fix(indexer): truncate chunks exceeding Ollama context window
This commit is contained in:
@@ -12,6 +12,7 @@ if TYPE_CHECKING:
|
|||||||
from obsidian_rag.config import ObsidianRagConfig
|
from obsidian_rag.config import ObsidianRagConfig
|
||||||
|
|
||||||
DEFAULT_TIMEOUT = 120.0 # seconds
|
DEFAULT_TIMEOUT = 120.0 # seconds
|
||||||
|
MAX_CHUNK_CHARS = 8000 # safe default for most Ollama models
|
||||||
|
|
||||||
|
|
||||||
class EmbeddingError(Exception):
|
class EmbeddingError(Exception):
|
||||||
@@ -44,7 +45,7 @@ class OllamaEmbedder:
|
|||||||
return
|
return
|
||||||
|
|
||||||
parsed = urllib.parse.urlparse(self.base_url)
|
parsed = urllib.parse.urlparse(self.base_url)
|
||||||
if parsed.hostname not in ['localhost', '127.0.0.1', '::1']:
|
if parsed.hostname not in ["localhost", "127.0.0.1", "::1"]:
|
||||||
raise SecurityError(
|
raise SecurityError(
|
||||||
f"Remote embedding service not allowed when local_only=True: {self.base_url}"
|
f"Remote embedding service not allowed when local_only=True: {self.base_url}"
|
||||||
)
|
)
|
||||||
@@ -84,23 +85,31 @@ class OllamaEmbedder:
|
|||||||
# For batch, call /api/embeddings multiple times sequentially
|
# For batch, call /api/embeddings multiple times sequentially
|
||||||
if len(batch) == 1:
|
if len(batch) == 1:
|
||||||
endpoint = f"{self.base_url}/api/embeddings"
|
endpoint = f"{self.base_url}/api/embeddings"
|
||||||
payload = {"model": self.model, "prompt": batch[0]}
|
prompt = batch[0][:MAX_CHUNK_CHARS]
|
||||||
|
payload = {"model": self.model, "prompt": prompt}
|
||||||
else:
|
else:
|
||||||
# For batch, use /api/embeddings with "input" (multiple calls)
|
# For batch, use /api/embeddings with "input" (multiple calls)
|
||||||
results = []
|
results = []
|
||||||
for text in batch:
|
for text in batch:
|
||||||
|
truncated = text[:MAX_CHUNK_CHARS]
|
||||||
try:
|
try:
|
||||||
resp = self._client.post(
|
resp = self._client.post(
|
||||||
f"{self.base_url}/api/embeddings",
|
f"{self.base_url}/api/embeddings",
|
||||||
json={"model": self.model, "prompt": text},
|
json={"model": self.model, "prompt": truncated},
|
||||||
timeout=DEFAULT_TIMEOUT,
|
timeout=DEFAULT_TIMEOUT,
|
||||||
)
|
)
|
||||||
except httpx.ConnectError as e:
|
except httpx.ConnectError as e:
|
||||||
raise OllamaUnavailableError(f"Cannot connect to Ollama at {self.base_url}") from e
|
raise OllamaUnavailableError(
|
||||||
|
f"Cannot connect to Ollama at {self.base_url}"
|
||||||
|
) from e
|
||||||
except httpx.TimeoutException as e:
|
except httpx.TimeoutException as e:
|
||||||
raise EmbeddingError(f"Embedding request timed out after {DEFAULT_TIMEOUT}s") from e
|
raise EmbeddingError(
|
||||||
|
f"Embedding request timed out after {DEFAULT_TIMEOUT}s"
|
||||||
|
) from e
|
||||||
if resp.status_code != 200:
|
if resp.status_code != 200:
|
||||||
raise EmbeddingError(f"Ollama returned {resp.status_code}: {resp.text}")
|
raise EmbeddingError(
|
||||||
|
f"Ollama returned {resp.status_code}: {resp.text}"
|
||||||
|
)
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
embedding = data.get("embedding", [])
|
embedding = data.get("embedding", [])
|
||||||
if not embedding:
|
if not embedding:
|
||||||
@@ -111,9 +120,13 @@ class OllamaEmbedder:
|
|||||||
try:
|
try:
|
||||||
resp = self._client.post(endpoint, json=payload, timeout=DEFAULT_TIMEOUT)
|
resp = self._client.post(endpoint, json=payload, timeout=DEFAULT_TIMEOUT)
|
||||||
except httpx.ConnectError as e:
|
except httpx.ConnectError as e:
|
||||||
raise OllamaUnavailableError(f"Cannot connect to Ollama at {self.base_url}") from e
|
raise OllamaUnavailableError(
|
||||||
|
f"Cannot connect to Ollama at {self.base_url}"
|
||||||
|
) from e
|
||||||
except httpx.TimeoutException as e:
|
except httpx.TimeoutException as e:
|
||||||
raise EmbeddingError(f"Embedding request timed out after {DEFAULT_TIMEOUT}s") from e
|
raise EmbeddingError(
|
||||||
|
f"Embedding request timed out after {DEFAULT_TIMEOUT}s"
|
||||||
|
) from e
|
||||||
|
|
||||||
if resp.status_code != 200:
|
if resp.status_code != 200:
|
||||||
raise EmbeddingError(f"Ollama returned {resp.status_code}: {resp.text}")
|
raise EmbeddingError(f"Ollama returned {resp.status_code}: {resp.text}")
|
||||||
|
|||||||
Reference in New Issue
Block a user