fix(indexer): truncate chunks exceeding Ollama context window

This commit is contained in:
2026-04-11 23:12:13 -04:00
parent 928a027cec
commit d946cf34e1

View File

@@ -12,6 +12,7 @@ if TYPE_CHECKING:
from obsidian_rag.config import ObsidianRagConfig from obsidian_rag.config import ObsidianRagConfig
DEFAULT_TIMEOUT = 120.0 # seconds DEFAULT_TIMEOUT = 120.0 # seconds
MAX_CHUNK_CHARS = 8000 # safe default for most Ollama models
class EmbeddingError(Exception): class EmbeddingError(Exception):
@@ -44,7 +45,7 @@ class OllamaEmbedder:
return return
parsed = urllib.parse.urlparse(self.base_url) parsed = urllib.parse.urlparse(self.base_url)
if parsed.hostname not in ['localhost', '127.0.0.1', '::1']: if parsed.hostname not in ["localhost", "127.0.0.1", "::1"]:
raise SecurityError( raise SecurityError(
f"Remote embedding service not allowed when local_only=True: {self.base_url}" f"Remote embedding service not allowed when local_only=True: {self.base_url}"
) )
@@ -84,23 +85,31 @@ class OllamaEmbedder:
# For batch, call /api/embeddings multiple times sequentially # For batch, call /api/embeddings multiple times sequentially
if len(batch) == 1: if len(batch) == 1:
endpoint = f"{self.base_url}/api/embeddings" endpoint = f"{self.base_url}/api/embeddings"
payload = {"model": self.model, "prompt": batch[0]} prompt = batch[0][:MAX_CHUNK_CHARS]
payload = {"model": self.model, "prompt": prompt}
else: else:
# For batch, use /api/embeddings with "input" (multiple calls) # For batch, use /api/embeddings with "input" (multiple calls)
results = [] results = []
for text in batch: for text in batch:
truncated = text[:MAX_CHUNK_CHARS]
try: try:
resp = self._client.post( resp = self._client.post(
f"{self.base_url}/api/embeddings", f"{self.base_url}/api/embeddings",
json={"model": self.model, "prompt": text}, json={"model": self.model, "prompt": truncated},
timeout=DEFAULT_TIMEOUT, timeout=DEFAULT_TIMEOUT,
) )
except httpx.ConnectError as e: except httpx.ConnectError as e:
raise OllamaUnavailableError(f"Cannot connect to Ollama at {self.base_url}") from e raise OllamaUnavailableError(
f"Cannot connect to Ollama at {self.base_url}"
) from e
except httpx.TimeoutException as e: except httpx.TimeoutException as e:
raise EmbeddingError(f"Embedding request timed out after {DEFAULT_TIMEOUT}s") from e raise EmbeddingError(
f"Embedding request timed out after {DEFAULT_TIMEOUT}s"
) from e
if resp.status_code != 200: if resp.status_code != 200:
raise EmbeddingError(f"Ollama returned {resp.status_code}: {resp.text}") raise EmbeddingError(
f"Ollama returned {resp.status_code}: {resp.text}"
)
data = resp.json() data = resp.json()
embedding = data.get("embedding", []) embedding = data.get("embedding", [])
if not embedding: if not embedding:
@@ -111,9 +120,13 @@ class OllamaEmbedder:
try: try:
resp = self._client.post(endpoint, json=payload, timeout=DEFAULT_TIMEOUT) resp = self._client.post(endpoint, json=payload, timeout=DEFAULT_TIMEOUT)
except httpx.ConnectError as e: except httpx.ConnectError as e:
raise OllamaUnavailableError(f"Cannot connect to Ollama at {self.base_url}") from e raise OllamaUnavailableError(
f"Cannot connect to Ollama at {self.base_url}"
) from e
except httpx.TimeoutException as e: except httpx.TimeoutException as e:
raise EmbeddingError(f"Embedding request timed out after {DEFAULT_TIMEOUT}s") from e raise EmbeddingError(
f"Embedding request timed out after {DEFAULT_TIMEOUT}s"
) from e
if resp.status_code != 200: if resp.status_code != 200:
raise EmbeddingError(f"Ollama returned {resp.status_code}: {resp.text}") raise EmbeddingError(f"Ollama returned {resp.status_code}: {resp.text}")