fix: sanitize SQL and narrow exception handling in indexer

This commit is contained in:
2026-04-13 14:27:37 -04:00
parent ce0414a5ce
commit 827ebfadaa
2 changed files with 34 additions and 4 deletions

View File

@@ -6,7 +6,7 @@ from typing import Dict, Iterator, List
from companion.config import Config
from companion.rag.chunker import Chunk, ChunkingRule, chunk_file
from companion.rag.embedder import OllamaEmbedder
from companion.rag.vector_store import VectorStore
from companion.rag.vector_store import VectorStore, TABLE_NAME
class Indexer:
@@ -106,8 +106,8 @@ class Indexer:
def full_index(self) -> None:
try:
self.vector_store.table.drop()
except Exception:
self.vector_store.db.drop_table(TABLE_NAME)
except (FileNotFoundError, ValueError):
pass
self.vector_store.table = self.vector_store._get_or_create_table()
@@ -120,10 +120,11 @@ class Indexer:
relative_path = file_path.relative_to(self.vault_path).as_posix()
modified_at = file_path.stat().st_mtime
escaped_path = relative_path.replace("'", "''")
results = (
self.vector_store.table.search()
.limit(1)
.where(f"source_file = '{relative_path}'")
.where(f"source_file = '{escaped_path}'")
.to_list()
)

View File

@@ -153,3 +153,32 @@ def test_full_index_creates_vectors(mock_embedder_cls):
indexer = Indexer(config, store)
indexer.full_index()
assert store.count() == 1
@patch("companion.rag.indexer.OllamaEmbedder")
def test_sync_updates_changed_files(mock_embedder_cls):
mock_embedder = MagicMock()
mock_embedder.embed.return_value = [[1.0, 0.0, 0.0, 0.0]]
mock_embedder_cls.return_value = mock_embedder
with tempfile.TemporaryDirectory() as tmp:
vault = Path(tmp) / "vault"
vault.mkdir()
file_path = vault / "hello.md"
file_path.write_text("hello world", encoding="utf-8")
vs_path = Path(tmp) / "vectors"
config = _make_config(vault, vs_path)
store = VectorStore(uri=vs_path, dimensions=4)
indexer = Indexer(config, store)
indexer.full_index()
assert store.count() == 1
first_text = store.table.to_pandas()["text"].iloc[0]
assert first_text == "hello world"
file_path.write_text("updated content", encoding="utf-8")
indexer.sync()
assert store.count() == 1
updated_text = store.table.to_pandas()["text"].iloc[0]
assert updated_text == "updated content"