From 827ebfadaa453af1d9a93598d232ffea8918ed4d Mon Sep 17 00:00:00 2001 From: Santhosh Janardhanan Date: Mon, 13 Apr 2026 14:27:37 -0400 Subject: [PATCH] fix: sanitize SQL and narrow exception handling in indexer --- src/companion/rag/indexer.py | 9 +++++---- tests/test_indexer.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src/companion/rag/indexer.py b/src/companion/rag/indexer.py index 098c529..1fbc27f 100644 --- a/src/companion/rag/indexer.py +++ b/src/companion/rag/indexer.py @@ -6,7 +6,7 @@ from typing import Dict, Iterator, List from companion.config import Config from companion.rag.chunker import Chunk, ChunkingRule, chunk_file from companion.rag.embedder import OllamaEmbedder -from companion.rag.vector_store import VectorStore +from companion.rag.vector_store import VectorStore, TABLE_NAME class Indexer: @@ -106,8 +106,8 @@ class Indexer: def full_index(self) -> None: try: - self.vector_store.table.drop() - except Exception: + self.vector_store.db.drop_table(TABLE_NAME) + except (FileNotFoundError, ValueError): pass self.vector_store.table = self.vector_store._get_or_create_table() @@ -120,10 +120,11 @@ class Indexer: relative_path = file_path.relative_to(self.vault_path).as_posix() modified_at = file_path.stat().st_mtime + escaped_path = relative_path.replace("'", "''") results = ( self.vector_store.table.search() .limit(1) - .where(f"source_file = '{relative_path}'") + .where(f"source_file = '{escaped_path}'") .to_list() ) diff --git a/tests/test_indexer.py b/tests/test_indexer.py index 1ba4f26..58d3185 100644 --- a/tests/test_indexer.py +++ b/tests/test_indexer.py @@ -153,3 +153,32 @@ def test_full_index_creates_vectors(mock_embedder_cls): indexer = Indexer(config, store) indexer.full_index() assert store.count() == 1 + + +@patch("companion.rag.indexer.OllamaEmbedder") +def test_sync_updates_changed_files(mock_embedder_cls): + mock_embedder = MagicMock() + mock_embedder.embed.return_value = [[1.0, 0.0, 0.0, 0.0]] + mock_embedder_cls.return_value = mock_embedder + + with tempfile.TemporaryDirectory() as tmp: + vault = Path(tmp) / "vault" + vault.mkdir() + file_path = vault / "hello.md" + file_path.write_text("hello world", encoding="utf-8") + vs_path = Path(tmp) / "vectors" + config = _make_config(vault, vs_path) + store = VectorStore(uri=vs_path, dimensions=4) + indexer = Indexer(config, store) + + indexer.full_index() + assert store.count() == 1 + first_text = store.table.to_pandas()["text"].iloc[0] + assert first_text == "hello world" + + file_path.write_text("updated content", encoding="utf-8") + indexer.sync() + + assert store.count() == 1 + updated_text = store.table.to_pandas()["text"].iloc[0] + assert updated_text == "updated content"