fix: sanitize SQL and narrow exception handling in indexer
This commit is contained in:
@@ -6,7 +6,7 @@ from typing import Dict, Iterator, List
|
||||
from companion.config import Config
|
||||
from companion.rag.chunker import Chunk, ChunkingRule, chunk_file
|
||||
from companion.rag.embedder import OllamaEmbedder
|
||||
from companion.rag.vector_store import VectorStore
|
||||
from companion.rag.vector_store import VectorStore, TABLE_NAME
|
||||
|
||||
|
||||
class Indexer:
|
||||
@@ -106,8 +106,8 @@ class Indexer:
|
||||
|
||||
def full_index(self) -> None:
|
||||
try:
|
||||
self.vector_store.table.drop()
|
||||
except Exception:
|
||||
self.vector_store.db.drop_table(TABLE_NAME)
|
||||
except (FileNotFoundError, ValueError):
|
||||
pass
|
||||
self.vector_store.table = self.vector_store._get_or_create_table()
|
||||
|
||||
@@ -120,10 +120,11 @@ class Indexer:
|
||||
relative_path = file_path.relative_to(self.vault_path).as_posix()
|
||||
modified_at = file_path.stat().st_mtime
|
||||
|
||||
escaped_path = relative_path.replace("'", "''")
|
||||
results = (
|
||||
self.vector_store.table.search()
|
||||
.limit(1)
|
||||
.where(f"source_file = '{relative_path}'")
|
||||
.where(f"source_file = '{escaped_path}'")
|
||||
.to_list()
|
||||
)
|
||||
|
||||
|
||||
@@ -153,3 +153,32 @@ def test_full_index_creates_vectors(mock_embedder_cls):
|
||||
indexer = Indexer(config, store)
|
||||
indexer.full_index()
|
||||
assert store.count() == 1
|
||||
|
||||
|
||||
@patch("companion.rag.indexer.OllamaEmbedder")
|
||||
def test_sync_updates_changed_files(mock_embedder_cls):
|
||||
mock_embedder = MagicMock()
|
||||
mock_embedder.embed.return_value = [[1.0, 0.0, 0.0, 0.0]]
|
||||
mock_embedder_cls.return_value = mock_embedder
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
vault = Path(tmp) / "vault"
|
||||
vault.mkdir()
|
||||
file_path = vault / "hello.md"
|
||||
file_path.write_text("hello world", encoding="utf-8")
|
||||
vs_path = Path(tmp) / "vectors"
|
||||
config = _make_config(vault, vs_path)
|
||||
store = VectorStore(uri=vs_path, dimensions=4)
|
||||
indexer = Indexer(config, store)
|
||||
|
||||
indexer.full_index()
|
||||
assert store.count() == 1
|
||||
first_text = store.table.to_pandas()["text"].iloc[0]
|
||||
assert first_text == "hello world"
|
||||
|
||||
file_path.write_text("updated content", encoding="utf-8")
|
||||
indexer.sync()
|
||||
|
||||
assert store.count() == 1
|
||||
updated_text = store.table.to_pandas()["text"].iloc[0]
|
||||
assert updated_text == "updated content"
|
||||
|
||||
Reference in New Issue
Block a user