fix: sanitize SQL and narrow exception handling in indexer
This commit is contained in:
@@ -6,7 +6,7 @@ from typing import Dict, Iterator, List
|
|||||||
from companion.config import Config
|
from companion.config import Config
|
||||||
from companion.rag.chunker import Chunk, ChunkingRule, chunk_file
|
from companion.rag.chunker import Chunk, ChunkingRule, chunk_file
|
||||||
from companion.rag.embedder import OllamaEmbedder
|
from companion.rag.embedder import OllamaEmbedder
|
||||||
from companion.rag.vector_store import VectorStore
|
from companion.rag.vector_store import VectorStore, TABLE_NAME
|
||||||
|
|
||||||
|
|
||||||
class Indexer:
|
class Indexer:
|
||||||
@@ -106,8 +106,8 @@ class Indexer:
|
|||||||
|
|
||||||
def full_index(self) -> None:
|
def full_index(self) -> None:
|
||||||
try:
|
try:
|
||||||
self.vector_store.table.drop()
|
self.vector_store.db.drop_table(TABLE_NAME)
|
||||||
except Exception:
|
except (FileNotFoundError, ValueError):
|
||||||
pass
|
pass
|
||||||
self.vector_store.table = self.vector_store._get_or_create_table()
|
self.vector_store.table = self.vector_store._get_or_create_table()
|
||||||
|
|
||||||
@@ -120,10 +120,11 @@ class Indexer:
|
|||||||
relative_path = file_path.relative_to(self.vault_path).as_posix()
|
relative_path = file_path.relative_to(self.vault_path).as_posix()
|
||||||
modified_at = file_path.stat().st_mtime
|
modified_at = file_path.stat().st_mtime
|
||||||
|
|
||||||
|
escaped_path = relative_path.replace("'", "''")
|
||||||
results = (
|
results = (
|
||||||
self.vector_store.table.search()
|
self.vector_store.table.search()
|
||||||
.limit(1)
|
.limit(1)
|
||||||
.where(f"source_file = '{relative_path}'")
|
.where(f"source_file = '{escaped_path}'")
|
||||||
.to_list()
|
.to_list()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -153,3 +153,32 @@ def test_full_index_creates_vectors(mock_embedder_cls):
|
|||||||
indexer = Indexer(config, store)
|
indexer = Indexer(config, store)
|
||||||
indexer.full_index()
|
indexer.full_index()
|
||||||
assert store.count() == 1
|
assert store.count() == 1
|
||||||
|
|
||||||
|
|
||||||
|
@patch("companion.rag.indexer.OllamaEmbedder")
|
||||||
|
def test_sync_updates_changed_files(mock_embedder_cls):
|
||||||
|
mock_embedder = MagicMock()
|
||||||
|
mock_embedder.embed.return_value = [[1.0, 0.0, 0.0, 0.0]]
|
||||||
|
mock_embedder_cls.return_value = mock_embedder
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
vault = Path(tmp) / "vault"
|
||||||
|
vault.mkdir()
|
||||||
|
file_path = vault / "hello.md"
|
||||||
|
file_path.write_text("hello world", encoding="utf-8")
|
||||||
|
vs_path = Path(tmp) / "vectors"
|
||||||
|
config = _make_config(vault, vs_path)
|
||||||
|
store = VectorStore(uri=vs_path, dimensions=4)
|
||||||
|
indexer = Indexer(config, store)
|
||||||
|
|
||||||
|
indexer.full_index()
|
||||||
|
assert store.count() == 1
|
||||||
|
first_text = store.table.to_pandas()["text"].iloc[0]
|
||||||
|
assert first_text == "hello world"
|
||||||
|
|
||||||
|
file_path.write_text("updated content", encoding="utf-8")
|
||||||
|
indexer.sync()
|
||||||
|
|
||||||
|
assert store.count() == 1
|
||||||
|
updated_text = store.table.to_pandas()["text"].iloc[0]
|
||||||
|
assert updated_text == "updated content"
|
||||||
|
|||||||
Reference in New Issue
Block a user