fix: improve watcher with per-file debounce, logging, and validation

This commit is contained in:
2026-04-13 14:37:12 -04:00
parent 67056d1f0f
commit e8d0f946b5

View File

@@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import logging
import time import time
from pathlib import Path from pathlib import Path
@@ -10,39 +11,49 @@ from companion.config import load_config
from companion.rag.indexer import Indexer from companion.rag.indexer import Indexer
from companion.rag.vector_store import VectorStore from companion.rag.vector_store import VectorStore
logger = logging.getLogger(__name__)
class VaultEventHandler(FileSystemEventHandler): class VaultEventHandler(FileSystemEventHandler):
"""Handles file system events in the vault directory and triggers indexing."""
def __init__(self, indexer: Indexer, debounce_seconds: float = 5.0): def __init__(self, indexer: Indexer, debounce_seconds: float = 5.0):
self.indexer = indexer self.indexer = indexer
self.debounce_seconds = debounce_seconds self.debounce_seconds = debounce_seconds
self._last_sync = 0.0 self._last_sync: dict[str, float] = {}
def on_any_event(self, event): def on_any_event(self, event):
"""Process a file system event and debounce per file path."""
if event.is_directory: if event.is_directory:
return return
if not event.src_path.endswith(".md"): if not event.src_path.endswith(".md"):
return return
path = event.src_path
now = time.time() now = time.time()
if now - self._last_sync < self.debounce_seconds: if now - self._last_sync.get(path, 0) < self.debounce_seconds:
return return
self._last_sync = now self._last_sync[path] = now
try: try:
self.indexer.sync() self.indexer.sync()
except Exception as exc: except (OSError, RuntimeError) as exc:
print(f"Sync failed: {exc}") logger.error("Sync failed: %s", exc)
def start_watcher(config_path: str = "config.json") -> None: def start_watcher(config_path: str = "config.json") -> None:
"""Load configuration and start watching the vault directory for changes."""
config = load_config(config_path) config = load_config(config_path)
vault_path = Path(config.vault.path)
if not vault_path.exists():
raise FileNotFoundError(f"Vault path does not exist: {vault_path}")
store = VectorStore( store = VectorStore(
uri=config.rag.vector_store.path, dimensions=config.rag.embedding.dimensions uri=config.rag.vector_store.path, dimensions=config.rag.embedding.dimensions
) )
indexer = Indexer(config, store) indexer = Indexer(config, store)
handler = VaultEventHandler(indexer) handler = VaultEventHandler(indexer)
observer = Observer() observer = Observer()
observer.schedule(handler, str(config.vault.path), recursive=True) observer.schedule(handler, str(vault_path), recursive=True)
observer.start() observer.start()
print(f"Watching {config.vault.path} for changes...") logger.info("Watching %s for changes...", vault_path)
try: try:
while True: while True:
time.sleep(1) time.sleep(1)