fix(indexer): write sync-result.json in reindex() not just sync()

reindex() was consuming the full_index() generator but never calling
_write_sync_result(), leaving sync-result.json stale while CLI output
showed correct indexed_files/total_chunks.
This commit is contained in:
2026-04-12 01:49:43 -04:00
parent 9d919dc237
commit 4ab504e87c

View File

@@ -4,8 +4,6 @@ from __future__ import annotations
import json import json
import os import os
import time
import uuid
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING, Any, Generator, Iterator from typing import TYPE_CHECKING, Any, Generator, Iterator
@@ -16,10 +14,13 @@ if TYPE_CHECKING:
import obsidian_rag.config as config_mod import obsidian_rag.config as config_mod
from obsidian_rag.config import _resolve_data_dir from obsidian_rag.config import _resolve_data_dir
from obsidian_rag.chunker import chunk_file from obsidian_rag.chunker import chunk_file
from obsidian_rag.embedder import EmbeddingError, OllamaUnavailableError, SecurityError from obsidian_rag.embedder import OllamaUnavailableError
from obsidian_rag.security import should_index_dir, validate_path from obsidian_rag.security import should_index_dir, validate_path
from obsidian_rag.audit_logger import AuditLogger from obsidian_rag.vector_store import (
from obsidian_rag.vector_store import create_table_if_not_exists, delete_by_source_file, get_db, upsert_chunks create_table_if_not_exists,
get_db,
upsert_chunks,
)
# ---------------------------------------------------------------------- # ----------------------------------------------------------------------
# Pipeline # Pipeline
@@ -43,6 +44,7 @@ class Indexer:
def embedder(self): def embedder(self):
if self._embedder is None: if self._embedder is None:
from obsidian_rag.embedder import OllamaEmbedder from obsidian_rag.embedder import OllamaEmbedder
self._embedder = OllamaEmbedder(self.config) self._embedder = OllamaEmbedder(self.config)
return self._embedder return self._embedder
@@ -50,6 +52,7 @@ class Indexer:
def audit_logger(self): def audit_logger(self):
if self._audit_logger is None: if self._audit_logger is None:
from obsidian_rag.audit_logger import AuditLogger from obsidian_rag.audit_logger import AuditLogger
log_dir = _resolve_data_dir() / "audit" log_dir = _resolve_data_dir() / "audit"
self._audit_logger = AuditLogger(log_dir / "audit.log") self._audit_logger = AuditLogger(log_dir / "audit.log")
return self._audit_logger return self._audit_logger
@@ -64,9 +67,9 @@ class Indexer:
for chunk in chunks: for chunk in chunks:
sensitivity = security.detect_sensitive( sensitivity = security.detect_sensitive(
chunk['chunk_text'], chunk["chunk_text"],
self.config.security.sensitive_sections, self.config.security.sensitive_sections,
self.config.memory.patterns self.config.memory.patterns,
) )
for category in sensitive_categories: for category in sensitive_categories:
@@ -99,7 +102,11 @@ class Indexer:
"""Index a single file. Returns (num_chunks, enriched_chunks).""" """Index a single file. Returns (num_chunks, enriched_chunks)."""
from obsidian_rag import security from obsidian_rag import security
mtime = str(datetime.fromtimestamp(filepath.stat().st_mtime, tz=timezone.utc).isoformat()) mtime = str(
datetime.fromtimestamp(
filepath.stat().st_mtime, tz=timezone.utc
).isoformat()
)
content = filepath.read_text(encoding="utf-8") content = filepath.read_text(encoding="utf-8")
# Sanitize # Sanitize
content = security.sanitize_text(content) content = security.sanitize_text(content)
@@ -151,18 +158,19 @@ class Indexer:
# Log sensitive content access # Log sensitive content access
for chunk in enriched: for chunk in enriched:
from obsidian_rag import security from obsidian_rag import security
sensitivity = security.detect_sensitive( sensitivity = security.detect_sensitive(
chunk['chunk_text'], chunk["chunk_text"],
self.config.security.sensitive_sections, self.config.security.sensitive_sections,
self.config.memory.patterns self.config.memory.patterns,
) )
for category in ['health', 'financial', 'relations']: for category in ["health", "financial", "relations"]:
if sensitivity.get(category, False): if sensitivity.get(category, False):
self.audit_logger.log_sensitive_access( self.audit_logger.log_sensitive_access(
str(chunk['source_file']), str(chunk["source_file"]),
category, category,
'index', "index",
{'chunk_id': chunk['chunk_id']} {"chunk_id": chunk["chunk_id"]},
) )
# Embed chunks # Embed chunks
@@ -249,9 +257,16 @@ class Indexer:
db = get_db(self.config) db = get_db(self.config)
if "obsidian_chunks" in db.list_tables(): if "obsidian_chunks" in db.list_tables():
db.drop_table("obsidian_chunks") db.drop_table("obsidian_chunks")
# full_index is a generator — materialize it to get the final dict
results = list(self.full_index()) results = list(self.full_index())
return results[-1] if results else {"indexed_files": 0, "total_chunks": 0, "errors": []} final = (
results[-1]
if results
else {"indexed_files": 0, "total_chunks": 0, "errors": []}
)
self._write_sync_result(
final["indexed_files"], final["total_chunks"], final["errors"]
)
return final
def _sync_result_path(self) -> Path: def _sync_result_path(self) -> Path:
# Use the same dev-data-dir convention as config.py # Use the same dev-data-dir convention as config.py
@@ -313,18 +328,19 @@ class Indexer:
# Log sensitive content access # Log sensitive content access
for chunk in enriched: for chunk in enriched:
from obsidian_rag import security from obsidian_rag import security
sensitivity = security.detect_sensitive( sensitivity = security.detect_sensitive(
chunk['chunk_text'], chunk["chunk_text"],
self.config.security.sensitive_sections, self.config.security.sensitive_sections,
self.config.memory.patterns self.config.memory.patterns,
) )
for category in ['health', 'financial', 'relations']: for category in ["health", "financial", "relations"]:
if sensitivity.get(category, False): if sensitivity.get(category, False):
self.audit_logger.log_sensitive_access( self.audit_logger.log_sensitive_access(
str(chunk['source_file']), str(chunk["source_file"]),
category, category,
'index', "index",
{'chunk_id': chunk['chunk_id']} {"chunk_id": chunk["chunk_id"]},
) )
# Embed chunks # Embed chunks
@@ -353,4 +369,3 @@ class Indexer:
if not data_dir.exists() and not (project_root / "KnowledgeVault").exists(): if not data_dir.exists() and not (project_root / "KnowledgeVault").exists():
data_dir = Path(os.path.expanduser("~/.obsidian-rag")) data_dir = Path(os.path.expanduser("~/.obsidian-rag"))
return data_dir / "sync-result.json" return data_dir / "sync-result.json"