fix(indexer): use upsert_chunks return value for chunk count

Previously total_chunks counted from process_file return (num_chunks) which could differ from actual stored count if upsert silently failed. Now using stored count returned by upsert_chunks. Also fixes cli._index to skip progress yields when building result.
2026-04-12 02:16:19 -04:00
parent 4ab504e87c
commit 21b9704e21
2 changed files with 6 additions and 3 deletions
--- a/python/obsidian_rag/cli.py
+++ b/python/obsidian_rag/cli.py
@@ -51,7 +51,10 @@ def _index(config) -> int:
        gen = indexer.full_index()
        result: dict = {"indexed_files": 0, "total_chunks": 0, "errors": []}
        for item in gen:
-            result = item  # progress yields are dicts; final dict from return
+            if item.get("type") == "complete":
                result = item
            elif item.get("type") == "progress":
                pass  # skip progress logs in result
        duration_ms = int((time.monotonic() - t0) * 1000)
        print(
            json.dumps(
--- a/python/obsidian_rag/indexer.py
+++ b/python/obsidian_rag/indexer.py
@@ -184,8 +184,8 @@ class Indexer:
                for e, v in zip(enriched, vectors):
                    e["vector"] = v
                # Store
-                upsert_chunks(table, enriched)
+                stored = upsert_chunks(table, enriched)
-                total_chunks += num_chunks
+                total_chunks += stored
                indexed_files += 1
            except Exception as exc:
                errors.append({"file": str(filepath), "error": str(exc)})