fix(indexer): use upsert_chunks return value for chunk count

Previously total_chunks counted from process_file return (num_chunks)
which could differ from actual stored count if upsert silently failed.
Now using stored count returned by upsert_chunks.

Also fixes cli._index to skip progress yields when building result.
This commit is contained in:
2026-04-12 02:16:19 -04:00
parent 4ab504e87c
commit 21b9704e21
2 changed files with 6 additions and 3 deletions

View File

@@ -51,7 +51,10 @@ def _index(config) -> int:
gen = indexer.full_index() gen = indexer.full_index()
result: dict = {"indexed_files": 0, "total_chunks": 0, "errors": []} result: dict = {"indexed_files": 0, "total_chunks": 0, "errors": []}
for item in gen: for item in gen:
result = item # progress yields are dicts; final dict from return if item.get("type") == "complete":
result = item
elif item.get("type") == "progress":
pass # skip progress logs in result
duration_ms = int((time.monotonic() - t0) * 1000) duration_ms = int((time.monotonic() - t0) * 1000)
print( print(
json.dumps( json.dumps(

View File

@@ -184,8 +184,8 @@ class Indexer:
for e, v in zip(enriched, vectors): for e, v in zip(enriched, vectors):
e["vector"] = v e["vector"] = v
# Store # Store
upsert_chunks(table, enriched) stored = upsert_chunks(table, enriched)
total_chunks += num_chunks total_chunks += stored
indexed_files += 1 indexed_files += 1
except Exception as exc: except Exception as exc:
errors.append({"file": str(filepath), "error": str(exc)}) errors.append({"file": str(filepath), "error": str(exc)})