fix(indexer): use upsert_chunks return value for chunk count
Previously total_chunks counted from process_file return (num_chunks) which could differ from actual stored count if upsert silently failed. Now using stored count returned by upsert_chunks. Also fixes cli._index to skip progress yields when building result.
This commit is contained in:
@@ -51,7 +51,10 @@ def _index(config) -> int:
|
||||
gen = indexer.full_index()
|
||||
result: dict = {"indexed_files": 0, "total_chunks": 0, "errors": []}
|
||||
for item in gen:
|
||||
result = item # progress yields are dicts; final dict from return
|
||||
if item.get("type") == "complete":
|
||||
result = item
|
||||
elif item.get("type") == "progress":
|
||||
pass # skip progress logs in result
|
||||
duration_ms = int((time.monotonic() - t0) * 1000)
|
||||
print(
|
||||
json.dumps(
|
||||
|
||||
@@ -184,8 +184,8 @@ class Indexer:
|
||||
for e, v in zip(enriched, vectors):
|
||||
e["vector"] = v
|
||||
# Store
|
||||
upsert_chunks(table, enriched)
|
||||
total_chunks += num_chunks
|
||||
stored = upsert_chunks(table, enriched)
|
||||
total_chunks += stored
|
||||
indexed_files += 1
|
||||
except Exception as exc:
|
||||
errors.append({"file": str(filepath), "error": str(exc)})
|
||||
|
||||
Reference in New Issue
Block a user