fix(indexer): use upsert_chunks return value for chunk count
Previously total_chunks counted from process_file return (num_chunks) which could differ from actual stored count if upsert silently failed. Now using stored count returned by upsert_chunks. Also fixes cli._index to skip progress yields when building result.
This commit is contained in:
@@ -51,7 +51,10 @@ def _index(config) -> int:
|
|||||||
gen = indexer.full_index()
|
gen = indexer.full_index()
|
||||||
result: dict = {"indexed_files": 0, "total_chunks": 0, "errors": []}
|
result: dict = {"indexed_files": 0, "total_chunks": 0, "errors": []}
|
||||||
for item in gen:
|
for item in gen:
|
||||||
result = item # progress yields are dicts; final dict from return
|
if item.get("type") == "complete":
|
||||||
|
result = item
|
||||||
|
elif item.get("type") == "progress":
|
||||||
|
pass # skip progress logs in result
|
||||||
duration_ms = int((time.monotonic() - t0) * 1000)
|
duration_ms = int((time.monotonic() - t0) * 1000)
|
||||||
print(
|
print(
|
||||||
json.dumps(
|
json.dumps(
|
||||||
|
|||||||
@@ -184,8 +184,8 @@ class Indexer:
|
|||||||
for e, v in zip(enriched, vectors):
|
for e, v in zip(enriched, vectors):
|
||||||
e["vector"] = v
|
e["vector"] = v
|
||||||
# Store
|
# Store
|
||||||
upsert_chunks(table, enriched)
|
stored = upsert_chunks(table, enriched)
|
||||||
total_chunks += num_chunks
|
total_chunks += stored
|
||||||
indexed_files += 1
|
indexed_files += 1
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
errors.append({"file": str(filepath), "error": str(exc)})
|
errors.append({"file": str(filepath), "error": str(exc)})
|
||||||
|
|||||||
Reference in New Issue
Block a user