fix: add configSchema to openclaw.plugin.json, add search CLI command, fix total_docs stat
- Add required configSchema to openclaw.plugin.json for OpenClaw plugin discovery - Add search command to CLI with --limit, --dir, --from-date, --to-date, --tags filters - Fix get_stats() to properly count unique docs (was returning 0 for non-null values) - Remove hardcoded max_results default of 5; search now returns all results by default - Update INSTALL.md and design docs with correct OpenClaw extension path instructions
This commit is contained in:
@@ -117,7 +117,7 @@ def delete_by_source_file(table: Any, source_file: str) -> int:
|
||||
def search_chunks(
|
||||
table: Any,
|
||||
query_vector: list[float],
|
||||
limit: int = 5,
|
||||
limit: int | None = None,
|
||||
directory_filter: list[str] | None = None,
|
||||
date_range: dict | None = None,
|
||||
tags: list[str] | None = None,
|
||||
@@ -132,7 +132,7 @@ def search_chunks(
|
||||
conditions: list[str] = []
|
||||
if directory_filter:
|
||||
dir_list = ", ".join(f'"{d}"' for d in directory_filter)
|
||||
conditions.append(f'source_directory IN ({dir_list})')
|
||||
conditions.append(f"source_directory IN ({dir_list})")
|
||||
if date_range:
|
||||
if "from" in date_range:
|
||||
conditions.append(f"date >= '{date_range['from']}'")
|
||||
@@ -144,11 +144,13 @@ def search_chunks(
|
||||
|
||||
where_clause = " AND ".join(conditions) if conditions else None
|
||||
|
||||
results = (
|
||||
table.search(query_vector, vector_column_name="vector")
|
||||
.limit(limit)
|
||||
.where(where_clause) if where_clause else table.search(query_vector, vector_column_name="vector").limit(limit)
|
||||
).to_list()
|
||||
search_query = table.search(query_vector, vector_column_name="vector")
|
||||
if limit is not None:
|
||||
search_query = search_query.limit(limit)
|
||||
if where_clause:
|
||||
search_query = search_query.where(where_clause)
|
||||
|
||||
results = search_query.to_list()
|
||||
|
||||
return [
|
||||
SearchResult(
|
||||
@@ -156,7 +158,9 @@ def search_chunks(
|
||||
chunk_text=r["chunk_text"],
|
||||
source_file=r["source_file"],
|
||||
source_directory=r["source_directory"],
|
||||
section=r.get("section") if r.get("section") not in (None, "None") else None,
|
||||
section=r.get("section")
|
||||
if r.get("section") not in (None, "None")
|
||||
else None,
|
||||
date=r.get("date") if r.get("date") not in (None, "None") else None,
|
||||
tags=r.get("tags") or [],
|
||||
chunk_index=r.get("chunk_index") or 0,
|
||||
@@ -172,10 +176,17 @@ def get_stats(table: Any) -> dict[str, Any]:
|
||||
total_chunks = 0
|
||||
try:
|
||||
total_chunks = table.count_rows()
|
||||
# Count unique source files using pandas
|
||||
# Count non-null, non-empty source files
|
||||
all_data = table.to_pandas()
|
||||
total_docs = all_data["source_file"].nunique()
|
||||
total_docs = (
|
||||
all_data["source_file"]
|
||||
.dropna()
|
||||
.astype(str)
|
||||
.str.strip()
|
||||
.loc[lambda s: s.str.len() > 0]
|
||||
.nunique()
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return {"total_docs": total_docs, "total_chunks": total_chunks}
|
||||
return {"total_docs": total_docs, "total_chunks": total_chunks}
|
||||
|
||||
Reference in New Issue
Block a user