fix: add configSchema to openclaw.plugin.json, add search CLI command, fix total_docs stat

- Add required configSchema to openclaw.plugin.json for OpenClaw plugin discovery
- Add search command to CLI with --limit, --dir, --from-date, --to-date, --tags filters
- Fix get_stats() to properly count unique docs (was returning 0 for non-null values)
- Remove hardcoded max_results default of 5; search now returns all results by default
- Update INSTALL.md and design docs with correct OpenClaw extension path instructions
This commit is contained in:
2026-04-11 20:01:09 -04:00
parent de3b9c1c12
commit e15e4ff856
6 changed files with 137 additions and 22 deletions

View File

@@ -8,7 +8,8 @@ import time
from pathlib import Path
import obsidian_rag.config as config_mod
from obsidian_rag.vector_store import get_db, get_stats
from obsidian_rag.vector_store import get_db, get_stats, search_chunks
from obsidian_rag.embedder import OllamaEmbedder
from obsidian_rag.indexer import Indexer
@@ -35,6 +36,8 @@ def main(argv: list[str] | None = None) -> int:
return _reindex(config)
elif cmd == "status":
return _status(config)
elif cmd == "search":
return _search(config, argv[1:])
else:
print(f"Unknown command: {cmd}\n{_usage()}", file=sys.stderr)
return 1
@@ -111,6 +114,7 @@ def _status(config) -> int:
# Resolve sync-result.json path (same convention as indexer)
from pathlib import Path
import os as osmod
project_root = Path(__file__).parent.parent.parent
data_dir = project_root / "obsidian-rag"
if not data_dir.exists() and not (project_root / "KnowledgeVault").exists():
@@ -134,7 +138,101 @@ def _status(config) -> int:
)
return 0
except FileNotFoundError:
print(json.dumps({"error": "Index not found. Run 'obsidian-rag index' first."}, indent=2))
print(
json.dumps(
{"error": "Index not found. Run 'obsidian-rag index' first."}, indent=2
)
)
return 1
except Exception as e:
print(json.dumps({"error": str(e)}), file=sys.stderr)
return 1
def _search(config, args: list[str]) -> int:
import argparse
parser = argparse.ArgumentParser(prog="obsidian-rag search")
parser.add_argument("query", nargs="*", help="Search query")
parser.add_argument(
"--limit", type=int, default=None, help="Max results (default: unlimited)"
)
parser.add_argument("--dir", dest="directory", help="Filter by directory")
parser.add_argument("--from-date", dest="from_date", help="Start date (YYYY-MM-DD)")
parser.add_argument("--to-date", dest="to_date", help="End date (YYYY-MM-DD)")
parser.add_argument("--tags", help="Comma-separated tags")
parsed, _ = parser.parse_known_args(args)
query_text = " ".join(parsed.query) if parsed.query else ""
if not query_text:
print("ERROR: query is required\n", file=sys.stderr)
parser.print_help()
return 1
try:
db = get_db(config)
table = db.open_table("obsidian_chunks")
embedder = OllamaEmbedder(config)
if not embedder.is_available():
print(
json.dumps(
{
"error": "Ollama is not available. Start Ollama or use DEGRADED mode."
},
indent=2,
)
)
return 1
query_vector = embedder.embed_single(query_text)
filters = {}
if parsed.directory:
filters["directory_filter"] = [parsed.directory]
if parsed.from_date or parsed.to_date:
filters["date_range"] = {}
if parsed.from_date:
filters["date_range"]["from"] = parsed.from_date
if parsed.to_date:
filters["date_range"]["to"] = parsed.to_date
if parsed.tags:
filters["tags"] = [t.strip() for t in parsed.tags.split(",")]
results = search_chunks(
table,
query_vector,
limit=parsed.limit,
directory_filter=filters.get("directory_filter"),
date_range=filters.get("date_range"),
tags=filters.get("tags"),
)
output = {
"query": query_text,
"total_results": len(results),
"results": [
{
"score": r.score,
"source_file": r.source_file,
"source_directory": r.source_directory,
"section": r.section,
"date": r.date,
"tags": r.tags,
"chunk_text": r.chunk_text,
}
for r in results
],
}
print(json.dumps(output, indent=2, default=str))
return 0
except FileNotFoundError:
print(
json.dumps(
{"error": "Index not found. Run 'obsidian-rag index' first."}, indent=2
)
)
return 1
except Exception as e:
print(json.dumps({"error": str(e)}), file=sys.stderr)
@@ -149,8 +247,9 @@ Usage:
obsidian-rag sync Incremental sync (changed files only)
obsidian-rag reindex Force full reindex (nuke + rebuild)
obsidian-rag status Show index health and statistics
obsidian-rag search Semantic search through indexed notes
"""
if __name__ == "__main__":
sys.exit(main())
sys.exit(main())

View File

@@ -117,7 +117,7 @@ def delete_by_source_file(table: Any, source_file: str) -> int:
def search_chunks(
table: Any,
query_vector: list[float],
limit: int = 5,
limit: int | None = None,
directory_filter: list[str] | None = None,
date_range: dict | None = None,
tags: list[str] | None = None,
@@ -132,7 +132,7 @@ def search_chunks(
conditions: list[str] = []
if directory_filter:
dir_list = ", ".join(f'"{d}"' for d in directory_filter)
conditions.append(f'source_directory IN ({dir_list})')
conditions.append(f"source_directory IN ({dir_list})")
if date_range:
if "from" in date_range:
conditions.append(f"date >= '{date_range['from']}'")
@@ -144,11 +144,13 @@ def search_chunks(
where_clause = " AND ".join(conditions) if conditions else None
results = (
table.search(query_vector, vector_column_name="vector")
.limit(limit)
.where(where_clause) if where_clause else table.search(query_vector, vector_column_name="vector").limit(limit)
).to_list()
search_query = table.search(query_vector, vector_column_name="vector")
if limit is not None:
search_query = search_query.limit(limit)
if where_clause:
search_query = search_query.where(where_clause)
results = search_query.to_list()
return [
SearchResult(
@@ -156,7 +158,9 @@ def search_chunks(
chunk_text=r["chunk_text"],
source_file=r["source_file"],
source_directory=r["source_directory"],
section=r.get("section") if r.get("section") not in (None, "None") else None,
section=r.get("section")
if r.get("section") not in (None, "None")
else None,
date=r.get("date") if r.get("date") not in (None, "None") else None,
tags=r.get("tags") or [],
chunk_index=r.get("chunk_index") or 0,
@@ -172,10 +176,17 @@ def get_stats(table: Any) -> dict[str, Any]:
total_chunks = 0
try:
total_chunks = table.count_rows()
# Count unique source files using pandas
# Count non-null, non-empty source files
all_data = table.to_pandas()
total_docs = all_data["source_file"].nunique()
total_docs = (
all_data["source_file"]
.dropna()
.astype(str)
.str.strip()
.loc[lambda s: s.str.len() > 0]
.nunique()
)
except Exception:
pass
return {"total_docs": total_docs, "total_chunks": total_chunks}
return {"total_docs": total_docs, "total_chunks": total_chunks}