fix: add configSchema to openclaw.plugin.json, add search CLI command, fix total_docs stat
- Add required configSchema to openclaw.plugin.json for OpenClaw plugin discovery - Add search command to CLI with --limit, --dir, --from-date, --to-date, --tags filters - Fix get_stats() to properly count unique docs (was returning 0 for non-null values) - Remove hardcoded max_results default of 5; search now returns all results by default - Update INSTALL.md and design docs with correct OpenClaw extension path instructions
This commit is contained in:
@@ -139,10 +139,9 @@
|
|||||||
},
|
},
|
||||||
"max_results": {
|
"max_results": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"description": "Maximum number of chunks to return",
|
"description": "Maximum number of chunks to return (default: unlimited)",
|
||||||
"default": 5,
|
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"maximum": 50
|
"maximum": 10000
|
||||||
},
|
},
|
||||||
"directory_filter": {
|
"directory_filter": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
|
|||||||
@@ -8,7 +8,8 @@ import time
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import obsidian_rag.config as config_mod
|
import obsidian_rag.config as config_mod
|
||||||
from obsidian_rag.vector_store import get_db, get_stats
|
from obsidian_rag.vector_store import get_db, get_stats, search_chunks
|
||||||
|
from obsidian_rag.embedder import OllamaEmbedder
|
||||||
from obsidian_rag.indexer import Indexer
|
from obsidian_rag.indexer import Indexer
|
||||||
|
|
||||||
|
|
||||||
@@ -35,6 +36,8 @@ def main(argv: list[str] | None = None) -> int:
|
|||||||
return _reindex(config)
|
return _reindex(config)
|
||||||
elif cmd == "status":
|
elif cmd == "status":
|
||||||
return _status(config)
|
return _status(config)
|
||||||
|
elif cmd == "search":
|
||||||
|
return _search(config, argv[1:])
|
||||||
else:
|
else:
|
||||||
print(f"Unknown command: {cmd}\n{_usage()}", file=sys.stderr)
|
print(f"Unknown command: {cmd}\n{_usage()}", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
@@ -111,6 +114,7 @@ def _status(config) -> int:
|
|||||||
# Resolve sync-result.json path (same convention as indexer)
|
# Resolve sync-result.json path (same convention as indexer)
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import os as osmod
|
import os as osmod
|
||||||
|
|
||||||
project_root = Path(__file__).parent.parent.parent
|
project_root = Path(__file__).parent.parent.parent
|
||||||
data_dir = project_root / "obsidian-rag"
|
data_dir = project_root / "obsidian-rag"
|
||||||
if not data_dir.exists() and not (project_root / "KnowledgeVault").exists():
|
if not data_dir.exists() and not (project_root / "KnowledgeVault").exists():
|
||||||
@@ -134,7 +138,101 @@ def _status(config) -> int:
|
|||||||
)
|
)
|
||||||
return 0
|
return 0
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
print(json.dumps({"error": "Index not found. Run 'obsidian-rag index' first."}, indent=2))
|
print(
|
||||||
|
json.dumps(
|
||||||
|
{"error": "Index not found. Run 'obsidian-rag index' first."}, indent=2
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return 1
|
||||||
|
except Exception as e:
|
||||||
|
print(json.dumps({"error": str(e)}), file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
def _search(config, args: list[str]) -> int:
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(prog="obsidian-rag search")
|
||||||
|
parser.add_argument("query", nargs="*", help="Search query")
|
||||||
|
parser.add_argument(
|
||||||
|
"--limit", type=int, default=None, help="Max results (default: unlimited)"
|
||||||
|
)
|
||||||
|
parser.add_argument("--dir", dest="directory", help="Filter by directory")
|
||||||
|
parser.add_argument("--from-date", dest="from_date", help="Start date (YYYY-MM-DD)")
|
||||||
|
parser.add_argument("--to-date", dest="to_date", help="End date (YYYY-MM-DD)")
|
||||||
|
parser.add_argument("--tags", help="Comma-separated tags")
|
||||||
|
|
||||||
|
parsed, _ = parser.parse_known_args(args)
|
||||||
|
|
||||||
|
query_text = " ".join(parsed.query) if parsed.query else ""
|
||||||
|
if not query_text:
|
||||||
|
print("ERROR: query is required\n", file=sys.stderr)
|
||||||
|
parser.print_help()
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
db = get_db(config)
|
||||||
|
table = db.open_table("obsidian_chunks")
|
||||||
|
embedder = OllamaEmbedder(config)
|
||||||
|
|
||||||
|
if not embedder.is_available():
|
||||||
|
print(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"error": "Ollama is not available. Start Ollama or use DEGRADED mode."
|
||||||
|
},
|
||||||
|
indent=2,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
query_vector = embedder.embed_single(query_text)
|
||||||
|
|
||||||
|
filters = {}
|
||||||
|
if parsed.directory:
|
||||||
|
filters["directory_filter"] = [parsed.directory]
|
||||||
|
if parsed.from_date or parsed.to_date:
|
||||||
|
filters["date_range"] = {}
|
||||||
|
if parsed.from_date:
|
||||||
|
filters["date_range"]["from"] = parsed.from_date
|
||||||
|
if parsed.to_date:
|
||||||
|
filters["date_range"]["to"] = parsed.to_date
|
||||||
|
if parsed.tags:
|
||||||
|
filters["tags"] = [t.strip() for t in parsed.tags.split(",")]
|
||||||
|
|
||||||
|
results = search_chunks(
|
||||||
|
table,
|
||||||
|
query_vector,
|
||||||
|
limit=parsed.limit,
|
||||||
|
directory_filter=filters.get("directory_filter"),
|
||||||
|
date_range=filters.get("date_range"),
|
||||||
|
tags=filters.get("tags"),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = {
|
||||||
|
"query": query_text,
|
||||||
|
"total_results": len(results),
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"score": r.score,
|
||||||
|
"source_file": r.source_file,
|
||||||
|
"source_directory": r.source_directory,
|
||||||
|
"section": r.section,
|
||||||
|
"date": r.date,
|
||||||
|
"tags": r.tags,
|
||||||
|
"chunk_text": r.chunk_text,
|
||||||
|
}
|
||||||
|
for r in results
|
||||||
|
],
|
||||||
|
}
|
||||||
|
print(json.dumps(output, indent=2, default=str))
|
||||||
|
return 0
|
||||||
|
except FileNotFoundError:
|
||||||
|
print(
|
||||||
|
json.dumps(
|
||||||
|
{"error": "Index not found. Run 'obsidian-rag index' first."}, indent=2
|
||||||
|
)
|
||||||
|
)
|
||||||
return 1
|
return 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(json.dumps({"error": str(e)}), file=sys.stderr)
|
print(json.dumps({"error": str(e)}), file=sys.stderr)
|
||||||
@@ -149,6 +247,7 @@ Usage:
|
|||||||
obsidian-rag sync Incremental sync (changed files only)
|
obsidian-rag sync Incremental sync (changed files only)
|
||||||
obsidian-rag reindex Force full reindex (nuke + rebuild)
|
obsidian-rag reindex Force full reindex (nuke + rebuild)
|
||||||
obsidian-rag status Show index health and statistics
|
obsidian-rag status Show index health and statistics
|
||||||
|
obsidian-rag search Semantic search through indexed notes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -117,7 +117,7 @@ def delete_by_source_file(table: Any, source_file: str) -> int:
|
|||||||
def search_chunks(
|
def search_chunks(
|
||||||
table: Any,
|
table: Any,
|
||||||
query_vector: list[float],
|
query_vector: list[float],
|
||||||
limit: int = 5,
|
limit: int | None = None,
|
||||||
directory_filter: list[str] | None = None,
|
directory_filter: list[str] | None = None,
|
||||||
date_range: dict | None = None,
|
date_range: dict | None = None,
|
||||||
tags: list[str] | None = None,
|
tags: list[str] | None = None,
|
||||||
@@ -132,7 +132,7 @@ def search_chunks(
|
|||||||
conditions: list[str] = []
|
conditions: list[str] = []
|
||||||
if directory_filter:
|
if directory_filter:
|
||||||
dir_list = ", ".join(f'"{d}"' for d in directory_filter)
|
dir_list = ", ".join(f'"{d}"' for d in directory_filter)
|
||||||
conditions.append(f'source_directory IN ({dir_list})')
|
conditions.append(f"source_directory IN ({dir_list})")
|
||||||
if date_range:
|
if date_range:
|
||||||
if "from" in date_range:
|
if "from" in date_range:
|
||||||
conditions.append(f"date >= '{date_range['from']}'")
|
conditions.append(f"date >= '{date_range['from']}'")
|
||||||
@@ -144,11 +144,13 @@ def search_chunks(
|
|||||||
|
|
||||||
where_clause = " AND ".join(conditions) if conditions else None
|
where_clause = " AND ".join(conditions) if conditions else None
|
||||||
|
|
||||||
results = (
|
search_query = table.search(query_vector, vector_column_name="vector")
|
||||||
table.search(query_vector, vector_column_name="vector")
|
if limit is not None:
|
||||||
.limit(limit)
|
search_query = search_query.limit(limit)
|
||||||
.where(where_clause) if where_clause else table.search(query_vector, vector_column_name="vector").limit(limit)
|
if where_clause:
|
||||||
).to_list()
|
search_query = search_query.where(where_clause)
|
||||||
|
|
||||||
|
results = search_query.to_list()
|
||||||
|
|
||||||
return [
|
return [
|
||||||
SearchResult(
|
SearchResult(
|
||||||
@@ -156,7 +158,9 @@ def search_chunks(
|
|||||||
chunk_text=r["chunk_text"],
|
chunk_text=r["chunk_text"],
|
||||||
source_file=r["source_file"],
|
source_file=r["source_file"],
|
||||||
source_directory=r["source_directory"],
|
source_directory=r["source_directory"],
|
||||||
section=r.get("section") if r.get("section") not in (None, "None") else None,
|
section=r.get("section")
|
||||||
|
if r.get("section") not in (None, "None")
|
||||||
|
else None,
|
||||||
date=r.get("date") if r.get("date") not in (None, "None") else None,
|
date=r.get("date") if r.get("date") not in (None, "None") else None,
|
||||||
tags=r.get("tags") or [],
|
tags=r.get("tags") or [],
|
||||||
chunk_index=r.get("chunk_index") or 0,
|
chunk_index=r.get("chunk_index") or 0,
|
||||||
@@ -172,9 +176,16 @@ def get_stats(table: Any) -> dict[str, Any]:
|
|||||||
total_chunks = 0
|
total_chunks = 0
|
||||||
try:
|
try:
|
||||||
total_chunks = table.count_rows()
|
total_chunks = table.count_rows()
|
||||||
# Count unique source files using pandas
|
# Count non-null, non-empty source files
|
||||||
all_data = table.to_pandas()
|
all_data = table.to_pandas()
|
||||||
total_docs = all_data["source_file"].nunique()
|
total_docs = (
|
||||||
|
all_data["source_file"]
|
||||||
|
.dropna()
|
||||||
|
.astype(str)
|
||||||
|
.str.strip()
|
||||||
|
.loc[lambda s: s.str.len() > 0]
|
||||||
|
.nunique()
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ function makeSearchTool(config: ObsidianRagConfig): AnyAgentTool {
|
|||||||
type: "object",
|
type: "object",
|
||||||
properties: {
|
properties: {
|
||||||
query: { type: "string", description: "Natural language question or topic to search for" },
|
query: { type: "string", description: "Natural language question or topic to search for" },
|
||||||
max_results: { type: "integer", description: "Maximum number of chunks to return (default: 5, range: 1-50)", default: 5, minimum: 1, maximum: 50 },
|
max_results: { type: "integer", description: "Maximum number of chunks to return (default: unlimited)", minimum: 1, maximum: 10000 },
|
||||||
directory_filter: { type: "array", description: "Limit search to specific subdirectories", items: { type: "string" } },
|
directory_filter: { type: "array", description: "Limit search to specific subdirectories", items: { type: "string" } },
|
||||||
date_range: {
|
date_range: {
|
||||||
type: "object",
|
type: "object",
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ export async function searchTool(
|
|||||||
): Promise<ResponseEnvelope<{ results: SearchResult[]; sensitive_detected: boolean } | null>> {
|
): Promise<ResponseEnvelope<{ results: SearchResult[]; sensitive_detected: boolean } | null>> {
|
||||||
try {
|
try {
|
||||||
const results = await searchVectorDb(config, params.query, {
|
const results = await searchVectorDb(config, params.query, {
|
||||||
max_results: params.max_results ?? 5,
|
max_results: params.max_results ?? undefined,
|
||||||
directory_filter: params.directory_filter,
|
directory_filter: params.directory_filter,
|
||||||
date_range: params.date_range,
|
date_range: params.date_range,
|
||||||
tags: params.tags,
|
tags: params.tags,
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ export async function searchVectorDb(
|
|||||||
}
|
}
|
||||||
const whereClause = conditions.length > 0 ? conditions.join(" AND ") : undefined;
|
const whereClause = conditions.length > 0 ? conditions.join(" AND ") : undefined;
|
||||||
|
|
||||||
const limit = options.max_results ?? 5;
|
const limit = options.max_results;
|
||||||
|
|
||||||
// Try vector search first; if Ollama is down embedQuery throws → fallback to FTS
|
// Try vector search first; if Ollama is down embedQuery throws → fallback to FTS
|
||||||
let rows: Record<string, unknown>[];
|
let rows: Record<string, unknown>[];
|
||||||
@@ -85,14 +85,20 @@ export async function searchVectorDb(
|
|||||||
if (whereClause) {
|
if (whereClause) {
|
||||||
queryBuilder = queryBuilder.filter(whereClause);
|
queryBuilder = queryBuilder.filter(whereClause);
|
||||||
}
|
}
|
||||||
rows = await queryBuilder.limit(limit).toArray();
|
if (limit !== undefined) {
|
||||||
|
queryBuilder = queryBuilder.limit(limit);
|
||||||
|
}
|
||||||
|
rows = await queryBuilder.toArray();
|
||||||
} catch {
|
} catch {
|
||||||
// Ollama unavailable — fallback to full-text search on chunk_text (BM25 scoring)
|
// Ollama unavailable — fallback to full-text search on chunk_text (BM25 scoring)
|
||||||
let ftsBuilder = table.query().fullTextSearch(query);
|
let ftsBuilder = table.query().fullTextSearch(query);
|
||||||
if (whereClause) {
|
if (whereClause) {
|
||||||
ftsBuilder = ftsBuilder.filter(whereClause);
|
ftsBuilder = ftsBuilder.filter(whereClause);
|
||||||
}
|
}
|
||||||
rows = await ftsBuilder.limit(limit).toArray();
|
if (limit !== undefined) {
|
||||||
|
ftsBuilder = ftsBuilder.limit(limit);
|
||||||
|
}
|
||||||
|
rows = await ftsBuilder.toArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
return rows.map((r: Record<string, unknown>) => ({
|
return rows.map((r: Record<string, unknown>) => ({
|
||||||
|
|||||||
Reference in New Issue
Block a user