Files
obsidian-rag/python/obsidian_rag/cli.py
Santhosh Janardhanan 21b9704e21 fix(indexer): use upsert_chunks return value for chunk count
Previously total_chunks counted from process_file return (num_chunks)
which could differ from actual stored count if upsert silently failed.
Now using stored count returned by upsert_chunks.

Also fixes cli._index to skip progress yields when building result.
2026-04-12 02:16:19 -04:00

259 lines
7.8 KiB
Python

"""CLI: obsidian-rag index | sync | reindex | status."""
from __future__ import annotations
import json
import sys
import time
from pathlib import Path
import obsidian_rag.config as config_mod
from obsidian_rag.vector_store import get_db, get_stats, search_chunks
from obsidian_rag.embedder import OllamaEmbedder
from obsidian_rag.indexer import Indexer
def main(argv: list[str] | None = None) -> int:
argv = argv or sys.argv[1:]
if not argv or argv[0] in ("--help", "-h"):
print(_usage())
return 0
cmd = argv[0]
try:
config = config_mod.load_config()
except FileNotFoundError as e:
print(f"ERROR: {e}", file=sys.stderr)
return 1
if cmd == "index":
return _index(config)
elif cmd == "sync":
return _sync(config)
elif cmd == "reindex":
return _reindex(config)
elif cmd == "status":
return _status(config)
elif cmd == "search":
return _search(config, argv[1:])
else:
print(f"Unknown command: {cmd}\n{_usage()}", file=sys.stderr)
return 1
def _index(config) -> int:
indexer = Indexer(config)
t0 = time.monotonic()
try:
gen = indexer.full_index()
result: dict = {"indexed_files": 0, "total_chunks": 0, "errors": []}
for item in gen:
if item.get("type") == "complete":
result = item
elif item.get("type") == "progress":
pass # skip progress logs in result
duration_ms = int((time.monotonic() - t0) * 1000)
print(
json.dumps(
{
"type": "complete",
"indexed_files": result["indexed_files"],
"total_chunks": result["total_chunks"],
"duration_ms": duration_ms,
"errors": result["errors"],
},
indent=2,
)
)
return 0 if not result["errors"] else 1
except Exception as e:
print(json.dumps({"type": "error", "error": str(e)}), file=sys.stderr)
return 2
def _sync(config) -> int:
indexer = Indexer(config)
try:
result = indexer.sync()
print(json.dumps({"type": "complete", **result}, indent=2))
return 0 if not result["errors"] else 1
except Exception as e:
print(json.dumps({"type": "error", "error": str(e)}), file=sys.stderr)
return 2
def _reindex(config) -> int:
indexer = Indexer(config)
t0 = time.monotonic()
try:
result = indexer.reindex()
duration_ms = int((time.monotonic() - t0) * 1000)
print(
json.dumps(
{
"type": "complete",
"indexed_files": result["indexed_files"],
"total_chunks": result["total_chunks"],
"duration_ms": duration_ms,
"errors": result["errors"],
},
indent=2,
)
)
return 0
except Exception as e:
print(json.dumps({"type": "error", "error": str(e)}), file=sys.stderr)
return 2
def _status(config) -> int:
try:
db = get_db(config)
table = db.open_table("obsidian_chunks")
stats = get_stats(table)
# Resolve sync-result.json path (same convention as indexer)
from pathlib import Path
import os as osmod
project_root = Path(__file__).parent.parent.parent
data_dir = project_root / "obsidian-rag"
if not data_dir.exists() and not (project_root / "KnowledgeVault").exists():
data_dir = Path(osmod.path.expanduser("~/.obsidian-rag"))
sync_path = data_dir / "sync-result.json"
last_sync = None
if sync_path.exists():
try:
last_sync = json.loads(sync_path.read_text()).get("timestamp")
except Exception:
pass
print(
json.dumps(
{
"total_docs": stats["total_docs"],
"total_chunks": stats["total_chunks"],
"last_sync": last_sync,
},
indent=2,
)
)
return 0
except FileNotFoundError:
print(
json.dumps(
{"error": "Index not found. Run 'obsidian-rag index' first."}, indent=2
)
)
return 1
except Exception as e:
print(json.dumps({"error": str(e)}), file=sys.stderr)
return 1
def _search(config, args: list[str]) -> int:
import argparse
parser = argparse.ArgumentParser(prog="obsidian-rag search")
parser.add_argument("query", nargs="*", help="Search query")
parser.add_argument(
"--limit", type=int, default=None, help="Max results (default: unlimited)"
)
parser.add_argument("--dir", dest="directory", help="Filter by directory")
parser.add_argument("--from-date", dest="from_date", help="Start date (YYYY-MM-DD)")
parser.add_argument("--to-date", dest="to_date", help="End date (YYYY-MM-DD)")
parser.add_argument("--tags", help="Comma-separated tags")
parsed, _ = parser.parse_known_args(args)
query_text = " ".join(parsed.query) if parsed.query else ""
if not query_text:
print("ERROR: query is required\n", file=sys.stderr)
parser.print_help()
return 1
try:
db = get_db(config)
table = db.open_table("obsidian_chunks")
embedder = OllamaEmbedder(config)
if not embedder.is_available():
print(
json.dumps(
{
"error": "Ollama is not available. Start Ollama or use DEGRADED mode."
},
indent=2,
)
)
return 1
query_vector = embedder.embed_single(query_text)
filters = {}
if parsed.directory:
filters["directory_filter"] = [parsed.directory]
if parsed.from_date or parsed.to_date:
filters["date_range"] = {}
if parsed.from_date:
filters["date_range"]["from"] = parsed.from_date
if parsed.to_date:
filters["date_range"]["to"] = parsed.to_date
if parsed.tags:
filters["tags"] = [t.strip() for t in parsed.tags.split(",")]
results = search_chunks(
table,
query_vector,
limit=parsed.limit,
directory_filter=filters.get("directory_filter"),
date_range=filters.get("date_range"),
tags=filters.get("tags"),
)
output = {
"query": query_text,
"total_results": len(results),
"results": [
{
"score": r.score,
"source_file": r.source_file,
"source_directory": r.source_directory,
"section": r.section,
"date": r.date,
"tags": r.tags,
"chunk_text": r.chunk_text,
}
for r in results
],
}
print(json.dumps(output, indent=2, default=str))
return 0
except FileNotFoundError:
print(
json.dumps(
{"error": "Index not found. Run 'obsidian-rag index' first."}, indent=2
)
)
return 1
except Exception as e:
print(json.dumps({"error": str(e)}), file=sys.stderr)
return 1
def _usage() -> str:
return """obsidian-rag - Obsidian vault RAG indexer
Usage:
obsidian-rag index Full index of the vault
obsidian-rag sync Incremental sync (changed files only)
obsidian-rag reindex Force full reindex (nuke + rebuild)
obsidian-rag status Show index health and statistics
obsidian-rag search Semantic search through indexed notes
"""
if __name__ == "__main__":
sys.exit(main())