"""CLI: obsidian-rag index | sync | reindex | status.""" from __future__ import annotations import json import sys import time from pathlib import Path import obsidian_rag.config as config_mod from obsidian_rag.vector_store import get_db, get_stats, search_chunks from obsidian_rag.embedder import OllamaEmbedder from obsidian_rag.indexer import Indexer def main(argv: list[str] | None = None) -> int: argv = argv or sys.argv[1:] if not argv or argv[0] in ("--help", "-h"): print(_usage()) return 0 cmd = argv[0] try: config = config_mod.load_config() except FileNotFoundError as e: print(f"ERROR: {e}", file=sys.stderr) return 1 if cmd == "index": return _index(config) elif cmd == "sync": return _sync(config) elif cmd == "reindex": return _reindex(config) elif cmd == "status": return _status(config) elif cmd == "search": return _search(config, argv[1:]) else: print(f"Unknown command: {cmd}\n{_usage()}", file=sys.stderr) return 1 def _index(config) -> int: indexer = Indexer(config) t0 = time.monotonic() try: gen = indexer.full_index() result: dict = {"indexed_files": 0, "total_chunks": 0, "errors": []} for item in gen: result = item # progress yields are dicts; final dict from return duration_ms = int((time.monotonic() - t0) * 1000) print( json.dumps( { "type": "complete", "indexed_files": result["indexed_files"], "total_chunks": result["total_chunks"], "duration_ms": duration_ms, "errors": result["errors"], }, indent=2, ) ) return 0 if not result["errors"] else 1 except Exception as e: print(json.dumps({"type": "error", "error": str(e)}), file=sys.stderr) return 2 def _sync(config) -> int: indexer = Indexer(config) try: result = indexer.sync() print(json.dumps({"type": "complete", **result}, indent=2)) return 0 if not result["errors"] else 1 except Exception as e: print(json.dumps({"type": "error", "error": str(e)}), file=sys.stderr) return 2 def _reindex(config) -> int: indexer = Indexer(config) t0 = time.monotonic() try: result = indexer.reindex() duration_ms = int((time.monotonic() - t0) * 1000) print( json.dumps( { "type": "complete", "indexed_files": result["indexed_files"], "total_chunks": result["total_chunks"], "duration_ms": duration_ms, "errors": result["errors"], }, indent=2, ) ) return 0 except Exception as e: print(json.dumps({"type": "error", "error": str(e)}), file=sys.stderr) return 2 def _status(config) -> int: try: db = get_db(config) table = db.open_table("obsidian_chunks") stats = get_stats(table) # Resolve sync-result.json path (same convention as indexer) from pathlib import Path import os as osmod project_root = Path(__file__).parent.parent.parent data_dir = project_root / "obsidian-rag" if not data_dir.exists() and not (project_root / "KnowledgeVault").exists(): data_dir = Path(osmod.path.expanduser("~/.obsidian-rag")) sync_path = data_dir / "sync-result.json" last_sync = None if sync_path.exists(): try: last_sync = json.loads(sync_path.read_text()).get("timestamp") except Exception: pass print( json.dumps( { "total_docs": stats["total_docs"], "total_chunks": stats["total_chunks"], "last_sync": last_sync, }, indent=2, ) ) return 0 except FileNotFoundError: print( json.dumps( {"error": "Index not found. Run 'obsidian-rag index' first."}, indent=2 ) ) return 1 except Exception as e: print(json.dumps({"error": str(e)}), file=sys.stderr) return 1 def _search(config, args: list[str]) -> int: import argparse parser = argparse.ArgumentParser(prog="obsidian-rag search") parser.add_argument("query", nargs="*", help="Search query") parser.add_argument( "--limit", type=int, default=None, help="Max results (default: unlimited)" ) parser.add_argument("--dir", dest="directory", help="Filter by directory") parser.add_argument("--from-date", dest="from_date", help="Start date (YYYY-MM-DD)") parser.add_argument("--to-date", dest="to_date", help="End date (YYYY-MM-DD)") parser.add_argument("--tags", help="Comma-separated tags") parsed, _ = parser.parse_known_args(args) query_text = " ".join(parsed.query) if parsed.query else "" if not query_text: print("ERROR: query is required\n", file=sys.stderr) parser.print_help() return 1 try: db = get_db(config) table = db.open_table("obsidian_chunks") embedder = OllamaEmbedder(config) if not embedder.is_available(): print( json.dumps( { "error": "Ollama is not available. Start Ollama or use DEGRADED mode." }, indent=2, ) ) return 1 query_vector = embedder.embed_single(query_text) filters = {} if parsed.directory: filters["directory_filter"] = [parsed.directory] if parsed.from_date or parsed.to_date: filters["date_range"] = {} if parsed.from_date: filters["date_range"]["from"] = parsed.from_date if parsed.to_date: filters["date_range"]["to"] = parsed.to_date if parsed.tags: filters["tags"] = [t.strip() for t in parsed.tags.split(",")] results = search_chunks( table, query_vector, limit=parsed.limit, directory_filter=filters.get("directory_filter"), date_range=filters.get("date_range"), tags=filters.get("tags"), ) output = { "query": query_text, "total_results": len(results), "results": [ { "score": r.score, "source_file": r.source_file, "source_directory": r.source_directory, "section": r.section, "date": r.date, "tags": r.tags, "chunk_text": r.chunk_text, } for r in results ], } print(json.dumps(output, indent=2, default=str)) return 0 except FileNotFoundError: print( json.dumps( {"error": "Index not found. Run 'obsidian-rag index' first."}, indent=2 ) ) return 1 except Exception as e: print(json.dumps({"error": str(e)}), file=sys.stderr) return 1 def _usage() -> str: return """obsidian-rag - Obsidian vault RAG indexer Usage: obsidian-rag index Full index of the vault obsidian-rag sync Incremental sync (changed files only) obsidian-rag reindex Force full reindex (nuke + rebuild) obsidian-rag status Show index health and statistics obsidian-rag search Semantic search through indexed notes """ if __name__ == "__main__": sys.exit(main())