Sprint 0-2: TS plugin scaffolding, LanceDB utils, tooling updates
- Add index-tool.ts command implementation - Wire lancedb.ts vector search into plugin - Update src/tools/index.ts exports - Bump package deps (ts-jest, jest, typescript, lancedb) - Add .claude/settings.local.json Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -8,7 +8,46 @@
|
|||||||
"Bash(git add:*)",
|
"Bash(git add:*)",
|
||||||
"Bash(git commit -m ':*)",
|
"Bash(git commit -m ':*)",
|
||||||
"WebFetch(domain:www.ollama.com)",
|
"WebFetch(domain:www.ollama.com)",
|
||||||
"mcp__web-reader__webReader"
|
"mcp__web-reader__webReader",
|
||||||
|
"Bash(ollama list:*)",
|
||||||
|
"Bash(python3:*)",
|
||||||
|
"Bash(pip install:*)",
|
||||||
|
"Bash(npm install:*)",
|
||||||
|
"Bash(obsidian-rag --help)",
|
||||||
|
"Bash(obsidian-rag status:*)",
|
||||||
|
"Bash(npm run:*)",
|
||||||
|
"Bash(obsidian-rag index:*)",
|
||||||
|
"Bash(curl -s http://localhost:11434/api/tags)",
|
||||||
|
"Bash(curl -s -X POST http://localhost:11434/api/embeddings -d '{\"model\":\"mxbai-embed-large\",\"prompt\":\"hello world\"}')",
|
||||||
|
"Bash(curl -s -X POST http://localhost:11434/api/embeddings -d '{\"model\":\"mxbai-embed-large:335m\",\"prompt\":\"hello world\"}')",
|
||||||
|
"Bash(curl:*)",
|
||||||
|
"Bash(find /Users/santhoshj/dev/obsidian-rag/python -name \"*.pyc\" -delete)",
|
||||||
|
"Bash(find /Users/santhoshj/dev/obsidian-rag/python -name \"__pycache__\" -exec rm -rf {} +)",
|
||||||
|
"Bash(npm test:*)",
|
||||||
|
"Bash(python -m pytest --collect-only)",
|
||||||
|
"Bash(python -m pytest tests/unit/test_chunker.py tests/unit/test_security.py -v)",
|
||||||
|
"Bash(python -m pytest tests/unit/test_chunker.py -v --tb=short)",
|
||||||
|
"mcp__plugin_ecc_context7__resolve-library-id",
|
||||||
|
"mcp__plugin_ecc_context7__query-docs",
|
||||||
|
"Bash(python -m pytest tests/unit/test_vector_store.py -v)",
|
||||||
|
"Bash(python -m pytest tests/unit/test_vector_store.py::test_search_chunks_with_tags_filter -v)",
|
||||||
|
"Bash(python:*)",
|
||||||
|
"Bash(npx tsx:*)",
|
||||||
|
"Bash(node test_lancedb_client.mjs)",
|
||||||
|
"Bash(node -e ':*)",
|
||||||
|
"Bash(node:*)",
|
||||||
|
"Bash(ls /Users/santhoshj/dev/obsidian-rag/*.config.*)",
|
||||||
|
"Bash(npx vitest:*)",
|
||||||
|
"Bash(git commit:*)",
|
||||||
|
"mcp__plugin_ecc_memory__add_observations",
|
||||||
|
"WebSearch",
|
||||||
|
"WebFetch(domain:docs.openclaw.ai)",
|
||||||
|
"Bash(ls node_modules/openclaw/dist/plugin-sdk/zod*)",
|
||||||
|
"Bash(ls:*)",
|
||||||
|
"Bash(npx ts-node:*)",
|
||||||
|
"Bash(pkill -f \"ollama serve\")"
|
||||||
]
|
]
|
||||||
}
|
},
|
||||||
|
"outputStyle": "default",
|
||||||
|
"spinnerTipsEnabled": false
|
||||||
}
|
}
|
||||||
|
|||||||
470
README.md
Normal file
470
README.md
Normal file
@@ -0,0 +1,470 @@
|
|||||||
|
# Obsidian RAG — Manual Testing Guide
|
||||||
|
|
||||||
|
**What it does:** Indexes an Obsidian vault → LanceDB → semantic search via Ollama embeddings. Powers OpenClaw agent tools for natural-language queries over 677+ personal notes.
|
||||||
|
|
||||||
|
**Stack:** Python indexer (CLI) → LanceDB → TypeScript plugin (OpenClaw)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
| Component | Version | Verify |
|
||||||
|
|---|---|---|
|
||||||
|
| Python | ≥3.11 | `python --version` |
|
||||||
|
| Node.js | ≥18 | `node --version` |
|
||||||
|
| Ollama | running | `curl http://localhost:11434/api/tags` |
|
||||||
|
| Ollama model | `mxbai-embed-large:335m` | `ollama list` |
|
||||||
|
|
||||||
|
**Install Ollama + model (if needed):**
|
||||||
|
```bash
|
||||||
|
# macOS/Linux
|
||||||
|
curl -fsSL https://ollama.com/install.sh | sh
|
||||||
|
|
||||||
|
# Pull embedding model
|
||||||
|
ollama pull mxbai-embed-large:335m
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
### 1. Python CLI (indexer)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /Users/santhoshj/dev/obsidian-rag
|
||||||
|
|
||||||
|
# Create virtual environment (optional but recommended)
|
||||||
|
python -m venv .venv
|
||||||
|
source .venv/bin/activate # macOS/Linux
|
||||||
|
# .\.venv\Scripts\Activate.ps1 # Windows PowerShell
|
||||||
|
# .venv\Scripts\activate.bat # Windows CMD
|
||||||
|
|
||||||
|
# Install in editable mode
|
||||||
|
pip install -e python/
|
||||||
|
```
|
||||||
|
|
||||||
|
**Verify:**
|
||||||
|
```bash
|
||||||
|
obsidian-rag --help
|
||||||
|
# → obsidian-rag index | sync | reindex | status
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. TypeScript Plugin (for OpenClaw integration)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm install
|
||||||
|
npm run build # → dist/index.js (131kb)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. (Optional) Ollama running
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ollama serve &
|
||||||
|
curl http://localhost:11434/api/tags
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
Edit `obsidian-rag/config.json` at the project root:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"vault_path": "./KnowledgeVault/Default",
|
||||||
|
"embedding": {
|
||||||
|
"provider": "ollama",
|
||||||
|
"model": "mxbai-embed-large:335m",
|
||||||
|
"base_url": "http://localhost:11434",
|
||||||
|
"dimensions": 1024,
|
||||||
|
"batch_size": 64
|
||||||
|
},
|
||||||
|
"vector_store": {
|
||||||
|
"type": "lancedb",
|
||||||
|
"path": "./obsidian-rag/vectors.lance"
|
||||||
|
},
|
||||||
|
"indexing": {
|
||||||
|
"chunk_size": 500,
|
||||||
|
"chunk_overlap": 100,
|
||||||
|
"file_patterns": ["*.md"],
|
||||||
|
"deny_dirs": [".obsidian", ".trash", "zzz-Archive", ".git", ".logseq"],
|
||||||
|
"allow_dirs": []
|
||||||
|
},
|
||||||
|
"security": {
|
||||||
|
"require_confirmation_for": ["health", "financial_debt"],
|
||||||
|
"sensitive_sections": ["#mentalhealth", "#physicalhealth", "#Relations"],
|
||||||
|
"local_only": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
| Field | What it does |
|
||||||
|
|---|---|
|
||||||
|
| `vault_path` | Root of Obsidian vault (relative or absolute) |
|
||||||
|
| `embedding.model` | Ollama model for `mxbai-embed-large:335m` |
|
||||||
|
| `vector_store.path` | Where LanceDB data lives |
|
||||||
|
| `deny_dirs` | Always-skipped directories |
|
||||||
|
| `allow_dirs` | If non-empty, **only** these directories are indexed |
|
||||||
|
|
||||||
|
**Windows users:** Use `".\\KnowledgeVault\\Default"` or an absolute path like `"C:\\Users\\you\\KnowledgeVault\\Default"`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## CLI Commands
|
||||||
|
|
||||||
|
All commands run from the project root (`/Users/santhoshj/dev/obsidian-rag`).
|
||||||
|
|
||||||
|
### `obsidian-rag index` — Full Index
|
||||||
|
|
||||||
|
First-time indexing. Scans all `.md` files → chunks → embeds → stores in LanceDB.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
obsidian-rag index
|
||||||
|
```
|
||||||
|
|
||||||
|
**Output:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"type": "complete",
|
||||||
|
"indexed_files": 627,
|
||||||
|
"total_chunks": 3764,
|
||||||
|
"duration_ms": 45230,
|
||||||
|
"errors": []
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**What happens:**
|
||||||
|
1. Walk vault (respects `deny_dirs` / `allow_dirs`)
|
||||||
|
2. Parse markdown: frontmatter, headings, tags, dates
|
||||||
|
3. Chunk: structured notes (journal) split by `# heading`; unstructured use 500-token sliding window
|
||||||
|
4. Embed: batch of 64 chunks → Ollama `/api/embeddings`
|
||||||
|
5. Upsert: write to LanceDB
|
||||||
|
6. Write `obsidian-rag/sync-result.json` atomically
|
||||||
|
|
||||||
|
**Time:** ~45s for 627 files on first run.
|
||||||
|
|
||||||
|
### `obsidian-rag sync` — Incremental Sync
|
||||||
|
|
||||||
|
Only re-indexes files changed since last sync (by `mtime`).
|
||||||
|
|
||||||
|
```bash
|
||||||
|
obsidian-rag sync
|
||||||
|
```
|
||||||
|
|
||||||
|
**Output:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"type": "complete",
|
||||||
|
"indexed_files": 3,
|
||||||
|
"total_chunks": 12,
|
||||||
|
"duration_ms": 1200,
|
||||||
|
"errors": []
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Use when:** You edited/added a few notes and want to update the index without a full rebuild.
|
||||||
|
|
||||||
|
### `obsidian-rag reindex` — Force Rebuild
|
||||||
|
|
||||||
|
Nukes the existing LanceDB table and rebuilds from scratch.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
obsidian-rag reindex
|
||||||
|
```
|
||||||
|
|
||||||
|
**Use when:**
|
||||||
|
- LanceDB schema changed
|
||||||
|
- Chunking strategy changed
|
||||||
|
- Index corrupted
|
||||||
|
- First run after upgrading (to pick up FTS index)
|
||||||
|
|
||||||
|
### `obsidian-rag status` — Index Health
|
||||||
|
|
||||||
|
```bash
|
||||||
|
obsidian-rag status
|
||||||
|
```
|
||||||
|
|
||||||
|
**Output:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"total_docs": 627,
|
||||||
|
"total_chunks": 3764,
|
||||||
|
"last_sync": "2026-04-11T00:30:00Z"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Re-index after schema upgrade (important!)
|
||||||
|
|
||||||
|
If you pulled a new version that changed the FTS index setup, you **must** reindex:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
obsidian-rag reindex
|
||||||
|
```
|
||||||
|
|
||||||
|
This drops and recreates the LanceDB table, rebuilding the FTS index on `chunk_text`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Manual Testing Walkthrough
|
||||||
|
|
||||||
|
### Step 1 — Verify prerequisites
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Ollama up?
|
||||||
|
curl http://localhost:11434/api/tags
|
||||||
|
|
||||||
|
# Python CLI working?
|
||||||
|
obsidian-rag --help
|
||||||
|
|
||||||
|
# Vault accessible?
|
||||||
|
ls ./KnowledgeVault/Default | head -5
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 2 — Do a full index
|
||||||
|
|
||||||
|
```bash
|
||||||
|
obsidian-rag index
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: ~30-60s. JSON output with `indexed_files` and `total_chunks`.
|
||||||
|
|
||||||
|
### Step 3 — Check status
|
||||||
|
|
||||||
|
```bash
|
||||||
|
obsidian-rag status
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 4 — Test search via Python
|
||||||
|
|
||||||
|
The Python indexer doesn't have an interactive search CLI, but you can test via the LanceDB Python API directly:
|
||||||
|
|
||||||
|
```python
|
||||||
|
python3 -c "
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, 'python')
|
||||||
|
from obsidian_rag.vector_store import get_db, search_chunks
|
||||||
|
from obsidian_rag.embedder import embed_texts
|
||||||
|
from obsidian_rag.config import load_config
|
||||||
|
|
||||||
|
config = load_config()
|
||||||
|
db = get_db(config)
|
||||||
|
table = db.open_table('obsidian_chunks')
|
||||||
|
|
||||||
|
# Embed a query
|
||||||
|
query_vec = embed_texts(['how was my mental health in 2024'], config)[0]
|
||||||
|
|
||||||
|
# Search
|
||||||
|
results = search_chunks(table, query_vec, limit=3)
|
||||||
|
for r in results:
|
||||||
|
print(f'[{r.score:.3f}] {r.source_file} | {r.section or \"(no section)\"}')
|
||||||
|
print(f' {r.chunk_text[:200]}...')
|
||||||
|
print()
|
||||||
|
"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 5 — Test TypeScript search (via Node)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
node --input-type=module -e "
|
||||||
|
import { loadConfig } from './src/utils/config.js';
|
||||||
|
import { searchVectorDb } from './src/utils/lancedb.js';
|
||||||
|
|
||||||
|
const config = loadConfig();
|
||||||
|
const results = await searchVectorDb(config, 'how was my mental health in 2024', { max_results: 3 });
|
||||||
|
for (const r of results) {
|
||||||
|
console.log(\`[\${r.score}] \${r.source_file} | \${r.section || '(no section)'}\`);
|
||||||
|
console.log(\` \${r.chunk_text.slice(0, 180)}...\`);
|
||||||
|
console.log();
|
||||||
|
}
|
||||||
|
"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 6 — Test DEGRADED mode (Ollama down)
|
||||||
|
|
||||||
|
Stop Ollama, then run the same search:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Stop Ollama
|
||||||
|
pkill -f ollama # macOS/Linux
|
||||||
|
|
||||||
|
# Now run search — should fall back to FTS
|
||||||
|
node --input-type=module -e "
|
||||||
|
...same as above...
|
||||||
|
"
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected: results come back using BM25 full-text search instead of vector similarity. You'll see lower `_score` values (BM25 scores are smaller floats).
|
||||||
|
|
||||||
|
### Step 7 — Test sync
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Edit a note
|
||||||
|
echo "# Test edit
|
||||||
|
This is a test note about Ollama being down." >> ./KnowledgeVault/Default/test-note.md
|
||||||
|
|
||||||
|
# Sync
|
||||||
|
obsidian-rag sync
|
||||||
|
|
||||||
|
# Check it was indexed
|
||||||
|
obsidian-rag status
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 8 — Test indexer health check
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Stop Ollama
|
||||||
|
pkill -f ollama
|
||||||
|
|
||||||
|
# Check status — will report Ollama as down but still show index stats
|
||||||
|
obsidian-rag status
|
||||||
|
|
||||||
|
# Restart Ollama
|
||||||
|
ollama serve
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Directory Filtering
|
||||||
|
|
||||||
|
Test searching only within `Journal`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
node --input-type=module -e "
|
||||||
|
import { loadConfig } from './src/utils/config.js';
|
||||||
|
import { searchVectorDb } from './src/utils/lancedb.js';
|
||||||
|
const config = loadConfig();
|
||||||
|
const results = await searchVectorDb(config, 'my mood and feelings', {
|
||||||
|
max_results: 3,
|
||||||
|
directory_filter: ['Journal']
|
||||||
|
});
|
||||||
|
results.forEach(r => console.log(\`[\${r.score}] \${r.source_file}\`));
|
||||||
|
"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## File Paths Reference
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|---|---|
|
||||||
|
| `obsidian-rag/vectors.lance/` | LanceDB data directory |
|
||||||
|
| `obsidian-rag/sync-result.json` | Last sync timestamp + stats |
|
||||||
|
| `python/obsidian_rag/` | Python package source |
|
||||||
|
| `src/` | TypeScript plugin source |
|
||||||
|
| `dist/index.js` | Built plugin bundle |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### `FileNotFoundError: config.json`
|
||||||
|
|
||||||
|
Config must be found. The CLI looks in:
|
||||||
|
1. `./obsidian-rag/config.json` (relative to project root)
|
||||||
|
2. `~/.obsidian-rag/config.json` (home directory)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Verify config is found
|
||||||
|
python3 -c "
|
||||||
|
import sys; sys.path.insert(0,'python')
|
||||||
|
from obsidian_rag.config import load_config
|
||||||
|
c = load_config()
|
||||||
|
print('vault_path:', c.vault_path)
|
||||||
|
"
|
||||||
|
```
|
||||||
|
|
||||||
|
### `ERROR: Index not found. Run 'obsidian-rag index' first.`
|
||||||
|
|
||||||
|
LanceDB table doesn't exist yet. Run `obsidian-rag index`.
|
||||||
|
|
||||||
|
### Ollama connection refused
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:11434/api/tags
|
||||||
|
```
|
||||||
|
|
||||||
|
If this fails, Ollama isn't running:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ollama serve &
|
||||||
|
ollama pull mxbai-embed-large:335m
|
||||||
|
```
|
||||||
|
|
||||||
|
### Vector search returns 0 results
|
||||||
|
|
||||||
|
1. Check index exists: `obsidian-rag status`
|
||||||
|
2. Rebuild index: `obsidian-rag reindex`
|
||||||
|
3. Check Ollama is up and model is available: `ollama list`
|
||||||
|
|
||||||
|
### FTS (DEGRADED mode) not working after upgrade
|
||||||
|
|
||||||
|
The FTS index on `chunk_text` was added in a recent change. **Reindex to rebuild with FTS:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
obsidian-rag reindex
|
||||||
|
```
|
||||||
|
|
||||||
|
### Permission errors on Windows
|
||||||
|
|
||||||
|
Run terminal as Administrator, or install Python/Ollama to user-writable directories.
|
||||||
|
|
||||||
|
### Very slow embedding
|
||||||
|
|
||||||
|
Reduce batch size in `config.json`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"batch_size": 32
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
obsidian-rag/
|
||||||
|
├── obsidian-rag/
|
||||||
|
│ ├── config.json # Dev configuration
|
||||||
|
│ ├── vectors.lance/ # LanceDB data (created on first index)
|
||||||
|
│ └── sync-result.json # Last sync metadata
|
||||||
|
├── python/
|
||||||
|
│ ├── obsidian_rag/
|
||||||
|
│ │ ├── cli.py # obsidian-rag CLI entry point
|
||||||
|
│ │ ├── config.py # Config loader
|
||||||
|
│ │ ├── indexer.py # Full pipeline (scan → chunk → embed → store)
|
||||||
|
│ │ ├── chunker.py # Structured + sliding-window chunking
|
||||||
|
│ │ ├── embedder.py # Ollama /api/embeddings client
|
||||||
|
│ │ ├── vector_store.py # LanceDB CRUD
|
||||||
|
│ │ └── security.py # Path traversal, HTML strip, sensitive detection
|
||||||
|
│ └── tests/unit/ # 64 pytest tests
|
||||||
|
├── src/
|
||||||
|
│ ├── index.ts # OpenClaw plugin entry (definePluginEntry)
|
||||||
|
│ ├── tools/
|
||||||
|
│ │ ├── index.ts # 4× api.registerTool() calls
|
||||||
|
│ │ ├── index-tool.ts # obsidian_rag_index implementation
|
||||||
|
│ │ ├── search.ts # obsidian_rag_search implementation
|
||||||
|
│ │ ├── status.ts # obsidian_rag_status implementation
|
||||||
|
│ │ └── memory.ts # obsidian_rag_memory_store implementation
|
||||||
|
│ ├── services/
|
||||||
|
│ │ ├── health.ts # HEALTHY / DEGRADED / UNAVAILABLE state machine
|
||||||
|
│ │ ├── vault-watcher.ts # chokidar watcher + auto-sync
|
||||||
|
│ │ └── indexer-bridge.ts # Spawns Python CLI subprocess
|
||||||
|
│ └── utils/
|
||||||
|
│ ├── config.ts # TS config loader
|
||||||
|
│ ├── lancedb.ts # TS LanceDB query + FTS fallback
|
||||||
|
│ ├── types.ts # Shared types (SearchResult, ResponseEnvelope)
|
||||||
|
│ └── response.ts # makeEnvelope() factory
|
||||||
|
├── dist/index.js # Built plugin (do not edit)
|
||||||
|
├── openclaw.plugin.json # Plugin manifest
|
||||||
|
├── package.json
|
||||||
|
└── tsconfig.json
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Health States
|
||||||
|
|
||||||
|
| State | Meaning | Search |
|
||||||
|
|---|---|---|
|
||||||
|
| `HEALTHY` | Ollama up + index exists | Vector similarity (semantic) |
|
||||||
|
| `DEGRADED` | Ollama down + index exists | FTS on `chunk_text` (BM25) |
|
||||||
|
| `UNAVAILABLE` | No index / corrupted | Error — run `obsidian-rag index` first |
|
||||||
9589
package-lock.json
generated
9589
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -14,6 +14,7 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@lancedb/lancedb": "^0.12",
|
"@lancedb/lancedb": "^0.12",
|
||||||
"chokidar": "^3.6",
|
"chokidar": "^3.6",
|
||||||
|
"openclaw": "^2026.4.9",
|
||||||
"yaml": "^2.5"
|
"yaml": "^2.5"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
|||||||
@@ -79,6 +79,9 @@ def create_table_if_not_exists(db: Any) -> Any:
|
|||||||
)
|
)
|
||||||
|
|
||||||
tbl = db.create_table(TABLE_NAME, schema=schema, exist_ok=True)
|
tbl = db.create_table(TABLE_NAME, schema=schema, exist_ok=True)
|
||||||
|
# Create FTS index on chunk_text for DEGRADED mode fallback (Ollama down)
|
||||||
|
# replace=True makes this idempotent — safe to call on existing tables
|
||||||
|
tbl.create_fts_index("chunk_text", replace=True)
|
||||||
return tbl
|
return tbl
|
||||||
|
|
||||||
|
|
||||||
@@ -153,11 +156,11 @@ def search_chunks(
|
|||||||
chunk_text=r["chunk_text"],
|
chunk_text=r["chunk_text"],
|
||||||
source_file=r["source_file"],
|
source_file=r["source_file"],
|
||||||
source_directory=r["source_directory"],
|
source_directory=r["source_directory"],
|
||||||
section=r.get("section"),
|
section=r.get("section") if r.get("section") not in (None, "None") else None,
|
||||||
date=r.get("date"),
|
date=r.get("date") if r.get("date") not in (None, "None") else None,
|
||||||
tags=r.get("tags", []),
|
tags=r.get("tags") or [],
|
||||||
chunk_index=r.get("chunk_index", 0),
|
chunk_index=r.get("chunk_index") or 0,
|
||||||
score=r.get("_score", 0.0),
|
score=r.get("_distance") or 0.0,
|
||||||
)
|
)
|
||||||
for r in results
|
for r in results
|
||||||
]
|
]
|
||||||
|
|||||||
31
src/index.ts
31
src/index.ts
@@ -1,27 +1,34 @@
|
|||||||
|
/**
|
||||||
|
* OpenClaw plugin entry point.
|
||||||
|
* Registers 4 obsidian_rag_* tools via the OpenClaw SDK.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
|
||||||
import { registerTools } from "./tools/index.js";
|
import { registerTools } from "./tools/index.js";
|
||||||
import { loadConfig } from "./utils/config.js";
|
import { loadConfig } from "./utils/config.js";
|
||||||
import { createHealthMachine, probeAll } from "./services/health.js";
|
import { createHealthMachine, probeAll } from "./services/health.js";
|
||||||
import { VaultWatcher } from "./services/vault-watcher.js";
|
import { VaultWatcher } from "./services/vault-watcher.js";
|
||||||
|
|
||||||
/** OpenClaw plugin entry point. */
|
export default definePluginEntry({
|
||||||
export async function onLoad(): Promise<void> {
|
id: "obsidian-rag",
|
||||||
|
name: "Obsidian RAG",
|
||||||
|
description:
|
||||||
|
"Semantic search through Obsidian vault notes using RAG. Powers natural language queries like 'How was my mental health in 2024?' across journal entries, financial records, health data, and more.",
|
||||||
|
register(api) {
|
||||||
const config = loadConfig();
|
const config = loadConfig();
|
||||||
const health = createHealthMachine(config);
|
const health = createHealthMachine(config);
|
||||||
|
|
||||||
// Probe dependencies immediately
|
|
||||||
const probe = await probeAll(config);
|
|
||||||
health.transition(probe);
|
|
||||||
|
|
||||||
// Start vault watcher for auto-sync
|
// Start vault watcher for auto-sync
|
||||||
const watcher = new VaultWatcher(config, health);
|
const watcher = new VaultWatcher(config, health);
|
||||||
watcher.start();
|
watcher.start();
|
||||||
|
|
||||||
// Register all 4 tools
|
// Register all 4 tools
|
||||||
await registerTools(config, health);
|
registerTools(api, config, health);
|
||||||
|
|
||||||
console.log("[obsidian-rag] Plugin loaded");
|
console.log("[obsidian-rag] Plugin loaded — tools registered");
|
||||||
}
|
|
||||||
|
|
||||||
export async function onUnload(): Promise<void> {
|
// Probe dependencies and start health reprobing in background
|
||||||
console.log("[obsidian-rag] Plugin unloading");
|
probeAll(config).then((probe) => health.transition(probe));
|
||||||
}
|
health.startReprobing(() => probeAll(config));
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|||||||
44
src/tools/index-tool.ts
Normal file
44
src/tools/index-tool.ts
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
/** obsidian_rag_index tool — spawns the Python indexer CLI. */
|
||||||
|
|
||||||
|
import type { ObsidianRagConfig } from "../utils/config.js";
|
||||||
|
import type { HealthState } from "../services/health.js";
|
||||||
|
import type { ResponseEnvelope } from "../utils/types.js";
|
||||||
|
import { makeEnvelope } from "../utils/response.js";
|
||||||
|
import { spawnIndexer } from "../services/indexer-bridge.js";
|
||||||
|
|
||||||
|
export interface IndexParams {
|
||||||
|
mode: "full" | "sync" | "reindex";
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function runIndexTool(
|
||||||
|
config: ObsidianRagConfig,
|
||||||
|
health: { get: () => { state: HealthState }; setActiveJob: (job: { id: string; mode: string; progress: number } | null) => void },
|
||||||
|
params: IndexParams,
|
||||||
|
): Promise<ResponseEnvelope<{ job_id: string; status: string; mode: string; message: string } | null>> {
|
||||||
|
const modeMap = { full: "index", sync: "sync", reindex: "reindex" } as const;
|
||||||
|
const cliMode = modeMap[params.mode];
|
||||||
|
|
||||||
|
try {
|
||||||
|
const job = await spawnIndexer(cliMode, config);
|
||||||
|
|
||||||
|
health.setActiveJob({ id: job.id, mode: job.mode, progress: job.progress });
|
||||||
|
|
||||||
|
return makeEnvelope(
|
||||||
|
"healthy",
|
||||||
|
{
|
||||||
|
job_id: job.id,
|
||||||
|
status: "started",
|
||||||
|
mode: params.mode,
|
||||||
|
message: `Indexing job ${job.id} started in ${params.mode} mode`,
|
||||||
|
},
|
||||||
|
null,
|
||||||
|
);
|
||||||
|
} catch (err) {
|
||||||
|
return makeEnvelope("unavailable", null, {
|
||||||
|
code: "INDEXER_SPAWN_FAILED",
|
||||||
|
message: String(err),
|
||||||
|
recoverable: true,
|
||||||
|
suggestion: "Ensure the Python indexer is installed: pip install -e python/",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,12 +1,118 @@
|
|||||||
/** Tool registration — wires all 4 obsidian_rag_* tools into OpenClaw. */
|
/** Tool registration — wires all 4 obsidian_rag_* tools into OpenClaw. */
|
||||||
|
|
||||||
|
import type { AgentToolResult } from "@mariozechner/pi-agent-core";
|
||||||
|
import type { OpenClawPluginApi } from "openclaw/plugin-sdk/plugin-entry";
|
||||||
import type { ObsidianRagConfig } from "../utils/config.js";
|
import type { ObsidianRagConfig } from "../utils/config.js";
|
||||||
import type { HealthState } from "../services/health.js";
|
import type { HealthState } from "../services/health.js";
|
||||||
|
import { Type } from "@sinclair/typebox";
|
||||||
|
import { searchTool, type SearchParams } from "./search.js";
|
||||||
|
import { runIndexTool, type IndexParams } from "./index-tool.js";
|
||||||
|
import { statusTool } from "./status.js";
|
||||||
|
import { memoryStoreTool, type MemoryStoreParams } from "./memory.js";
|
||||||
|
|
||||||
export async function registerTools(
|
function textEnvelope<T>(text: string, details: T): AgentToolResult<T> {
|
||||||
_config: ObsidianRagConfig,
|
return { content: [{ type: "text", text }], details };
|
||||||
_health: { get: () => { state: HealthState } },
|
}
|
||||||
): Promise<void> {
|
|
||||||
// TODO: Wire into OpenClaw tool registry once SDK is available
|
export function registerTools(
|
||||||
console.log("[obsidian-rag] Tools registered (stub — OpenClaw SDK TBD)");
|
api: OpenClawPluginApi,
|
||||||
|
config: ObsidianRagConfig,
|
||||||
|
health: { get: () => { state: HealthState }; setActiveJob: (job: { id: string; mode: string; progress: number } | null) => void },
|
||||||
|
): void {
|
||||||
|
// obsidian_rag_search — primary semantic search
|
||||||
|
api.registerTool({
|
||||||
|
name: "obsidian_rag_search",
|
||||||
|
description:
|
||||||
|
"Primary semantic search tool. Given a natural language query, searches the Obsidian vault index and returns the most relevant note chunks ranked by semantic similarity. Supports filtering by directory, date range, and tags.",
|
||||||
|
label: "Search Obsidian Vault",
|
||||||
|
parameters: Type.Object({
|
||||||
|
query: Type.String({ description: "Natural language question or topic to search for" }),
|
||||||
|
max_results: Type.Optional(
|
||||||
|
Type.Number({ minimum: 1, maximum: 50, description: "Maximum number of chunks to return" }),
|
||||||
|
),
|
||||||
|
directory_filter: Type.Optional(
|
||||||
|
Type.Array(Type.String(), {
|
||||||
|
description: "Limit search to specific vault subdirectories (e.g. ['Journal', 'Finance'])",
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
date_range: Type.Optional(
|
||||||
|
Type.Object({
|
||||||
|
from: Type.Optional(Type.String({ description: "Start date (YYYY-MM-DD)" })),
|
||||||
|
to: Type.Optional(Type.String({ description: "End date (YYYY-MM-DD)" })),
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
tags: Type.Optional(
|
||||||
|
Type.Array(Type.String(), {
|
||||||
|
description: "Filter by hashtags found in notes (e.g. ['#mentalhealth', '#therapy'])",
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
}),
|
||||||
|
async execute(_id, params) {
|
||||||
|
const searchParams: SearchParams = {
|
||||||
|
query: String(params.query),
|
||||||
|
max_results: params.max_results != null ? Number(params.max_results) : undefined,
|
||||||
|
directory_filter: params.directory_filter as string[] | undefined,
|
||||||
|
date_range: params.date_range as { from?: string; to?: string } | undefined,
|
||||||
|
tags: params.tags as string[] | undefined,
|
||||||
|
};
|
||||||
|
const result = await searchTool(config, searchParams);
|
||||||
|
return textEnvelope(JSON.stringify(result), result);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// obsidian_rag_index — trigger indexing
|
||||||
|
api.registerTool({
|
||||||
|
name: "obsidian_rag_index",
|
||||||
|
description:
|
||||||
|
"Trigger indexing of the Obsidian vault. Use 'full' for first-time setup, 'sync' for incremental updates, 'reindex' to force a clean rebuild.",
|
||||||
|
label: "Index Obsidian Vault",
|
||||||
|
parameters: Type.Object({
|
||||||
|
mode: Type.Union(
|
||||||
|
[Type.Literal("full"), Type.Literal("sync"), Type.Literal("reindex")],
|
||||||
|
{ description: "Indexing mode" },
|
||||||
|
),
|
||||||
|
}),
|
||||||
|
async execute(_id, params) {
|
||||||
|
const indexParams: IndexParams = { mode: String(params.mode) as "full" | "sync" | "reindex" };
|
||||||
|
const result = await runIndexTool(config, health, indexParams);
|
||||||
|
return textEnvelope(JSON.stringify(result), result);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// obsidian_rag_status — health check
|
||||||
|
api.registerTool({
|
||||||
|
name: "obsidian_rag_status",
|
||||||
|
description:
|
||||||
|
"Check the health of the Obsidian RAG plugin — index statistics, last sync time, unindexed files, and Ollama status. Call this first when unsure if the index is ready.",
|
||||||
|
label: "Obsidian RAG Status",
|
||||||
|
parameters: Type.Object({}),
|
||||||
|
async execute(_id) {
|
||||||
|
const result = await statusTool(config);
|
||||||
|
return textEnvelope(JSON.stringify(result), result);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// obsidian_rag_memory_store — commit facts to memory
|
||||||
|
api.registerTool({
|
||||||
|
name: "obsidian_rag_memory_store",
|
||||||
|
description:
|
||||||
|
"Commit an important fact from search results to OpenClaw's memory for faster future retrieval. Use after finding significant information (e.g. 'I owe Sreenivas $50') that should be remembered.",
|
||||||
|
label: "Store in Memory",
|
||||||
|
parameters: Type.Object({
|
||||||
|
key: Type.String({ description: "Identifier for the fact (e.g. 'debt_to_sreenivas')" }),
|
||||||
|
value: Type.String({ description: "The fact to remember" }),
|
||||||
|
source: Type.String({
|
||||||
|
description: "Source file path in the vault (e.g. 'Journal/2025-03-15.md')",
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
async execute(_id, params) {
|
||||||
|
const memParams: MemoryStoreParams = {
|
||||||
|
key: String(params.key),
|
||||||
|
value: String(params.value),
|
||||||
|
source: String(params.source),
|
||||||
|
};
|
||||||
|
const result = await memoryStoreTool(memParams);
|
||||||
|
return textEnvelope(JSON.stringify(result), result);
|
||||||
|
},
|
||||||
|
});
|
||||||
}
|
}
|
||||||
@@ -52,9 +52,6 @@ export async function searchVectorDb(
|
|||||||
}
|
}
|
||||||
const table = await db.openTable("obsidian_chunks");
|
const table = await db.openTable("obsidian_chunks");
|
||||||
|
|
||||||
// Embed the query text
|
|
||||||
const queryVector = await embedQuery(query, config);
|
|
||||||
|
|
||||||
// Build WHERE clause from filters
|
// Build WHERE clause from filters
|
||||||
const conditions: string[] = [];
|
const conditions: string[] = [];
|
||||||
if (options.directory_filter && options.directory_filter.length > 0) {
|
if (options.directory_filter && options.directory_filter.length > 0) {
|
||||||
@@ -79,12 +76,24 @@ export async function searchVectorDb(
|
|||||||
|
|
||||||
const limit = options.max_results ?? 5;
|
const limit = options.max_results ?? 5;
|
||||||
|
|
||||||
// LanceDB JS SDK: table.vectorSearch(vector).filter(...).limit(...).toArray()
|
// Try vector search first; if Ollama is down embedQuery throws → fallback to FTS
|
||||||
|
let rows: Record<string, unknown>[];
|
||||||
|
try {
|
||||||
|
const queryVector = await embedQuery(query, config);
|
||||||
|
|
||||||
let queryBuilder = table.vectorSearch(queryVector);
|
let queryBuilder = table.vectorSearch(queryVector);
|
||||||
if (whereClause) {
|
if (whereClause) {
|
||||||
queryBuilder = queryBuilder.filter(whereClause);
|
queryBuilder = queryBuilder.filter(whereClause);
|
||||||
}
|
}
|
||||||
const rows = await queryBuilder.limit(limit).toArray();
|
rows = await queryBuilder.limit(limit).toArray();
|
||||||
|
} catch {
|
||||||
|
// Ollama unavailable — fallback to full-text search on chunk_text (BM25 scoring)
|
||||||
|
let ftsBuilder = table.query().fullTextSearch(query);
|
||||||
|
if (whereClause) {
|
||||||
|
ftsBuilder = ftsBuilder.filter(whereClause);
|
||||||
|
}
|
||||||
|
rows = await ftsBuilder.limit(limit).toArray();
|
||||||
|
}
|
||||||
|
|
||||||
return rows.map((r: Record<string, unknown>) => ({
|
return rows.map((r: Record<string, unknown>) => ({
|
||||||
chunk_id: r["chunk_id"] as string,
|
chunk_id: r["chunk_id"] as string,
|
||||||
@@ -95,6 +104,6 @@ export async function searchVectorDb(
|
|||||||
date: (r["date"] as string) ?? null,
|
date: (r["date"] as string) ?? null,
|
||||||
tags: (r["tags"] as string[]) ?? [],
|
tags: (r["tags"] as string[]) ?? [],
|
||||||
chunk_index: (r["chunk_index"] as number) ?? 0,
|
chunk_index: (r["chunk_index"] as number) ?? 0,
|
||||||
score: (r["_distance"] as number) ?? 0.0,
|
score: (r["_score"] as number) ?? (r["_distance"] as number) ?? 0.0,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user