Sprint 0-1: Python indexer, TS plugin scaffolding, and test suite
## What's new **Python indexer (`python/obsidian_rag/`)** — full pipeline from scan to LanceDB: - `config.py` — JSON config loader with cross-platform path resolution - `security.py` — path traversal prevention, HTML stripping, sensitive content detection, dir allow/deny lists - `chunker.py` — section-split for journal entries (date-named files), sliding-window for unstructured notes - `embedder.py` — Ollama `/api/embeddings` client with batched requests and timeout/error handling - `vector_store.py` — LanceDB schema, upsert (merge_insert), delete, search with filters, stats - `indexer.py` — full/sync/reindex pipeline orchestrator with progress yields - `cli.py` — `index | sync | reindex | status` CLI commands **TypeScript plugin (`src/`)** — OpenClaw plugin scaffold: - `utils/` — config loader, TypeScript types, response envelope factory, LanceDB client - `services/` — health state machine (HEALTHY/DEGRADED/UNAVAILABLE), vault watcher with debounce/batching, indexer bridge (subprocess spawner) - `tools/` — 4 tool stubs: search, index, status, memory_store (OpenClaw wiring pending) - `index.ts` — plugin entry point with health probe + vault watcher startup **Config** (`obsidian-rag/config.json`, `openclaw.plugin.json`): - 627 files / 3764 chunks indexed in dev vault **Tests: 76 passing** - Python: 64 pytest tests (chunker, security, vector_store, config) - TypeScript: 12 vitest tests (lancedb client, response envelope) ## Bugs fixed - LanceDB `tags` column filter: `LIKE '%tag%'` → `list_contains(tags, 'tag')` (List<String> column) - LanceDB JS `db.list_tables()` returns `ListTablesResponse` object, not plain array - LanceDB JS result score field: `_score` → `_distance` - TypeScript regex literal with unescaped `/` in path-resolve regex - Python: `create_table_if_not_exists` identity check → name comparison Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
12
src/tools/index.ts
Normal file
12
src/tools/index.ts
Normal file
@@ -0,0 +1,12 @@
|
||||
/** Tool registration — wires all 4 obsidian_rag_* tools into OpenClaw. */
|
||||
|
||||
import type { ObsidianRagConfig } from "../utils/config.js";
|
||||
import type { HealthState } from "../services/health.js";
|
||||
|
||||
export async function registerTools(
|
||||
_config: ObsidianRagConfig,
|
||||
_health: { get: () => { state: HealthState } },
|
||||
): Promise<void> {
|
||||
// TODO: Wire into OpenClaw tool registry once SDK is available
|
||||
console.log("[obsidian-rag] Tools registered (stub — OpenClaw SDK TBD)");
|
||||
}
|
||||
27
src/tools/memory.ts
Normal file
27
src/tools/memory.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
/** obsidian_rag_memory_store tool implementation. */
|
||||
|
||||
import type { ResponseEnvelope } from "../utils/types.js";
|
||||
import { makeEnvelope } from "../utils/response.js";
|
||||
|
||||
export interface MemoryStoreParams {
|
||||
key: string;
|
||||
value: string;
|
||||
source: string;
|
||||
}
|
||||
|
||||
// In a real OpenClaw integration, this would store to the agent's memory system.
|
||||
// For now, we just acknowledge the store operation.
|
||||
export async function memoryStoreTool(
|
||||
params: MemoryStoreParams,
|
||||
): Promise<ResponseEnvelope<{ stored: boolean; key: string }>> {
|
||||
console.log(`[obsidian-rag] memory_store: ${params.key} = ${params.value} (source: ${params.source})`);
|
||||
|
||||
return makeEnvelope(
|
||||
"healthy",
|
||||
{
|
||||
stored: true,
|
||||
key: params.key,
|
||||
},
|
||||
null,
|
||||
);
|
||||
}
|
||||
44
src/tools/search.ts
Normal file
44
src/tools/search.ts
Normal file
@@ -0,0 +1,44 @@
|
||||
/** obsidian_rag_search tool implementation. */
|
||||
|
||||
import type { ObsidianRagConfig } from "../utils/config.js";
|
||||
import type { ResponseEnvelope } from "../utils/types.js";
|
||||
import type { SearchResult } from "../utils/types.js";
|
||||
import { makeEnvelope } from "../utils/response.js";
|
||||
import { searchVectorDb } from "../utils/lancedb.js";
|
||||
|
||||
export interface SearchParams {
|
||||
query: string;
|
||||
max_results?: number;
|
||||
directory_filter?: string[];
|
||||
date_range?: { from?: string; to?: string };
|
||||
tags?: string[];
|
||||
}
|
||||
|
||||
export async function searchTool(
|
||||
config: ObsidianRagConfig,
|
||||
params: SearchParams,
|
||||
): Promise<ResponseEnvelope<{ results: SearchResult[]; sensitive_detected: boolean } | null>> {
|
||||
try {
|
||||
const results = await searchVectorDb(config, params.query, {
|
||||
max_results: params.max_results ?? 5,
|
||||
directory_filter: params.directory_filter,
|
||||
date_range: params.date_range,
|
||||
tags: params.tags,
|
||||
});
|
||||
|
||||
// TODO: Run sensitive content detection once we have actual results
|
||||
return makeEnvelope(
|
||||
results.length > 0 ? "healthy" : "degraded",
|
||||
{ results, sensitive_detected: false },
|
||||
null,
|
||||
{ query_time_ms: 0, chunks_scanned: results.length },
|
||||
);
|
||||
} catch (err) {
|
||||
return makeEnvelope("degraded", null, {
|
||||
code: "SEARCH_FAILED",
|
||||
message: String(err),
|
||||
recoverable: true,
|
||||
suggestion: "Check if the index exists with obsidian_rag_status",
|
||||
});
|
||||
}
|
||||
}
|
||||
44
src/tools/status.ts
Normal file
44
src/tools/status.ts
Normal file
@@ -0,0 +1,44 @@
|
||||
/** obsidian_rag_status tool implementation. */
|
||||
|
||||
import type { ObsidianRagConfig } from "../utils/config.js";
|
||||
import type { ResponseEnvelope } from "../utils/types.js";
|
||||
import { makeEnvelope } from "../utils/response.js";
|
||||
import { readSyncResult } from "../services/indexer-bridge.js";
|
||||
|
||||
export async function statusTool(
|
||||
config: ObsidianRagConfig,
|
||||
): Promise<ResponseEnvelope<{
|
||||
plugin_health: string;
|
||||
total_docs: number;
|
||||
total_chunks: number;
|
||||
last_sync: string | null;
|
||||
unindexed_files: number;
|
||||
ollama_status: string;
|
||||
active_job: null;
|
||||
}>> {
|
||||
const sync = readSyncResult(config);
|
||||
const ollamaUp = await checkOllama(config);
|
||||
|
||||
return makeEnvelope(
|
||||
sync ? "healthy" : "unavailable",
|
||||
{
|
||||
plugin_health: sync ? "healthy" : "unavailable",
|
||||
total_docs: sync?.indexed_files ?? 0,
|
||||
total_chunks: sync?.total_chunks ?? 0,
|
||||
last_sync: sync?.timestamp ?? null,
|
||||
unindexed_files: 0,
|
||||
ollama_status: ollamaUp ? "up" : "down",
|
||||
active_job: null,
|
||||
},
|
||||
null,
|
||||
);
|
||||
}
|
||||
|
||||
async function checkOllama(config: ObsidianRagConfig): Promise<boolean> {
|
||||
try {
|
||||
const res = await fetch(`${config.embedding.base_url}/api/tags`, { signal: AbortSignal.timeout(3000) });
|
||||
return res.ok;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user