Sprint 0-1: Python indexer, TS plugin scaffolding, and test suite
## What's new **Python indexer (`python/obsidian_rag/`)** — full pipeline from scan to LanceDB: - `config.py` — JSON config loader with cross-platform path resolution - `security.py` — path traversal prevention, HTML stripping, sensitive content detection, dir allow/deny lists - `chunker.py` — section-split for journal entries (date-named files), sliding-window for unstructured notes - `embedder.py` — Ollama `/api/embeddings` client with batched requests and timeout/error handling - `vector_store.py` — LanceDB schema, upsert (merge_insert), delete, search with filters, stats - `indexer.py` — full/sync/reindex pipeline orchestrator with progress yields - `cli.py` — `index | sync | reindex | status` CLI commands **TypeScript plugin (`src/`)** — OpenClaw plugin scaffold: - `utils/` — config loader, TypeScript types, response envelope factory, LanceDB client - `services/` — health state machine (HEALTHY/DEGRADED/UNAVAILABLE), vault watcher with debounce/batching, indexer bridge (subprocess spawner) - `tools/` — 4 tool stubs: search, index, status, memory_store (OpenClaw wiring pending) - `index.ts` — plugin entry point with health probe + vault watcher startup **Config** (`obsidian-rag/config.json`, `openclaw.plugin.json`): - 627 files / 3764 chunks indexed in dev vault **Tests: 76 passing** - Python: 64 pytest tests (chunker, security, vector_store, config) - TypeScript: 12 vitest tests (lancedb client, response envelope) ## Bugs fixed - LanceDB `tags` column filter: `LIKE '%tag%'` → `list_contains(tags, 'tag')` (List<String> column) - LanceDB JS `db.list_tables()` returns `ListTablesResponse` object, not plain array - LanceDB JS result score field: `_score` → `_distance` - TypeScript regex literal with unescaped `/` in path-resolve regex - Python: `create_table_if_not_exists` identity check → name comparison Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
100
src/utils/lancedb.ts
Normal file
100
src/utils/lancedb.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
/** LanceDB client for TypeScript — searches the pre-built index. */
|
||||
|
||||
import { resolve } from "path";
|
||||
import type { ObsidianRagConfig } from "./config.js";
|
||||
import type { SearchResult } from "./types.js";
|
||||
|
||||
export function resolveVectorDbPath(config: ObsidianRagConfig): string {
|
||||
const vsp = config.vector_store.path;
|
||||
// Special case: resolve nested paths where vector_store.path is itself inside data dir
|
||||
if (vsp.startsWith("./obsidian-rag/") || vsp.includes("../")) return resolve(process.cwd(), vsp);
|
||||
if (vsp.startsWith("/") || /^[A-Za-z]:/.test(vsp)) return vsp;
|
||||
return resolve(process.cwd(), vsp);
|
||||
}
|
||||
|
||||
export async function embedQuery(
|
||||
text: string,
|
||||
config: ObsidianRagConfig,
|
||||
): Promise<number[]> {
|
||||
const url = `${config.embedding.base_url}/api/embeddings`;
|
||||
const response = await fetch(url, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ model: config.embedding.model, prompt: text }),
|
||||
signal: AbortSignal.timeout(30_000),
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`Embedding request failed: ${response.status} ${response.statusText}`);
|
||||
}
|
||||
const data = (await response.json()) as { embedding?: number[]; embeddings?: number[][] };
|
||||
return data.embedding ?? data.embeddings?.[0] ?? [];
|
||||
}
|
||||
|
||||
export async function searchVectorDb(
|
||||
config: ObsidianRagConfig,
|
||||
query: string,
|
||||
options: {
|
||||
max_results?: number;
|
||||
directory_filter?: string[];
|
||||
date_range?: { from?: string; to?: string };
|
||||
tags?: string[];
|
||||
} = {},
|
||||
): Promise<SearchResult[]> {
|
||||
const dbPath = resolveVectorDbPath(config);
|
||||
|
||||
// Dynamically import LanceDB to avoid issues at import time when not needed
|
||||
const { connect } = await import("@lancedb/lancedb");
|
||||
|
||||
const db = await connect(dbPath);
|
||||
const tableNames = await db.tableNames();
|
||||
if (!tableNames.includes("obsidian_chunks")) {
|
||||
return [];
|
||||
}
|
||||
const table = await db.openTable("obsidian_chunks");
|
||||
|
||||
// Embed the query text
|
||||
const queryVector = await embedQuery(query, config);
|
||||
|
||||
// Build WHERE clause from filters
|
||||
const conditions: string[] = [];
|
||||
if (options.directory_filter && options.directory_filter.length > 0) {
|
||||
const dirs = options.directory_filter.map((d) => `"${d}"`).join(", ");
|
||||
conditions.push(`source_directory IN (${dirs})`);
|
||||
}
|
||||
if (options.date_range) {
|
||||
if (options.date_range.from) {
|
||||
conditions.push(`date >= '${options.date_range.from}'`);
|
||||
}
|
||||
if (options.date_range.to) {
|
||||
conditions.push(`date <= '${options.date_range.to}'`);
|
||||
}
|
||||
}
|
||||
if (options.tags && options.tags.length > 0) {
|
||||
for (const tag of options.tags) {
|
||||
// LanceDB stores tags as List<String>; use array_contains SQL function
|
||||
conditions.push(`array_contains(tags, '${tag}')`);
|
||||
}
|
||||
}
|
||||
const whereClause = conditions.length > 0 ? conditions.join(" AND ") : undefined;
|
||||
|
||||
const limit = options.max_results ?? 5;
|
||||
|
||||
// LanceDB JS SDK: table.vectorSearch(vector).filter(...).limit(...).toArray()
|
||||
let queryBuilder = table.vectorSearch(queryVector);
|
||||
if (whereClause) {
|
||||
queryBuilder = queryBuilder.filter(whereClause);
|
||||
}
|
||||
const rows = await queryBuilder.limit(limit).toArray();
|
||||
|
||||
return rows.map((r: Record<string, unknown>) => ({
|
||||
chunk_id: r["chunk_id"] as string,
|
||||
chunk_text: r["chunk_text"] as string,
|
||||
source_file: r["source_file"] as string,
|
||||
source_directory: r["source_directory"] as string,
|
||||
section: (r["section"] as string) ?? null,
|
||||
date: (r["date"] as string) ?? null,
|
||||
tags: (r["tags"] as string[]) ?? [],
|
||||
chunk_index: (r["chunk_index"] as number) ?? 0,
|
||||
score: (r["_distance"] as number) ?? 0.0,
|
||||
}));
|
||||
}
|
||||
Reference in New Issue
Block a user