Sprint 0-1: Python indexer, TS plugin scaffolding, and test suite
## What's new **Python indexer (`python/obsidian_rag/`)** — full pipeline from scan to LanceDB: - `config.py` — JSON config loader with cross-platform path resolution - `security.py` — path traversal prevention, HTML stripping, sensitive content detection, dir allow/deny lists - `chunker.py` — section-split for journal entries (date-named files), sliding-window for unstructured notes - `embedder.py` — Ollama `/api/embeddings` client with batched requests and timeout/error handling - `vector_store.py` — LanceDB schema, upsert (merge_insert), delete, search with filters, stats - `indexer.py` — full/sync/reindex pipeline orchestrator with progress yields - `cli.py` — `index | sync | reindex | status` CLI commands **TypeScript plugin (`src/`)** — OpenClaw plugin scaffold: - `utils/` — config loader, TypeScript types, response envelope factory, LanceDB client - `services/` — health state machine (HEALTHY/DEGRADED/UNAVAILABLE), vault watcher with debounce/batching, indexer bridge (subprocess spawner) - `tools/` — 4 tool stubs: search, index, status, memory_store (OpenClaw wiring pending) - `index.ts` — plugin entry point with health probe + vault watcher startup **Config** (`obsidian-rag/config.json`, `openclaw.plugin.json`): - 627 files / 3764 chunks indexed in dev vault **Tests: 76 passing** - Python: 64 pytest tests (chunker, security, vector_store, config) - TypeScript: 12 vitest tests (lancedb client, response envelope) ## Bugs fixed - LanceDB `tags` column filter: `LIKE '%tag%'` → `list_contains(tags, 'tag')` (List<String> column) - LanceDB JS `db.list_tables()` returns `ListTablesResponse` object, not plain array - LanceDB JS result score field: `_score` → `_distance` - TypeScript regex literal with unescaped `/` in path-resolve regex - Python: `create_table_if_not_exists` identity check → name comparison Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
27
src/index.ts
Normal file
27
src/index.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
import { registerTools } from "./tools/index.js";
|
||||
import { loadConfig } from "./utils/config.js";
|
||||
import { createHealthMachine, probeAll } from "./services/health.js";
|
||||
import { VaultWatcher } from "./services/vault-watcher.js";
|
||||
|
||||
/** OpenClaw plugin entry point. */
|
||||
export async function onLoad(): Promise<void> {
|
||||
const config = loadConfig();
|
||||
const health = createHealthMachine(config);
|
||||
|
||||
// Probe dependencies immediately
|
||||
const probe = await probeAll(config);
|
||||
health.transition(probe);
|
||||
|
||||
// Start vault watcher for auto-sync
|
||||
const watcher = new VaultWatcher(config, health);
|
||||
watcher.start();
|
||||
|
||||
// Register all 4 tools
|
||||
await registerTools(config, health);
|
||||
|
||||
console.log("[obsidian-rag] Plugin loaded");
|
||||
}
|
||||
|
||||
export async function onUnload(): Promise<void> {
|
||||
console.log("[obsidian-rag] Plugin unloading");
|
||||
}
|
||||
130
src/services/health.ts
Normal file
130
src/services/health.ts
Normal file
@@ -0,0 +1,130 @@
|
||||
/** Health state machine: HEALTHY / DEGRADED / UNAVAILABLE. */
|
||||
|
||||
import { existsSync, readFileSync } from "fs";
|
||||
import { resolve } from "path";
|
||||
import type { ObsidianRagConfig } from "../utils/config.js";
|
||||
|
||||
export type HealthState = "healthy" | "degraded" | "unavailable";
|
||||
|
||||
export interface HealthStatus {
|
||||
state: HealthState;
|
||||
ollama_up: boolean;
|
||||
index_exists: boolean;
|
||||
vault_exists: boolean;
|
||||
total_docs: number;
|
||||
total_chunks: number;
|
||||
last_sync: string | null;
|
||||
active_job: { id: string; mode: string; progress: number } | null;
|
||||
}
|
||||
|
||||
export interface ProbeResult {
|
||||
ollama_up: boolean;
|
||||
index_exists: boolean;
|
||||
vault_exists: boolean;
|
||||
total_docs: number;
|
||||
total_chunks: number;
|
||||
last_sync: string | null;
|
||||
}
|
||||
|
||||
const REPROBE_INTERVAL_MS = 30_000;
|
||||
|
||||
export function createHealthMachine(_config: ObsidianRagConfig) {
|
||||
let currentState: HealthState = "unavailable";
|
||||
let status: ProbeResult = {
|
||||
ollama_up: false,
|
||||
index_exists: false,
|
||||
vault_exists: false,
|
||||
total_docs: 0,
|
||||
total_chunks: 0,
|
||||
last_sync: null,
|
||||
};
|
||||
let activeJob: { id: string; mode: string; progress: number } | null = null;
|
||||
let reprobeTimer: ReturnType<typeof setInterval> | null = null;
|
||||
|
||||
function transition(probe: ProbeResult): void {
|
||||
status = probe;
|
||||
const prev = currentState;
|
||||
if (!probe.index_exists || !probe.vault_exists) {
|
||||
currentState = "unavailable";
|
||||
} else if (!probe.ollama_up) {
|
||||
currentState = "degraded";
|
||||
} else {
|
||||
currentState = "healthy";
|
||||
}
|
||||
if (prev !== currentState) {
|
||||
console.log(`[obsidian-rag] Health: ${prev} → ${currentState}`);
|
||||
}
|
||||
}
|
||||
|
||||
function get(): HealthStatus {
|
||||
return { state: currentState, ...status, active_job: activeJob };
|
||||
}
|
||||
|
||||
function setActiveJob(job: { id: string; mode: string; progress: number } | null): void {
|
||||
activeJob = job;
|
||||
}
|
||||
|
||||
function startReprobing(fn: () => Promise<ProbeResult>): void {
|
||||
if (reprobeTimer) clearInterval(reprobeTimer);
|
||||
reprobeTimer = setInterval(async () => {
|
||||
const probe = await fn();
|
||||
transition(probe);
|
||||
}, REPROBE_INTERVAL_MS);
|
||||
}
|
||||
|
||||
function stop(): void {
|
||||
if (reprobeTimer) {
|
||||
clearInterval(reprobeTimer);
|
||||
reprobeTimer = null;
|
||||
}
|
||||
}
|
||||
|
||||
return { get, transition, setActiveJob, startReprobing, stop };
|
||||
}
|
||||
|
||||
export async function probeAll(config: ObsidianRagConfig): Promise<ProbeResult> {
|
||||
const { resolveVectorDbPath } = await import("../utils/lancedb.js");
|
||||
|
||||
const vaultPath = resolve(process.cwd(), config.vault_path);
|
||||
const dbPath = resolveVectorDbPath(config);
|
||||
|
||||
const vaultExists = existsSync(vaultPath);
|
||||
const indexExists = existsSync(String(dbPath));
|
||||
const ollamaUp = await probeOllama(config.embedding.base_url);
|
||||
|
||||
let totalDocs = 0;
|
||||
let totalChunks = 0;
|
||||
let lastSync: string | null = null;
|
||||
|
||||
if (indexExists) {
|
||||
try {
|
||||
const syncPath = resolve(dbPath, "..", "sync-result.json");
|
||||
if (existsSync(syncPath)) {
|
||||
const data = JSON.parse(readFileSync(syncPath, "utf-8"));
|
||||
lastSync = data.timestamp ?? null;
|
||||
totalDocs = data.indexed_files ?? 0;
|
||||
totalChunks = data.total_chunks ?? 0;
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
ollama_up: ollamaUp,
|
||||
index_exists: indexExists,
|
||||
vault_exists: vaultExists,
|
||||
total_docs: totalDocs,
|
||||
total_chunks: totalChunks,
|
||||
last_sync: lastSync,
|
||||
};
|
||||
}
|
||||
|
||||
async function probeOllama(baseUrl: string): Promise<boolean> {
|
||||
try {
|
||||
const res = await fetch(`${baseUrl}/api/tags`, { signal: AbortSignal.timeout(3000) });
|
||||
return res.ok;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
120
src/services/indexer-bridge.ts
Normal file
120
src/services/indexer-bridge.ts
Normal file
@@ -0,0 +1,120 @@
|
||||
/** Bridge to the Python indexer CLI — spawns subprocess, tracks job progress. */
|
||||
|
||||
import { spawn } from "child_process";
|
||||
import { readFileSync, existsSync } from "fs";
|
||||
import { resolve } from "path";
|
||||
import type { ObsidianRagConfig } from "../utils/config.js";
|
||||
|
||||
export interface JobStatus {
|
||||
id: string;
|
||||
mode: string;
|
||||
progress: number;
|
||||
status: "running" | "complete" | "failed";
|
||||
indexed_files?: number;
|
||||
total_chunks?: number;
|
||||
duration_ms?: number;
|
||||
errors?: Array<{ file: string; error: string }>;
|
||||
}
|
||||
|
||||
const runningJobs = new Map<string, JobStatus>();
|
||||
|
||||
export function spawnIndexer(
|
||||
mode: "index" | "sync" | "reindex",
|
||||
config: ObsidianRagConfig,
|
||||
): Promise<JobStatus> {
|
||||
const jobId = `job-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
const status: JobStatus = {
|
||||
id: jobId,
|
||||
mode,
|
||||
progress: 0,
|
||||
status: "running",
|
||||
};
|
||||
runningJobs.set(jobId, status);
|
||||
|
||||
return new Promise((resolveJob) => {
|
||||
const pythonCmd = "python";
|
||||
const args = ["-m", "obsidian_rag.cli", mode];
|
||||
|
||||
const child = spawn(pythonCmd, args, {
|
||||
cwd: resolve(process.cwd(), "python"),
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
|
||||
child.stdout?.on("data", (chunk: Buffer) => {
|
||||
stdout += chunk.toString();
|
||||
try {
|
||||
const lines = stdout.split("\n").filter(Boolean);
|
||||
for (const line of lines) {
|
||||
try {
|
||||
const obj = JSON.parse(line);
|
||||
if (obj.type === "progress") {
|
||||
const total = obj.total ?? 1;
|
||||
const current = obj.current ?? 0;
|
||||
status.progress = Math.round((current / total) * 100);
|
||||
} else if (obj.type === "complete") {
|
||||
status.status = obj.errors?.length ? "failed" : "complete";
|
||||
status.indexed_files = obj.indexed_files;
|
||||
status.total_chunks = obj.total_chunks;
|
||||
status.duration_ms = obj.duration_ms;
|
||||
status.errors = obj.errors ?? [];
|
||||
status.progress = 100;
|
||||
runningJobs.delete(jobId);
|
||||
resolveJob(status);
|
||||
} else if (obj.type === "error") {
|
||||
status.status = "failed";
|
||||
status.errors = [{ file: "cli", error: obj.error }];
|
||||
runningJobs.delete(jobId);
|
||||
resolveJob(status);
|
||||
}
|
||||
} catch {
|
||||
// Not JSON — ignore partial lines
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// ignore parse errors
|
||||
}
|
||||
});
|
||||
|
||||
child.stderr?.on("data", (chunk: Buffer) => {
|
||||
stderr += chunk.toString();
|
||||
});
|
||||
|
||||
child.on("close", (code) => {
|
||||
if (status.status === "running") {
|
||||
status.status = code === 0 ? "complete" : "failed";
|
||||
runningJobs.delete(jobId);
|
||||
resolveJob(status);
|
||||
}
|
||||
});
|
||||
|
||||
child.on("error", (err) => {
|
||||
status.status = "failed";
|
||||
status.errors = [{ file: "subprocess", error: err.message }];
|
||||
runningJobs.delete(jobId);
|
||||
resolveJob(status);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export function getJobStatus(jobId: string): JobStatus | null {
|
||||
return runningJobs.get(jobId) ?? null;
|
||||
}
|
||||
|
||||
export function readSyncResult(config: ObsidianRagConfig): {
|
||||
timestamp: string | null;
|
||||
indexed_files: number;
|
||||
total_chunks: number;
|
||||
errors: Array<{ file: string; error: string }>;
|
||||
} | null {
|
||||
const dataDir = resolve(process.cwd(), ".obsidian-rag");
|
||||
const path = resolve(dataDir, "sync-result.json");
|
||||
if (!existsSync(path)) return null;
|
||||
try {
|
||||
return JSON.parse(readFileSync(path, "utf-8"));
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
81
src/services/vault-watcher.ts
Normal file
81
src/services/vault-watcher.ts
Normal file
@@ -0,0 +1,81 @@
|
||||
/** Vault watcher — chokidar-based file system monitor with debounce + batching. */
|
||||
|
||||
import { watch, FSWatcher } from "chokidar";
|
||||
import type { ObsidianRagConfig } from "../utils/config.js";
|
||||
import type { HealthState } from "./health.js";
|
||||
|
||||
const DEBOUNCE_MS = 2_000;
|
||||
const COLLECT_WINDOW_MS = 5_000;
|
||||
|
||||
export class VaultWatcher {
|
||||
private watcher: FSWatcher | null = null;
|
||||
private debounceTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
private pending = new Set<string>();
|
||||
private collectTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
|
||||
constructor(
|
||||
private config: ObsidianRagConfig,
|
||||
private health: { get: () => { state: HealthState } },
|
||||
) {}
|
||||
|
||||
start(): void {
|
||||
const vaultPath = this.config.vault_path;
|
||||
this.watcher = watch(vaultPath, {
|
||||
persistent: true,
|
||||
ignoreInitial: true,
|
||||
depth: 99,
|
||||
});
|
||||
|
||||
this.watcher.on("add", (p) => this.onEvent(p));
|
||||
this.watcher.on("change", (p) => this.onEvent(p));
|
||||
this.watcher.on("unlink", (p) => this.onEvent(p));
|
||||
}
|
||||
|
||||
private onEvent(filepath: string): void {
|
||||
if (!filepath.endsWith(".md")) return;
|
||||
// Apply deny list check
|
||||
const parts = filepath.replace("\\", "/").split("/");
|
||||
const dir = parts[parts.length - 2] ?? "";
|
||||
if (this.config.indexing.deny_dirs.includes(dir)) return;
|
||||
|
||||
this.pending.add(filepath);
|
||||
this.scheduleFlush();
|
||||
}
|
||||
|
||||
private scheduleFlush(): void {
|
||||
if (this.debounceTimer) clearTimeout(this.debounceTimer);
|
||||
this.debounceTimer = setTimeout(() => {
|
||||
this.flush();
|
||||
}, DEBOUNCE_MS);
|
||||
}
|
||||
|
||||
private flush(): void {
|
||||
if (this.pending.size === 0) return;
|
||||
const files = [...this.pending];
|
||||
this.pending.clear();
|
||||
|
||||
if (this.collectTimer) clearTimeout(this.collectTimer);
|
||||
this.collectTimer = setTimeout(() => {
|
||||
this.triggerSync(files);
|
||||
}, COLLECT_WINDOW_MS);
|
||||
}
|
||||
|
||||
private async triggerSync(_files: string[]): Promise<void> {
|
||||
// Import dynamically to avoid circular issues
|
||||
const { spawnIndexer } = await import("./indexer-bridge.js");
|
||||
const health = this.health.get();
|
||||
if (health.state === "unavailable") {
|
||||
console.log("[obsidian-rag] Skipping sync — index unavailable");
|
||||
return;
|
||||
}
|
||||
console.log(`[obsidian-rag] Triggering sync for ${_files.length} files`);
|
||||
await spawnIndexer("sync", this.config);
|
||||
}
|
||||
|
||||
stop(): void {
|
||||
this.watcher?.close();
|
||||
this.watcher = null;
|
||||
if (this.debounceTimer) clearTimeout(this.debounceTimer);
|
||||
if (this.collectTimer) clearTimeout(this.collectTimer);
|
||||
}
|
||||
}
|
||||
12
src/tools/index.ts
Normal file
12
src/tools/index.ts
Normal file
@@ -0,0 +1,12 @@
|
||||
/** Tool registration — wires all 4 obsidian_rag_* tools into OpenClaw. */
|
||||
|
||||
import type { ObsidianRagConfig } from "../utils/config.js";
|
||||
import type { HealthState } from "../services/health.js";
|
||||
|
||||
export async function registerTools(
|
||||
_config: ObsidianRagConfig,
|
||||
_health: { get: () => { state: HealthState } },
|
||||
): Promise<void> {
|
||||
// TODO: Wire into OpenClaw tool registry once SDK is available
|
||||
console.log("[obsidian-rag] Tools registered (stub — OpenClaw SDK TBD)");
|
||||
}
|
||||
27
src/tools/memory.ts
Normal file
27
src/tools/memory.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
/** obsidian_rag_memory_store tool implementation. */
|
||||
|
||||
import type { ResponseEnvelope } from "../utils/types.js";
|
||||
import { makeEnvelope } from "../utils/response.js";
|
||||
|
||||
export interface MemoryStoreParams {
|
||||
key: string;
|
||||
value: string;
|
||||
source: string;
|
||||
}
|
||||
|
||||
// In a real OpenClaw integration, this would store to the agent's memory system.
|
||||
// For now, we just acknowledge the store operation.
|
||||
export async function memoryStoreTool(
|
||||
params: MemoryStoreParams,
|
||||
): Promise<ResponseEnvelope<{ stored: boolean; key: string }>> {
|
||||
console.log(`[obsidian-rag] memory_store: ${params.key} = ${params.value} (source: ${params.source})`);
|
||||
|
||||
return makeEnvelope(
|
||||
"healthy",
|
||||
{
|
||||
stored: true,
|
||||
key: params.key,
|
||||
},
|
||||
null,
|
||||
);
|
||||
}
|
||||
44
src/tools/search.ts
Normal file
44
src/tools/search.ts
Normal file
@@ -0,0 +1,44 @@
|
||||
/** obsidian_rag_search tool implementation. */
|
||||
|
||||
import type { ObsidianRagConfig } from "../utils/config.js";
|
||||
import type { ResponseEnvelope } from "../utils/types.js";
|
||||
import type { SearchResult } from "../utils/types.js";
|
||||
import { makeEnvelope } from "../utils/response.js";
|
||||
import { searchVectorDb } from "../utils/lancedb.js";
|
||||
|
||||
export interface SearchParams {
|
||||
query: string;
|
||||
max_results?: number;
|
||||
directory_filter?: string[];
|
||||
date_range?: { from?: string; to?: string };
|
||||
tags?: string[];
|
||||
}
|
||||
|
||||
export async function searchTool(
|
||||
config: ObsidianRagConfig,
|
||||
params: SearchParams,
|
||||
): Promise<ResponseEnvelope<{ results: SearchResult[]; sensitive_detected: boolean } | null>> {
|
||||
try {
|
||||
const results = await searchVectorDb(config, params.query, {
|
||||
max_results: params.max_results ?? 5,
|
||||
directory_filter: params.directory_filter,
|
||||
date_range: params.date_range,
|
||||
tags: params.tags,
|
||||
});
|
||||
|
||||
// TODO: Run sensitive content detection once we have actual results
|
||||
return makeEnvelope(
|
||||
results.length > 0 ? "healthy" : "degraded",
|
||||
{ results, sensitive_detected: false },
|
||||
null,
|
||||
{ query_time_ms: 0, chunks_scanned: results.length },
|
||||
);
|
||||
} catch (err) {
|
||||
return makeEnvelope("degraded", null, {
|
||||
code: "SEARCH_FAILED",
|
||||
message: String(err),
|
||||
recoverable: true,
|
||||
suggestion: "Check if the index exists with obsidian_rag_status",
|
||||
});
|
||||
}
|
||||
}
|
||||
44
src/tools/status.ts
Normal file
44
src/tools/status.ts
Normal file
@@ -0,0 +1,44 @@
|
||||
/** obsidian_rag_status tool implementation. */
|
||||
|
||||
import type { ObsidianRagConfig } from "../utils/config.js";
|
||||
import type { ResponseEnvelope } from "../utils/types.js";
|
||||
import { makeEnvelope } from "../utils/response.js";
|
||||
import { readSyncResult } from "../services/indexer-bridge.js";
|
||||
|
||||
export async function statusTool(
|
||||
config: ObsidianRagConfig,
|
||||
): Promise<ResponseEnvelope<{
|
||||
plugin_health: string;
|
||||
total_docs: number;
|
||||
total_chunks: number;
|
||||
last_sync: string | null;
|
||||
unindexed_files: number;
|
||||
ollama_status: string;
|
||||
active_job: null;
|
||||
}>> {
|
||||
const sync = readSyncResult(config);
|
||||
const ollamaUp = await checkOllama(config);
|
||||
|
||||
return makeEnvelope(
|
||||
sync ? "healthy" : "unavailable",
|
||||
{
|
||||
plugin_health: sync ? "healthy" : "unavailable",
|
||||
total_docs: sync?.indexed_files ?? 0,
|
||||
total_chunks: sync?.total_chunks ?? 0,
|
||||
last_sync: sync?.timestamp ?? null,
|
||||
unindexed_files: 0,
|
||||
ollama_status: ollamaUp ? "up" : "down",
|
||||
active_job: null,
|
||||
},
|
||||
null,
|
||||
);
|
||||
}
|
||||
|
||||
async function checkOllama(config: ObsidianRagConfig): Promise<boolean> {
|
||||
try {
|
||||
const res = await fetch(`${config.embedding.base_url}/api/tags`, { signal: AbortSignal.timeout(3000) });
|
||||
return res.ok;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
111
src/utils/config.ts
Normal file
111
src/utils/config.ts
Normal file
@@ -0,0 +1,111 @@
|
||||
/** Config loader + TypeScript interfaces mirroring the Python config. */
|
||||
|
||||
import { readFileSync } from "fs";
|
||||
import { resolve } from "path";
|
||||
|
||||
export interface EmbeddingConfig {
|
||||
provider: string;
|
||||
model: string;
|
||||
base_url: string;
|
||||
dimensions: number;
|
||||
batch_size: number;
|
||||
}
|
||||
|
||||
export interface VectorStoreConfig {
|
||||
type: string;
|
||||
path: string;
|
||||
}
|
||||
|
||||
export interface IndexingConfig {
|
||||
chunk_size: number;
|
||||
chunk_overlap: number;
|
||||
file_patterns: string[];
|
||||
deny_dirs: string[];
|
||||
allow_dirs: string[];
|
||||
}
|
||||
|
||||
export interface SecurityConfig {
|
||||
require_confirmation_for: string[];
|
||||
sensitive_sections: string[];
|
||||
local_only: boolean;
|
||||
}
|
||||
|
||||
export interface MemoryPatterns {
|
||||
financial: string[];
|
||||
health: string[];
|
||||
commitments: string[];
|
||||
}
|
||||
|
||||
export interface MemoryConfig {
|
||||
auto_suggest: boolean;
|
||||
patterns: MemoryPatterns;
|
||||
}
|
||||
|
||||
export interface ObsidianRagConfig {
|
||||
vault_path: string;
|
||||
embedding: EmbeddingConfig;
|
||||
vector_store: VectorStoreConfig;
|
||||
indexing: IndexingConfig;
|
||||
security: SecurityConfig;
|
||||
memory: MemoryConfig;
|
||||
}
|
||||
|
||||
function defaults(): ObsidianRagConfig {
|
||||
return {
|
||||
vault_path: "./KnowledgeVault/Default",
|
||||
embedding: {
|
||||
provider: "ollama",
|
||||
model: "mxbai-embed-large",
|
||||
base_url: "http://localhost:11434",
|
||||
dimensions: 1024,
|
||||
batch_size: 64,
|
||||
},
|
||||
vector_store: {
|
||||
type: "lancedb",
|
||||
path: "./obsidian-rag/vectors.lance",
|
||||
},
|
||||
indexing: {
|
||||
chunk_size: 500,
|
||||
chunk_overlap: 100,
|
||||
file_patterns: ["*.md"],
|
||||
deny_dirs: [".obsidian", ".trash", "zzz-Archive", ".git", ".logseq"],
|
||||
allow_dirs: [],
|
||||
},
|
||||
security: {
|
||||
require_confirmation_for: ["health", "financial_debt"],
|
||||
sensitive_sections: ["#mentalhealth", "#physicalhealth", "#Relations"],
|
||||
local_only: true,
|
||||
},
|
||||
memory: {
|
||||
auto_suggest: true,
|
||||
patterns: {
|
||||
financial: ["owe", "owed", "debt", "paid", "$", "spent", "spend"],
|
||||
health: ["#mentalhealth", "#physicalhealth", "medication", "therapy"],
|
||||
commitments: ["shopping list", "costco", "amazon", "grocery"],
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function loadConfig(configPath?: string): ObsidianRagConfig {
|
||||
const defaultPath = resolve(process.cwd(), ".obsidian-rag", "config.json");
|
||||
const path = configPath ?? defaultPath;
|
||||
try {
|
||||
const raw = JSON.parse(readFileSync(path, "utf-8"));
|
||||
return deepMerge(defaults(), raw) as ObsidianRagConfig;
|
||||
} catch {
|
||||
return defaults();
|
||||
}
|
||||
}
|
||||
|
||||
function deepMerge<T extends object>(target: T, source: Partial<T>): T {
|
||||
const out = { ...target };
|
||||
for (const [key, val] of Object.entries(source)) {
|
||||
if (val && typeof val === "object" && !Array.isArray(val)) {
|
||||
(out as any)[key] = deepMerge((target as any)[key] ?? {}, val);
|
||||
} else if (val !== undefined) {
|
||||
(out as any)[key] = val;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
100
src/utils/lancedb.ts
Normal file
100
src/utils/lancedb.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
/** LanceDB client for TypeScript — searches the pre-built index. */
|
||||
|
||||
import { resolve } from "path";
|
||||
import type { ObsidianRagConfig } from "./config.js";
|
||||
import type { SearchResult } from "./types.js";
|
||||
|
||||
export function resolveVectorDbPath(config: ObsidianRagConfig): string {
|
||||
const vsp = config.vector_store.path;
|
||||
// Special case: resolve nested paths where vector_store.path is itself inside data dir
|
||||
if (vsp.startsWith("./obsidian-rag/") || vsp.includes("../")) return resolve(process.cwd(), vsp);
|
||||
if (vsp.startsWith("/") || /^[A-Za-z]:/.test(vsp)) return vsp;
|
||||
return resolve(process.cwd(), vsp);
|
||||
}
|
||||
|
||||
export async function embedQuery(
|
||||
text: string,
|
||||
config: ObsidianRagConfig,
|
||||
): Promise<number[]> {
|
||||
const url = `${config.embedding.base_url}/api/embeddings`;
|
||||
const response = await fetch(url, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ model: config.embedding.model, prompt: text }),
|
||||
signal: AbortSignal.timeout(30_000),
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`Embedding request failed: ${response.status} ${response.statusText}`);
|
||||
}
|
||||
const data = (await response.json()) as { embedding?: number[]; embeddings?: number[][] };
|
||||
return data.embedding ?? data.embeddings?.[0] ?? [];
|
||||
}
|
||||
|
||||
export async function searchVectorDb(
|
||||
config: ObsidianRagConfig,
|
||||
query: string,
|
||||
options: {
|
||||
max_results?: number;
|
||||
directory_filter?: string[];
|
||||
date_range?: { from?: string; to?: string };
|
||||
tags?: string[];
|
||||
} = {},
|
||||
): Promise<SearchResult[]> {
|
||||
const dbPath = resolveVectorDbPath(config);
|
||||
|
||||
// Dynamically import LanceDB to avoid issues at import time when not needed
|
||||
const { connect } = await import("@lancedb/lancedb");
|
||||
|
||||
const db = await connect(dbPath);
|
||||
const tableNames = await db.tableNames();
|
||||
if (!tableNames.includes("obsidian_chunks")) {
|
||||
return [];
|
||||
}
|
||||
const table = await db.openTable("obsidian_chunks");
|
||||
|
||||
// Embed the query text
|
||||
const queryVector = await embedQuery(query, config);
|
||||
|
||||
// Build WHERE clause from filters
|
||||
const conditions: string[] = [];
|
||||
if (options.directory_filter && options.directory_filter.length > 0) {
|
||||
const dirs = options.directory_filter.map((d) => `"${d}"`).join(", ");
|
||||
conditions.push(`source_directory IN (${dirs})`);
|
||||
}
|
||||
if (options.date_range) {
|
||||
if (options.date_range.from) {
|
||||
conditions.push(`date >= '${options.date_range.from}'`);
|
||||
}
|
||||
if (options.date_range.to) {
|
||||
conditions.push(`date <= '${options.date_range.to}'`);
|
||||
}
|
||||
}
|
||||
if (options.tags && options.tags.length > 0) {
|
||||
for (const tag of options.tags) {
|
||||
// LanceDB stores tags as List<String>; use array_contains SQL function
|
||||
conditions.push(`array_contains(tags, '${tag}')`);
|
||||
}
|
||||
}
|
||||
const whereClause = conditions.length > 0 ? conditions.join(" AND ") : undefined;
|
||||
|
||||
const limit = options.max_results ?? 5;
|
||||
|
||||
// LanceDB JS SDK: table.vectorSearch(vector).filter(...).limit(...).toArray()
|
||||
let queryBuilder = table.vectorSearch(queryVector);
|
||||
if (whereClause) {
|
||||
queryBuilder = queryBuilder.filter(whereClause);
|
||||
}
|
||||
const rows = await queryBuilder.limit(limit).toArray();
|
||||
|
||||
return rows.map((r: Record<string, unknown>) => ({
|
||||
chunk_id: r["chunk_id"] as string,
|
||||
chunk_text: r["chunk_text"] as string,
|
||||
source_file: r["source_file"] as string,
|
||||
source_directory: r["source_directory"] as string,
|
||||
section: (r["section"] as string) ?? null,
|
||||
date: (r["date"] as string) ?? null,
|
||||
tags: (r["tags"] as string[]) ?? [],
|
||||
chunk_index: (r["chunk_index"] as number) ?? 0,
|
||||
score: (r["_distance"] as number) ?? 0.0,
|
||||
}));
|
||||
}
|
||||
32
src/utils/response.ts
Normal file
32
src/utils/response.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
/** Response envelope factory + error normalization. */
|
||||
|
||||
import type { ResponseEnvelope } from "./types.js";
|
||||
|
||||
export function makeEnvelope<T>(
|
||||
status: "healthy" | "degraded" | "unavailable",
|
||||
data: T | null,
|
||||
error: ResponseEnvelope<T>["error"],
|
||||
meta: Partial<ResponseEnvelope<T>["meta"]> = {},
|
||||
): ResponseEnvelope<T> {
|
||||
return {
|
||||
status,
|
||||
data,
|
||||
error,
|
||||
meta: {
|
||||
query_time_ms: 0,
|
||||
chunks_scanned: 0,
|
||||
index_version: "0.1.0",
|
||||
vault_mtime: new Date().toISOString(),
|
||||
...meta,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function errorEnvelope(
|
||||
code: string,
|
||||
message: string,
|
||||
recoverable: boolean,
|
||||
suggestion: string,
|
||||
) {
|
||||
return makeEnvelope<null>("unavailable", null, { code, message, recoverable, suggestion });
|
||||
}
|
||||
32
src/utils/types.ts
Normal file
32
src/utils/types.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
/** Shared TypeScript types across the plugin. */
|
||||
|
||||
export interface SearchResult {
|
||||
chunk_id: string;
|
||||
chunk_text: string;
|
||||
source_file: string;
|
||||
source_directory: string;
|
||||
section: string | null;
|
||||
date: string | null;
|
||||
tags: string[];
|
||||
chunk_index: number;
|
||||
score: number;
|
||||
}
|
||||
|
||||
export interface ResponseEnvelope<T> {
|
||||
status: "healthy" | "degraded" | "unavailable";
|
||||
data: T | null;
|
||||
error: {
|
||||
code: string;
|
||||
message: string;
|
||||
recoverable: boolean;
|
||||
suggestion: string;
|
||||
} | null;
|
||||
meta: {
|
||||
query_time_ms: number;
|
||||
chunks_scanned: number;
|
||||
index_version: string;
|
||||
vault_mtime: string;
|
||||
};
|
||||
}
|
||||
|
||||
export type ToolStatus = "healthy" | "degraded" | "unavailable";
|
||||
Reference in New Issue
Block a user