Sprint 0-1: Python indexer, TS plugin scaffolding, and test suite

## What's new

**Python indexer (`python/obsidian_rag/`)** — full pipeline from scan to LanceDB:
- `config.py` — JSON config loader with cross-platform path resolution
- `security.py` — path traversal prevention, HTML stripping, sensitive content detection, dir allow/deny lists
- `chunker.py` — section-split for journal entries (date-named files), sliding-window for unstructured notes
- `embedder.py` — Ollama `/api/embeddings` client with batched requests and timeout/error handling
- `vector_store.py` — LanceDB schema, upsert (merge_insert), delete, search with filters, stats
- `indexer.py` — full/sync/reindex pipeline orchestrator with progress yields
- `cli.py` — `index | sync | reindex | status` CLI commands

**TypeScript plugin (`src/`)** — OpenClaw plugin scaffold:
- `utils/` — config loader, TypeScript types, response envelope factory, LanceDB client
- `services/` — health state machine (HEALTHY/DEGRADED/UNAVAILABLE), vault watcher with debounce/batching, indexer bridge (subprocess spawner)
- `tools/` — 4 tool stubs: search, index, status, memory_store (OpenClaw wiring pending)
- `index.ts` — plugin entry point with health probe + vault watcher startup

**Config** (`obsidian-rag/config.json`, `openclaw.plugin.json`):
- 627 files / 3764 chunks indexed in dev vault

**Tests: 76 passing**
- Python: 64 pytest tests (chunker, security, vector_store, config)
- TypeScript: 12 vitest tests (lancedb client, response envelope)

## Bugs fixed

- LanceDB `tags` column filter: `LIKE '%tag%'` → `list_contains(tags, 'tag')` (List<String> column)
- LanceDB JS `db.list_tables()` returns `ListTablesResponse` object, not plain array
- LanceDB JS result score field: `_score` → `_distance`
- TypeScript regex literal with unescaped `/` in path-resolve regex
- Python: `create_table_if_not_exists` identity check → name comparison

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-10 22:56:50 -04:00
parent 18ad47e100
commit 5c281165c7
40 changed files with 5814 additions and 59 deletions

111
src/utils/config.ts Normal file
View File

@@ -0,0 +1,111 @@
/** Config loader + TypeScript interfaces mirroring the Python config. */
import { readFileSync } from "fs";
import { resolve } from "path";
export interface EmbeddingConfig {
provider: string;
model: string;
base_url: string;
dimensions: number;
batch_size: number;
}
export interface VectorStoreConfig {
type: string;
path: string;
}
export interface IndexingConfig {
chunk_size: number;
chunk_overlap: number;
file_patterns: string[];
deny_dirs: string[];
allow_dirs: string[];
}
export interface SecurityConfig {
require_confirmation_for: string[];
sensitive_sections: string[];
local_only: boolean;
}
export interface MemoryPatterns {
financial: string[];
health: string[];
commitments: string[];
}
export interface MemoryConfig {
auto_suggest: boolean;
patterns: MemoryPatterns;
}
export interface ObsidianRagConfig {
vault_path: string;
embedding: EmbeddingConfig;
vector_store: VectorStoreConfig;
indexing: IndexingConfig;
security: SecurityConfig;
memory: MemoryConfig;
}
function defaults(): ObsidianRagConfig {
return {
vault_path: "./KnowledgeVault/Default",
embedding: {
provider: "ollama",
model: "mxbai-embed-large",
base_url: "http://localhost:11434",
dimensions: 1024,
batch_size: 64,
},
vector_store: {
type: "lancedb",
path: "./obsidian-rag/vectors.lance",
},
indexing: {
chunk_size: 500,
chunk_overlap: 100,
file_patterns: ["*.md"],
deny_dirs: [".obsidian", ".trash", "zzz-Archive", ".git", ".logseq"],
allow_dirs: [],
},
security: {
require_confirmation_for: ["health", "financial_debt"],
sensitive_sections: ["#mentalhealth", "#physicalhealth", "#Relations"],
local_only: true,
},
memory: {
auto_suggest: true,
patterns: {
financial: ["owe", "owed", "debt", "paid", "$", "spent", "spend"],
health: ["#mentalhealth", "#physicalhealth", "medication", "therapy"],
commitments: ["shopping list", "costco", "amazon", "grocery"],
},
},
};
}
export function loadConfig(configPath?: string): ObsidianRagConfig {
const defaultPath = resolve(process.cwd(), ".obsidian-rag", "config.json");
const path = configPath ?? defaultPath;
try {
const raw = JSON.parse(readFileSync(path, "utf-8"));
return deepMerge(defaults(), raw) as ObsidianRagConfig;
} catch {
return defaults();
}
}
function deepMerge<T extends object>(target: T, source: Partial<T>): T {
const out = { ...target };
for (const [key, val] of Object.entries(source)) {
if (val && typeof val === "object" && !Array.isArray(val)) {
(out as any)[key] = deepMerge((target as any)[key] ?? {}, val);
} else if (val !== undefined) {
(out as any)[key] = val;
}
}
return out;
}