Sprint 0-1: Python indexer, TS plugin scaffolding, and test suite
## What's new **Python indexer (`python/obsidian_rag/`)** — full pipeline from scan to LanceDB: - `config.py` — JSON config loader with cross-platform path resolution - `security.py` — path traversal prevention, HTML stripping, sensitive content detection, dir allow/deny lists - `chunker.py` — section-split for journal entries (date-named files), sliding-window for unstructured notes - `embedder.py` — Ollama `/api/embeddings` client with batched requests and timeout/error handling - `vector_store.py` — LanceDB schema, upsert (merge_insert), delete, search with filters, stats - `indexer.py` — full/sync/reindex pipeline orchestrator with progress yields - `cli.py` — `index | sync | reindex | status` CLI commands **TypeScript plugin (`src/`)** — OpenClaw plugin scaffold: - `utils/` — config loader, TypeScript types, response envelope factory, LanceDB client - `services/` — health state machine (HEALTHY/DEGRADED/UNAVAILABLE), vault watcher with debounce/batching, indexer bridge (subprocess spawner) - `tools/` — 4 tool stubs: search, index, status, memory_store (OpenClaw wiring pending) - `index.ts` — plugin entry point with health probe + vault watcher startup **Config** (`obsidian-rag/config.json`, `openclaw.plugin.json`): - 627 files / 3764 chunks indexed in dev vault **Tests: 76 passing** - Python: 64 pytest tests (chunker, security, vector_store, config) - TypeScript: 12 vitest tests (lancedb client, response envelope) ## Bugs fixed - LanceDB `tags` column filter: `LIKE '%tag%'` → `list_contains(tags, 'tag')` (List<String> column) - LanceDB JS `db.list_tables()` returns `ListTablesResponse` object, not plain array - LanceDB JS result score field: `_score` → `_distance` - TypeScript regex literal with unescaped `/` in path-resolve regex - Python: `create_table_if_not_exists` identity check → name comparison Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
111
src/utils/config.ts
Normal file
111
src/utils/config.ts
Normal file
@@ -0,0 +1,111 @@
|
||||
/** Config loader + TypeScript interfaces mirroring the Python config. */
|
||||
|
||||
import { readFileSync } from "fs";
|
||||
import { resolve } from "path";
|
||||
|
||||
export interface EmbeddingConfig {
|
||||
provider: string;
|
||||
model: string;
|
||||
base_url: string;
|
||||
dimensions: number;
|
||||
batch_size: number;
|
||||
}
|
||||
|
||||
export interface VectorStoreConfig {
|
||||
type: string;
|
||||
path: string;
|
||||
}
|
||||
|
||||
export interface IndexingConfig {
|
||||
chunk_size: number;
|
||||
chunk_overlap: number;
|
||||
file_patterns: string[];
|
||||
deny_dirs: string[];
|
||||
allow_dirs: string[];
|
||||
}
|
||||
|
||||
export interface SecurityConfig {
|
||||
require_confirmation_for: string[];
|
||||
sensitive_sections: string[];
|
||||
local_only: boolean;
|
||||
}
|
||||
|
||||
export interface MemoryPatterns {
|
||||
financial: string[];
|
||||
health: string[];
|
||||
commitments: string[];
|
||||
}
|
||||
|
||||
export interface MemoryConfig {
|
||||
auto_suggest: boolean;
|
||||
patterns: MemoryPatterns;
|
||||
}
|
||||
|
||||
export interface ObsidianRagConfig {
|
||||
vault_path: string;
|
||||
embedding: EmbeddingConfig;
|
||||
vector_store: VectorStoreConfig;
|
||||
indexing: IndexingConfig;
|
||||
security: SecurityConfig;
|
||||
memory: MemoryConfig;
|
||||
}
|
||||
|
||||
function defaults(): ObsidianRagConfig {
|
||||
return {
|
||||
vault_path: "./KnowledgeVault/Default",
|
||||
embedding: {
|
||||
provider: "ollama",
|
||||
model: "mxbai-embed-large",
|
||||
base_url: "http://localhost:11434",
|
||||
dimensions: 1024,
|
||||
batch_size: 64,
|
||||
},
|
||||
vector_store: {
|
||||
type: "lancedb",
|
||||
path: "./obsidian-rag/vectors.lance",
|
||||
},
|
||||
indexing: {
|
||||
chunk_size: 500,
|
||||
chunk_overlap: 100,
|
||||
file_patterns: ["*.md"],
|
||||
deny_dirs: [".obsidian", ".trash", "zzz-Archive", ".git", ".logseq"],
|
||||
allow_dirs: [],
|
||||
},
|
||||
security: {
|
||||
require_confirmation_for: ["health", "financial_debt"],
|
||||
sensitive_sections: ["#mentalhealth", "#physicalhealth", "#Relations"],
|
||||
local_only: true,
|
||||
},
|
||||
memory: {
|
||||
auto_suggest: true,
|
||||
patterns: {
|
||||
financial: ["owe", "owed", "debt", "paid", "$", "spent", "spend"],
|
||||
health: ["#mentalhealth", "#physicalhealth", "medication", "therapy"],
|
||||
commitments: ["shopping list", "costco", "amazon", "grocery"],
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function loadConfig(configPath?: string): ObsidianRagConfig {
|
||||
const defaultPath = resolve(process.cwd(), ".obsidian-rag", "config.json");
|
||||
const path = configPath ?? defaultPath;
|
||||
try {
|
||||
const raw = JSON.parse(readFileSync(path, "utf-8"));
|
||||
return deepMerge(defaults(), raw) as ObsidianRagConfig;
|
||||
} catch {
|
||||
return defaults();
|
||||
}
|
||||
}
|
||||
|
||||
function deepMerge<T extends object>(target: T, source: Partial<T>): T {
|
||||
const out = { ...target };
|
||||
for (const [key, val] of Object.entries(source)) {
|
||||
if (val && typeof val === "object" && !Array.isArray(val)) {
|
||||
(out as any)[key] = deepMerge((target as any)[key] ?? {}, val);
|
||||
} else if (val !== undefined) {
|
||||
(out as any)[key] = val;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
Reference in New Issue
Block a user