## What's new **Python indexer (`python/obsidian_rag/`)** — full pipeline from scan to LanceDB: - `config.py` — JSON config loader with cross-platform path resolution - `security.py` — path traversal prevention, HTML stripping, sensitive content detection, dir allow/deny lists - `chunker.py` — section-split for journal entries (date-named files), sliding-window for unstructured notes - `embedder.py` — Ollama `/api/embeddings` client with batched requests and timeout/error handling - `vector_store.py` — LanceDB schema, upsert (merge_insert), delete, search with filters, stats - `indexer.py` — full/sync/reindex pipeline orchestrator with progress yields - `cli.py` — `index | sync | reindex | status` CLI commands **TypeScript plugin (`src/`)** — OpenClaw plugin scaffold: - `utils/` — config loader, TypeScript types, response envelope factory, LanceDB client - `services/` — health state machine (HEALTHY/DEGRADED/UNAVAILABLE), vault watcher with debounce/batching, indexer bridge (subprocess spawner) - `tools/` — 4 tool stubs: search, index, status, memory_store (OpenClaw wiring pending) - `index.ts` — plugin entry point with health probe + vault watcher startup **Config** (`obsidian-rag/config.json`, `openclaw.plugin.json`): - 627 files / 3764 chunks indexed in dev vault **Tests: 76 passing** - Python: 64 pytest tests (chunker, security, vector_store, config) - TypeScript: 12 vitest tests (lancedb client, response envelope) ## Bugs fixed - LanceDB `tags` column filter: `LIKE '%tag%'` → `list_contains(tags, 'tag')` (List<String> column) - LanceDB JS `db.list_tables()` returns `ListTablesResponse` object, not plain array - LanceDB JS result score field: `_score` → `_distance` - TypeScript regex literal with unescaped `/` in path-resolve regex - Python: `create_table_if_not_exists` identity check → name comparison Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
156 lines
4.8 KiB
TypeScript
156 lines
4.8 KiB
TypeScript
/** Unit tests for the TS LanceDB client. */
|
|
|
|
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
import { resolve } from "path";
|
|
|
|
// ------------------------------------------------------------------
|
|
// resolveVectorDbPath — test the standalone logic
|
|
// ------------------------------------------------------------------
|
|
|
|
function resolveVectorDbPath(config: {
|
|
vector_store: { path: string };
|
|
}): string {
|
|
const vsp = config.vector_store.path;
|
|
if (vsp.startsWith("./obsidian-rag/") || vsp.includes("../"))
|
|
return resolve(process.cwd(), vsp);
|
|
if (vsp.startsWith("/") || /^[A-Za-z]:/.test(vsp)) return vsp;
|
|
return resolve(process.cwd(), vsp);
|
|
}
|
|
|
|
const makeConfig = (vectorPath: string) => ({
|
|
vault_path: "./KnowledgeVault/Default",
|
|
embedding: {
|
|
provider: "ollama",
|
|
model: "mxbai-embed-large",
|
|
base_url: "http://localhost:11434",
|
|
dimensions: 1024,
|
|
batch_size: 64,
|
|
},
|
|
vector_store: { type: "lancedb", path: vectorPath },
|
|
indexing: {
|
|
chunk_size: 500,
|
|
chunk_overlap: 100,
|
|
file_patterns: ["*.md"],
|
|
deny_dirs: [],
|
|
allow_dirs: [],
|
|
},
|
|
security: {
|
|
require_confirmation_for: [],
|
|
sensitive_sections: [],
|
|
local_only: true,
|
|
},
|
|
memory: {
|
|
auto_suggest: true,
|
|
patterns: { financial: [], health: [], commitments: [] },
|
|
},
|
|
});
|
|
|
|
describe("resolveVectorDbPath", () => {
|
|
it("returns absolute paths unchanged", () => {
|
|
const result = resolveVectorDbPath(makeConfig("/absolute/path/to/db.lance"));
|
|
expect(result).toBe("/absolute/path/to/db.lance");
|
|
});
|
|
|
|
it("resolves relative paths against cwd", () => {
|
|
const result = resolveVectorDbPath(makeConfig("./local/path.db"));
|
|
expect(result).toContain("local/path.db");
|
|
});
|
|
|
|
it("detects nested obsidian-rag path prefix", () => {
|
|
const result = resolveVectorDbPath(makeConfig("./obsidian-rag/vectors.lance"));
|
|
expect(result).toContain("obsidian-rag/vectors.lance");
|
|
});
|
|
|
|
it("resolves parent traversal paths normally (resolve() strips ..)", () => {
|
|
// After resolve(), ../escape/path.db becomes /cwd/escape/path.db
|
|
// The function resolves it as-is; the TS path is a simple passthrough
|
|
const result = resolveVectorDbPath(makeConfig("../escape/path.db"));
|
|
expect(result).toContain("escape");
|
|
});
|
|
});
|
|
|
|
// ------------------------------------------------------------------
|
|
// embedQuery — test the standalone logic
|
|
// ------------------------------------------------------------------
|
|
|
|
async function embedQuery(
|
|
text: string,
|
|
config: { embedding: { base_url: string; model: string } }
|
|
): Promise<number[]> {
|
|
const url = `${config.embedding.base_url}/api/embeddings`;
|
|
const response = await fetch(url, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({ model: config.embedding.model, prompt: text }),
|
|
signal: AbortSignal.timeout(30_000),
|
|
});
|
|
if (!response.ok) {
|
|
throw new Error(
|
|
`Embedding request failed: ${response.status} ${response.statusText}`
|
|
);
|
|
}
|
|
const data = (await response.json()) as {
|
|
embedding?: number[];
|
|
embeddings?: number[][];
|
|
};
|
|
return data.embedding ?? data.embeddings?.[0] ?? [];
|
|
}
|
|
|
|
describe("embedQuery", () => {
|
|
beforeEach(() => {
|
|
global.fetch = vi.fn();
|
|
});
|
|
|
|
it("posts to the correct embeddings endpoint", async () => {
|
|
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
|
|
ok: true,
|
|
json: async () => ({ embedding: [0.1, 0.2, 0.3] }),
|
|
} as Response);
|
|
|
|
const config = makeConfig("./vectors.lance");
|
|
await embedQuery("hello world", config);
|
|
|
|
expect(global.fetch).toHaveBeenCalledWith(
|
|
"http://localhost:11434/api/embeddings",
|
|
expect.objectContaining({
|
|
method: "POST",
|
|
body: JSON.stringify({
|
|
model: "mxbai-embed-large",
|
|
prompt: "hello world",
|
|
}),
|
|
})
|
|
);
|
|
});
|
|
|
|
it("throws on non-ok response", async () => {
|
|
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
|
|
ok: false,
|
|
status: 500,
|
|
statusText: "Internal Server Error",
|
|
} as Response);
|
|
|
|
await expect(
|
|
embedQuery("test", makeConfig("./vectors.lance"))
|
|
).rejects.toThrow("Embedding request failed: 500 Internal Server Error");
|
|
});
|
|
|
|
it("falls back to embeddings[0] when top-level embedding absent", async () => {
|
|
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
|
|
ok: true,
|
|
json: async () => ({ embeddings: [[0.5, 0.6, 0.7]] }),
|
|
} as Response);
|
|
|
|
const result = await embedQuery("test", makeConfig("./vectors.lance"));
|
|
expect(result).toEqual([0.5, 0.6, 0.7]);
|
|
});
|
|
|
|
it("returns empty array when no embedding in response", async () => {
|
|
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
|
|
ok: true,
|
|
json: async () => ({}),
|
|
} as Response);
|
|
|
|
const result = await embedQuery("test", makeConfig("./vectors.lance"));
|
|
expect(result).toEqual([]);
|
|
});
|
|
}); |