Files
obsidian-rag/tests/unit/lancedb.test.ts
Santhosh Janardhanan 5c281165c7 Sprint 0-1: Python indexer, TS plugin scaffolding, and test suite
## What's new

**Python indexer (`python/obsidian_rag/`)** — full pipeline from scan to LanceDB:
- `config.py` — JSON config loader with cross-platform path resolution
- `security.py` — path traversal prevention, HTML stripping, sensitive content detection, dir allow/deny lists
- `chunker.py` — section-split for journal entries (date-named files), sliding-window for unstructured notes
- `embedder.py` — Ollama `/api/embeddings` client with batched requests and timeout/error handling
- `vector_store.py` — LanceDB schema, upsert (merge_insert), delete, search with filters, stats
- `indexer.py` — full/sync/reindex pipeline orchestrator with progress yields
- `cli.py` — `index | sync | reindex | status` CLI commands

**TypeScript plugin (`src/`)** — OpenClaw plugin scaffold:
- `utils/` — config loader, TypeScript types, response envelope factory, LanceDB client
- `services/` — health state machine (HEALTHY/DEGRADED/UNAVAILABLE), vault watcher with debounce/batching, indexer bridge (subprocess spawner)
- `tools/` — 4 tool stubs: search, index, status, memory_store (OpenClaw wiring pending)
- `index.ts` — plugin entry point with health probe + vault watcher startup

**Config** (`obsidian-rag/config.json`, `openclaw.plugin.json`):
- 627 files / 3764 chunks indexed in dev vault

**Tests: 76 passing**
- Python: 64 pytest tests (chunker, security, vector_store, config)
- TypeScript: 12 vitest tests (lancedb client, response envelope)

## Bugs fixed

- LanceDB `tags` column filter: `LIKE '%tag%'` → `list_contains(tags, 'tag')` (List<String> column)
- LanceDB JS `db.list_tables()` returns `ListTablesResponse` object, not plain array
- LanceDB JS result score field: `_score` → `_distance`
- TypeScript regex literal with unescaped `/` in path-resolve regex
- Python: `create_table_if_not_exists` identity check → name comparison

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-10 22:56:50 -04:00

156 lines
4.8 KiB
TypeScript

/** Unit tests for the TS LanceDB client. */
import { describe, it, expect, vi, beforeEach } from "vitest";
import { resolve } from "path";
// ------------------------------------------------------------------
// resolveVectorDbPath — test the standalone logic
// ------------------------------------------------------------------
function resolveVectorDbPath(config: {
vector_store: { path: string };
}): string {
const vsp = config.vector_store.path;
if (vsp.startsWith("./obsidian-rag/") || vsp.includes("../"))
return resolve(process.cwd(), vsp);
if (vsp.startsWith("/") || /^[A-Za-z]:/.test(vsp)) return vsp;
return resolve(process.cwd(), vsp);
}
const makeConfig = (vectorPath: string) => ({
vault_path: "./KnowledgeVault/Default",
embedding: {
provider: "ollama",
model: "mxbai-embed-large",
base_url: "http://localhost:11434",
dimensions: 1024,
batch_size: 64,
},
vector_store: { type: "lancedb", path: vectorPath },
indexing: {
chunk_size: 500,
chunk_overlap: 100,
file_patterns: ["*.md"],
deny_dirs: [],
allow_dirs: [],
},
security: {
require_confirmation_for: [],
sensitive_sections: [],
local_only: true,
},
memory: {
auto_suggest: true,
patterns: { financial: [], health: [], commitments: [] },
},
});
describe("resolveVectorDbPath", () => {
it("returns absolute paths unchanged", () => {
const result = resolveVectorDbPath(makeConfig("/absolute/path/to/db.lance"));
expect(result).toBe("/absolute/path/to/db.lance");
});
it("resolves relative paths against cwd", () => {
const result = resolveVectorDbPath(makeConfig("./local/path.db"));
expect(result).toContain("local/path.db");
});
it("detects nested obsidian-rag path prefix", () => {
const result = resolveVectorDbPath(makeConfig("./obsidian-rag/vectors.lance"));
expect(result).toContain("obsidian-rag/vectors.lance");
});
it("resolves parent traversal paths normally (resolve() strips ..)", () => {
// After resolve(), ../escape/path.db becomes /cwd/escape/path.db
// The function resolves it as-is; the TS path is a simple passthrough
const result = resolveVectorDbPath(makeConfig("../escape/path.db"));
expect(result).toContain("escape");
});
});
// ------------------------------------------------------------------
// embedQuery — test the standalone logic
// ------------------------------------------------------------------
async function embedQuery(
text: string,
config: { embedding: { base_url: string; model: string } }
): Promise<number[]> {
const url = `${config.embedding.base_url}/api/embeddings`;
const response = await fetch(url, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ model: config.embedding.model, prompt: text }),
signal: AbortSignal.timeout(30_000),
});
if (!response.ok) {
throw new Error(
`Embedding request failed: ${response.status} ${response.statusText}`
);
}
const data = (await response.json()) as {
embedding?: number[];
embeddings?: number[][];
};
return data.embedding ?? data.embeddings?.[0] ?? [];
}
describe("embedQuery", () => {
beforeEach(() => {
global.fetch = vi.fn();
});
it("posts to the correct embeddings endpoint", async () => {
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: true,
json: async () => ({ embedding: [0.1, 0.2, 0.3] }),
} as Response);
const config = makeConfig("./vectors.lance");
await embedQuery("hello world", config);
expect(global.fetch).toHaveBeenCalledWith(
"http://localhost:11434/api/embeddings",
expect.objectContaining({
method: "POST",
body: JSON.stringify({
model: "mxbai-embed-large",
prompt: "hello world",
}),
})
);
});
it("throws on non-ok response", async () => {
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: false,
status: 500,
statusText: "Internal Server Error",
} as Response);
await expect(
embedQuery("test", makeConfig("./vectors.lance"))
).rejects.toThrow("Embedding request failed: 500 Internal Server Error");
});
it("falls back to embeddings[0] when top-level embedding absent", async () => {
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: true,
json: async () => ({ embeddings: [[0.5, 0.6, 0.7]] }),
} as Response);
const result = await embedQuery("test", makeConfig("./vectors.lance"));
expect(result).toEqual([0.5, 0.6, 0.7]);
});
it("returns empty array when no embedding in response", async () => {
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValueOnce({
ok: true,
json: async () => ({}),
} as Response);
const result = await embedQuery("test", makeConfig("./vectors.lance"));
expect(result).toEqual([]);
});
});