- Add required configSchema to openclaw.plugin.json for OpenClaw plugin discovery - Add search command to CLI with --limit, --dir, --from-date, --to-date, --tags filters - Fix get_stats() to properly count unique docs (was returning 0 for non-null values) - Remove hardcoded max_results default of 5; search now returns all results by default - Update INSTALL.md and design docs with correct OpenClaw extension path instructions
116 lines
4.0 KiB
TypeScript
116 lines
4.0 KiB
TypeScript
/** LanceDB client for TypeScript — searches the pre-built index. */
|
|
|
|
import { resolve } from "path";
|
|
import type { ObsidianRagConfig } from "./config.js";
|
|
import type { SearchResult } from "./types.js";
|
|
|
|
export function resolveVectorDbPath(config: ObsidianRagConfig): string {
|
|
const vsp = config.vector_store.path;
|
|
// Special case: resolve nested paths where vector_store.path is itself inside data dir
|
|
if (vsp.startsWith("./obsidian-rag/") || vsp.includes("../")) return resolve(process.cwd(), vsp);
|
|
if (vsp.startsWith("/") || /^[A-Za-z]:/.test(vsp)) return vsp;
|
|
return resolve(process.cwd(), vsp);
|
|
}
|
|
|
|
export async function embedQuery(
|
|
text: string,
|
|
config: ObsidianRagConfig,
|
|
): Promise<number[]> {
|
|
const url = `${config.embedding.base_url}/api/embeddings`;
|
|
const response = await fetch(url, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({ model: config.embedding.model, prompt: text }),
|
|
signal: AbortSignal.timeout(30_000),
|
|
});
|
|
if (!response.ok) {
|
|
throw new Error(`Embedding request failed: ${response.status} ${response.statusText}`);
|
|
}
|
|
const data = (await response.json()) as { embedding?: number[]; embeddings?: number[][] };
|
|
return data.embedding ?? data.embeddings?.[0] ?? [];
|
|
}
|
|
|
|
export async function searchVectorDb(
|
|
config: ObsidianRagConfig,
|
|
query: string,
|
|
options: {
|
|
max_results?: number;
|
|
directory_filter?: string[];
|
|
date_range?: { from?: string; to?: string };
|
|
tags?: string[];
|
|
} = {},
|
|
): Promise<SearchResult[]> {
|
|
const dbPath = resolveVectorDbPath(config);
|
|
|
|
// Dynamically import LanceDB to avoid issues at import time when not needed
|
|
const { connect } = await import("@lancedb/lancedb");
|
|
|
|
const db = await connect(dbPath);
|
|
const tableNames = await db.tableNames();
|
|
if (!tableNames.includes("obsidian_chunks")) {
|
|
return [];
|
|
}
|
|
const table = await db.openTable("obsidian_chunks");
|
|
|
|
// Build WHERE clause from filters
|
|
const conditions: string[] = [];
|
|
if (options.directory_filter && options.directory_filter.length > 0) {
|
|
const dirs = options.directory_filter.map((d) => `"${d}"`).join(", ");
|
|
conditions.push(`source_directory IN (${dirs})`);
|
|
}
|
|
if (options.date_range) {
|
|
if (options.date_range.from) {
|
|
conditions.push(`date >= '${options.date_range.from}'`);
|
|
}
|
|
if (options.date_range.to) {
|
|
conditions.push(`date <= '${options.date_range.to}'`);
|
|
}
|
|
}
|
|
if (options.tags && options.tags.length > 0) {
|
|
for (const tag of options.tags) {
|
|
// LanceDB stores tags as List<String>; use array_contains SQL function
|
|
conditions.push(`array_contains(tags, '${tag}')`);
|
|
}
|
|
}
|
|
const whereClause = conditions.length > 0 ? conditions.join(" AND ") : undefined;
|
|
|
|
const limit = options.max_results;
|
|
|
|
// Try vector search first; if Ollama is down embedQuery throws → fallback to FTS
|
|
let rows: Record<string, unknown>[];
|
|
try {
|
|
const queryVector = await embedQuery(query, config);
|
|
|
|
let queryBuilder = table.vectorSearch(queryVector);
|
|
if (whereClause) {
|
|
queryBuilder = queryBuilder.filter(whereClause);
|
|
}
|
|
if (limit !== undefined) {
|
|
queryBuilder = queryBuilder.limit(limit);
|
|
}
|
|
rows = await queryBuilder.toArray();
|
|
} catch {
|
|
// Ollama unavailable — fallback to full-text search on chunk_text (BM25 scoring)
|
|
let ftsBuilder = table.query().fullTextSearch(query);
|
|
if (whereClause) {
|
|
ftsBuilder = ftsBuilder.filter(whereClause);
|
|
}
|
|
if (limit !== undefined) {
|
|
ftsBuilder = ftsBuilder.limit(limit);
|
|
}
|
|
rows = await ftsBuilder.toArray();
|
|
}
|
|
|
|
return rows.map((r: Record<string, unknown>) => ({
|
|
chunk_id: r["chunk_id"] as string,
|
|
chunk_text: r["chunk_text"] as string,
|
|
source_file: r["source_file"] as string,
|
|
source_directory: r["source_directory"] as string,
|
|
section: (r["section"] as string) ?? null,
|
|
date: (r["date"] as string) ?? null,
|
|
tags: (r["tags"] as string[]) ?? [],
|
|
chunk_index: (r["chunk_index"] as number) ?? 0,
|
|
score: (r["_score"] as number) ?? (r["_distance"] as number) ?? 0.0,
|
|
}));
|
|
}
|