/** LanceDB client for TypeScript — searches the pre-built index. */ import { resolve } from "path"; import type { ObsidianRagConfig } from "./config.js"; import type { SearchResult } from "./types.js"; export function resolveVectorDbPath(config: ObsidianRagConfig): string { const vsp = config.vector_store.path; // Special case: resolve nested paths where vector_store.path is itself inside data dir if (vsp.startsWith("./obsidian-rag/") || vsp.includes("../")) return resolve(process.cwd(), vsp); if (vsp.startsWith("/") || /^[A-Za-z]:/.test(vsp)) return vsp; return resolve(process.cwd(), vsp); } export async function embedQuery( text: string, config: ObsidianRagConfig, ): Promise { const url = `${config.embedding.base_url}/api/embeddings`; const response = await fetch(url, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ model: config.embedding.model, prompt: text }), signal: AbortSignal.timeout(30_000), }); if (!response.ok) { throw new Error(`Embedding request failed: ${response.status} ${response.statusText}`); } const data = (await response.json()) as { embedding?: number[]; embeddings?: number[][] }; return data.embedding ?? data.embeddings?.[0] ?? []; } export async function searchVectorDb( config: ObsidianRagConfig, query: string, options: { max_results?: number; directory_filter?: string[]; date_range?: { from?: string; to?: string }; tags?: string[]; } = {}, ): Promise { const dbPath = resolveVectorDbPath(config); // Dynamically import LanceDB to avoid issues at import time when not needed const { connect } = await import("@lancedb/lancedb"); const db = await connect(dbPath); const tableNames = await db.tableNames(); if (!tableNames.includes("obsidian_chunks")) { return []; } const table = await db.openTable("obsidian_chunks"); // Build WHERE clause from filters const conditions: string[] = []; if (options.directory_filter && options.directory_filter.length > 0) { const dirs = options.directory_filter.map((d) => `"${d}"`).join(", "); conditions.push(`source_directory IN (${dirs})`); } if (options.date_range) { if (options.date_range.from) { conditions.push(`date >= '${options.date_range.from}'`); } if (options.date_range.to) { conditions.push(`date <= '${options.date_range.to}'`); } } if (options.tags && options.tags.length > 0) { for (const tag of options.tags) { // LanceDB stores tags as List; use array_contains SQL function conditions.push(`array_contains(tags, '${tag}')`); } } const whereClause = conditions.length > 0 ? conditions.join(" AND ") : undefined; const limit = options.max_results; // Try vector search first; if Ollama is down embedQuery throws → fallback to FTS let rows: Record[]; try { const queryVector = await embedQuery(query, config); let queryBuilder = table.vectorSearch(queryVector); if (whereClause) { queryBuilder = queryBuilder.filter(whereClause); } if (limit !== undefined) { queryBuilder = queryBuilder.limit(limit); } rows = await queryBuilder.toArray(); } catch { // Ollama unavailable — fallback to full-text search on chunk_text (BM25 scoring) let ftsBuilder = table.query().fullTextSearch(query); if (whereClause) { ftsBuilder = ftsBuilder.filter(whereClause); } if (limit !== undefined) { ftsBuilder = ftsBuilder.limit(limit); } rows = await ftsBuilder.toArray(); } return rows.map((r: Record) => ({ chunk_id: r["chunk_id"] as string, chunk_text: r["chunk_text"] as string, source_file: r["source_file"] as string, source_directory: r["source_directory"] as string, section: (r["section"] as string) ?? null, date: (r["date"] as string) ?? null, tags: (r["tags"] as string[]) ?? [], chunk_index: (r["chunk_index"] as number) ?? 0, score: (r["_score"] as number) ?? (r["_distance"] as number) ?? 0.0, })); }