feat: Phase 5 - citations, source highlighting, and UI polish

This commit is contained in:
2026-04-13 15:47:47 -04:00
parent e77fa69b31
commit 732555cf55
8 changed files with 381 additions and 103 deletions

View File

@@ -1,9 +1,52 @@
from dataclasses import dataclass
from typing import Any
from companion.rag.embedder import OllamaEmbedder
from companion.rag.vector_store import VectorStore
@dataclass
class SearchResult:
"""Structured search result with citation information."""
id: str
text: str
source_file: str
source_directory: str
section: str | None
date: str | None
tags: list[str]
chunk_index: int
total_chunks: int
distance: float
@property
def citation(self) -> str:
"""Generate a citation string for this result."""
parts = [self.source_file]
if self.section:
parts.append(f"#{self.section}")
if self.date:
parts.append(f"({self.date})")
return " - ".join(parts)
def to_dict(self) -> dict[str, Any]:
"""Convert to dictionary for API serialization."""
return {
"id": self.id,
"text": self.text,
"source_file": self.source_file,
"source_directory": self.source_directory,
"section": self.section,
"date": self.date,
"tags": self.tags,
"chunk_index": self.chunk_index,
"total_chunks": self.total_chunks,
"distance": self.distance,
"citation": self.citation,
}
class SearchEngine:
"""Search engine for semantic search using vector embeddings.
@@ -50,7 +93,7 @@ class SearchEngine:
query: str,
top_k: int | None = None,
filters: dict[str, Any] | None = None,
) -> list[dict[str, Any]]:
) -> list[SearchResult]:
"""Search for relevant documents using semantic similarity.
Args:
@@ -59,7 +102,7 @@ class SearchEngine:
filters: Optional metadata filters to apply
Returns:
List of matching documents with similarity scores
List of SearchResult objects with similarity scores
Raises:
RuntimeError: If embedding generation fails
@@ -76,14 +119,33 @@ class SearchEngine:
except RuntimeError as e:
raise RuntimeError(f"Failed to generate embedding for query: {e}") from e
results = self.vector_store.search(query_embedding, top_k=k, filters=filters)
raw_results = self.vector_store.search(
query_embedding, top_k=k, filters=filters
)
if self.similarity_threshold > 0 and results:
results = [
if self.similarity_threshold > 0 and raw_results:
raw_results = [
r
for r in results
for r in raw_results
if r.get(self._DISTANCE_FIELD, float("inf"))
<= self.similarity_threshold
]
# Convert raw results to SearchResult objects
results: list[SearchResult] = []
for r in raw_results:
result = SearchResult(
id=r.get("id", ""),
text=r.get("text", ""),
source_file=r.get("source_file", ""),
source_directory=r.get("source_directory", ""),
section=r.get("section"),
date=r.get("date"),
tags=r.get("tags") or [],
chunk_index=r.get("chunk_index", 0),
total_chunks=r.get("total_chunks", 1),
distance=r.get(self._DISTANCE_FIELD, 1.0),
)
results.append(result)
return results