feat: Phase 5 - citations, source highlighting, and UI polish
This commit is contained in:
@@ -1,9 +1,52 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from companion.rag.embedder import OllamaEmbedder
|
||||
from companion.rag.vector_store import VectorStore
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchResult:
|
||||
"""Structured search result with citation information."""
|
||||
|
||||
id: str
|
||||
text: str
|
||||
source_file: str
|
||||
source_directory: str
|
||||
section: str | None
|
||||
date: str | None
|
||||
tags: list[str]
|
||||
chunk_index: int
|
||||
total_chunks: int
|
||||
distance: float
|
||||
|
||||
@property
|
||||
def citation(self) -> str:
|
||||
"""Generate a citation string for this result."""
|
||||
parts = [self.source_file]
|
||||
if self.section:
|
||||
parts.append(f"#{self.section}")
|
||||
if self.date:
|
||||
parts.append(f"({self.date})")
|
||||
return " - ".join(parts)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Convert to dictionary for API serialization."""
|
||||
return {
|
||||
"id": self.id,
|
||||
"text": self.text,
|
||||
"source_file": self.source_file,
|
||||
"source_directory": self.source_directory,
|
||||
"section": self.section,
|
||||
"date": self.date,
|
||||
"tags": self.tags,
|
||||
"chunk_index": self.chunk_index,
|
||||
"total_chunks": self.total_chunks,
|
||||
"distance": self.distance,
|
||||
"citation": self.citation,
|
||||
}
|
||||
|
||||
|
||||
class SearchEngine:
|
||||
"""Search engine for semantic search using vector embeddings.
|
||||
|
||||
@@ -50,7 +93,7 @@ class SearchEngine:
|
||||
query: str,
|
||||
top_k: int | None = None,
|
||||
filters: dict[str, Any] | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
) -> list[SearchResult]:
|
||||
"""Search for relevant documents using semantic similarity.
|
||||
|
||||
Args:
|
||||
@@ -59,7 +102,7 @@ class SearchEngine:
|
||||
filters: Optional metadata filters to apply
|
||||
|
||||
Returns:
|
||||
List of matching documents with similarity scores
|
||||
List of SearchResult objects with similarity scores
|
||||
|
||||
Raises:
|
||||
RuntimeError: If embedding generation fails
|
||||
@@ -76,14 +119,33 @@ class SearchEngine:
|
||||
except RuntimeError as e:
|
||||
raise RuntimeError(f"Failed to generate embedding for query: {e}") from e
|
||||
|
||||
results = self.vector_store.search(query_embedding, top_k=k, filters=filters)
|
||||
raw_results = self.vector_store.search(
|
||||
query_embedding, top_k=k, filters=filters
|
||||
)
|
||||
|
||||
if self.similarity_threshold > 0 and results:
|
||||
results = [
|
||||
if self.similarity_threshold > 0 and raw_results:
|
||||
raw_results = [
|
||||
r
|
||||
for r in results
|
||||
for r in raw_results
|
||||
if r.get(self._DISTANCE_FIELD, float("inf"))
|
||||
<= self.similarity_threshold
|
||||
]
|
||||
|
||||
# Convert raw results to SearchResult objects
|
||||
results: list[SearchResult] = []
|
||||
for r in raw_results:
|
||||
result = SearchResult(
|
||||
id=r.get("id", ""),
|
||||
text=r.get("text", ""),
|
||||
source_file=r.get("source_file", ""),
|
||||
source_directory=r.get("source_directory", ""),
|
||||
section=r.get("section"),
|
||||
date=r.get("date"),
|
||||
tags=r.get("tags") or [],
|
||||
chunk_index=r.get("chunk_index", 0),
|
||||
total_chunks=r.get("total_chunks", 1),
|
||||
distance=r.get(self._DISTANCE_FIELD, 1.0),
|
||||
)
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
Reference in New Issue
Block a user