feat: add search engine interface with embedding and filtering
This commit is contained in:
47
src/companion/rag/search.py
Normal file
47
src/companion/rag/search.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from typing import Any
|
||||
|
||||
from companion.rag.embedder import OllamaEmbedder
|
||||
from companion.rag.vector_store import VectorStore
|
||||
|
||||
|
||||
class SearchEngine:
|
||||
def __init__(
|
||||
self,
|
||||
vector_store: VectorStore,
|
||||
embedder_base_url: str,
|
||||
embedder_model: str,
|
||||
embedder_batch_size: int,
|
||||
default_top_k: int,
|
||||
similarity_threshold: float,
|
||||
hybrid_search_enabled: bool,
|
||||
keyword_weight: float = 0.3,
|
||||
semantic_weight: float = 0.7,
|
||||
):
|
||||
self.vector_store = vector_store
|
||||
self.embedder = OllamaEmbedder(
|
||||
base_url=embedder_base_url,
|
||||
model=embedder_model,
|
||||
batch_size=embedder_batch_size,
|
||||
)
|
||||
self.default_top_k = default_top_k
|
||||
self.similarity_threshold = similarity_threshold
|
||||
self.hybrid_search_enabled = hybrid_search_enabled
|
||||
self.keyword_weight = keyword_weight
|
||||
self.semantic_weight = semantic_weight
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
top_k: int | None = None,
|
||||
filters: dict[str, Any] | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
k = top_k or self.default_top_k
|
||||
query_embedding = self.embedder.embed([query])[0]
|
||||
results = self.vector_store.search(query_embedding, top_k=k, filters=filters)
|
||||
if self.similarity_threshold > 0 and results:
|
||||
results = [
|
||||
r
|
||||
for r in results
|
||||
if r.get("_distance", float("inf")) <= self.similarity_threshold
|
||||
]
|
||||
return results
|
||||
Reference in New Issue
Block a user