Anshrathore01's picture
Implement core pipelines and web UI
0116d50
"""Simple semantic search over review embeddings."""
from __future__ import annotations
from dataclasses import dataclass
from typing import List, Sequence, Tuple
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sentence_transformers import SentenceTransformer
@dataclass
class QueryEngine:
embeddings: np.ndarray
documents: Sequence[str]
model_name: str = "sentence-transformers/all-MiniLM-L6-v2"
top_k: int = 5
def __post_init__(self) -> None:
if len(self.documents) != len(self.embeddings):
raise ValueError("Embeddings and documents must be aligned")
self.model = SentenceTransformer(self.model_name)
self.index = NearestNeighbors(metric="cosine")
self.index.fit(self.embeddings)
def search(self, query: str) -> List[Tuple[str, float]]:
query_emb = self.model.encode([query])
distances, indices = self.index.kneighbors(query_emb, n_neighbors=self.top_k)
results = []
for dist, idx in zip(distances[0], indices[0]):
similarity = 1 - dist
results.append((self.documents[idx], float(similarity)))
return results
__all__ = ["QueryEngine"]