""" retriever.py Performs cosine-similarity search against the FAISS index. """ from __future__ import annotations import numpy as np import faiss from rag.embedder import VectorIndex DEFAULT_TOP_K = 5 # Chunks with a cosine similarity below this threshold are considered # too dissimilar to the query and are dropped before reaching the LLM. # This prevents low-quality context from polluting the answer. # Range: 0.0 (no filtering) → 1.0 (exact match only). 0.30 is a safe default. MIN_SCORE = 0.30 def retrieve(query: str, vector_index: VectorIndex, top_k: int = DEFAULT_TOP_K) -> list[dict]: """ Embed the query and return top_k most similar chunks above MIN_SCORE. Each result: {"source": str, "text": str, "score": float} Scores are cosine similarities (higher = more relevant). """ if vector_index is None or vector_index.index is None: return [] query_embedding = vector_index.embedder.encode([query], show_progress_bar=False) query_embedding = np.array(query_embedding, dtype="float32") faiss.normalize_L2(query_embedding) # Must match IndexFlatIP cosine index n_results = min(top_k, vector_index.index.ntotal) scores, indices = vector_index.index.search(query_embedding, n_results) results = [] for score, idx in zip(scores[0], indices[0]): if idx == -1: continue if float(score) < MIN_SCORE: continue # Drop chunks below relevance threshold chunk = vector_index.chunks[idx] results.append({ "source": chunk["source"], "text": chunk["text"], "score": float(score), # cosine similarity (0–1 range) }) return results