Spaces:
Running
Running
Bhaskar Ram
fix: sentence-aware chunking, score threshold, DOCX tables, streaming error handling, LLM_MODEL env var
2623b17 | """ | |
| retriever.py | |
| Performs cosine-similarity search against the FAISS index. | |
| """ | |
| from __future__ import annotations | |
| import numpy as np | |
| import faiss | |
| from rag.embedder import VectorIndex | |
| DEFAULT_TOP_K = 5 | |
| # Chunks with a cosine similarity below this threshold are considered | |
| # too dissimilar to the query and are dropped before reaching the LLM. | |
| # This prevents low-quality context from polluting the answer. | |
| # Range: 0.0 (no filtering) → 1.0 (exact match only). 0.30 is a safe default. | |
| MIN_SCORE = 0.30 | |
| def retrieve(query: str, vector_index: VectorIndex, top_k: int = DEFAULT_TOP_K) -> list[dict]: | |
| """ | |
| Embed the query and return top_k most similar chunks above MIN_SCORE. | |
| Each result: {"source": str, "text": str, "score": float} | |
| Scores are cosine similarities (higher = more relevant). | |
| """ | |
| if vector_index is None or vector_index.index is None: | |
| return [] | |
| query_embedding = vector_index.embedder.encode([query], show_progress_bar=False) | |
| query_embedding = np.array(query_embedding, dtype="float32") | |
| faiss.normalize_L2(query_embedding) # Must match IndexFlatIP cosine index | |
| n_results = min(top_k, vector_index.index.ntotal) | |
| scores, indices = vector_index.index.search(query_embedding, n_results) | |
| results = [] | |
| for score, idx in zip(scores[0], indices[0]): | |
| if idx == -1: | |
| continue | |
| if float(score) < MIN_SCORE: | |
| continue # Drop chunks below relevance threshold | |
| chunk = vector_index.chunks[idx] | |
| results.append({ | |
| "source": chunk["source"], | |
| "text": chunk["text"], | |
| "score": float(score), # cosine similarity (0–1 range) | |
| }) | |
| return results | |