Spaces:

kerdosdotio
/

Custom-LLM-Chat

Running

Bhaskar Ram

fix: sentence-aware chunking, score threshold, DOCX tables, streaming error handling, LLM_MODEL env var

2623b17 4 days ago

1.71 kB

	"""
	retriever.py
	Performs cosine-similarity search against the FAISS index.
	"""

	from __future__ import annotations
	import numpy as np
	import faiss
	from rag.embedder import VectorIndex

	DEFAULT_TOP_K = 5

	# Chunks with a cosine similarity below this threshold are considered
	# too dissimilar to the query and are dropped before reaching the LLM.
	# This prevents low-quality context from polluting the answer.
	# Range: 0.0 (no filtering) → 1.0 (exact match only). 0.30 is a safe default.
	MIN_SCORE = 0.30


	def retrieve(query: str, vector_index: VectorIndex, top_k: int = DEFAULT_TOP_K) -> list[dict]:
	"""
	Embed the query and return top_k most similar chunks above MIN_SCORE.
	Each result: {"source": str, "text": str, "score": float}
	Scores are cosine similarities (higher = more relevant).
	"""
	if vector_index is None or vector_index.index is None:
	return []

	query_embedding = vector_index.embedder.encode([query], show_progress_bar=False)
	query_embedding = np.array(query_embedding, dtype="float32")
	faiss.normalize_L2(query_embedding) # Must match IndexFlatIP cosine index

	n_results = min(top_k, vector_index.index.ntotal)
	scores, indices = vector_index.index.search(query_embedding, n_results)

	results = []
	for score, idx in zip(scores[0], indices[0]):
	if idx == -1:
	continue
	if float(score) < MIN_SCORE:
	continue # Drop chunks below relevance threshold
	chunk = vector_index.chunks[idx]
	results.append({
	"source": chunk["source"],
	"text": chunk["text"],
	"score": float(score), # cosine similarity (0–1 range)
	})
	return results