""" retrieval.py ------------ Sanyu RAG — Retrieval Module Loads pre-computed L2-normalised numpy embeddings from the .pkl file and performs retrieval via a simple dot-product similarity search (pure numpy). No FAISS dependency at runtime — avoids FAISS SWIG binary incompatibilities between build environments (Windows vs Linux HF Space). The .pkl file is located at: data/sanyu_knowledge_base.pkl (hardcoded as agreed). """ import pickle import numpy as np from sentence_transformers import SentenceTransformer # Path to the serialised knowledge base — hardcoded as confirmed with Atwine. DEFAULT_PKL_PATH = 'data/sanyu_knowledge_base.pkl' def load_index(pkl_path: str = DEFAULT_PKL_PATH) -> tuple: """ Loads the embeddings and chunk metadata from the .pkl file. Returns: (embeddings, chunks, model_name) - embeddings: np.ndarray float32, shape (n, d), L2-normalised - chunks: list of chunk dicts (text + metadata) - model_name: str, the embedding model used to build the index """ with open(pkl_path, 'rb') as f: payload = pickle.load(f) return payload['embeddings'], payload['chunks'], payload['embedding_model'] def retrieve(query: str, embeddings: np.ndarray, chunks: list, model: SentenceTransformer, top_k: int = 4) -> list: """ Retrieves the top_k most relevant chunks for a given query. Uses a numpy dot product against pre-computed L2-normalised embeddings (equivalent to cosine similarity). No FAISS required at runtime. Args: query: The user's input string. embeddings: np.ndarray of shape (n, d), L2-normalised chunk embeddings. chunks: The list of chunk dicts corresponding to the embeddings. model: A loaded SentenceTransformer model instance. top_k: Number of results to return (default 4). Returns: List of chunk dicts with an added 'similarity_score' key, ordered from most to least relevant. """ query_embedding = model.encode([query], normalize_embeddings=True) query_embedding = np.array(query_embedding, dtype='float32') # shape (1, d) # Cosine similarity via dot product (both sides are L2-normalised) scores = (query_embedding @ embeddings.T).flatten() # shape (n,) # Get top_k indices sorted by descending score top_indices = np.argsort(scores)[::-1][:top_k] results = [] for idx in top_indices: chunk = chunks[int(idx)].copy() chunk['similarity_score'] = float(scores[idx]) results.append(chunk) return results