import numpy as np def dense_only_retrieve(query_emb, doc_embeddings, top_k=1000): sims = np.dot(doc_embeddings, query_emb) top_indices = np.argsort(sims)[::-1][:top_k] return [(int(i), float(sims[i])) for i in top_indices] def ivf_retrieve(query_emb, cluster_manager, doc_embeddings, top_clusters=5): c_sims = np.dot(cluster_manager.centroids, query_emb) top_c = np.argsort(c_sims)[::-1][:top_clusters] candidate_docs = [] for c in top_c: candidate_docs.extend(cluster_manager.cluster_to_docs[c]) if not candidate_docs: return [] candidate_embs = doc_embeddings[candidate_docs] sims = np.dot(candidate_embs, query_emb) results = [(candidate_docs[i], float(sims[i])) for i in range(len(candidate_docs))] results.sort(key=lambda x: x[1], reverse=True) return results def rerank_retrieve(query_emb, sparse_results, doc_embeddings, top_sparse=1000): candidates = [r[0] for r in sparse_results[:top_sparse]] if not candidates: return [] candidate_embs = doc_embeddings[candidates] sims = np.dot(candidate_embs, query_emb) results = [(candidates[i], float(sims[i])) for i in range(len(candidates))] results.sort(key=lambda x: x[1], reverse=True) return results