Spaces:
Sleeping
Sleeping
| import numpy as np | |
| def dense_only_retrieve(query_emb, doc_embeddings, top_k=1000): | |
| sims = np.dot(doc_embeddings, query_emb) | |
| top_indices = np.argsort(sims)[::-1][:top_k] | |
| return [(int(i), float(sims[i])) for i in top_indices] | |
| def ivf_retrieve(query_emb, cluster_manager, doc_embeddings, top_clusters=5): | |
| c_sims = np.dot(cluster_manager.centroids, query_emb) | |
| top_c = np.argsort(c_sims)[::-1][:top_clusters] | |
| candidate_docs = [] | |
| for c in top_c: | |
| candidate_docs.extend(cluster_manager.cluster_to_docs[c]) | |
| if not candidate_docs: | |
| return [] | |
| candidate_embs = doc_embeddings[candidate_docs] | |
| sims = np.dot(candidate_embs, query_emb) | |
| results = [(candidate_docs[i], float(sims[i])) for i in range(len(candidate_docs))] | |
| results.sort(key=lambda x: x[1], reverse=True) | |
| return results | |
| def rerank_retrieve(query_emb, sparse_results, doc_embeddings, top_sparse=1000): | |
| candidates = [r[0] for r in sparse_results[:top_sparse]] | |
| if not candidates: return [] | |
| candidate_embs = doc_embeddings[candidates] | |
| sims = np.dot(candidate_embs, query_emb) | |
| results = [(candidates[i], float(sims[i])) for i in range(len(candidates))] | |
| results.sort(key=lambda x: x[1], reverse=True) | |
| return results |