Spaces:
Sleeping
Sleeping
| import numpy as np | |
| from rank_bm25 import BM25Okapi | |
| class SparseRetriever: | |
| def __init__(self, doc_texts): | |
| self.tokenized_corpus = [t.lower().split() for t in doc_texts] | |
| self.bm25 = BM25Okapi(self.tokenized_corpus) | |
| def retrieve(self, query_text, top_k=1000): | |
| tokens = query_text.lower().split() | |
| scores = self.bm25.get_scores(tokens) | |
| top_indices = np.argsort(scores)[::-1][:top_k] | |
| return [(int(i), float(scores[i])) for i in top_indices] |