import numpy as np from rank_bm25 import BM25Okapi class SparseRetriever: def __init__(self, doc_texts): self.tokenized_corpus = [t.lower().split() for t in doc_texts] self.bm25 = BM25Okapi(self.tokenized_corpus) def retrieve(self, query_text, top_k=1000): tokens = query_text.lower().split() scores = self.bm25.get_scores(tokens) top_indices = np.argsort(scores)[::-1][:top_k] return [(int(i), float(scores[i])) for i in top_indices]