| import numpy as np | |
| import joblib | |
| from gensim.utils import simple_preprocess | |
| from rank_bm25 import BM25Okapi | |
| def bm25_pipeline(query, bm25_path="Retrieval/savedModels/bm25-1_0.pkl", ids_path="Retrieval/savedModels/ids.pkl", k=100): | |
| bm25 = joblib.load(bm25_path) | |
| ids = joblib.load(ids_path) | |
| ranking = bm25.get_scores(simple_preprocess(query)) | |
| ranking = np.argsort(np.array(ranking))[::-1] | |
| ranking = ranking[:k] | |
| for j in range(len(ranking)): | |
| ranking[j] = ids[ranking[j]] | |
| return ranking |