Spaces:
Sleeping
Sleeping
| import faiss, json | |
| from script.preprocessing_text import Preprocessor | |
| from rank_bm25 import BM25Okapi | |
| import numpy as np | |
| class KnowledgeBase: | |
| def __init__(self, faiss_path, preprocessing_path) -> None: | |
| self.BM25_model = BM25Okapi(self._load(preprocessing_path)) | |
| self.vector_base = faiss.read_index(faiss_path) | |
| def _load(self, path): | |
| with open(path, 'rb') as file: | |
| data = json.load(file) | |
| return data | |
| def search_by_BM25(self, query, k=5): | |
| preprocessor = Preprocessor() | |
| prep_query = preprocessor.preprocessing_text(query) | |
| doc_scores = self.BM25_model.get_scores(prep_query) | |
| sorted_docs = np.argsort(-doc_scores) | |
| return sorted_docs[:k].tolist() | |
| def search_by_embedding(self, embedding, k): | |
| _, indexes = self.vector_base.search(embedding, k) | |
| return indexes | |