Spaces:
Sleeping
Sleeping
| from pinecone import Pinecone | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import os | |
| from text_embedder_encoder import encoder_model_name | |
| class Reranker: | |
| def __init__(self, | |
| pinecone_api_key=os.environ["pinecone_api_key"], | |
| answer_index_name=f"hebrew-dentist-answers-{encoder_model_name.replace('/', '-')}".lower()): | |
| self.pc = Pinecone(api_key=pinecone_api_key) | |
| self.answer_index_name = answer_index_name | |
| def rerank(self, query_vector, retrieved_answers_ids, top_n=5): | |
| # Encode query and documents | |
| try: | |
| index = self.pc.Index(self.answer_index_name) | |
| fetch_response = index.fetch(ids=retrieved_answers_ids) | |
| doc_embeddings = [] | |
| answers = [] | |
| for i in range(len(retrieved_answers_ids)): | |
| doc_embeddings.append(fetch_response['vectors'][retrieved_answers_ids[i]]['values']) | |
| answers.append(fetch_response['vectors'][retrieved_answers_ids[i]]['metadata']['answer']) | |
| similarity_scores = cosine_similarity([query_vector], doc_embeddings)[0] | |
| similarity_scores_with_idxes = list(zip(similarity_scores, range(len(similarity_scores)))) | |
| similarity_scores_with_idxes.sort(reverse=True) | |
| similarity_scores_with_idxes_final = similarity_scores_with_idxes[:top_n] | |
| reranked_answers = [answers[idx] for score, idx in similarity_scores_with_idxes_final if score >= 0.7] | |
| return reranked_answers | |
| except Exception as e: | |
| print(f"Error performing rerank: {e}") | |
| return [] | |