Spaces:
Running
Running
File size: 1,518 Bytes
32a7233 1d0a7da 32a7233 8b208f1 81e5d6f 32a7233 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | from langchain_text_splitters import CharacterTextSplitter
from langchain_core.documents import Document # Added import for Document
from sentence_transformers import SentenceTransformer, util
import pandas as pd
class rag_text_chooser:
def __init__(self,data_rag):
self.data_rag = pd.read_excel(data_rag)
self.corpus, self.answers = self.get_questions_Answers()
self.model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
self.corpus_embeddings = self.model.encode(self.corpus)
def get_questions_Answers(self):
questions = []
answers = []
for _, row in self.data_rag.iterrows():
questions.append(row.iloc[0])
answers.append(row.iloc[1])
return questions, answers
def get_relevant_question(self, query):
# 4) Encode the Arabic query
query_embedding = self.model.encode(query)
# 5) Compute cosine similarity
cos_scores = util.cos_sim(query_embedding, self.corpus_embeddings)[0]
# 6) Rank results
top_results = cos_scores.argsort(descending=True)
matched_result = None
for idx in top_results:
if (cos_scores[idx] < .7): # Mostly not relate to her:
matched_result = None
else:
matched_result = (f"Questions {self.corpus[idx]} \n Answer {self.answers[idx]} \n (score: {cos_scores[idx]:.4f})")
break
return matched_result |