CodeBuddyAI / agent /rag /rag.py
TahaFawzyElshrif
uploaded files
8b208f1
from langchain_text_splitters import CharacterTextSplitter
from langchain_core.documents import Document # Added import for Document
from sentence_transformers import SentenceTransformer, util
import pandas as pd
class rag_text_chooser:
def __init__(self,data_rag):
self.data_rag = pd.read_excel(data_rag)
self.corpus, self.answers = self.get_questions_Answers()
self.model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
self.corpus_embeddings = self.model.encode(self.corpus)
def get_questions_Answers(self):
questions = []
answers = []
for _, row in self.data_rag.iterrows():
questions.append(row.iloc[0])
answers.append(row.iloc[1])
return questions, answers
def get_relevant_question(self, query):
# 4) Encode the Arabic query
query_embedding = self.model.encode(query)
# 5) Compute cosine similarity
cos_scores = util.cos_sim(query_embedding, self.corpus_embeddings)[0]
# 6) Rank results
top_results = cos_scores.argsort(descending=True)
matched_result = None
for idx in top_results:
if (cos_scores[idx] < .7): # Mostly not relate to her:
matched_result = None
else:
matched_result = (f"Questions {self.corpus[idx]} \n Answer {self.answers[idx]} \n (score: {cos_scores[idx]:.4f})")
break
return matched_result