Spaces:

taha454
/

CodeBuddyAI

Running

TahaFawzyElshrif

uploaded files

8b208f1 about 2 months ago

1.52 kB

	from langchain_text_splitters import CharacterTextSplitter
	from langchain_core.documents import Document # Added import for Document
	from sentence_transformers import SentenceTransformer, util
	import pandas as pd

	class rag_text_chooser:
	def __init__(self,data_rag):
	self.data_rag = pd.read_excel(data_rag)

	self.corpus, self.answers = self.get_questions_Answers()
	self.model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
	self.corpus_embeddings = self.model.encode(self.corpus)



	def get_questions_Answers(self):

	questions = []
	answers = []

	for _, row in self.data_rag.iterrows():
	questions.append(row.iloc[0])
	answers.append(row.iloc[1])

	return questions, answers

	def get_relevant_question(self, query):
	# 4) Encode the Arabic query
	query_embedding = self.model.encode(query)

	# 5) Compute cosine similarity
	cos_scores = util.cos_sim(query_embedding, self.corpus_embeddings)[0]

	# 6) Rank results
	top_results = cos_scores.argsort(descending=True)


	matched_result = None
	for idx in top_results:
	if (cos_scores[idx] < .7): # Mostly not relate to her:
	matched_result = None
	else:
	matched_result = (f"Questions {self.corpus[idx]} \n Answer {self.answers[idx]} \n (score: {cos_scores[idx]:.4f})")

	break

	return matched_result