Portfolio1 / src /evaluation.py
PranavReddy18's picture
Upload 22 files
b41fa31 verified
import os
from dotenv import load_dotenv
from datasets import Dataset
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors.cross_encoder_rerank import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from langchain_groq import ChatGroq
from ragas import evaluate
from ragas.metrics import context_precision, context_recall, faithfulness, answer_relevancy
from ragas.run_config import RunConfig
from src.retrival_generation import RetrievalGeneration
class Evaluation:
def __init__(self, vectorstore_path: str, llm_model: str = "llama-3.3-70b-versatile"):
load_dotenv()
self.groq_api_key = os.getenv("GROQ_API_KEY")
self.llm = ChatGroq(api_key=self.groq_api_key, model=llm_model)
self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
self.vectorstore_path = vectorstore_path
self.vectorstore = FAISS.load_local(
self.vectorstore_path,
self.embeddings,
allow_dangerous_deserialization=True
)
self.rg = RetrievalGeneration(vectorstore_path=vectorstore_path)
self.rg.init_vectorstore()
self.qa = self.rg.build_rag_chain()
def run(self, questions: list, ground_truth: list, use_reranker: bool = False):
"""Run evaluation with or without reranking"""
if use_reranker:
cross_encoder_model = HuggingFaceCrossEncoder(model_name="cross-encoder/ms-marco-MiniLM-L-6-v2")
compressor = CrossEncoderReranker(model=cross_encoder_model, top_n=3)
retriever = ContextualCompressionRetriever(
base_compressor=compressor,
base_retriever=self.vectorstore.as_retriever(search_kwargs={"k": 10})
)
else:
retriever = self.vectorstore.as_retriever(search_kwargs={"k": 10})
answers, contexts = [], []
for query in questions:
answers.append(self.qa.invoke(query))
contexts.append([doc.page_content for doc in retriever.get_relevant_documents(query)])
data = {
"question": questions,
"ground_truth": ground_truth,
"answer": answers,
"contexts": contexts
}
dataset = Dataset.from_dict(data)
run_config = RunConfig(
timeout=290,
max_retries=5,
max_wait=30,
max_workers=1
)
result = evaluate(
dataset=dataset,
metrics=[context_precision, context_recall, faithfulness, answer_relevancy],
llm=self.llm,
embeddings=self.embeddings,
run_config=run_config,
batch_size=1
)
return result
if __name__ == "__main__":
base_dir = os.path.dirname(os.path.abspath(__file__))
vectorstore_path = os.path.join(base_dir, "..", "local_faiss_index")
evaluation = Evaluation(vectorstore_path)
questions = [
"What were Katta Sai Pranav Reddy’s 10th class marks and CGPA?",
"What subjects did Pranav Reddy study in 12th (Intermediate) and what were his marks?",
"Can you summarize Pranav Reddy’s professional and project experience?"
]
ground_truth = [
"Katta Sai Pranav Reddy completed his SSC in March 2019 at Ekalavya Foundation School, Nalgonda, securing A1 grades in most subjects and a B1 in Hindi, with an overall CGPA of 9.5.",
"In March 2021, Pranav Reddy finished his Intermediate education, achieving nearly full marks in English, Sanskrit, HE, and optional subjects like Mathematics, Physics, and Chemistry, with a total of 982 marks.",
"Pranav Reddy is an AI and ML engineer with internship experience at iNeuron Intelligence and Unified Mentor, where he worked on customer segmentation and attrition prediction. His projects include the BigBasket SmartCart AI Assistant and Netflix Churn Prediction, showcasing skills in Python, ML pipelines, FAISS, FastAPI, and Generative AI solutions."
]
# Run without reranker
print("🔹 Baseline Evaluation (no reranker)")
baseline_result = evaluation.run(questions, ground_truth, use_reranker=False)
print(baseline_result)
# Run with reranker
print("\n🔹 Evaluation with Reranker")
rerank_result = evaluation.run(questions, ground_truth, use_reranker=True)
print(rerank_result)