Spaces:
Sleeping
Sleeping
File size: 1,023 Bytes
9806c71 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
from datasets import Dataset
from ragas import evaluate
from ragas.llms import llm_factory
from ragas.metrics.collections import Faithfulness, ResponseRelevancy, ContextPrecision
from src.agent import app
# Ragas 2025: Experiment-based factory
judge_llm = llm_factory("gemini-2.0-flash")
def evaluate_agent(questions: list, references: list):
"""MNC-grade verification of RAG pipeline quality."""
results = []
for q, r in zip(questions, references):
output = app.invoke({"messages": [("user", q)]})
results.append({
"user_input": q,
"response": output["messages"][-1].content,
"retrieved_contexts": [m.content for m in output["messages"] if hasattr(m, "tool_call_id")],
"reference": r
})
dataset = Dataset.from_list(results)
metrics = [Faithfulness(), ResponseRelevancy(), ContextPrecision()]
# Evaluate with Gemini judge
return evaluate(dataset=dataset, metrics=metrics, llm=judge_llm) |