agentic-corrective-rag / hf_backend /eval_dataset.json
3v324v23's picture
Auto deploy backend
a977e38
[
{
"question": "What is the maximum number of retries the self-correcting agent performs?",
"ground_truth": "The self-correcting agent retries up to 3 times if validation fails."
},
{
"question": "Which embedding model is used for dense semantic search?",
"ground_truth": "The embedding model used is all-MiniLM-L6-v2, which produces 384-dimensional vectors for semantic search."
},
{
"question": "What LLM is used for both answer generation and hallucination detection?",
"ground_truth": "LLaMA 3.3 70B running on Groq is used for both answer generation and hallucination validation."
},
{
"question": "What are the three MCP tools exposed by the MCP server?",
"ground_truth": "The three MCP tools are query_rag which runs the full corrective RAG pipeline, ingest_document which uploads and indexes a PDF or TXT file, and clear_session which clears conversation memory for a session."
},
{
"question": "What is the hallucination detection rate of the system?",
"ground_truth": "The hallucination detection rate is 94%."
},
{
"question": "How many conversation turns does the session memory remember?",
"ground_truth": "The session memory remembers the last 5 conversation turns per session."
},
{
"question": "What reranking model is used and what is its role?",
"ground_truth": "The reranker is cross-encoder/ms-marco-MiniLM-L-6-v2 and its role is precision re-scoring of the top-k retrieved candidates."
},
{
"question": "What is the end-to-end latency of the system when no retries are needed?",
"ground_truth": "The end-to-end latency with no retries is approximately 3 seconds."
},
{
"question": "What retrieval methods are combined in the hybrid retrieval pipeline?",
"ground_truth": "Hybrid retrieval combines FAISS semantic search and BM25 keyword search, fused using Reciprocal Rank Fusion to produce the top 5 combined results, followed by cross-encoder reranking."
},
{
"question": "What framework is used to build the self-correcting agent pipeline?",
"ground_truth": "The self-correcting agent pipeline is built using LangGraph."
}
]