Spaces:
Sleeping
Sleeping
| [ | |
| { | |
| "question": "What is the maximum number of retries the self-correcting agent performs?", | |
| "ground_truth": "The self-correcting agent retries up to 3 times if validation fails." | |
| }, | |
| { | |
| "question": "Which embedding model is used for dense semantic search?", | |
| "ground_truth": "The embedding model used is all-MiniLM-L6-v2, which produces 384-dimensional vectors for semantic search." | |
| }, | |
| { | |
| "question": "What LLM is used for both answer generation and hallucination detection?", | |
| "ground_truth": "LLaMA 3.3 70B running on Groq is used for both answer generation and hallucination validation." | |
| }, | |
| { | |
| "question": "What are the three MCP tools exposed by the MCP server?", | |
| "ground_truth": "The three MCP tools are query_rag which runs the full corrective RAG pipeline, ingest_document which uploads and indexes a PDF or TXT file, and clear_session which clears conversation memory for a session." | |
| }, | |
| { | |
| "question": "What is the hallucination detection rate of the system?", | |
| "ground_truth": "The hallucination detection rate is 94%." | |
| }, | |
| { | |
| "question": "How many conversation turns does the session memory remember?", | |
| "ground_truth": "The session memory remembers the last 5 conversation turns per session." | |
| }, | |
| { | |
| "question": "What reranking model is used and what is its role?", | |
| "ground_truth": "The reranker is cross-encoder/ms-marco-MiniLM-L-6-v2 and its role is precision re-scoring of the top-k retrieved candidates." | |
| }, | |
| { | |
| "question": "What is the end-to-end latency of the system when no retries are needed?", | |
| "ground_truth": "The end-to-end latency with no retries is approximately 3 seconds." | |
| }, | |
| { | |
| "question": "What retrieval methods are combined in the hybrid retrieval pipeline?", | |
| "ground_truth": "Hybrid retrieval combines FAISS semantic search and BM25 keyword search, fused using Reciprocal Rank Fusion to produce the top 5 combined results, followed by cross-encoder reranking." | |
| }, | |
| { | |
| "question": "What framework is used to build the self-correcting agent pipeline?", | |
| "ground_truth": "The self-correcting agent pipeline is built using LangGraph." | |
| } | |
| ] | |