[ { "question": "What is the maximum number of retries the self-correcting agent performs?", "ground_truth": "The self-correcting agent retries up to 3 times if validation fails." }, { "question": "Which embedding model is used for dense semantic search?", "ground_truth": "The embedding model used is all-MiniLM-L6-v2, which produces 384-dimensional vectors for semantic search." }, { "question": "What LLM is used for both answer generation and hallucination detection?", "ground_truth": "LLaMA 3.3 70B running on Groq is used for both answer generation and hallucination validation." }, { "question": "What are the three MCP tools exposed by the MCP server?", "ground_truth": "The three MCP tools are query_rag which runs the full corrective RAG pipeline, ingest_document which uploads and indexes a PDF or TXT file, and clear_session which clears conversation memory for a session." }, { "question": "What is the hallucination detection rate of the system?", "ground_truth": "The hallucination detection rate is 94%." }, { "question": "How many conversation turns does the session memory remember?", "ground_truth": "The session memory remembers the last 5 conversation turns per session." }, { "question": "What reranking model is used and what is its role?", "ground_truth": "The reranker is cross-encoder/ms-marco-MiniLM-L-6-v2 and its role is precision re-scoring of the top-k retrieved candidates." }, { "question": "What is the end-to-end latency of the system when no retries are needed?", "ground_truth": "The end-to-end latency with no retries is approximately 3 seconds." }, { "question": "What retrieval methods are combined in the hybrid retrieval pipeline?", "ground_truth": "Hybrid retrieval combines FAISS semantic search and BM25 keyword search, fused using Reciprocal Rank Fusion to produce the top 5 combined results, followed by cross-encoder reranking." }, { "question": "What framework is used to build the self-correcting agent pipeline?", "ground_truth": "The self-correcting agent pipeline is built using LangGraph." } ]