File size: 2,289 Bytes
a977e38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
[
    {
        "question": "What is the maximum number of retries the self-correcting agent performs?",
        "ground_truth": "The self-correcting agent retries up to 3 times if validation fails."
    },
    {
        "question": "Which embedding model is used for dense semantic search?",
        "ground_truth": "The embedding model used is all-MiniLM-L6-v2, which produces 384-dimensional vectors for semantic search."
    },
    {
        "question": "What LLM is used for both answer generation and hallucination detection?",
        "ground_truth": "LLaMA 3.3 70B running on Groq is used for both answer generation and hallucination validation."
    },
    {
        "question": "What are the three MCP tools exposed by the MCP server?",
        "ground_truth": "The three MCP tools are query_rag which runs the full corrective RAG pipeline, ingest_document which uploads and indexes a PDF or TXT file, and clear_session which clears conversation memory for a session."
    },
    {
        "question": "What is the hallucination detection rate of the system?",
        "ground_truth": "The hallucination detection rate is 94%."
    },
    {
        "question": "How many conversation turns does the session memory remember?",
        "ground_truth": "The session memory remembers the last 5 conversation turns per session."
    },
    {
        "question": "What reranking model is used and what is its role?",
        "ground_truth": "The reranker is cross-encoder/ms-marco-MiniLM-L-6-v2 and its role is precision re-scoring of the top-k retrieved candidates."
    },
    {
        "question": "What is the end-to-end latency of the system when no retries are needed?",
        "ground_truth": "The end-to-end latency with no retries is approximately 3 seconds."
    },
    {
        "question": "What retrieval methods are combined in the hybrid retrieval pipeline?",
        "ground_truth": "Hybrid retrieval combines FAISS semantic search and BM25 keyword search, fused using Reciprocal Rank Fusion to produce the top 5 combined results, followed by cross-encoder reranking."
    },
    {
        "question": "What framework is used to build the self-correcting agent pipeline?",
        "ground_truth": "The self-correcting agent pipeline is built using LangGraph."
    }
]