File size: 3,927 Bytes
2a83c3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
"""LangGraph state schema for the multi-agent RAG workflow."""

from __future__ import annotations

from operator import add
from typing import Annotated, TypedDict


class DocumentGrade(TypedDict):
    """Grade for a retrieved document.



    Attributes:

        doc_id: Unique identifier for the document chunk.

        text: The text content of the document chunk.

        score: Relevance score from retrieval.

        relevant: Whether the document was judged relevant by the grader.

        metadata: Associated metadata (source, page, sensitivity, etc.).

    """

    doc_id: str
    text: str
    score: float
    relevant: bool
    metadata: dict


class Citation(TypedDict):
    """Citation for a source document.



    Attributes:

        source_file: Original file name or path.

        page_number: Page number in the source document.

        chunk_text: Excerpt of the cited text.

        relevance_score: Score indicating relevance to the answer.

    """

    source_file: str
    page_number: int
    chunk_text: str
    relevance_score: float


class GraphState(TypedDict):
    """State for the multi-agent RAG graph.



    This TypedDict defines all fields flowing through the LangGraph workflow.

    Each node reads from and writes to subsets of this state.

    """

    # Input
    query: str
    user_context: dict  # UserContext serialized as dict

    # Inference routing preferences (set by UI / API caller)
    prefer_cloud: bool  # True when caller opts into cloud providers for LOW/MEDIUM
    override_provider: str  # "" or one of "ollama" / "groq" / "openai" / "anthropic"

    # Optional tone hint injected into the synthesizer's system prompt.
    # Empty string = use the default research-assistant voice. The BYOK
    # demo endpoint populates this from the X-Demo-Persona header so the
    # three personas produce visibly distinct answers.
    persona_style: str

    # BYOK visitor session id. When set, the retriever node also queries the
    # visitor's per-session Qdrant collection so any docs the visitor
    # uploaded compete with the demo corpus by ranking. Empty string =
    # demo-only retrieval (production /query path).
    byok_session_id: str

    # Streaming dispatch flag — set by run_rag_pipeline_stream so the
    # synthesizer chooses call_llm_stream over call_llm_with_decision and
    # pushes tokens through the LangGraph stream writer. Not part of the
    # public API; leading underscore signals "internal pipeline plumbing".
    _stream: bool

    # Router
    query_type: str  # "simple", "complex", "out_of_scope"
    rewritten_query: str
    query_sensitivity: str  # "low" | "medium" | "high" — inferred from the query itself

    # Guardrails (prompt-injection / jailbreak detection)
    guardrails_passed: bool
    guardrails_reason: str

    # Security
    security_passed: bool
    security_message: str

    # Retrieval
    documents: list[DocumentGrade]

    # Grading
    relevant_documents: list[DocumentGrade]
    relevance_ratio: float

    # Corrective RAG
    retry_count: int
    max_retries: int

    # Generation
    generation: str
    citations: list[Citation]
    confidence_score: float
    # Provenance of the synthesizer LLM call (set by synthesize_answer/_stream).
    synth_provider: str  # "ollama" | "groq" | "openai" | "anthropic"
    synth_model: str
    synth_usage: dict  # {prompt_tokens, completion_tokens, total_tokens}
    synth_latency_ms: float

    # Faithfulness (NLI-gated)
    faithfulness_ratio: float  # entailed sentences / total cited sentences
    faithfulness_unsupported: list[dict]  # [{"sentence": str, "cited": [int], "verdict": str}]

    # Evaluation
    needs_human_review: bool
    evaluation_notes: str

    # Audit
    audit_trail: Annotated[list[dict], add]  # Append-only via reducer