Spaces:
Running
Running
File size: 3,927 Bytes
2a83c3b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | """LangGraph state schema for the multi-agent RAG workflow."""
from __future__ import annotations
from operator import add
from typing import Annotated, TypedDict
class DocumentGrade(TypedDict):
"""Grade for a retrieved document.
Attributes:
doc_id: Unique identifier for the document chunk.
text: The text content of the document chunk.
score: Relevance score from retrieval.
relevant: Whether the document was judged relevant by the grader.
metadata: Associated metadata (source, page, sensitivity, etc.).
"""
doc_id: str
text: str
score: float
relevant: bool
metadata: dict
class Citation(TypedDict):
"""Citation for a source document.
Attributes:
source_file: Original file name or path.
page_number: Page number in the source document.
chunk_text: Excerpt of the cited text.
relevance_score: Score indicating relevance to the answer.
"""
source_file: str
page_number: int
chunk_text: str
relevance_score: float
class GraphState(TypedDict):
"""State for the multi-agent RAG graph.
This TypedDict defines all fields flowing through the LangGraph workflow.
Each node reads from and writes to subsets of this state.
"""
# Input
query: str
user_context: dict # UserContext serialized as dict
# Inference routing preferences (set by UI / API caller)
prefer_cloud: bool # True when caller opts into cloud providers for LOW/MEDIUM
override_provider: str # "" or one of "ollama" / "groq" / "openai" / "anthropic"
# Optional tone hint injected into the synthesizer's system prompt.
# Empty string = use the default research-assistant voice. The BYOK
# demo endpoint populates this from the X-Demo-Persona header so the
# three personas produce visibly distinct answers.
persona_style: str
# BYOK visitor session id. When set, the retriever node also queries the
# visitor's per-session Qdrant collection so any docs the visitor
# uploaded compete with the demo corpus by ranking. Empty string =
# demo-only retrieval (production /query path).
byok_session_id: str
# Streaming dispatch flag — set by run_rag_pipeline_stream so the
# synthesizer chooses call_llm_stream over call_llm_with_decision and
# pushes tokens through the LangGraph stream writer. Not part of the
# public API; leading underscore signals "internal pipeline plumbing".
_stream: bool
# Router
query_type: str # "simple", "complex", "out_of_scope"
rewritten_query: str
query_sensitivity: str # "low" | "medium" | "high" — inferred from the query itself
# Guardrails (prompt-injection / jailbreak detection)
guardrails_passed: bool
guardrails_reason: str
# Security
security_passed: bool
security_message: str
# Retrieval
documents: list[DocumentGrade]
# Grading
relevant_documents: list[DocumentGrade]
relevance_ratio: float
# Corrective RAG
retry_count: int
max_retries: int
# Generation
generation: str
citations: list[Citation]
confidence_score: float
# Provenance of the synthesizer LLM call (set by synthesize_answer/_stream).
synth_provider: str # "ollama" | "groq" | "openai" | "anthropic"
synth_model: str
synth_usage: dict # {prompt_tokens, completion_tokens, total_tokens}
synth_latency_ms: float
# Faithfulness (NLI-gated)
faithfulness_ratio: float # entailed sentences / total cited sentences
faithfulness_unsupported: list[dict] # [{"sentence": str, "cited": [int], "verdict": str}]
# Evaluation
needs_human_review: bool
evaluation_notes: str
# Audit
audit_trail: Annotated[list[dict], add] # Append-only via reducer
|