LeomordKaly's picture
deploy: phase 3 BYOK backend (Dockerfile.hf, FastAPI on 7860)
2a83c3b verified
"""LangGraph state schema for the multi-agent RAG workflow."""
from __future__ import annotations
from operator import add
from typing import Annotated, TypedDict
class DocumentGrade(TypedDict):
"""Grade for a retrieved document.
Attributes:
doc_id: Unique identifier for the document chunk.
text: The text content of the document chunk.
score: Relevance score from retrieval.
relevant: Whether the document was judged relevant by the grader.
metadata: Associated metadata (source, page, sensitivity, etc.).
"""
doc_id: str
text: str
score: float
relevant: bool
metadata: dict
class Citation(TypedDict):
"""Citation for a source document.
Attributes:
source_file: Original file name or path.
page_number: Page number in the source document.
chunk_text: Excerpt of the cited text.
relevance_score: Score indicating relevance to the answer.
"""
source_file: str
page_number: int
chunk_text: str
relevance_score: float
class GraphState(TypedDict):
"""State for the multi-agent RAG graph.
This TypedDict defines all fields flowing through the LangGraph workflow.
Each node reads from and writes to subsets of this state.
"""
# Input
query: str
user_context: dict # UserContext serialized as dict
# Inference routing preferences (set by UI / API caller)
prefer_cloud: bool # True when caller opts into cloud providers for LOW/MEDIUM
override_provider: str # "" or one of "ollama" / "groq" / "openai" / "anthropic"
# Optional tone hint injected into the synthesizer's system prompt.
# Empty string = use the default research-assistant voice. The BYOK
# demo endpoint populates this from the X-Demo-Persona header so the
# three personas produce visibly distinct answers.
persona_style: str
# BYOK visitor session id. When set, the retriever node also queries the
# visitor's per-session Qdrant collection so any docs the visitor
# uploaded compete with the demo corpus by ranking. Empty string =
# demo-only retrieval (production /query path).
byok_session_id: str
# Streaming dispatch flag — set by run_rag_pipeline_stream so the
# synthesizer chooses call_llm_stream over call_llm_with_decision and
# pushes tokens through the LangGraph stream writer. Not part of the
# public API; leading underscore signals "internal pipeline plumbing".
_stream: bool
# Router
query_type: str # "simple", "complex", "out_of_scope"
rewritten_query: str
query_sensitivity: str # "low" | "medium" | "high" — inferred from the query itself
# Guardrails (prompt-injection / jailbreak detection)
guardrails_passed: bool
guardrails_reason: str
# Security
security_passed: bool
security_message: str
# Retrieval
documents: list[DocumentGrade]
# Grading
relevant_documents: list[DocumentGrade]
relevance_ratio: float
# Corrective RAG
retry_count: int
max_retries: int
# Generation
generation: str
citations: list[Citation]
confidence_score: float
# Provenance of the synthesizer LLM call (set by synthesize_answer/_stream).
synth_provider: str # "ollama" | "groq" | "openai" | "anthropic"
synth_model: str
synth_usage: dict # {prompt_tokens, completion_tokens, total_tokens}
synth_latency_ms: float
# Faithfulness (NLI-gated)
faithfulness_ratio: float # entailed sentences / total cited sentences
faithfulness_unsupported: list[dict] # [{"sentence": str, "cited": [int], "verdict": str}]
# Evaluation
needs_human_review: bool
evaluation_notes: str
# Audit
audit_trail: Annotated[list[dict], add] # Append-only via reducer