Spaces:

LeomordKaly
/

secureagentrag-api

Running

App Files Files Community

secureagentrag-api / core /state.py

LeomordKaly

deploy: phase 3 BYOK backend (Dockerfile.hf, FastAPI on 7860)

2a83c3b verified 11 days ago

raw

history blame contribute delete

3.93 kB

	"""LangGraph state schema for the multi-agent RAG workflow."""

	from __future__ import annotations

	from operator import add
	from typing import Annotated, TypedDict


	class DocumentGrade(TypedDict):
	"""Grade for a retrieved document.

	Attributes:
	doc_id: Unique identifier for the document chunk.
	text: The text content of the document chunk.
	score: Relevance score from retrieval.
	relevant: Whether the document was judged relevant by the grader.
	metadata: Associated metadata (source, page, sensitivity, etc.).
	"""

	doc_id: str
	text: str
	score: float
	relevant: bool
	metadata: dict


	class Citation(TypedDict):
	"""Citation for a source document.

	Attributes:
	source_file: Original file name or path.
	page_number: Page number in the source document.
	chunk_text: Excerpt of the cited text.
	relevance_score: Score indicating relevance to the answer.
	"""

	source_file: str
	page_number: int
	chunk_text: str
	relevance_score: float


	class GraphState(TypedDict):
	"""State for the multi-agent RAG graph.

	This TypedDict defines all fields flowing through the LangGraph workflow.
	Each node reads from and writes to subsets of this state.
	"""

	# Input
	query: str
	user_context: dict # UserContext serialized as dict

	# Inference routing preferences (set by UI / API caller)
	prefer_cloud: bool # True when caller opts into cloud providers for LOW/MEDIUM
	override_provider: str # "" or one of "ollama" / "groq" / "openai" / "anthropic"

	# Optional tone hint injected into the synthesizer's system prompt.
	# Empty string = use the default research-assistant voice. The BYOK
	# demo endpoint populates this from the X-Demo-Persona header so the
	# three personas produce visibly distinct answers.
	persona_style: str

	# BYOK visitor session id. When set, the retriever node also queries the
	# visitor's per-session Qdrant collection so any docs the visitor
	# uploaded compete with the demo corpus by ranking. Empty string =
	# demo-only retrieval (production /query path).
	byok_session_id: str

	# Streaming dispatch flag — set by run_rag_pipeline_stream so the
	# synthesizer chooses call_llm_stream over call_llm_with_decision and
	# pushes tokens through the LangGraph stream writer. Not part of the
	# public API; leading underscore signals "internal pipeline plumbing".
	_stream: bool

	# Router
	query_type: str # "simple", "complex", "out_of_scope"
	rewritten_query: str
	query_sensitivity: str # "low" \| "medium" \| "high" — inferred from the query itself

	# Guardrails (prompt-injection / jailbreak detection)
	guardrails_passed: bool
	guardrails_reason: str

	# Security
	security_passed: bool
	security_message: str

	# Retrieval
	documents: list[DocumentGrade]

	# Grading
	relevant_documents: list[DocumentGrade]
	relevance_ratio: float

	# Corrective RAG
	retry_count: int
	max_retries: int

	# Generation
	generation: str
	citations: list[Citation]
	confidence_score: float
	# Provenance of the synthesizer LLM call (set by synthesize_answer/_stream).
	synth_provider: str # "ollama" \| "groq" \| "openai" \| "anthropic"
	synth_model: str
	synth_usage: dict # {prompt_tokens, completion_tokens, total_tokens}
	synth_latency_ms: float

	# Faithfulness (NLI-gated)
	faithfulness_ratio: float # entailed sentences / total cited sentences
	faithfulness_unsupported: list[dict] # [{"sentence": str, "cited": [int], "verdict": str}]

	# Evaluation
	needs_human_review: bool
	evaluation_notes: str

	# Audit
	audit_trail: Annotated[list[dict], add] # Append-only via reducer