""" Pydantic models for Government Fraud Detection OpenEnv. Implements the full OpenEnv typed spec: Observation, Action, Reward. """ from __future__ import annotations from typing import Any, Dict, List, Literal, Optional from pydantic import BaseModel, Field # --------------------------------------------------------------------------- # Action space # --------------------------------------------------------------------------- class Action(BaseModel): """Every action the agent can take.""" action_type: Literal[ "read_document", "flag_duplicate", "flag_shell_company", "trace_ownership", "flag_overbilling", "submit_finding", "request_more_docs", ] = Field(..., description="Type of action to perform") document_id: Optional[str] = Field(None, description="Target document ID") entity_ids: Optional[List[str]] = Field(None, description="List of entity/claim IDs") finding_type: Optional[Literal[ "duplicate_billing", "shell_company", "overbilling", "fca_violation", "clean", ]] = Field(None, description="Type of fraud finding (for submit_finding)") evidence: Optional[List[str]] = Field(None, description="Evidence document IDs cited") defendant: Optional[str] = Field(None, description="Name of defendant entity") amount_at_risk: Optional[float] = Field(None, description="Estimated dollar amount at risk") legal_basis: Optional[str] = Field(None, description="Legal statute cited e.g. 31 U.S.C. ยง3729") reasoning: Optional[str] = Field(None, description="Agent's reasoning text") request_target: Optional[str] = Field( None, description="Target entity/topic when requesting more docs (e.g., 'FastBuild LLC bank records')", ) requested_doc_type: Optional[str] = Field( None, description="Requested document type (e.g., bank_records, audit_memo, compliance_review)", ) class Config: extra = "allow" # --------------------------------------------------------------------------- # Observation space # --------------------------------------------------------------------------- class DocumentSummary(BaseModel): doc_id: str doc_type: str title: str preview: str is_read: bool = False class FraudSignal(BaseModel): signal_type: str description: str severity: Literal["low", "medium", "high", "critical"] class Observation(BaseModel): """Returned by reset() and step().""" task_id: str = Field(..., description="Current task identifier") task_description: str = Field(..., description="What the agent must accomplish") difficulty: Literal["easy", "medium", "hard"] available_documents: List[DocumentSummary] = Field( default_factory=list, description="All documents available for reading" ) read_documents: Dict[str, Any] = Field( default_factory=dict, description="Full content of documents the agent has read" ) detected_signals: List[FraudSignal] = Field( default_factory=list, description="Fraud signals the env has pre-flagged for the agent" ) steps_taken: int = 0 steps_remaining: int = 10 cumulative_reward: float = 0.0 last_action_result: Optional[str] = None last_action_error: Optional[str] = None done: bool = False info: Dict[str, Any] = Field(default_factory=dict) # --------------------------------------------------------------------------- # Reward # --------------------------------------------------------------------------- class Reward(BaseModel): """Reward returned after each step.""" value: float = Field(..., ge=0.0, le=1.0, description="Reward this step [0,1]") breakdown: Dict[str, float] = Field( default_factory=dict, description="Component-wise reward breakdown" ) reason: str = Field("", description="Human-readable explanation") is_bonus: bool = False is_penalty: bool = False