from typing import Any, Dict, List, Literal, Optional from pydantic import BaseModel, Field TaskId = Literal["easy", "medium", "hard"] ActionType = Literal["submit_finding", "flag_human_review", "noop"] class AuditFinding(BaseModel): document_id: str = Field(description="ID of the document containing potential violation") violation_type: str = Field(description="Violation category") evidence: List[str] = Field(default_factory=list, description="Supporting document IDs") confidence: float = Field(ge=0.0, le=1.0) class AuditAction(BaseModel): action_type: ActionType task_id: TaskId finding: Optional[AuditFinding] = None note: str = "" class AuditObservation(BaseModel): session_id: str task_id: TaskId documents: List[Dict[str, Any]] findings_submitted: int steps_remaining: int current_partial_score: float class AuditReward(BaseModel): value: float normalized: float = Field(ge=0.0, le=1.0) reason: str class StepResult(BaseModel): observation: AuditObservation reward: AuditReward done: bool info: Dict[str, Any] = Field(default_factory=dict) class EnvState(BaseModel): session_id: str task_id: TaskId steps_remaining: int findings_submitted: int partial_score: float found_truth_keys: List[str]