from __future__ import annotations from typing import Any, Literal from pydantic import BaseModel, Field ActionType = Literal["read_ticket", "classify_ticket", "draft_reply", "resolve_ticket"] PriorityType = Literal["low", "medium", "high", "urgent"] CategoryType = Literal["account", "billing", "technical", "abuse", "general"] class TicketView(BaseModel): ticket_id: str subject: str customer_tier: Literal["free", "pro", "enterprise"] age_minutes: int read: bool = False class Observation(BaseModel): task_id: str objective: str step_count: int max_steps: int inbox: list[TicketView] current_ticket_content: str | None = None latest_system_note: str = "" score_hint: dict[str, float] = Field(default_factory=dict) class Action(BaseModel): action_type: ActionType ticket_id: str | None = None priority: PriorityType | None = None category: CategoryType | None = None needs_escalation: bool | None = None message: str | None = None class Reward(BaseModel): value: float = Field(ge=0.0, le=1.0) components: dict[str, float] = Field(default_factory=dict) reasoning: str = "" class StepInfo(BaseModel): task_id: str done_reason: Literal["ongoing", "resolved", "max_steps", "invalid_action"] grader_score: float reward_components: dict[str, float] penalties: dict[str, float] state_snapshot: dict[str, Any]