| from __future__ import annotations |
|
|
| from typing import Any, Literal |
|
|
| from pydantic import BaseModel, Field |
|
|
|
|
| ActionType = Literal["read_ticket", "classify_ticket", "draft_reply", "resolve_ticket"] |
| PriorityType = Literal["low", "medium", "high", "urgent"] |
| CategoryType = Literal["account", "billing", "technical", "abuse", "general"] |
|
|
|
|
| class TicketView(BaseModel): |
| ticket_id: str |
| subject: str |
| customer_tier: Literal["free", "pro", "enterprise"] |
| age_minutes: int |
| read: bool = False |
|
|
|
|
| class Observation(BaseModel): |
| task_id: str |
| objective: str |
| step_count: int |
| max_steps: int |
| inbox: list[TicketView] |
| current_ticket_content: str | None = None |
| latest_system_note: str = "" |
| score_hint: dict[str, float] = Field(default_factory=dict) |
|
|
|
|
| class Action(BaseModel): |
| action_type: ActionType |
| ticket_id: str | None = None |
| priority: PriorityType | None = None |
| category: CategoryType | None = None |
| needs_escalation: bool | None = None |
| message: str | None = None |
|
|
|
|
| class Reward(BaseModel): |
| value: float = Field(ge=0.0, le=1.0) |
| components: dict[str, float] = Field(default_factory=dict) |
| reasoning: str = "" |
|
|
|
|
| class StepInfo(BaseModel): |
| task_id: str |
| done_reason: Literal["ongoing", "resolved", "max_steps", "invalid_action"] |
| grader_score: float |
| reward_components: dict[str, float] |
| penalties: dict[str, float] |
| state_snapshot: dict[str, Any] |
|
|