| """Typed models for the RecallTrace OpenEnv environment.""" |
|
|
| from __future__ import annotations |
|
|
| from enum import Enum |
| from typing import Any, Dict, List, Optional |
|
|
| from pydantic import BaseModel, ConfigDict, Field |
|
|
|
|
| class ActionType(str, Enum): |
| INSPECT_NODE = "inspect_node" |
| TRACE_LOT = "trace_lot" |
| QUARANTINE = "quarantine" |
| NOTIFY = "notify" |
| FINALIZE = "finalize" |
|
|
|
|
| class RecallAction(BaseModel): |
| """Action submitted by an agent.""" |
|
|
| model_config = ConfigDict(extra="forbid") |
|
|
| type: ActionType |
| node_id: Optional[str] = None |
| lot_id: Optional[str] = None |
| quantity: Optional[int] = Field(default=None, ge=1) |
| rationale: Optional[str] = None |
|
|
|
|
| class RewardSignal(BaseModel): |
| """Typed reward payload.""" |
|
|
| model_config = ConfigDict(extra="forbid") |
|
|
| value: float = Field(ge=-1.0, le=1.0) |
| reason: str |
| components: Dict[str, float] = Field(default_factory=dict) |
|
|
|
|
| class InspectionEvidence(BaseModel): |
| """Evidence revealed after inspecting a node.""" |
|
|
| model_config = ConfigDict(extra="allow") |
|
|
| status: str |
| unsafe_quantity: int = Field(ge=0) |
| evidence: str |
| safe_quantity: Optional[int] = Field(default=None, ge=0) |
|
|
|
|
| class TaskDefinition(BaseModel): |
| """Static task descriptor.""" |
|
|
| model_config = ConfigDict(extra="forbid") |
|
|
| task_id: str |
| name: str |
| difficulty: str |
| objective: str |
| max_steps: int = Field(ge=1) |
|
|
|
|
| class RecallObservation(BaseModel): |
| """Observable state exposed to the agent.""" |
|
|
| model_config = ConfigDict(extra="forbid") |
|
|
| task_id: str |
| phase: int |
| recall_notice: str |
| available_actions: List[str] |
| inventory: Dict[str, Dict[str, int]] |
| discovered_shipments: Dict[str, List[str]] |
| inspected_nodes: List[str] |
| inspection_results: Dict[str, Dict[str, InspectionEvidence]] |
| trace_results: Dict[str, Dict[str, Any]] |
| notified_nodes: List[str] |
| quarantined_inventory: Dict[str, Dict[str, int]] |
| history: List[str] |
| steps_taken: int = Field(ge=0) |
| remaining_step_budget: int = Field(ge=0) |
|
|
|
|
| class StepInfo(BaseModel): |
| """Structured info payload returned after each step.""" |
|
|
| model_config = ConfigDict(extra="allow") |
|
|
| message: str |
| action_type: str |
| score: Optional[float] = Field(default=None, ge=0.0, le=1.0) |
| reward_breakdown: Dict[str, float] = Field(default_factory=dict) |
|
|
|
|
| class EnvironmentState(BaseModel): |
| """Full internal state for debugging and grading.""" |
|
|
| model_config = ConfigDict(extra="forbid") |
|
|
| done: bool |
| task: TaskDefinition |
| steps_taken: int = Field(ge=0) |
| state_data: Dict[str, Any] |
| ground_truth: Dict[str, Any] |
|
|
|
|
| class TaskGrade(BaseModel): |
| """Deterministic grader output.""" |
|
|
| model_config = ConfigDict(extra="forbid") |
|
|
| task_id: str |
| score: float = Field(ge=0.0, le=1.0) |
| success: bool |
| steps_taken: int = Field(ge=0) |
| max_steps: int = Field(ge=1) |
| reward_total: float |
| final_info: Dict[str, Any] |
|
|