| """Typed models for the RecallTrace OpenEnv environment.""" | |
| from __future__ import annotations | |
| from enum import Enum | |
| from typing import Any, Dict, List, Optional | |
| from pydantic import BaseModel, ConfigDict, Field | |
| class ActionType(str, Enum): | |
| INSPECT_NODE = "inspect_node" | |
| TRACE_LOT = "trace_lot" | |
| QUARANTINE = "quarantine" | |
| NOTIFY = "notify" | |
| FINALIZE = "finalize" | |
| class RecallAction(BaseModel): | |
| """Action submitted by an agent.""" | |
| model_config = ConfigDict(extra="forbid") | |
| type: ActionType | |
| node_id: Optional[str] = None | |
| lot_id: Optional[str] = None | |
| quantity: Optional[int] = Field(default=None, ge=1) | |
| rationale: Optional[str] = None | |
| class RewardSignal(BaseModel): | |
| """Typed reward payload.""" | |
| model_config = ConfigDict(extra="forbid") | |
| value: float = Field(ge=-1.0, le=1.0) | |
| reason: str | |
| components: Dict[str, float] = Field(default_factory=dict) | |
| class InspectionEvidence(BaseModel): | |
| """Evidence revealed after inspecting a node.""" | |
| model_config = ConfigDict(extra="allow") | |
| status: str | |
| unsafe_quantity: int = Field(ge=0) | |
| evidence: str | |
| safe_quantity: Optional[int] = Field(default=None, ge=0) | |
| class TaskDefinition(BaseModel): | |
| """Static task descriptor.""" | |
| model_config = ConfigDict(extra="forbid") | |
| task_id: str | |
| name: str | |
| difficulty: str | |
| objective: str | |
| max_steps: int = Field(ge=1) | |
| class RecallObservation(BaseModel): | |
| """Observable state exposed to the agent.""" | |
| model_config = ConfigDict(extra="forbid") | |
| task_id: str | |
| phase: int | |
| recall_notice: str | |
| available_actions: List[str] | |
| inventory: Dict[str, Dict[str, int]] | |
| discovered_shipments: Dict[str, List[str]] | |
| inspected_nodes: List[str] | |
| inspection_results: Dict[str, Dict[str, InspectionEvidence]] | |
| trace_results: Dict[str, Dict[str, Any]] | |
| notified_nodes: List[str] | |
| quarantined_inventory: Dict[str, Dict[str, int]] | |
| history: List[str] | |
| steps_taken: int = Field(ge=0) | |
| remaining_step_budget: int = Field(ge=0) | |
| class StepInfo(BaseModel): | |
| """Structured info payload returned after each step.""" | |
| model_config = ConfigDict(extra="allow") | |
| message: str | |
| action_type: str | |
| score: Optional[float] = Field(default=None, ge=0.0, le=1.0) | |
| reward_breakdown: Dict[str, float] = Field(default_factory=dict) | |
| class EnvironmentState(BaseModel): | |
| """Full internal state for debugging and grading.""" | |
| model_config = ConfigDict(extra="forbid") | |
| done: bool | |
| task: TaskDefinition | |
| steps_taken: int = Field(ge=0) | |
| state_data: Dict[str, Any] | |
| ground_truth: Dict[str, Any] | |
| class TaskGrade(BaseModel): | |
| """Deterministic grader output.""" | |
| model_config = ConfigDict(extra="forbid") | |
| task_id: str | |
| score: float = Field(ge=0.0, le=1.0) | |
| success: bool | |
| steps_taken: int = Field(ge=0) | |
| max_steps: int = Field(ge=1) | |
| reward_total: float | |
| final_info: Dict[str, Any] | |