Spaces:

Majen
/

new_model

Sleeping

Ayu

feat: RecallTrace Tasks 1-9 complete - belief calibration + curriculum + plots

d19137b about 1 month ago

2.96 kB

	"""Typed models for the RecallTrace OpenEnv environment."""

	from __future__ import annotations

	from enum import Enum
	from typing import Any, Dict, List, Optional

	from pydantic import BaseModel, ConfigDict, Field


	class ActionType(str, Enum):
	INSPECT_NODE = "inspect_node"
	TRACE_LOT = "trace_lot"
	QUARANTINE = "quarantine"
	NOTIFY = "notify"
	FINALIZE = "finalize"


	class RecallAction(BaseModel):
	"""Action submitted by an agent."""

	model_config = ConfigDict(extra="forbid")

	type: ActionType
	node_id: Optional[str] = None
	lot_id: Optional[str] = None
	quantity: Optional[int] = Field(default=None, ge=1)
	rationale: Optional[str] = None


	class RewardSignal(BaseModel):
	"""Typed reward payload."""

	model_config = ConfigDict(extra="forbid")

	value: float = Field(ge=-1.0, le=1.0)
	reason: str
	components: Dict[str, float] = Field(default_factory=dict)


	class InspectionEvidence(BaseModel):
	"""Evidence revealed after inspecting a node."""

	model_config = ConfigDict(extra="allow")

	status: str
	unsafe_quantity: int = Field(ge=0)
	evidence: str
	safe_quantity: Optional[int] = Field(default=None, ge=0)


	class TaskDefinition(BaseModel):
	"""Static task descriptor."""

	model_config = ConfigDict(extra="forbid")

	task_id: str
	name: str
	difficulty: str
	objective: str
	max_steps: int = Field(ge=1)


	class RecallObservation(BaseModel):
	"""Observable state exposed to the agent."""

	model_config = ConfigDict(extra="forbid")

	task_id: str
	phase: int
	recall_notice: str
	available_actions: List[str]
	inventory: Dict[str, Dict[str, int]]
	discovered_shipments: Dict[str, List[str]]
	inspected_nodes: List[str]
	inspection_results: Dict[str, Dict[str, InspectionEvidence]]
	trace_results: Dict[str, Dict[str, Any]]
	notified_nodes: List[str]
	quarantined_inventory: Dict[str, Dict[str, int]]
	history: List[str]
	steps_taken: int = Field(ge=0)
	remaining_step_budget: int = Field(ge=0)


	class StepInfo(BaseModel):
	"""Structured info payload returned after each step."""

	model_config = ConfigDict(extra="allow")

	message: str
	action_type: str
	score: Optional[float] = Field(default=None, ge=0.0, le=1.0)
	reward_breakdown: Dict[str, float] = Field(default_factory=dict)


	class EnvironmentState(BaseModel):
	"""Full internal state for debugging and grading."""

	model_config = ConfigDict(extra="forbid")

	done: bool
	task: TaskDefinition
	steps_taken: int = Field(ge=0)
	state_data: Dict[str, Any]
	ground_truth: Dict[str, Any]


	class TaskGrade(BaseModel):
	"""Deterministic grader output."""

	model_config = ConfigDict(extra="forbid")

	task_id: str
	score: float = Field(ge=0.0, le=1.0)
	success: bool
	steps_taken: int = Field(ge=0)
	max_steps: int = Field(ge=1)
	reward_total: float
	final_info: Dict[str, Any]