Spaces:
Running
Running
| """ | |
| Pydantic models for the Enterprise Supply Chain & Tax Reconciliation Environment. | |
| Defines the Action, Observation, and State types used for communication | |
| between the agent and the environment. Designed for type-safe interaction | |
| with an ERP-like tool suite. | |
| """ | |
| from typing import Any, Dict, List, Literal, Optional | |
| from pydantic import BaseModel, ConfigDict, Field | |
| # --------------------------------------------------------------------------- | |
| # Action β what the agent sends to the environment | |
| # --------------------------------------------------------------------------- | |
| class ESCTRAction(BaseModel): | |
| """Action sent by the agent to the ESCTR environment. | |
| The agent operates as an autonomous financial controller using 4 tool verbs: | |
| - 'query_database': Search procurement, accounts payable, shipping, or warehouse databases | |
| - 'read_document': Retrieve a specific contract, SLA, PO, or invoice by document_id | |
| - 'communicate_vendor': Send a negotiation message to the simulated vendor | |
| - 'submit_financial_decision': Submit the final ledger adjustment (terminal action) | |
| """ | |
| model_config = ConfigDict(extra="forbid") | |
| action_type: Literal[ | |
| "query_database", | |
| "read_document", | |
| "communicate_vendor", | |
| "submit_financial_decision", | |
| ] = Field( | |
| ..., | |
| description=( | |
| "The tool verb to execute. One of: 'query_database', 'read_document', " | |
| "'communicate_vendor', or 'submit_financial_decision'." | |
| ), | |
| ) | |
| query_parameters: Optional[Dict[str, Any]] = Field( | |
| default=None, | |
| description=( | |
| "Structured query for database lookups. Example: " | |
| '{"table": "shipping_logs", "tracking_id": "TRK-9921"}' | |
| ), | |
| ) | |
| document_id: Optional[str] = Field( | |
| default=None, | |
| description="Unique alphanumeric identifier of the document to read (e.g. 'PO-2024-0055').", | |
| ) | |
| message_content: Optional[str] = Field( | |
| default=None, | |
| description="Natural language message for vendor negotiation (used with 'communicate_vendor').", | |
| ) | |
| adjustment_amount: Optional[float] = Field( | |
| default=None, | |
| description=( | |
| "The precise monetary adjustment to submit (used with 'submit_financial_decision'). " | |
| "Must be the exact floating-point value calculated from contract terms." | |
| ), | |
| ) | |
| adjustment_reason: Optional[str] = Field( | |
| default=None, | |
| description="Brief explanation of the adjustment rationale (used with 'submit_financial_decision').", | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Observation β what the environment returns after each step | |
| # --------------------------------------------------------------------------- | |
| class ESCTRObservation(BaseModel): | |
| """Observation returned by the ESCTR environment after each step. | |
| Provides structured telemetry to help the agent understand the | |
| outcome of its action and plan the next move. | |
| """ | |
| model_config = ConfigDict(extra="forbid") | |
| done: bool = Field(default=False, description="Whether the episode has ended") | |
| reward: float = Field(default=0.0, description="Reward signal for this step (0.0-1.0)") | |
| system_response: str = Field( | |
| default="", | |
| description="Output from the tool: database results, document text, vendor reply, or grader feedback.", | |
| ) | |
| last_action_status: Literal["success", "error"] = Field( | |
| default="success", | |
| description="Whether the last action was valid and executed successfully.", | |
| ) | |
| error_message: Optional[str] = Field( | |
| default=None, | |
| description="Diagnostic error message if last_action_status is 'error'.", | |
| ) | |
| current_step: int = Field( | |
| default=0, | |
| description="Current step number within the episode (0-indexed at reset).", | |
| ) | |
| max_steps: int = Field( | |
| default=15, | |
| description="Maximum steps allowed for this task.", | |
| ) | |
| accumulated_reward: float = Field( | |
| default=0.0, | |
| description="Total reward accumulated across all steps in this episode.", | |
| ) | |
| task_name: str = Field(default="", description="Current task name.") | |
| available_tools: List[str] = Field( | |
| default_factory=list, | |
| description="List of tool verbs available in this task.", | |
| ) | |
| metadata: Dict[str, Any] = Field( | |
| default_factory=dict, | |
| description="Additional structured metadata (scores, milestones, etc.).", | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # State β internal environment state (exposed via GET /state) | |
| # --------------------------------------------------------------------------- | |
| class ESCTRState(BaseModel): | |
| """Internal environment state for the ESCTR environment.""" | |
| model_config = ConfigDict(extra="allow") | |
| episode_id: Optional[str] = Field(default=None, description="Current episode ID") | |
| step_count: int = Field(default=0, ge=0, description="Steps taken in current episode") | |
| task_name: str = Field(default="", description="Current task name") | |
| seed: int = Field(default=0, description="Seed used for procedural generation") | |
| accumulated_reward: float = Field(default=0.0, description="Total reward accumulated") | |
| outcome_submitted: bool = Field(default=False, description="Whether final decision was submitted") | |
| milestones_hit: List[str] = Field( | |
| default_factory=list, | |
| description="Trajectory milestones achieved (e.g. 'retrieved_po', 'retrieved_sla').", | |
| ) | |
| best_score: float = Field(default=0.0, description="Best score achieved") | |