Spaces:
Running
Running
File size: 5,689 Bytes
c5cfc73 a363048 c5cfc73 a363048 c5cfc73 6f7e1b7 c5cfc73 a363048 c5cfc73 a363048 c5cfc73 a363048 c5cfc73 6f7e1b7 a363048 6f7e1b7 c5cfc73 a363048 c5cfc73 a363048 c5cfc73 a363048 c5cfc73 a363048 c5cfc73 a363048 6f7e1b7 a363048 6f7e1b7 a363048 6f7e1b7 a363048 6f7e1b7 a363048 6f7e1b7 a363048 c5cfc73 a363048 c5cfc73 a363048 c5cfc73 a363048 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | """
Pydantic models for the Enterprise Supply Chain & Tax Reconciliation Environment.
Defines the Action, Observation, and State types used for communication
between the agent and the environment. Designed for type-safe interaction
with an ERP-like tool suite.
"""
from typing import Any, Dict, List, Literal, Optional
from pydantic import BaseModel, ConfigDict, Field
# ---------------------------------------------------------------------------
# Action — what the agent sends to the environment
# ---------------------------------------------------------------------------
class ESCTRAction(BaseModel):
"""Action sent by the agent to the ESCTR environment.
The agent operates as an autonomous financial controller using 4 tool verbs:
- 'query_database': Search procurement, accounts payable, shipping, or warehouse databases
- 'read_document': Retrieve a specific contract, SLA, PO, or invoice by document_id
- 'communicate_vendor': Send a negotiation message to the simulated vendor
- 'submit_financial_decision': Submit the final ledger adjustment (terminal action)
"""
model_config = ConfigDict(extra="forbid")
action_type: Literal[
"query_database",
"read_document",
"communicate_vendor",
"submit_financial_decision",
] = Field(
...,
description=(
"The tool verb to execute. One of: 'query_database', 'read_document', "
"'communicate_vendor', or 'submit_financial_decision'."
),
)
query_parameters: Optional[Dict[str, Any]] = Field(
default=None,
description=(
"Structured query for database lookups. Example: "
'{"table": "shipping_logs", "tracking_id": "TRK-9921"}'
),
)
document_id: Optional[str] = Field(
default=None,
description="Unique alphanumeric identifier of the document to read (e.g. 'PO-2024-0055').",
)
message_content: Optional[str] = Field(
default=None,
description="Natural language message for vendor negotiation (used with 'communicate_vendor').",
)
adjustment_amount: Optional[float] = Field(
default=None,
description=(
"The precise monetary adjustment to submit (used with 'submit_financial_decision'). "
"Must be the exact floating-point value calculated from contract terms."
),
)
adjustment_reason: Optional[str] = Field(
default=None,
description="Brief explanation of the adjustment rationale (used with 'submit_financial_decision').",
)
# ---------------------------------------------------------------------------
# Observation — what the environment returns after each step
# ---------------------------------------------------------------------------
class ESCTRObservation(BaseModel):
"""Observation returned by the ESCTR environment after each step.
Provides structured telemetry to help the agent understand the
outcome of its action and plan the next move.
"""
model_config = ConfigDict(extra="forbid")
done: bool = Field(default=False, description="Whether the episode has ended")
reward: float = Field(default=0.0, description="Reward signal for this step (0.0-1.0)")
system_response: str = Field(
default="",
description="Output from the tool: database results, document text, vendor reply, or grader feedback.",
)
last_action_status: Literal["success", "error"] = Field(
default="success",
description="Whether the last action was valid and executed successfully.",
)
error_message: Optional[str] = Field(
default=None,
description="Diagnostic error message if last_action_status is 'error'.",
)
current_step: int = Field(
default=0,
description="Current step number within the episode (0-indexed at reset).",
)
max_steps: int = Field(
default=15,
description="Maximum steps allowed for this task.",
)
accumulated_reward: float = Field(
default=0.0,
description="Total reward accumulated across all steps in this episode.",
)
task_name: str = Field(default="", description="Current task name.")
available_tools: List[str] = Field(
default_factory=list,
description="List of tool verbs available in this task.",
)
metadata: Dict[str, Any] = Field(
default_factory=dict,
description="Additional structured metadata (scores, milestones, etc.).",
)
# ---------------------------------------------------------------------------
# State — internal environment state (exposed via GET /state)
# ---------------------------------------------------------------------------
class ESCTRState(BaseModel):
"""Internal environment state for the ESCTR environment."""
model_config = ConfigDict(extra="allow")
episode_id: Optional[str] = Field(default=None, description="Current episode ID")
step_count: int = Field(default=0, ge=0, description="Steps taken in current episode")
task_name: str = Field(default="", description="Current task name")
seed: int = Field(default=0, description="Seed used for procedural generation")
accumulated_reward: float = Field(default=0.0, description="Total reward accumulated")
outcome_submitted: bool = Field(default=False, description="Whether final decision was submitted")
milestones_hit: List[str] = Field(
default_factory=list,
description="Trajectory milestones achieved (e.g. 'retrieved_po', 'retrieved_sla').",
)
best_score: float = Field(default=0.0, description="Best score achieved")
|