Spaces:

musharraf7
/

esctr-environment

Running

File size: 5,689 Bytes

"""
Pydantic models for the Enterprise Supply Chain & Tax Reconciliation Environment.

Defines the Action, Observation, and State types used for communication
between the agent and the environment. Designed for type-safe interaction
with an ERP-like tool suite.
"""

from typing import Any, Dict, List, Literal, Optional

from pydantic import BaseModel, ConfigDict, Field


# ---------------------------------------------------------------------------
# Action — what the agent sends to the environment
# ---------------------------------------------------------------------------

class ESCTRAction(BaseModel):
    """Action sent by the agent to the ESCTR environment.

    The agent operates as an autonomous financial controller using 4 tool verbs:
      - 'query_database': Search procurement, accounts payable, shipping, or warehouse databases
      - 'read_document': Retrieve a specific contract, SLA, PO, or invoice by document_id
      - 'communicate_vendor': Send a negotiation message to the simulated vendor
      - 'submit_financial_decision': Submit the final ledger adjustment (terminal action)
    """

    model_config = ConfigDict(extra="forbid")

    action_type: Literal[
        "query_database",
        "read_document",
        "communicate_vendor",
        "submit_financial_decision",
    ] = Field(
        ...,
        description=(
            "The tool verb to execute. One of: 'query_database', 'read_document', "
            "'communicate_vendor', or 'submit_financial_decision'."
        ),
    )
    query_parameters: Optional[Dict[str, Any]] = Field(
        default=None,
        description=(
            "Structured query for database lookups. Example: "
            '{"table": "shipping_logs", "tracking_id": "TRK-9921"}'
        ),
    )
    document_id: Optional[str] = Field(
        default=None,
        description="Unique alphanumeric identifier of the document to read (e.g. 'PO-2024-0055').",
    )
    message_content: Optional[str] = Field(
        default=None,
        description="Natural language message for vendor negotiation (used with 'communicate_vendor').",
    )
    adjustment_amount: Optional[float] = Field(
        default=None,
        description=(
            "The precise monetary adjustment to submit (used with 'submit_financial_decision'). "
            "Must be the exact floating-point value calculated from contract terms."
        ),
    )
    adjustment_reason: Optional[str] = Field(
        default=None,
        description="Brief explanation of the adjustment rationale (used with 'submit_financial_decision').",
    )


# ---------------------------------------------------------------------------
# Observation — what the environment returns after each step
# ---------------------------------------------------------------------------

class ESCTRObservation(BaseModel):
    """Observation returned by the ESCTR environment after each step.

    Provides structured telemetry to help the agent understand the
    outcome of its action and plan the next move.
    """

    model_config = ConfigDict(extra="forbid")

    done: bool = Field(default=False, description="Whether the episode has ended")
    reward: float = Field(default=0.0, description="Reward signal for this step (0.0-1.0)")
    system_response: str = Field(
        default="",
        description="Output from the tool: database results, document text, vendor reply, or grader feedback.",
    )
    last_action_status: Literal["success", "error"] = Field(
        default="success",
        description="Whether the last action was valid and executed successfully.",
    )
    error_message: Optional[str] = Field(
        default=None,
        description="Diagnostic error message if last_action_status is 'error'.",
    )
    current_step: int = Field(
        default=0,
        description="Current step number within the episode (0-indexed at reset).",
    )
    max_steps: int = Field(
        default=15,
        description="Maximum steps allowed for this task.",
    )
    accumulated_reward: float = Field(
        default=0.0,
        description="Total reward accumulated across all steps in this episode.",
    )
    task_name: str = Field(default="", description="Current task name.")
    available_tools: List[str] = Field(
        default_factory=list,
        description="List of tool verbs available in this task.",
    )
    metadata: Dict[str, Any] = Field(
        default_factory=dict,
        description="Additional structured metadata (scores, milestones, etc.).",
    )


# ---------------------------------------------------------------------------
# State — internal environment state (exposed via GET /state)
# ---------------------------------------------------------------------------

class ESCTRState(BaseModel):
    """Internal environment state for the ESCTR environment."""

    model_config = ConfigDict(extra="allow")

    episode_id: Optional[str] = Field(default=None, description="Current episode ID")
    step_count: int = Field(default=0, ge=0, description="Steps taken in current episode")
    task_name: str = Field(default="", description="Current task name")
    seed: int = Field(default=0, description="Seed used for procedural generation")
    accumulated_reward: float = Field(default=0.0, description="Total reward accumulated")
    outcome_submitted: bool = Field(default=False, description="Whether final decision was submitted")
    milestones_hit: List[str] = Field(
        default_factory=list,
        description="Trajectory milestones achieved (e.g. 'retrieved_po', 'retrieved_sla').",
    )
    best_score: float = Field(default=0.0, description="Best score achieved")