Spaces:

musharraf7
/

esctr-environment

Running

App Files Files Community

esctr-environment / server /models.py

musharraf7

feat: ESCTR pivot — Enterprise Supply Chain & Tax Reconciliation

a363048 21 days ago

raw

history blame contribute delete

5.69 kB

	"""
	Pydantic models for the Enterprise Supply Chain & Tax Reconciliation Environment.

	Defines the Action, Observation, and State types used for communication
	between the agent and the environment. Designed for type-safe interaction
	with an ERP-like tool suite.
	"""

	from typing import Any, Dict, List, Literal, Optional

	from pydantic import BaseModel, ConfigDict, Field


	# ---------------------------------------------------------------------------
	# Action — what the agent sends to the environment
	# ---------------------------------------------------------------------------

	class ESCTRAction(BaseModel):
	"""Action sent by the agent to the ESCTR environment.

	The agent operates as an autonomous financial controller using 4 tool verbs:
	- 'query_database': Search procurement, accounts payable, shipping, or warehouse databases
	- 'read_document': Retrieve a specific contract, SLA, PO, or invoice by document_id
	- 'communicate_vendor': Send a negotiation message to the simulated vendor
	- 'submit_financial_decision': Submit the final ledger adjustment (terminal action)
	"""

	model_config = ConfigDict(extra="forbid")

	action_type: Literal[
	"query_database",
	"read_document",
	"communicate_vendor",
	"submit_financial_decision",
	] = Field(
	...,
	description=(
	"The tool verb to execute. One of: 'query_database', 'read_document', "
	"'communicate_vendor', or 'submit_financial_decision'."
	),
	)
	query_parameters: Optional[Dict[str, Any]] = Field(
	default=None,
	description=(
	"Structured query for database lookups. Example: "
	'{"table": "shipping_logs", "tracking_id": "TRK-9921"}'
	),
	)
	document_id: Optional[str] = Field(
	default=None,
	description="Unique alphanumeric identifier of the document to read (e.g. 'PO-2024-0055').",
	)
	message_content: Optional[str] = Field(
	default=None,
	description="Natural language message for vendor negotiation (used with 'communicate_vendor').",
	)
	adjustment_amount: Optional[float] = Field(
	default=None,
	description=(
	"The precise monetary adjustment to submit (used with 'submit_financial_decision'). "
	"Must be the exact floating-point value calculated from contract terms."
	),
	)
	adjustment_reason: Optional[str] = Field(
	default=None,
	description="Brief explanation of the adjustment rationale (used with 'submit_financial_decision').",
	)


	# ---------------------------------------------------------------------------
	# Observation — what the environment returns after each step
	# ---------------------------------------------------------------------------

	class ESCTRObservation(BaseModel):
	"""Observation returned by the ESCTR environment after each step.

	Provides structured telemetry to help the agent understand the
	outcome of its action and plan the next move.
	"""

	model_config = ConfigDict(extra="forbid")

	done: bool = Field(default=False, description="Whether the episode has ended")
	reward: float = Field(default=0.0, description="Reward signal for this step (0.0-1.0)")
	system_response: str = Field(
	default="",
	description="Output from the tool: database results, document text, vendor reply, or grader feedback.",
	)
	last_action_status: Literal["success", "error"] = Field(
	default="success",
	description="Whether the last action was valid and executed successfully.",
	)
	error_message: Optional[str] = Field(
	default=None,
	description="Diagnostic error message if last_action_status is 'error'.",
	)
	current_step: int = Field(
	default=0,
	description="Current step number within the episode (0-indexed at reset).",
	)
	max_steps: int = Field(
	default=15,
	description="Maximum steps allowed for this task.",
	)
	accumulated_reward: float = Field(
	default=0.0,
	description="Total reward accumulated across all steps in this episode.",
	)
	task_name: str = Field(default="", description="Current task name.")
	available_tools: List[str] = Field(
	default_factory=list,
	description="List of tool verbs available in this task.",
	)
	metadata: Dict[str, Any] = Field(
	default_factory=dict,
	description="Additional structured metadata (scores, milestones, etc.).",
	)


	# ---------------------------------------------------------------------------
	# State — internal environment state (exposed via GET /state)
	# ---------------------------------------------------------------------------

	class ESCTRState(BaseModel):
	"""Internal environment state for the ESCTR environment."""

	model_config = ConfigDict(extra="allow")

	episode_id: Optional[str] = Field(default=None, description="Current episode ID")
	step_count: int = Field(default=0, ge=0, description="Steps taken in current episode")
	task_name: str = Field(default="", description="Current task name")
	seed: int = Field(default=0, description="Seed used for procedural generation")
	accumulated_reward: float = Field(default=0.0, description="Total reward accumulated")
	outcome_submitted: bool = Field(default=False, description="Whether final decision was submitted")
	milestones_hit: List[str] = Field(
	default_factory=list,
	description="Trajectory milestones achieved (e.g. 'retrieved_po', 'retrieved_sla').",
	)
	best_score: float = Field(default=0.0, description="Best score achieved")