Spaces:

Arun-Sanjay
/

dispatchpulse

Sleeping

App Files Files Community

dispatchpulse / server /environment.py

Arun-Sanjay

Trim task_definitions.py to match minimal passing pattern

82713c7 about 1 month ago

raw

history blame contribute delete

11.9 kB

	"""DispatchPulse OpenEnv environment.

	Inherits from ``openenv.core.env_server.interfaces.Environment`` and implements
	the standard ``reset() / step() / state`` Gym-style API. The wire types
	``DispatchPulseAction`` and ``DispatchPulseObservation`` are defined in
	``models.py`` and inherit from the OpenEnv ``Action`` / ``Observation`` base
	classes.

	This is a thin wrapper around the in-process ``DispatchSimulation`` engine.
	"""

	from __future__ import annotations

	import os
	import sys
	from typing import Any, Optional
	from uuid import uuid4

	# Make project root importable when running as ``server.app:app`` from /app/env
	_PKG_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	if _PKG_ROOT not in sys.path:
	sys.path.insert(0, _PKG_ROOT)

	from openenv.core.env_server.interfaces import Environment

	from grader import grade_simulation
	from models import DispatchPulseAction, DispatchPulseObservation, DispatchPulseState
	from scenario_loader import VALID_TASKS, load_scenario
	from simulation import DispatchSimulation
	from text_view import render_dispatch_center

	# Re-export the task registry and grader symbols at module level so static
	# validators that scan server/environment.py for tasks can find them here
	# (same pattern as the SQL Repair passing submission where both TASKS and
	# grade_submission are accessible from server/environment.py).
	from task_definitions import ( # noqa: F401,E402
	TASKS,
	TaskDefinition,
	grade_submission,
	get_task,
	list_tasks,
	)

	DEFAULT_TASK = "easy"
	DEFAULT_SEED = 42


	class DispatchPulseEnvironment(
	Environment[DispatchPulseAction, DispatchPulseObservation, DispatchPulseState]
	):
	"""Emergency-dispatch OpenEnv environment.

	Each call to ``reset()`` starts a fresh episode for the chosen task.
	Calls to ``step(action)`` advance the simulation by one decision turn
	(which usually equals 1 minute of simulation time).

	Tasks: ``easy``, ``medium``, ``hard``.
	"""

	SUPPORTS_CONCURRENT_SESSIONS: bool = True

	def __init__(self) -> None:
	super().__init__()
	self.sim: Optional[DispatchSimulation] = None
	self.task_name: str = DEFAULT_TASK
	self.seed: int = DEFAULT_SEED
	self._episode_id: str = str(uuid4())
	self._step_count: int = 0
	self._cumulative_step_reward: float = 0.0
	self._last_step_reward: float = 0.0
	# Bootstrap so single-shot HTTP /step still works without an explicit reset
	self._bootstrap()

	def _bootstrap(self) -> None:
	try:
	scenario = load_scenario(DEFAULT_TASK)
	self.sim = DispatchSimulation(scenario, seed=DEFAULT_SEED)
	self.task_name = DEFAULT_TASK
	self.seed = DEFAULT_SEED
	self._cumulative_step_reward = 0.0
	self._last_step_reward = 0.0
	self._step_count = 0
	except Exception as exc: # pragma: no cover
	print(f"[DispatchPulseEnvironment] bootstrap failed: {exc}", file=sys.stderr, flush=True)
	self.sim = None

	# ------------------------------------------------------------------
	# Environment API
	# ------------------------------------------------------------------

	def reset(
	self,
	seed: Optional[int] = None,
	episode_id: Optional[str] = None,
	task_name: Optional[str] = None,
	**kwargs: Any,
	) -> DispatchPulseObservation:
	chosen_task = (task_name or DEFAULT_TASK).strip().lower()
	if chosen_task not in VALID_TASKS:
	chosen_task = DEFAULT_TASK
	chosen_seed = int(seed) if seed is not None else DEFAULT_SEED

	scenario = load_scenario(chosen_task)
	self.sim = DispatchSimulation(scenario, seed=chosen_seed)
	self.task_name = chosen_task
	self.seed = chosen_seed
	self._episode_id = episode_id or str(uuid4())
	self._step_count = 0
	self._cumulative_step_reward = 0.0
	self._last_step_reward = 0.0
	return self._build_observation(info_message="ready", error=None)

	def step(
	self,
	action: DispatchPulseAction,
	timeout_s: Optional[float] = None,
	**kwargs: Any,
	) -> DispatchPulseObservation:
	if self.sim is None:
	self._bootstrap()
	if self.sim is None:
	return self._build_observation(error="environment not initialised")

	if self.sim.episode_done:
	return self._build_observation(error="episode already done")

	self._step_count += 1
	action_type = (action.action_type or "").strip().lower()
	text_action = (action.text or "").strip()

	# Allow text-only actions: parse the text into structured fields
	if not action_type and text_action:
	parsed = _parse_text_action(text_action)
	if parsed is not None:
	action_type, fields = parsed
	for key, value in fields.items():
	if getattr(action, key, None) in (None, ""):
	setattr(action, key, value)

	step_reward = 0.0
	info_message: Optional[str] = None
	error: Optional[str] = None

	try:
	if action_type == "dispatch":
	if not action.call_id or not action.unit_id:
	error = "dispatch requires call_id and unit_id"
	else:
	step_reward, info_message = self.sim.dispatch(
	call_id=action.call_id,
	unit_id=action.unit_id,
	hospital_id=action.hospital_id,
	)
	self.sim.advance_time(1)
	elif action_type == "classify":
	if not action.call_id or action.severity is None:
	error = "classify requires call_id and severity (1-5)"
	else:
	step_reward, info_message = self.sim.classify(
	call_id=action.call_id, severity=int(action.severity)
	)
	self.sim.advance_time(1)
	elif action_type == "callback":
	if not action.call_id:
	error = "callback requires call_id"
	else:
	step_reward, info_message = self.sim.callback(
	call_id=action.call_id, question=action.message or ""
	)
	self.sim.advance_time(1)
	elif action_type == "wait":
	minutes = int(action.minutes or 1)
	minutes = max(1, min(minutes, self.sim.config.max_wait_step_minutes))
	pending_before = len(self.sim.get_pending_calls())
	self.sim.advance_time(minutes)
	step_reward = -0.005 * minutes * pending_before
	info_message = f"waited {minutes} minute(s)"
	elif action_type == "view":
	step_reward = 0.0
	info_message = "view (no time cost)"
	else:
	step_reward = -0.05
	error = f"unknown action_type: {action_type!r}"
	except Exception as exc: # pragma: no cover - defensive
	error = f"{type(exc).__name__}: {exc}"
	step_reward = -0.05

	self._cumulative_step_reward += step_reward
	self._last_step_reward = step_reward
	return self._build_observation(info_message=info_message, error=error)

	@property
	def state(self) -> DispatchPulseState:
	if self.sim is None:
	return DispatchPulseState(
	episode_id=self._episode_id,
	step_count=self._step_count,
	task_name=self.task_name,
	)
	return DispatchPulseState(
	episode_id=self._episode_id,
	step_count=self._step_count,
	current_time=self.sim.current_time,
	episode_done=self.sim.episode_done,
	total_calls=self.sim.total_calls(),
	calls_dispatched=len(self.sim.dispatches),
	calls_completed=len(self.sim.completed_calls),
	calls_timed_out=len(self.sim.timed_out_calls),
	calls_pending=len(self.sim.get_pending_calls()),
	units_available=len(self.sim.get_available_units()),
	running_reward=self._cumulative_step_reward,
	task_name=self.task_name,
	)

	# ------------------------------------------------------------------
	# Helpers
	# ------------------------------------------------------------------

	def _build_observation(
	self,
	info_message: Optional[str] = None,
	error: Optional[str] = None,
	) -> DispatchPulseObservation:
	if self.sim is None:
	return DispatchPulseObservation(
	done=True,
	reward=0.0,
	text="ERROR: environment not initialised. Call reset first.",
	last_action_error="not_initialised",
	)

	text = render_dispatch_center(self.sim, self.task_name)
	done = bool(self.sim.episode_done)
	if done:
	final = grade_simulation(self.sim)
	reward_value: float = float(final.total)
	metadata = {
	"final_reward": final.model_dump(),
	"task": self.task_name,
	"cumulative_step_reward": float(self._cumulative_step_reward),
	}
	else:
	# Report the per-step delta, not the running cumulative. The
	# cumulative is still available via state() and metadata, but the
	# observation's reward field matches the standard Gym/OpenEnv
	# semantics of "reward for this step only".
	reward_value = float(self._last_step_reward)
	metadata = {
	"task": self.task_name,
	"cumulative_step_reward": float(self._cumulative_step_reward),
	}

	if info_message:
	metadata["info"] = info_message
	if error:
	metadata["error"] = error

	return DispatchPulseObservation(
	done=done,
	reward=reward_value,
	text=text,
	current_time=self.sim.current_time,
	time_limit=self.sim.config.time_limit_minutes,
	calls_pending=len(self.sim.get_pending_calls()),
	units_available=len(self.sim.get_available_units()),
	calls_completed=len(self.sim.completed_calls),
	calls_timed_out=len(self.sim.timed_out_calls),
	total_calls=self.sim.total_calls(),
	last_action_error=error,
	info_message=info_message,
	metadata=metadata,
	)


	def _parse_text_action(text: str):
	"""Parse a text action like ``dispatch CALL-001 ALS-1 H1`` into fields.

	Returns ``(action_type, kwargs_dict)`` or None on parse failure.
	"""
	parts = text.strip().split(maxsplit=4)
	if not parts:
	return None
	head = parts[0].lower()
	if head == "dispatch" and len(parts) >= 3:
	out = {"call_id": parts[1], "unit_id": parts[2]}
	if len(parts) >= 4 and parts[3]:
	out["hospital_id"] = parts[3]
	return "dispatch", out
	if head == "classify" and len(parts) >= 3:
	try:
	sev = int(parts[2])
	except ValueError:
	return None
	return "classify", {"call_id": parts[1], "severity": sev}
	if head == "callback" and len(parts) >= 2:
	return "callback", {
	"call_id": parts[1],
	"message": " ".join(parts[2:]) if len(parts) > 2 else "",
	}
	if head == "wait":
	try:
	mins = int(parts[1]) if len(parts) > 1 else 1
	except ValueError:
	mins = 1
	return "wait", {"minutes": mins}
	if head in ("view", "view_dispatch_center"):
	return "view", {}
	return None