Spaces:

muskanp
/

dispatch-triage

Sleeping

App Files Files Community

dispatch-triage / server /environment.py

muskanp

Upload folder using huggingface_hub

86d3c2a verified about 2 months ago

raw

history blame contribute delete

27.6 kB

	"""
	server/environment.py — Enhanced 911 Dispatch Triage Environment v2

	WHAT IS HAPPENING HERE
	----------------------
	This is a genuine multi-step RL environment. Here is the full episode lifecycle:

	1. Episode starts → incidents arrive (with people counts, call descriptions)
	2. Agent observes the full board: who needs help, which units are free
	3. Agent dispatches ONE unit to ONE incident per step
	4. Time ticks every step:
	- Unresolved incidents accumulate steps_waiting (severity decays)
	- Fire incidents spread (severity increases every FIRE_SPREAD_INTERVAL steps)
	- En-route units count down their travel time → become available again
	5. When units return, agent dispatches them again
	6. Episode ends when all incidents resolved OR max_steps reached

	REWARD MATHEMATICS (always in [0, 1])
	--------------------------------------
	For each dispatched incident:
	contribution = severity
	× log(1 + people_count) ← people multiplier
	× e^(-DECAY_LAMBDA × wait) ← time decay
	× match_quality(type, unit) ← unit type effectiveness

	For each unresolved incident at episode end:
	penalty = severity × log(1 + people_count) × UNRESOLVED_PENALTY_FRACTION

	For cascade violations (hard mode):
	penalty += CASCADE_PENALTY per violation

	final_score = max(0, sum(contributions) - sum(penalties)) / max_possible
	∈ [0, 1] always

	max_possible = sum(severity × log(1+people) for ALL incidents) dispatched instantly
	perfect match. Upper bound — agent approaches but rarely reaches 1.0.

	WHY THIS IS REAL RL (not just sorting)
	----------------------------------------
	- Agent must learn that people_count matters more than raw severity
	- Agent must learn optimal dispatch timing (units returning from medium priority
	calls might be better held for an incoming high priority call)
	- Agent must learn cascade dependencies (gas_leak before cardiac)
	- Agent must learn fire is a time bomb (severity grows each step)
	- None of this is told to the agent — it discovers it from reward signals
	"""

	import uuid
	import math
	from typing import List, Optional
	from copy import deepcopy

	try:
	from openenv.core.env_server import Environment
	except ModuleNotFoundError:
	import sys, os
	sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
	from openenv_stubs import Environment

	try:
	from models import (
	DispatchAction, DispatchObservation, DispatchState,
	Incident, Unit,
	DECAY_LAMBDA, FIRE_SPREAD_INTERVAL,
	UNRESOLVED_PENALTY_FRACTION, CASCADE_PENALTY,
	get_match_quality, effective_priority,
	)
	except ModuleNotFoundError:
	from ..models import (
	DispatchAction, DispatchObservation, DispatchState,
	Incident, Unit,
	DECAY_LAMBDA, FIRE_SPREAD_INTERVAL,
	UNRESOLVED_PENALTY_FRACTION, CASCADE_PENALTY,
	get_match_quality, effective_priority,
	)


	# ─────────────────────────────────────────────────────────────────────────────
	# Scenario definitions
	# ─────────────────────────────────────────────────────────────────────────────
	#
	# Each incident has:
	# call_description — raw 911 call text (LLM reads this to infer severity)
	# people_count — how many people at risk
	# fire_spreads — whether severity grows over time
	#
	# The agent sees severity + people_count as numbers and learns the weighting.
	# The LLM can additionally re-assess severity from call_description text.

	SCENARIOS = {

	# ──────────────────────────────────────────────────────────────────
	# EASY — 3 incidents, 3 units, travel_time=1, max_steps=6
	#
	# Lesson: dispatch highest effective priority first.
	# cardiac + 1 person looks obvious, but agent must learn
	# that crash + 3 people > fire + 1 person despite equal raw severity.
	# ──────────────────────────────────────────────────────────────────
	"easy": {
	"max_steps": 6,
	"incidents": [
	Incident(
	id=0, type="cardiac_arrest", severity=9, location="Block 2A",
	people_count=1, fire_spreads=False,
	call_description=(
	"Caller reports elderly man collapsed, not breathing. "
	"CPR being attempted. Address: 12 Oak Street, Block 2A."
	),
	),
	Incident(
	id=1, type="car_crash", severity=5, location="Block 7C",
	people_count=3, fire_spreads=False,
	call_description=(
	"Three-car collision at Block 7C. Three people injured, "
	"one trapped in vehicle. No fire visible."
	),
	),
	Incident(
	id=2, type="fire", severity=3, location="Block 1B",
	people_count=1, fire_spreads=True,
	call_description=(
	"Small kitchen fire at Block 1B apartment. One resident "
	"evacuated. Fire contained to one room so far."
	),
	),
	],
	"units": [
	Unit(id=0, type="ambulance", travel_time=1),
	Unit(id=1, type="police", travel_time=1),
	Unit(id=2, type="fire_truck", travel_time=1),
	],
	},

	# ──────────────────────────────────────────────────────────────────
	# MEDIUM — 5 incidents, 3 units, travel_time=2, max_steps=12
	#
	# Lessons:
	# 1. Units return after 2 steps — agent must plan across waves
	# 2. car_crash with 8 people (effective priority 8.79) beats
	# gas_leak with 0 people (effective priority 0.0) despite
	# gas_leak having higher raw severity
	# 3. Fire spreads — delay costs grow non-linearly
	# ──────────────────────────────────────────────────────────────────
	"medium": {
	"max_steps": 12,
	"incidents": [
	Incident(
	id=0, type="cardiac_arrest", severity=9, location="Block 3A",
	people_count=2, fire_spreads=False,
	call_description=(
	"Two people collapsed at Block 3A community centre, "
	"possible carbon monoxide poisoning. Both unresponsive."
	),
	),
	Incident(
	id=1, type="fire", severity=7, location="Block 5D",
	people_count=6, fire_spreads=True,
	call_description=(
	"Large building fire at Block 5D. Flames visible from "
	"third floor. At least 6 residents unable to evacuate."
	),
	),
	Incident(
	id=2, type="car_crash", severity=5, location="Block 9B",
	people_count=8, fire_spreads=False,
	call_description=(
	"Major crash on Block 9B expressway. Multiple vehicles. "
	"Caller reports 8 people injured, one vehicle on its side."
	),
	),
	Incident(
	id=3, type="gas_leak", severity=6, location="Block 2C",
	people_count=0, fire_spreads=False,
	call_description=(
	"Strong gas smell reported at Block 2C. Building evacuated. "
	"No injuries yet but area needs to be secured."
	),
	),
	Incident(
	id=4, type="car_crash", severity=3, location="Block 11E",
	people_count=1, fire_spreads=False,
	call_description=(
	"Minor fender-bender at Block 11E. One driver with "
	"minor cuts. No serious injuries reported."
	),
	),
	],
	"units": [
	Unit(id=0, type="ambulance", travel_time=2),
	Unit(id=1, type="fire_truck", travel_time=2),
	Unit(id=2, type="police", travel_time=2),
	],
	},

	# ──────────────────────────────────────────────────────────────────
	# HARD — 7 incidents, 3 units, travel_time=2, max_steps=18
	#
	# Lessons:
	# 1. Cascade: cardiac (id=0) depends on gas_leak (id=1).
	# Gas leak at Block 4B is adjacent. Dispatching cardiac first
	# without clearing the gas leak triggers CASCADE_PENALTY.
	# 2. Fire at Block 9C has 12 people AND spreads.
	# Every 2 steps ignored, severity +1. After 4 steps: sev=10.
	# 3. Three waves of dispatch needed — agent must plan 6 steps ahead.
	# 4. car_crash id=3 has 5 people. Despite sev=5, effective priority
	# = 5 × log(6) = 8.96 — nearly as urgent as the cardiac.
	# ──────────────────────────────────────────────────────────────────
	"hard": {
	"max_steps": 18,
	"incidents": [
	Incident(
	id=0, type="cardiac_arrest", severity=7, location="Block 4A",
	people_count=1, fire_spreads=False, depends_on=[1],
	call_description=(
	"Man having heart attack at Block 4A, next to the building "
	"with the reported gas leak. Caller is panicking. "
	"Address same block as the gas incident."
	),
	),
	Incident(
	id=1, type="gas_leak", severity=6, location="Block 4B",
	people_count=0, fire_spreads=False,
	call_description=(
	"Major gas leak at Block 4B, adjacent to Block 4A. "
	"Strong smell reported. Area not yet evacuated. "
	"Risk of explosion if ignition source present."
	),
	),
	Incident(
	id=2, type="fire", severity=8, location="Block 9C",
	people_count=12, fire_spreads=True,
	call_description=(
	"Warehouse fire at Block 9C, spreading rapidly. "
	"Night shift workers trapped inside, approximately 12 people. "
	"Flames visible from street."
	),
	),
	Incident(
	id=3, type="car_crash", severity=5, location="Block 2D",
	people_count=5, fire_spreads=False,
	call_description=(
	"Head-on collision at Block 2D intersection. Five occupants, "
	"multiple injuries. One child among the injured. "
	"Vehicles blocking traffic."
	),
	),
	Incident(
	id=4, type="car_crash", severity=4, location="Block 6E",
	people_count=2, fire_spreads=False,
	call_description=(
	"Vehicle hit a lamp post at Block 6E. Driver and passenger "
	"injured. Airbags deployed. Both conscious."
	),
	),
	Incident(
	id=5, type="fire", severity=3, location="Block 1F",
	people_count=0, fire_spreads=True,
	call_description=(
	"Bin fire at Block 1F alley. No people involved. "
	"Risk of spreading to nearby building if not contained."
	),
	),
	Incident(
	id=6, type="cardiac_arrest", severity=2, location="Block 12G",
	people_count=1, fire_spreads=False,
	call_description=(
	"Elderly woman feeling chest pains at Block 12G. "
	"Conscious and breathing. Not a confirmed cardiac event yet."
	),
	),
	],
	"units": [
	Unit(id=0, type="ambulance", travel_time=2),
	Unit(id=1, type="fire_truck", travel_time=2),
	Unit(id=2, type="police", travel_time=2),
	],
	},
	}


	# ─────────────────────────────────────────────────────────────────────────────
	# Helpers
	# ─────────────────────────────────────────────────────────────────────────────

	def _compute_max_possible(incidents: List[Incident]) -> float:
	"""
	Realistic sequential optimum: sort incidents by EP descending, then
	assign the minimum achievable wait = dispatch_index (because each
	step() call ticks time once for every incident).

	Why this matters
	----------------
	The old formula used wait=0 for ALL incidents — impossible with
	sequential dispatch. With N incidents, the k-th dispatch always
	incurs at least k steps of waiting for the remaining incidents.

	The new formula makes score = 1.0 reachable when the agent:
	1. Dispatches highest-EP incident first (lowest cumulative decay)
	2. Matches units correctly (match_quality = 1.0)
	3. Never wastes steps

	This gives a meaningful target the agent can actually hit.
	"""
	ep_sorted = sorted(
	incidents,
	key=lambda inc: inc.severity * math.log(1 + inc.people_count),
	reverse=True,
	)
	total = sum(
	inc.severity
	* math.log(1 + inc.people_count)
	* math.exp(-DECAY_LAMBDA * idx) # minimum wait = dispatch order index
	* 1.0 # assume perfect unit match
	for idx, inc in enumerate(ep_sorted)
	)
	return max(total, 1.0) # guard against division by zero


	def _compute_contribution(inc: Incident, unit_type: str) -> float:
	"""
	Score contribution for dispatching unit_type to incident at its
	current steps_waiting.

	= severity × log(1+people) × e^(-λ×wait) × match_quality
	"""
	ep = inc.severity * math.log(1 + inc.people_count)
	decay = math.exp(-DECAY_LAMBDA * inc.steps_waiting)
	match = get_match_quality(inc.type, unit_type)
	return ep * decay * match


	# ─────────────────────────────────────────────────────────────────────────────
	# Environment
	# ─────────────────────────────────────────────────────────────────────────────

	class DispatchEnvironment(Environment):
	"""
	Multi-step 911 dispatch triage environment.

	One step = one dispatch action + one time tick.
	One episode = multiple steps until all resolved or max_steps reached.

	The agent interacts via:
	obs = env.reset(difficulty="easy"\|"medium"\|"hard")
	obs = env.step(DispatchAction(incident_id=X, unit_id=Y))
	state = env.state
	"""

	SUPPORTS_CONCURRENT_SESSIONS = True

	def __init__(self):
	self._incidents: List[Incident] = []
	self._units: List[Unit] = []
	self._step_count: int = 0
	self._max_steps: int = 10
	self._dispatch_count: int = 0
	self._raw_score: float = 0.0
	self._penalties: float = 0.0
	self._max_possible: float = 1.0
	self._state = DispatchState()

	# ──────────────────────────────────────────────────────────────────
	# reset
	# ──────────────────────────────────────────────────────────────────

	def reset(
	self,
	seed=None,
	episode_id=None,
	difficulty: str = "easy",
	**kwargs,
	) -> DispatchObservation:
	if difficulty not in SCENARIOS:
	difficulty = "easy"

	scenario = SCENARIOS[difficulty]
	self._incidents = [i.model_copy(deep=True) for i in scenario["incidents"]]
	self._units = [u.model_copy(deep=True) for u in scenario["units"]]
	self._max_steps = scenario["max_steps"]

	self._step_count = 0
	self._dispatch_count = 0
	self._raw_score = 0.0
	self._penalties = 0.0
	self._max_possible = _compute_max_possible(self._incidents)

	self._state = DispatchState(
	episode_id=episode_id or str(uuid.uuid4()),
	step_count=0,
	difficulty=difficulty,
	total_incidents=len(self._incidents),
	total_units=len(self._units),
	max_steps=self._max_steps,
	max_possible_score=self._max_possible,
	)

	return self._make_obs(
	done=False,
	message=(
	f"[{difficulty.upper()}] {len(self._incidents)} incidents, "
	f"{len(self._units)} units available, "
	f"{self._max_steps} steps budget. "
	f"Dispatch wisely — people count and time decay matter."
	),
	)

	# ──────────────────────────────────────────────────────────────────
	# step
	# ──────────────────────────────────────────────────────────────────

	def step(
	self,
	action: DispatchAction,
	timeout_s=None,
	**kwargs,
	) -> DispatchObservation:

	self._step_count += 1
	self._state.step_count = self._step_count
	notes = []

	# ── Validate ─────────────────────────────────────────────────
	incident = self._find_incident(action.incident_id)
	unit = self._find_unit(action.unit_id)

	if incident is None:
	self._tick_time()
	return self._make_obs(
	done=self._is_done(),
	message=f"Invalid incident id {action.incident_id}. Time still ticked.",
	)
	if unit is None:
	self._tick_time()
	return self._make_obs(
	done=self._is_done(),
	message=f"Invalid unit id {action.unit_id}. Time still ticked.",
	)
	if not unit.available:
	self._tick_time()
	return self._make_obs(
	done=self._is_done(),
	message=(
	f"Unit {action.unit_id} ({unit.type}) is en route — "
	f"returns in {unit.steps_until_free} step(s). Time ticked."
	),
	)
	if incident.resolved:
	self._tick_time()
	return self._make_obs(
	done=self._is_done(),
	message=f"Incident {action.incident_id} already resolved. Time ticked.",
	)

	# ── Check cascade ─────────────────────────────────────────────
	cascade_triggered = False
	if incident.depends_on:
	unresolved_deps = [
	d for d in incident.depends_on
	if not self._find_incident(d).resolved
	]
	if unresolved_deps:
	cascade_triggered = True
	self._penalties += CASCADE_PENALTY
	notes.append(
	f"CASCADE PENALTY -{CASCADE_PENALTY}: "
	f"dependency incident(s) {unresolved_deps} unresolved!"
	)

	# ── Dispatch ──────────────────────────────────────────────────
	self._dispatch_count += 1
	contribution = _compute_contribution(incident, unit.type)
	self._raw_score += contribution

	unit.available = False
	unit.steps_until_free = unit.travel_time
	incident.resolved = True
	incident.assigned_unit_id = unit.id

	# Build dispatch note
	ep = incident.severity * math.log(1 + incident.people_count)
	decay = math.exp(-DECAY_LAMBDA * incident.steps_waiting)
	match = get_match_quality(incident.type, unit.type)
	notes.append(
	f"Dispatched {unit.type} -> {incident.type} at {incident.location} "
	f"[sev={incident.severity} people={incident.people_count} "
	f"wait={incident.steps_waiting}s] "
	f"contribution={contribution:.3f} "
	f"(ep={ep:.2f} x decay={decay:.2f} x match={match:.1f})"
	)
	if match < 0.5:
	notes.append(f"WRONG UNIT TYPE — match quality only {match:.1f}")

	# ── Tick time ─────────────────────────────────────────────────
	spread_notes = self._tick_time()
	notes.extend(spread_notes)

	# ── Done? ─────────────────────────────────────────────────────
	done = self._is_done()
	score = self._current_score()

	if done:
	# Apply unresolved penalties
	unresolved = [i for i in self._incidents if not i.resolved]
	for inc in unresolved:
	pen = (
	inc.severity
	* math.log(1 + inc.people_count)
	* UNRESOLVED_PENALTY_FRACTION
	)
	self._penalties += pen
	notes.append(
	f"UNRESOLVED PENALTY -{pen:.3f}: "
	f"{inc.type} at {inc.location} "
	f"[sev={inc.severity} people={inc.people_count}]"
	)
	score = self._current_score()
	notes.append(f"EPISODE DONE. Final score: {score:.4f}")

	return DispatchObservation(
	done=done,
	reward=score if done else None,
	incidents=deepcopy(self._incidents),
	units=deepcopy(self._units),
	step_count=self._step_count,
	max_steps=self._max_steps,
	dispatch_count=self._dispatch_count,
	score_so_far=score,
	message=" \| ".join(notes),
	)

	# ──────────────────────────────────────────────────────────────────
	# state
	# ──────────────────────────────────────────────────────────────────

	@property
	def state(self) -> DispatchState:
	return self._state

	# ──────────────────────────────────────────────────────────────────
	# Private helpers
	# ──────────────────────────────────────────────────────────────────

	def _tick_time(self) -> List[str]:
	"""
	Advance the world by one time step.
	- Units en route: decrement countdown, return to base when 0
	- Unresolved incidents: accumulate waiting time
	- Fire incidents: spread (severity +1) every FIRE_SPREAD_INTERVAL steps
	Returns list of notable event strings.
	"""
	notes = []

	# Units returning to base
	for u in self._units:
	if not u.available and u.steps_until_free > 0:
	u.steps_until_free -= 1
	if u.steps_until_free == 0:
	u.available = True
	notes.append(f"{u.type} (id={u.id}) returned to base — available.")

	# Incidents waiting
	for inc in self._incidents:
	if not inc.resolved:
	inc.steps_waiting += 1
	# Fire spreads
	if (
	inc.fire_spreads
	and inc.steps_waiting > 0
	and inc.steps_waiting % FIRE_SPREAD_INTERVAL == 0
	and inc.severity < 10
	):
	inc.severity += 1
	notes.append(
	f"FIRE SPREAD at {inc.location}: "
	f"severity now {inc.severity}!"
	)

	return notes

	def _is_done(self) -> bool:
	all_resolved = all(i.resolved for i in self._incidents)
	max_steps_hit = self._step_count >= self._max_steps
	# No units available AND none returning AND unresolved incidents exist
	no_help_possible = (
	any(not i.resolved for i in self._incidents)
	and not any(u.available for u in self._units)
	and not any(u.steps_until_free > 0 for u in self._units)
	)
	return all_resolved or max_steps_hit or no_help_possible

	def _current_score(self) -> float:
	net = max(0.0, self._raw_score - self._penalties)
	return min(1.0, net / self._max_possible)

	def _make_obs(self, *, done: bool, message: str) -> DispatchObservation:
	return DispatchObservation(
	done=done,
	reward=self._current_score() if done else None,
	incidents=deepcopy(self._incidents),
	units=deepcopy(self._units),
	step_count=self._step_count,
	max_steps=self._max_steps,
	dispatch_count=self._dispatch_count,
	score_so_far=self._current_score(),
	message=message,
	)

	def _find_incident(self, iid: int) -> Optional[Incident]:
	return next((i for i in self._incidents if i.id == iid), None)

	def _find_unit(self, uid: int) -> Optional[Unit]:
	return next((u for u in self._units if u.id == uid), None)