garvitsachdeva commited on
Commit
e259b96
·
1 Parent(s): 6172160

Fix OpenEnv metadata, docker data, tasks endpoint, and demo

Browse files
README.md CHANGED
@@ -1,3 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # 911 City-Wide Emergency Dispatch Supervisor
2
 
3
  **LLM-powered 911 dispatch supervision — city scale**
@@ -106,6 +120,33 @@ python demo.py
106
  python inference.py
107
  ```
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  ## Project Structure
110
 
111
  ```
 
1
+ ---
2
+ title: 911 Dispatch Supervisor
3
+ emoji: 🚨
4
+ colorFrom: red
5
+ colorTo: orange
6
+ sdk: docker
7
+ pinned: false
8
+ tags:
9
+ - openenv
10
+ - reinforcement-learning
11
+ - llm-agent
12
+ - emergency-dispatch
13
+ ---
14
+
15
  # 911 City-Wide Emergency Dispatch Supervisor
16
 
17
  **LLM-powered 911 dispatch supervision — city scale**
 
120
  python inference.py
121
  ```
122
 
123
+ ## Reward Function
124
+
125
+ The reward signal is a weighted combination of five components:
126
+
127
+ | Component | Weight | Description |
128
+ |-----------|--------|-------------|
129
+ | `response_time` | 30% | How quickly units reach incidents relative to severity benchmarks |
130
+ | `triage` | 25% | Whether the dispatched unit type matches incident requirements |
131
+ | `survival` | 25% | Whether Priority-1 incidents are resolved before survival clock expires |
132
+ | `coverage` | 12% | Geographic distribution of available units across city districts |
133
+ | `protocol` | 8% | Whether the dispatch action was legally valid |
134
+
135
+ **Safety gate:** If any Priority-1 incident was seen and `survival=0.0`, the total episode score is capped at `0.2` regardless of other components.
136
+
137
+ ## Baseline Scores
138
+
139
+ Scores from the random baseline agent (`USE_RANDOM=true`):
140
+
141
+ | Task | Difficulty | Baseline Score |
142
+ |------|-----------|---------------|
143
+ | `single_incident` | Easy | ~0.55 |
144
+ | `multi_incident` | Medium | ~0.48 |
145
+ | `mass_casualty` | Hard | ~0.32 |
146
+ | `shift_surge` | Hard | ~0.38 |
147
+
148
+ *Run `USE_RANDOM=true python inference.py` to reproduce.*
149
+
150
  ## Project Structure
151
 
152
  ```
demo.py CHANGED
@@ -1,9 +1,9 @@
1
  #!/usr/bin/env python3
2
  """Demo script showing the 911 dispatch supervisor environment in action.
3
 
4
- This non-interactive demo runs a deterministic episode using OpenEnvEnvironment
5
- directly (no LLM/API server required). It performs a scripted triage sequence
6
- on the multi-incident task.
7
  """
8
 
9
  import asyncio
@@ -47,14 +47,12 @@ async def run_demo_episode(
47
  rewards = []
48
  errors = []
49
 
50
- scripted_actions = [
51
- Action(action_type=DispatchAction.DISPATCH, unit_id="MED-1", incident_id="INC-002"),
52
- Action(action_type=DispatchAction.DISPATCH, unit_id="ENG-1", incident_id="INC-001"),
53
- Action(action_type=DispatchAction.DISPATCH, unit_id="LAD-1", incident_id="INC-001"),
54
- Action(action_type=DispatchAction.DISPATCH, unit_id="PAT-1", incident_id="INC-003"),
55
- ]
56
-
57
- for action in scripted_actions:
58
  step_count += 1
59
  try:
60
  obs, reward, done = await env.step(action)
@@ -73,19 +71,6 @@ async def run_demo_episode(
73
  print(f"[STEP {step_count}] ERROR: {e}")
74
  break
75
 
76
- # Continue stepping with any legal actions until done/max_steps.
77
- while step_count < max_steps:
78
- legal = env.legal_actions()
79
- if not legal:
80
- break
81
- action = legal[0]
82
- step_count += 1
83
- obs, reward, done = await env.step(action)
84
- rewards.append(reward)
85
- total_reward += reward
86
- if done:
87
- break
88
-
89
  # Final state
90
  final_state = env.state()
91
 
@@ -102,6 +87,15 @@ async def run_demo_episode(
102
  print(f"Final Score: {final_score:.4f}")
103
  print(f"Active incidents: {sum(1 for i in final_state.incidents.values() if i.status.value != 'RESOLVED')}")
104
 
 
 
 
 
 
 
 
 
 
105
  if errors:
106
  print(f"\nErrors encountered: {len(errors)}")
107
  for err in errors:
 
1
  #!/usr/bin/env python3
2
  """Demo script showing the 911 dispatch supervisor environment in action.
3
 
4
+ This non-interactive demo runs an episode using OpenEnvEnvironment directly
5
+ (no LLM/API server required). It uses `legal_actions()` so it is seed/task
6
+ independent.
7
  """
8
 
9
  import asyncio
 
47
  rewards = []
48
  errors = []
49
 
50
+ # Step through the environment using only legal actions.
51
+ while step_count < max_steps:
52
+ legal = env.legal_actions()
53
+ if not legal:
54
+ break
55
+ action = legal[0]
 
 
56
  step_count += 1
57
  try:
58
  obs, reward, done = await env.step(action)
 
71
  print(f"[STEP {step_count}] ERROR: {e}")
72
  break
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  # Final state
75
  final_state = env.state()
76
 
 
87
  print(f"Final Score: {final_score:.4f}")
88
  print(f"Active incidents: {sum(1 for i in final_state.incidents.values() if i.status.value != 'RESOLVED')}")
89
 
90
+ print("\n" + "─" * 60)
91
+ print(f"{'Incident':<12} {'Type':<22} {'Severity':<12} {'Status':<12}")
92
+ print("─" * 60)
93
+ for inc in sorted(final_state.incidents.values(), key=lambda i: i.incident_id):
94
+ print(
95
+ f"{inc.incident_id:<12} {inc.incident_type.value:<22} {inc.severity.value:<12} {inc.status.value:<12}"
96
+ )
97
+ print("─" * 60)
98
+
99
  if errors:
100
  print(f"\nErrors encountered: {len(errors)}")
101
  for err in errors:
inference.py CHANGED
@@ -304,7 +304,7 @@ async def main() -> int:
304
  hf_token = os.environ.get("HF_TOKEN", "")
305
  agent = LLMAgent(api_key=hf_token, base_url=api_base_url, model=model_name)
306
 
307
- task_ids = ["single_incident", "multi_incident", "mass_casualty"]
308
 
309
  for task_id in task_ids:
310
  await run_episode(task_id, model_name, agent)
 
304
  hf_token = os.environ.get("HF_TOKEN", "")
305
  agent = LLMAgent(api_key=hf_token, base_url=api_base_url, model=model_name)
306
 
307
+ task_ids = ["single_incident", "multi_incident", "mass_casualty", "shift_surge"]
308
 
309
  for task_id in task_ids:
310
  await run_episode(task_id, model_name, agent)
openenv.yaml CHANGED
@@ -1,17 +1,20 @@
1
  name: citywide-dispatch-supervisor
2
  version: "0.1.0"
3
- description: 911 dispatch supervisor RL environment powered by LLM orchestration.
 
 
 
4
  entrypoint: src.openenv_environment:OpenEnvEnvironment
5
  tasks:
6
  - id: single_incident
7
- name: Single Incident
8
- description: One incident with a small unit pool; learn basic dispatch.
9
  - id: multi_incident
10
- name: Multi Incident
11
- description: Multiple concurrent incidents; triage and prioritization under constraints.
12
  - id: mass_casualty
13
- name: Mass Casualty
14
- description: Priority-1 surge; maximize survival and response time.
15
  - id: shift_surge
16
  name: Shift Surge
17
- description: Incident waves plus unit status changes; maintain coverage over time.
 
1
  name: citywide-dispatch-supervisor
2
  version: "0.1.0"
3
+ description: >
4
+ City-wide 911 emergency dispatch supervisor RL environment.
5
+ An LLM agent learns to manage simultaneous incidents by dispatching
6
+ police, fire, and EMS units across a city grid under realistic constraints.
7
  entrypoint: src.openenv_environment:OpenEnvEnvironment
8
  tasks:
9
  - id: single_incident
10
+ name: Single Incident Response
11
+ description: One incident with a small unit pool; learn basic dispatch, correct unit type, and response time.
12
  - id: multi_incident
13
+ name: Simultaneous Multi-Incident
14
+ description: Multiple concurrent incidents requiring triage, prioritization, and correct unit matching.
15
  - id: mass_casualty
16
+ name: Mass Casualty Event
17
+ description: Wave-based Priority-1 surge with resource conflict; maximize survival outcomes.
18
  - id: shift_surge
19
  name: Shift Surge
20
+ description: Incident waves combined with units going out of service; maintain coverage over time.
src/server/Dockerfile CHANGED
@@ -6,6 +6,7 @@ COPY src/server/requirements.txt .
6
  RUN pip install -r requirements.txt
7
 
8
  COPY src/ /app/src/
 
9
 
10
  EXPOSE 8000
11
 
 
6
  RUN pip install -r requirements.txt
7
 
8
  COPY src/ /app/src/
9
+ COPY data/ /app/data/
10
 
11
  EXPOSE 8000
12
 
src/server/app.py CHANGED
@@ -48,6 +48,22 @@ async def health() -> dict[str, str]:
48
  return {"status": "ok"}
49
 
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  @app.post("/reset")
52
  async def reset(request: ResetRequest) -> dict[str, Any]:
53
  global _env
 
48
  return {"status": "ok"}
49
 
50
 
51
+ @app.get("/tasks")
52
+ async def list_tasks() -> list[dict[str, str]]:
53
+ """List all available tasks."""
54
+ from src.tasks.registry import TaskRegistry
55
+
56
+ return [
57
+ {
58
+ "task_id": t.task_id,
59
+ "name": t.name,
60
+ "description": t.description,
61
+ "difficulty": t.difficulty,
62
+ }
63
+ for t in TaskRegistry.list_tasks()
64
+ ]
65
+
66
+
67
  @app.post("/reset")
68
  async def reset(request: ResetRequest) -> dict[str, Any]:
69
  global _env
src/visualizer/__init__.py CHANGED
@@ -1 +1 @@
1
- """Visualizer package for 2D airport state viewer."""
 
1
+ """Visualizer package for 2D city dispatch state viewer."""
tests/smoke_docker.py CHANGED
@@ -63,7 +63,7 @@ def test_reset() -> None:
63
  print("Testing /reset endpoint...")
64
  response = requests.post(
65
  f"{HOST}/reset",
66
- json={"task_id": "arrival", "seed": 42},
67
  timeout=10,
68
  )
69
  assert response.status_code == 200, f"Expected 200, got {response.status_code}"
 
63
  print("Testing /reset endpoint...")
64
  response = requests.post(
65
  f"{HOST}/reset",
66
+ json={"task_id": "single_incident", "seed": 42},
67
  timeout=10,
68
  )
69
  assert response.status_code == 200, f"Expected 200, got {response.status_code}"
tests/test_inference.py CHANGED
@@ -9,7 +9,7 @@ import sys
9
 
10
 
11
  class TestInferenceFormatCompliance:
12
- TASK_IDS = ["single_incident", "multi_incident", "mass_casualty"]
13
 
14
  def _run_inference_capture(self, env: dict[str, str]) -> tuple[int, str, str]:
15
  cmd = [sys.executable, "inference.py"]
 
9
 
10
 
11
  class TestInferenceFormatCompliance:
12
+ TASK_IDS = ["single_incident", "multi_incident", "mass_casualty", "shift_surge"]
13
 
14
  def _run_inference_capture(self, env: dict[str, str]) -> tuple[int, str, str]:
15
  cmd = [sys.executable, "inference.py"]