Spaces:
Sleeping
Sleeping
Commit ·
e259b96
1
Parent(s): 6172160
Fix OpenEnv metadata, docker data, tasks endpoint, and demo
Browse files- README.md +41 -0
- demo.py +18 -24
- inference.py +1 -1
- openenv.yaml +11 -8
- src/server/Dockerfile +1 -0
- src/server/app.py +16 -0
- src/visualizer/__init__.py +1 -1
- tests/smoke_docker.py +1 -1
- tests/test_inference.py +1 -1
README.md
CHANGED
|
@@ -1,3 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# 911 City-Wide Emergency Dispatch Supervisor
|
| 2 |
|
| 3 |
**LLM-powered 911 dispatch supervision — city scale**
|
|
@@ -106,6 +120,33 @@ python demo.py
|
|
| 106 |
python inference.py
|
| 107 |
```
|
| 108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
## Project Structure
|
| 110 |
|
| 111 |
```
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: 911 Dispatch Supervisor
|
| 3 |
+
emoji: 🚨
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: orange
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
tags:
|
| 9 |
+
- openenv
|
| 10 |
+
- reinforcement-learning
|
| 11 |
+
- llm-agent
|
| 12 |
+
- emergency-dispatch
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
# 911 City-Wide Emergency Dispatch Supervisor
|
| 16 |
|
| 17 |
**LLM-powered 911 dispatch supervision — city scale**
|
|
|
|
| 120 |
python inference.py
|
| 121 |
```
|
| 122 |
|
| 123 |
+
## Reward Function
|
| 124 |
+
|
| 125 |
+
The reward signal is a weighted combination of five components:
|
| 126 |
+
|
| 127 |
+
| Component | Weight | Description |
|
| 128 |
+
|-----------|--------|-------------|
|
| 129 |
+
| `response_time` | 30% | How quickly units reach incidents relative to severity benchmarks |
|
| 130 |
+
| `triage` | 25% | Whether the dispatched unit type matches incident requirements |
|
| 131 |
+
| `survival` | 25% | Whether Priority-1 incidents are resolved before survival clock expires |
|
| 132 |
+
| `coverage` | 12% | Geographic distribution of available units across city districts |
|
| 133 |
+
| `protocol` | 8% | Whether the dispatch action was legally valid |
|
| 134 |
+
|
| 135 |
+
**Safety gate:** If any Priority-1 incident was seen and `survival=0.0`, the total episode score is capped at `0.2` regardless of other components.
|
| 136 |
+
|
| 137 |
+
## Baseline Scores
|
| 138 |
+
|
| 139 |
+
Scores from the random baseline agent (`USE_RANDOM=true`):
|
| 140 |
+
|
| 141 |
+
| Task | Difficulty | Baseline Score |
|
| 142 |
+
|------|-----------|---------------|
|
| 143 |
+
| `single_incident` | Easy | ~0.55 |
|
| 144 |
+
| `multi_incident` | Medium | ~0.48 |
|
| 145 |
+
| `mass_casualty` | Hard | ~0.32 |
|
| 146 |
+
| `shift_surge` | Hard | ~0.38 |
|
| 147 |
+
|
| 148 |
+
*Run `USE_RANDOM=true python inference.py` to reproduce.*
|
| 149 |
+
|
| 150 |
## Project Structure
|
| 151 |
|
| 152 |
```
|
demo.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""Demo script showing the 911 dispatch supervisor environment in action.
|
| 3 |
|
| 4 |
-
This non-interactive demo runs
|
| 5 |
-
|
| 6 |
-
|
| 7 |
"""
|
| 8 |
|
| 9 |
import asyncio
|
|
@@ -47,14 +47,12 @@ async def run_demo_episode(
|
|
| 47 |
rewards = []
|
| 48 |
errors = []
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
for action in scripted_actions:
|
| 58 |
step_count += 1
|
| 59 |
try:
|
| 60 |
obs, reward, done = await env.step(action)
|
|
@@ -73,19 +71,6 @@ async def run_demo_episode(
|
|
| 73 |
print(f"[STEP {step_count}] ERROR: {e}")
|
| 74 |
break
|
| 75 |
|
| 76 |
-
# Continue stepping with any legal actions until done/max_steps.
|
| 77 |
-
while step_count < max_steps:
|
| 78 |
-
legal = env.legal_actions()
|
| 79 |
-
if not legal:
|
| 80 |
-
break
|
| 81 |
-
action = legal[0]
|
| 82 |
-
step_count += 1
|
| 83 |
-
obs, reward, done = await env.step(action)
|
| 84 |
-
rewards.append(reward)
|
| 85 |
-
total_reward += reward
|
| 86 |
-
if done:
|
| 87 |
-
break
|
| 88 |
-
|
| 89 |
# Final state
|
| 90 |
final_state = env.state()
|
| 91 |
|
|
@@ -102,6 +87,15 @@ async def run_demo_episode(
|
|
| 102 |
print(f"Final Score: {final_score:.4f}")
|
| 103 |
print(f"Active incidents: {sum(1 for i in final_state.incidents.values() if i.status.value != 'RESOLVED')}")
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
if errors:
|
| 106 |
print(f"\nErrors encountered: {len(errors)}")
|
| 107 |
for err in errors:
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""Demo script showing the 911 dispatch supervisor environment in action.
|
| 3 |
|
| 4 |
+
This non-interactive demo runs an episode using OpenEnvEnvironment directly
|
| 5 |
+
(no LLM/API server required). It uses `legal_actions()` so it is seed/task
|
| 6 |
+
independent.
|
| 7 |
"""
|
| 8 |
|
| 9 |
import asyncio
|
|
|
|
| 47 |
rewards = []
|
| 48 |
errors = []
|
| 49 |
|
| 50 |
+
# Step through the environment using only legal actions.
|
| 51 |
+
while step_count < max_steps:
|
| 52 |
+
legal = env.legal_actions()
|
| 53 |
+
if not legal:
|
| 54 |
+
break
|
| 55 |
+
action = legal[0]
|
|
|
|
|
|
|
| 56 |
step_count += 1
|
| 57 |
try:
|
| 58 |
obs, reward, done = await env.step(action)
|
|
|
|
| 71 |
print(f"[STEP {step_count}] ERROR: {e}")
|
| 72 |
break
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
# Final state
|
| 75 |
final_state = env.state()
|
| 76 |
|
|
|
|
| 87 |
print(f"Final Score: {final_score:.4f}")
|
| 88 |
print(f"Active incidents: {sum(1 for i in final_state.incidents.values() if i.status.value != 'RESOLVED')}")
|
| 89 |
|
| 90 |
+
print("\n" + "─" * 60)
|
| 91 |
+
print(f"{'Incident':<12} {'Type':<22} {'Severity':<12} {'Status':<12}")
|
| 92 |
+
print("─" * 60)
|
| 93 |
+
for inc in sorted(final_state.incidents.values(), key=lambda i: i.incident_id):
|
| 94 |
+
print(
|
| 95 |
+
f"{inc.incident_id:<12} {inc.incident_type.value:<22} {inc.severity.value:<12} {inc.status.value:<12}"
|
| 96 |
+
)
|
| 97 |
+
print("─" * 60)
|
| 98 |
+
|
| 99 |
if errors:
|
| 100 |
print(f"\nErrors encountered: {len(errors)}")
|
| 101 |
for err in errors:
|
inference.py
CHANGED
|
@@ -304,7 +304,7 @@ async def main() -> int:
|
|
| 304 |
hf_token = os.environ.get("HF_TOKEN", "")
|
| 305 |
agent = LLMAgent(api_key=hf_token, base_url=api_base_url, model=model_name)
|
| 306 |
|
| 307 |
-
task_ids = ["single_incident", "multi_incident", "mass_casualty"]
|
| 308 |
|
| 309 |
for task_id in task_ids:
|
| 310 |
await run_episode(task_id, model_name, agent)
|
|
|
|
| 304 |
hf_token = os.environ.get("HF_TOKEN", "")
|
| 305 |
agent = LLMAgent(api_key=hf_token, base_url=api_base_url, model=model_name)
|
| 306 |
|
| 307 |
+
task_ids = ["single_incident", "multi_incident", "mass_casualty", "shift_surge"]
|
| 308 |
|
| 309 |
for task_id in task_ids:
|
| 310 |
await run_episode(task_id, model_name, agent)
|
openenv.yaml
CHANGED
|
@@ -1,17 +1,20 @@
|
|
| 1 |
name: citywide-dispatch-supervisor
|
| 2 |
version: "0.1.0"
|
| 3 |
-
description:
|
|
|
|
|
|
|
|
|
|
| 4 |
entrypoint: src.openenv_environment:OpenEnvEnvironment
|
| 5 |
tasks:
|
| 6 |
- id: single_incident
|
| 7 |
-
name: Single Incident
|
| 8 |
-
description: One incident with a small unit pool; learn basic dispatch.
|
| 9 |
- id: multi_incident
|
| 10 |
-
name:
|
| 11 |
-
description: Multiple concurrent incidents
|
| 12 |
- id: mass_casualty
|
| 13 |
-
name: Mass Casualty
|
| 14 |
-
description: Priority-1 surge; maximize survival
|
| 15 |
- id: shift_surge
|
| 16 |
name: Shift Surge
|
| 17 |
-
description: Incident waves
|
|
|
|
| 1 |
name: citywide-dispatch-supervisor
|
| 2 |
version: "0.1.0"
|
| 3 |
+
description: >
|
| 4 |
+
City-wide 911 emergency dispatch supervisor RL environment.
|
| 5 |
+
An LLM agent learns to manage simultaneous incidents by dispatching
|
| 6 |
+
police, fire, and EMS units across a city grid under realistic constraints.
|
| 7 |
entrypoint: src.openenv_environment:OpenEnvEnvironment
|
| 8 |
tasks:
|
| 9 |
- id: single_incident
|
| 10 |
+
name: Single Incident Response
|
| 11 |
+
description: One incident with a small unit pool; learn basic dispatch, correct unit type, and response time.
|
| 12 |
- id: multi_incident
|
| 13 |
+
name: Simultaneous Multi-Incident
|
| 14 |
+
description: Multiple concurrent incidents requiring triage, prioritization, and correct unit matching.
|
| 15 |
- id: mass_casualty
|
| 16 |
+
name: Mass Casualty Event
|
| 17 |
+
description: Wave-based Priority-1 surge with resource conflict; maximize survival outcomes.
|
| 18 |
- id: shift_surge
|
| 19 |
name: Shift Surge
|
| 20 |
+
description: Incident waves combined with units going out of service; maintain coverage over time.
|
src/server/Dockerfile
CHANGED
|
@@ -6,6 +6,7 @@ COPY src/server/requirements.txt .
|
|
| 6 |
RUN pip install -r requirements.txt
|
| 7 |
|
| 8 |
COPY src/ /app/src/
|
|
|
|
| 9 |
|
| 10 |
EXPOSE 8000
|
| 11 |
|
|
|
|
| 6 |
RUN pip install -r requirements.txt
|
| 7 |
|
| 8 |
COPY src/ /app/src/
|
| 9 |
+
COPY data/ /app/data/
|
| 10 |
|
| 11 |
EXPOSE 8000
|
| 12 |
|
src/server/app.py
CHANGED
|
@@ -48,6 +48,22 @@ async def health() -> dict[str, str]:
|
|
| 48 |
return {"status": "ok"}
|
| 49 |
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
@app.post("/reset")
|
| 52 |
async def reset(request: ResetRequest) -> dict[str, Any]:
|
| 53 |
global _env
|
|
|
|
| 48 |
return {"status": "ok"}
|
| 49 |
|
| 50 |
|
| 51 |
+
@app.get("/tasks")
|
| 52 |
+
async def list_tasks() -> list[dict[str, str]]:
|
| 53 |
+
"""List all available tasks."""
|
| 54 |
+
from src.tasks.registry import TaskRegistry
|
| 55 |
+
|
| 56 |
+
return [
|
| 57 |
+
{
|
| 58 |
+
"task_id": t.task_id,
|
| 59 |
+
"name": t.name,
|
| 60 |
+
"description": t.description,
|
| 61 |
+
"difficulty": t.difficulty,
|
| 62 |
+
}
|
| 63 |
+
for t in TaskRegistry.list_tasks()
|
| 64 |
+
]
|
| 65 |
+
|
| 66 |
+
|
| 67 |
@app.post("/reset")
|
| 68 |
async def reset(request: ResetRequest) -> dict[str, Any]:
|
| 69 |
global _env
|
src/visualizer/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
"""Visualizer package for 2D
|
|
|
|
| 1 |
+
"""Visualizer package for 2D city dispatch state viewer."""
|
tests/smoke_docker.py
CHANGED
|
@@ -63,7 +63,7 @@ def test_reset() -> None:
|
|
| 63 |
print("Testing /reset endpoint...")
|
| 64 |
response = requests.post(
|
| 65 |
f"{HOST}/reset",
|
| 66 |
-
json={"task_id": "
|
| 67 |
timeout=10,
|
| 68 |
)
|
| 69 |
assert response.status_code == 200, f"Expected 200, got {response.status_code}"
|
|
|
|
| 63 |
print("Testing /reset endpoint...")
|
| 64 |
response = requests.post(
|
| 65 |
f"{HOST}/reset",
|
| 66 |
+
json={"task_id": "single_incident", "seed": 42},
|
| 67 |
timeout=10,
|
| 68 |
)
|
| 69 |
assert response.status_code == 200, f"Expected 200, got {response.status_code}"
|
tests/test_inference.py
CHANGED
|
@@ -9,7 +9,7 @@ import sys
|
|
| 9 |
|
| 10 |
|
| 11 |
class TestInferenceFormatCompliance:
|
| 12 |
-
TASK_IDS = ["single_incident", "multi_incident", "mass_casualty"]
|
| 13 |
|
| 14 |
def _run_inference_capture(self, env: dict[str, str]) -> tuple[int, str, str]:
|
| 15 |
cmd = [sys.executable, "inference.py"]
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
class TestInferenceFormatCompliance:
|
| 12 |
+
TASK_IDS = ["single_incident", "multi_incident", "mass_casualty", "shift_surge"]
|
| 13 |
|
| 14 |
def _run_inference_capture(self, env: dict[str, str]) -> tuple[int, str, str]:
|
| 15 |
cmd = [sys.executable, "inference.py"]
|