open_ENV / openenv.yaml
arrow072's picture
Update openenv.yaml
36c3c58 verified
version: "1.0"
name: "TrafficSignalOptimization-v1"
description: >
AI-driven Traffic Signal Optimization for a 4-way urban intersection.
A reinforcement-learning environment that challenges agents to minimise
congestion, reduce average waiting time, respond to emergency vehicles,
and maintain signal stability across three difficulty tiers.
author: "OpenEnv Submission"
tags:
- Reinforcement Learning
- Traffic Control
- Smart Cities
- Safety-Critical
- Emergency Vehicle Priority
licence: MIT
# ─────────────────────────────────────────────────────────────────────
# Environment specification
# ─────────────────────────────────────────────────────────────────────
environment:
class: "env.TrafficEnv"
entry_point: "env:TrafficEnv"
state_space:
type: Dict
keys:
north_cars:
type: Discrete
description: "Queued vehicles in the North lane"
range: [0, max_queue]
south_cars:
type: Discrete
description: "Queued vehicles in the South lane"
range: [0, max_queue]
east_cars:
type: Discrete
description: "Queued vehicles in the East lane"
range: [0, max_queue]
west_cars:
type: Discrete
description: "Queued vehicles in the West lane"
range: [0, max_queue]
waiting_times:
type: "Dict[str, float]"
description: "Cumulative waiting-time pressure per lane (north/south/east/west)"
phase:
type: Discrete
values: [0, 1]
description: "Current green signal: 0 = NS green, 1 = EW green"
emergency_flags:
type: "Dict[str, bool]"
description: "True if an emergency vehicle is present in that lane"
step_count:
type: Discrete
description: "Current step within the episode"
range: [0, max_steps]
action_space:
type: Discrete
n: 2
actions:
0: "Keep current signal phase"
1: "Switch signal phase (NS ↔ EW)"
observation_vector_dim: 14
# Layout: [N, S, E, W queues | N, S, E, W waits | N, S, E, W EV flags | phase, step]
# ─────────────────────────────────────────────────────────────────────
# Tasks (3 required — validator enumerates and scores each one)
# ─────────────────────────────────────────────────────────────────────
tasks:
- id: easy
description: "Stable, balanced traffic. Minimal emergencies. Ideal for learning."
config_key: easy
max_steps: 50
score_range: [0.001, 0.999]
params:
arrival_rate: [0, 1]
discharge_rate: [4, 5]
max_queue: 15
emergency_prob: 0.01
burst_prob: 0.0
- id: medium
description: "Random traffic bursts, moderate congestion, occasional emergencies."
config_key: medium
max_steps: 100
score_range: [0.001, 0.999]
params:
arrival_rate: [1, 3]
discharge_rate: [3, 5]
max_queue: 25
emergency_prob: 0.05
burst_prob: 0.10
- id: hard
description: "High-intensity traffic, frequent emergencies, strict fairness constraints."
config_key: hard
max_steps: 200
score_range: [0.001, 0.999]
params:
arrival_rate: [2, 5]
discharge_rate: [2, 4]
max_queue: 40
emergency_prob: 0.15
burst_prob: 0.20
# ─────────────────────────────────────────────────────────────────────
# Reward design (multi-component, clipped to (-0.999, +0.999))
# Score = (reward + 1) / 2, always in open interval (0, 1)
# ─────────────────────────────────────────────────────────────────────
reward:
range: [-0.999, 0.999]
score_normalisation: "(reward + 1) / 2, clamped to [0.001, 0.999]"
components:
efficiency:
sign: "+"
description: "Vehicles cleared this step (throughput reward)"
congestion:
sign: "-"
description: "Normalised total queue density"
max_queue_penalty:
sign: "-"
description: "Penalty for extreme bottlenecks in any single lane"
switch_penalty:
sign: "-"
description: "Stability constraint to prevent oscillatory signal toggling"
improvement_bonus:
sign: "+"
description: "Bonus for active decongestion progress"
fairness_bonus:
sign: "+"
description: "Reward for maintaining balanced waiting times across all lanes"
starvation_penalty:
sign: "-"
description: "Penalty for phase-duration exceeding starvation limit"
emergency_golden_window:
sign: "+"
description: "Full bonus for clearing EV within golden window steps"
emergency_delay:
sign: "-"
description: "Exponential penalty for delaying life-saving vehicles"
# ─────────────────────────────────────────────────────────────────────
# Evaluation metrics (returned in info dict on every step)
# ─────────────────────────────────────────────────────────────────────
metrics:
total_cleared:
type: int
description: "Total vehicles discharged from the intersection (episode)"
avg_waiting_time:
type: float
description: "Cumulative wait pressure divided by vehicles cleared"
max_queue_length:
type: int
description: "Peak queue length observed in any lane (episode)"
signal_switch_count:
type: int
description: "Total signal changes (lower = more stable)"
congestion_score:
type: float
range: [0.001, 0.999]
description: "Current normalised total queue depth"
avg_ev_clear_time:
type: float
description: "Average steps taken to clear an emergency vehicle"
fairness_score:
type: float
range: [0.001, 0.999]
description: "Index representing lane-level service balance"
# ─────────────────────────────────────────────────────────────────────
# Baseline agent
# ─────────────────────────────────────────────────────────────────────
baseline:
class: "baseline_agent.RuleBasedAgent"
description: >
Deterministic rule-based agent. Switches based on queue imbalance,
minimum green time, starvation guard, and emergency preemption.
parameters:
min_green_time: 5
imbalance_threshold: 5
max_green_time: 15
emergency_min_green: 2
# ─────────────────────────────────────────────────────────────────────
# HTTP API (OpenEnv spec: reset / step / state)
# ─────────────────────────────────────────────────────────────────────
api:
reset: {method: POST, path: /reset, description: "Start a new episode"}
step: {method: POST, path: /step, description: "Advance one step"}
state: {method: GET, path: /state, description: "Get current state"}
tasks: {method: GET, path: /tasks, description: "List all tasks"}
grader: {method: POST, path: /grader, description: "Run baseline grader"}
health: {method: GET, path: /health, description: "Liveness probe"}
# ─────────────────────────────────────────────────────────────────────
# Project files
# ─────────────────────────────────────────────────────────────────────
project_structure:
- env.py: "Core TrafficEnv class"
- tasks.py: "Easy / Medium / Hard configuration dicts"
- baseline_agent.py: "Rule-based baseline agent"
- inference.py: "FastAPI server + LLM agent + CLI validator script"
- test_env.py: "Simulation runner and correctness checks"
- openenv.yaml: "This file — environment specification"
- README.md: "Full documentation"