Spaces:

arrow072
/

open_ENV

Sleeping

File size: 9,256 Bytes

version: "1.0"
name: "TrafficSignalOptimization-v1"
description: >
  AI-driven Traffic Signal Optimization for a 4-way urban intersection.
  A reinforcement-learning environment that challenges agents to minimise
  congestion, reduce average waiting time, respond to emergency vehicles,
  and maintain signal stability across three difficulty tiers.

author: "OpenEnv Submission"
tags:
  - Reinforcement Learning
  - Traffic Control
  - Smart Cities
  - Safety-Critical
  - Emergency Vehicle Priority
licence: MIT

# ─────────────────────────────────────────────────────────────────────
# Environment specification
# ─────────────────────────────────────────────────────────────────────
environment:
  class: "env.TrafficEnv"
  entry_point: "env:TrafficEnv"

  state_space:
    type: Dict
    keys:
      north_cars:
        type: Discrete
        description: "Queued vehicles in the North lane"
        range: [0, max_queue]
      south_cars:
        type: Discrete
        description: "Queued vehicles in the South lane"
        range: [0, max_queue]
      east_cars:
        type: Discrete
        description: "Queued vehicles in the East lane"
        range: [0, max_queue]
      west_cars:
        type: Discrete
        description: "Queued vehicles in the West lane"
        range: [0, max_queue]
      waiting_times:
        type: "Dict[str, float]"
        description: "Cumulative waiting-time pressure per lane (north/south/east/west)"
      phase:
        type: Discrete
        values: [0, 1]
        description: "Current green signal: 0 = NS green, 1 = EW green"
      emergency_flags:
        type: "Dict[str, bool]"
        description: "True if an emergency vehicle is present in that lane"
      step_count:
        type: Discrete
        description: "Current step within the episode"
        range: [0, max_steps]

  action_space:
    type: Discrete
    n: 2
    actions:
      0: "Keep current signal phase"
      1: "Switch signal phase (NS ↔ EW)"

  observation_vector_dim: 14
  # Layout: [N, S, E, W queues | N, S, E, W waits | N, S, E, W EV flags | phase, step]

# ─────────────────────────────────────────────────────────────────────
# Tasks  (3 required — validator enumerates and scores each one)
# ─────────────────────────────────────────────────────────────────────
tasks:
  - id: easy
    description: "Stable, balanced traffic. Minimal emergencies. Ideal for learning."
    config_key: easy
    max_steps: 50
    score_range: [0.001, 0.999]
    params:
      arrival_rate: [0, 1]
      discharge_rate: [4, 5]
      max_queue: 15
      emergency_prob: 0.01
      burst_prob: 0.0

  - id: medium
    description: "Random traffic bursts, moderate congestion, occasional emergencies."
    config_key: medium
    max_steps: 100
    score_range: [0.001, 0.999]
    params:
      arrival_rate: [1, 3]
      discharge_rate: [3, 5]
      max_queue: 25
      emergency_prob: 0.05
      burst_prob: 0.10

  - id: hard
    description: "High-intensity traffic, frequent emergencies, strict fairness constraints."
    config_key: hard
    max_steps: 200
    score_range: [0.001, 0.999]
    params:
      arrival_rate: [2, 5]
      discharge_rate: [2, 4]
      max_queue: 40
      emergency_prob: 0.15
      burst_prob: 0.20

# ─────────────────────────────────────────────────────────────────────
# Reward design (multi-component, clipped to (-0.999, +0.999))
# Score = (reward + 1) / 2, always in open interval (0, 1)
# ─────────────────────────────────────────────────────────────────────
reward:
  range: [-0.999, 0.999]
 score_normalisation: "(reward + 1) / 2, clamped to [0.001, 0.999]"
  components:
    efficiency:
      sign: "+"
      description: "Vehicles cleared this step (throughput reward)"
    congestion:
      sign: "-"
      description: "Normalised total queue density"
    max_queue_penalty:
      sign: "-"
      description: "Penalty for extreme bottlenecks in any single lane"
    switch_penalty:
      sign: "-"
      description: "Stability constraint to prevent oscillatory signal toggling"
    improvement_bonus:
      sign: "+"
      description: "Bonus for active decongestion progress"
    fairness_bonus:
      sign: "+"
      description: "Reward for maintaining balanced waiting times across all lanes"
    starvation_penalty:
      sign: "-"
      description: "Penalty for phase-duration exceeding starvation limit"
    emergency_golden_window:
      sign: "+"
      description: "Full bonus for clearing EV within golden window steps"
    emergency_delay:
      sign: "-"
      description: "Exponential penalty for delaying life-saving vehicles"

# ─────────────────────────────────────────────────────────────────────
# Evaluation metrics (returned in info dict on every step)
# ─────────────────────────────────────────────────────────────────────
metrics:
  total_cleared:
    type: int
    description: "Total vehicles discharged from the intersection (episode)"
  avg_waiting_time:
    type: float
    description: "Cumulative wait pressure divided by vehicles cleared"
  max_queue_length:
    type: int
    description: "Peak queue length observed in any lane (episode)"
  signal_switch_count:
    type: int
    description: "Total signal changes (lower = more stable)"
  congestion_score:
    type: float
    range: [0.001, 0.999]
    description: "Current normalised total queue depth"
  avg_ev_clear_time:
    type: float
    description: "Average steps taken to clear an emergency vehicle"
  fairness_score:
    type: float
    range: [0.001, 0.999]
    description: "Index representing lane-level service balance"

# ─────────────────────────────────────────────────────────────────────
# Baseline agent
# ─────────────────────────────────────────────────────────────────────
baseline:
  class: "baseline_agent.RuleBasedAgent"
  description: >
    Deterministic rule-based agent. Switches based on queue imbalance,
    minimum green time, starvation guard, and emergency preemption.
  parameters:
    min_green_time: 5
    imbalance_threshold: 5
    max_green_time: 15
    emergency_min_green: 2

# ─────────────────────────────────────────────────────────────────────
# HTTP API (OpenEnv spec: reset / step / state)
# ─────────────────────────────────────────────────────────────────────
api:
  reset:  {method: POST, path: /reset,     description: "Start a new episode"}
  step:   {method: POST, path: /step,      description: "Advance one step"}
  state:  {method: GET,  path: /state,     description: "Get current state"}
  tasks:  {method: GET,  path: /tasks,     description: "List all tasks"}
  grader: {method: POST, path: /grader,    description: "Run baseline grader"}
  health: {method: GET,  path: /health,    description: "Liveness probe"}

# ─────────────────────────────────────────────────────────────────────
# Project files
# ─────────────────────────────────────────────────────────────────────
project_structure:
  - env.py:            "Core TrafficEnv class"
  - tasks.py:          "Easy / Medium / Hard configuration dicts"
  - baseline_agent.py: "Rule-based baseline agent"
  - inference.py:      "FastAPI server + LLM agent + CLI validator script"
  - test_env.py:       "Simulation runner and correctness checks"
  - openenv.yaml:      "This file — environment specification"
  - README.md:         "Full documentation"