sumo_rl_env-v2-1-0 / models.py
burtenshaw's picture
burtenshaw HF Staff
Upload folder using huggingface_hub
f101be3 verified
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
Data models for SUMO-RL Environment.
This module defines the Action, Observation, and State types for traffic
signal control using SUMO (Simulation of Urban MObility).
"""
from pydantic import Field
from typing import Dict, List, Optional
from openenv.core.env_server import Action, Observation, State
class SumoAction(Action):
"""
Action for SUMO traffic signal control environment.
Represents selecting which traffic light phase to activate next.
Attributes:
phase_id: Index of the green phase to activate (0 to num_phases-1)
ts_id: Traffic signal ID (for multi-agent support, default "0")
"""
phase_id: int
ts_id: str = "0"
class SumoObservation(Observation):
"""
Observation from SUMO traffic signal environment.
Contains traffic metrics for decision-making.
Attributes:
observation: Flattened observation vector containing:
- One-hot encoded current phase
- Min green flag (binary)
- Lane densities (normalized)
- Lane queues (normalized)
observation_shape: Shape of observation for reshaping
action_mask: List of valid action indices
sim_time: Current simulation time in seconds
done: Whether episode is complete
reward: Reward from last action (None on reset)
metadata: Additional info (system metrics, etc.)
"""
observation: List[float] = Field(default_factory=list)
observation_shape: List[int] = Field(default_factory=list)
action_mask: List[int] = Field(default_factory=list)
sim_time: float = 0.0
done: bool = False
reward: Optional[float] = None
metadata: Dict = Field(default_factory=dict)
class SumoState(State):
"""
State of SUMO traffic signal environment.
Tracks both configuration and runtime state.
Configuration attributes:
net_file: Path to SUMO network file (.net.xml)
route_file: Path to SUMO route file (.rou.xml)
num_seconds: Total simulation duration in seconds
delta_time: Seconds between agent actions
yellow_time: Duration of yellow phase in seconds
min_green: Minimum green time per phase in seconds
max_green: Maximum green time per phase in seconds
reward_fn: Name of reward function used
Runtime attributes:
episode_id: Unique episode identifier
step_count: Number of steps taken in episode
sim_time: Current simulation time in seconds
total_vehicles: Total number of vehicles in simulation
total_waiting_time: Cumulative waiting time across all vehicles
"""
# Episode tracking
episode_id: str = ""
step_count: int = 0
# SUMO configuration
net_file: str = ""
route_file: str = ""
num_seconds: int = 20000
delta_time: int = 5
yellow_time: int = 2
min_green: int = 5
max_green: int = 50
reward_fn: str = "diff-waiting-time"
# Runtime metrics
sim_time: float = 0.0
total_vehicles: int = 0
total_waiting_time: float = 0.0
mean_waiting_time: float = 0.0
mean_speed: float = 0.0