# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
Data models for SUMO-RL Environment.

This module defines the Action, Observation, and State types for traffic
signal control using SUMO (Simulation of Urban MObility).
"""

from pydantic import Field
from typing import Dict, List, Optional

from openenv.core.env_server import Action, Observation, State


class SumoAction(Action):
    """
    Action for SUMO traffic signal control environment.

    Represents selecting which traffic light phase to activate next.

    Attributes:
        phase_id: Index of the green phase to activate (0 to num_phases-1)
        ts_id: Traffic signal ID (for multi-agent support, default "0")
    """

    phase_id: int
    ts_id: str = "0"


class SumoObservation(Observation):
    """
    Observation from SUMO traffic signal environment.

    Contains traffic metrics for decision-making.

    Attributes:
        observation: Flattened observation vector containing:
                    - One-hot encoded current phase
                    - Min green flag (binary)
                    - Lane densities (normalized)
                    - Lane queues (normalized)
        observation_shape: Shape of observation for reshaping
        action_mask: List of valid action indices
        sim_time: Current simulation time in seconds
        done: Whether episode is complete
        reward: Reward from last action (None on reset)
        metadata: Additional info (system metrics, etc.)
    """

    observation: List[float] = Field(default_factory=list)
    observation_shape: List[int] = Field(default_factory=list)
    action_mask: List[int] = Field(default_factory=list)
    sim_time: float = 0.0
    done: bool = False
    reward: Optional[float] = None
    metadata: Dict = Field(default_factory=dict)


class SumoState(State):
    """
    State of SUMO traffic signal environment.

    Tracks both configuration and runtime state.

    Configuration attributes:
        net_file: Path to SUMO network file (.net.xml)
        route_file: Path to SUMO route file (.rou.xml)
        num_seconds: Total simulation duration in seconds
        delta_time: Seconds between agent actions
        yellow_time: Duration of yellow phase in seconds
        min_green: Minimum green time per phase in seconds
        max_green: Maximum green time per phase in seconds
        reward_fn: Name of reward function used

    Runtime attributes:
        episode_id: Unique episode identifier
        step_count: Number of steps taken in episode
        sim_time: Current simulation time in seconds
        total_vehicles: Total number of vehicles in simulation
        total_waiting_time: Cumulative waiting time across all vehicles
    """

    # Episode tracking
    episode_id: str = ""
    step_count: int = 0

    # SUMO configuration
    net_file: str = ""
    route_file: str = ""
    num_seconds: int = 20000
    delta_time: int = 5
    yellow_time: int = 2
    min_green: int = 5
    max_green: int = 50
    reward_fn: str = "diff-waiting-time"

    # Runtime metrics
    sim_time: float = 0.0
    total_vehicles: int = 0
    total_waiting_time: float = 0.0
    mean_waiting_time: float = 0.0
    mean_speed: float = 0.0