"""
Configuration file for RL Traffic Signal Control project.

Contains all hyperparameters and settings for the environment,
agents, and training process.
"""

import os
from pathlib import Path

# ============================================================================
# PROJECT PATHS
# ============================================================================

ROOT_DIR = Path(__file__).parent
MODELS_DIR = ROOT_DIR / "models"
MODELS_DIR.mkdir(exist_ok=True)
RESULTS_DIR = ROOT_DIR / "results"
RESULTS_DIR.mkdir(exist_ok=True)

# ============================================================================
# ENVIRONMENT CONFIGURATION
# ============================================================================

NUM_LANES = 2                      # Lanes per direction at each intersection
EPISODE_LENGTH = 3600              # Steps per episode (simulated 1 hour)
TIME_STEP = 1                      # Simulation time step in seconds

# Traffic generation
TRAFFIC_DENSITY = 0.02             # Drastically reduced to hit ~1000-5000 throughput per episode
PEAK_HOURS = [(7, 9), (17, 19)]   # Morning and evening rush hours
PEAK_MULTIPLIER = 1.5              # Traffic density multiplier during peak hours

# Signal timing constraints
MIN_GREEN_TIME = 10                # Minimum green light duration (seconds)
MAX_GREEN_TIME = 60                # Maximum green light duration (seconds)
YELLOW_TIME = 3                    # Yellow light duration (seconds)
ALL_RED_TIME = 2                   # All-red clearance time (seconds)

# ============================================================================
# AGENT CONFIGURATION
# ============================================================================

AGENT_TYPE = "dqn"                 # Options: "dqn", "q_learning"
STATE_SIZE = 9                     # [N_SR, N_L, E_SR, E_L, S_SR, S_L, W_SR, W_L, phase]
ACTION_SIZE = 2                    # 0=keep current phase, 1=switch phase

# Deep Q-Network (DQN) hyperparameters
DQN_CONFIG = {
    "learning_rate": 0.0001,       # Low LR for stability
    "gamma": 0.99,                 # Discount factor
    "epsilon_start": 1.0,          # Initial exploration rate
    "epsilon_end": 0.01,           # Final exploration rate
    "epsilon_decay": 0.998,        # Slow decay for thorough exploration
    "memory_size": 50000,          # Replay buffer size
    "batch_size": 256,             # Larger batch = better GPU utilisation
    "target_update": 10,           # Target network update frequency (episodes)
    "hidden_layers": [256, 256],   # Slightly larger network for 9D state space
    "train_frequency": 4,          # Train every N env steps (reduces CPU-GPU overhead)
}

# Q-Learning (tabular) hyperparameters
Q_LEARNING_CONFIG = {
    "learning_rate": 0.1,          # Alpha
    "gamma": 0.99,                 # Discount factor
    "epsilon_start": 1.0,          # Initial exploration rate
    "epsilon_end": 0.01,           # Final exploration rate
    "epsilon_decay": 0.995,        # Exploration decay rate
    "num_bins": 10,                # Bins per state dimension for discretization
}

# ============================================================================
# TRAINING CONFIGURATION
# ============================================================================

NUM_EPISODES = 1000                # Total training episodes
EVAL_FREQUENCY = 50                # Evaluate every N episodes
SAVE_FREQUENCY = 100               # Save checkpoint every N episodes
EARLY_STOPPING_PATIENCE = 100      # Stop if no improvement for N episodes
MIN_REWARD_THRESHOLD = -1000       # Minimum average reward threshold

# Logging
LOG_FREQUENCY = 10                 # Log metrics every N episodes
USE_TENSORBOARD = False            # Disabled by default (no extra deps)

# ============================================================================
# EVALUATION CONFIGURATION
# ============================================================================

NUM_EVAL_EPISODES = 10             # Episodes for evaluation
RENDER_EVAL = False                # Render environment during evaluation

# ============================================================================
# VISUALIZATION
# ============================================================================

FIGURE_SIZE = (12, 6)
DPI = 100

METRICS = [
    "episode_reward",
    "average_waiting_time",
    "average_queue_length",
    "throughput",
]

# ============================================================================
# RANDOM SEED
# ============================================================================

RANDOM_SEED = 42

# ============================================================================
# DEVICE
# ============================================================================

try:
    import torch
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"[Config] Device: {DEVICE}")
except ImportError:
    DEVICE = "cpu"
    print("[Config] PyTorch not found, using CPU")