Spaces:

arrow072
/

open_ENV

Sleeping

App Files Files Community

open_ENV / env.py

arrow072

Upload 14 files

4bbf0fc verified 30 days ago

raw

history blame contribute delete

21.5 kB

	"""
	env.py — TrafficEnv: 4-Way Intersection RL Environment
	=======================================================
	Meta × PyTorch OpenEnv Hackathon Submission

	A production-quality reinforcement learning environment for optimising
	traffic signals at a 4-way urban intersection.

	Key design principles:
	- Realistic stochastic vehicle dynamics (arrivals, discharge, congestion)
	- Multi-component, shaped reward function
	- Emergency vehicle priority logic
	- Lane-starvation fairness penalty
	- Three difficulty tiers: Easy / Medium / Hard
	- Rich evaluation metrics exposed via info dict
	"""

	from __future__ import annotations

	import random
	from typing import Any, Dict, List, Tuple

	import numpy as np


	# ---------------------------------------------------------------------------
	# Constants
	# ---------------------------------------------------------------------------

	LANES: List[str] = ["north", "south", "east", "west"]
	NS_LANES: List[str] = ["north", "south"]
	EW_LANES: List[str] = ["east", "west"]

	PHASE_NS = 0 # North-South green
	PHASE_EW = 1 # East-West green


	# ---------------------------------------------------------------------------
	# Helper: observation vector for gym-compatible flat representation
	# ---------------------------------------------------------------------------

	def _state_to_vector(state: Dict[str, Any]) -> np.ndarray:
	"""Convert structured state dict → flat float32 numpy array."""
	queues = [state["north_cars"], state["south_cars"],
	state["east_cars"], state["west_cars"]]
	waits = list(state["waiting_times"].values())
	flags = [float(f) for f in state["emergency_flags"].values()]
	extras = [float(state["phase"]), float(state["step_count"])]
	return np.array(queues + waits + flags + extras, dtype=np.float32)


	# ---------------------------------------------------------------------------
	# TrafficEnv
	# ---------------------------------------------------------------------------

	class TrafficEnv:
	"""
	Reinforcement-learning environment simulating a 4-way traffic intersection.

	Parameters
	----------
	config : dict
	Configuration dictionary (see tasks.py for ready-made configs).

	Environment interface
	--------------------
	reset() → state_dict
	step(action: int) → (next_state, reward, done, info)
	get_state() → state_dict
	state_vector() → np.ndarray (flat observation for RL frameworks)

	Actions
	-------
	0 : Keep current signal phase
	1 : Switch signal phase (NS ↔ EW)

	State dictionary keys
	---------------------
	north_cars, south_cars, east_cars, west_cars : int queue sizes
	waiting_times : dict cumulative wait per lane
	phase : int 0=NS green, 1=EW green
	emergency_flags : dict bool per lane
	step_count : int
	"""

	# ------------------------------------------------------------------
	# Initialisation
	# ------------------------------------------------------------------

	def __init__(self, config: Dict[str, Any]) -> None:
	# --- Core parameters ---
	self.max_steps = int(config.get("max_steps", 100))
	self.max_queue = int(config.get("max_queue", 20))
	self.arrival_rate = tuple(config.get("arrival_rate", (0, 3)))
	self.discharge_rate = tuple(config.get("discharge_rate", (3, 5)))
	self.emergency_prob = float(config.get("emergency_prob", 0.05))
	self.switch_penalty_val = float(config.get("switch_penalty", 0.2))
	self.starvation_threshold= int(config.get("starvation_threshold", 10))

	# --- Burst traffic (Medium / Hard) ---
	self.burst_prob = float(config.get("burst_prob", 0.0))
	self.burst_multiplier = float(config.get("burst_multiplier", 1.0))

	# --- Reward scaling knobs (overridable) ---
	self.r_efficiency_scale = float(config.get("r_efficiency_scale", 0.20))
	self.p_congestion_scale = float(config.get("p_congestion_scale", 0.40))
	self.p_max_q_scale = float(config.get("p_max_q_scale", 0.15))
	self.p_starvation_scale = float(config.get("p_starvation_scale", 0.15))
	self.r_fairness_bonus = float(config.get("r_fairness_bonus", 0.10))
	self.r_improvement_bonus = float(config.get("r_improvement_bonus",0.20))
	self.p_emergency_scale = float(config.get("p_emergency_scale", 0.40))
	self.r_ev_bonus_scale = float(config.get("r_ev_bonus_scale", 0.25))

	# --- Difficulty-specific thresholds ---
	self.ev_golden_window = int(config.get("ev_golden_window", 5))
	self.ev_max_delay = int(config.get("ev_max_delay", 15))
	self.starvation_limit = int(config.get("starvation_threshold", 10))

	# --- Observation dimensionality ---
	# 4 queues + 4 waits + 4 emergency flags + 2 extras = 14
	self.obs_dim = 14

	self.reset()

	# ------------------------------------------------------------------
	# Core API
	# ------------------------------------------------------------------

	def reset(self) -> Dict[str, Any]:
	"""Reset the environment for a new episode. Returns the initial state."""
	self.queues: Dict[str, int] = {lane: 0 for lane in LANES}

	# Cumulative waiting-time pressure per lane
	self.waiting_times: Dict[str, float] = {lane: 0.0 for lane in LANES}

	# Binary emergency-vehicle flags
	self.emergency_flags: Dict[str, bool] = {lane: False for lane in LANES}

	# Signal phase (0 = NS green, 1 = EW green)
	self.phase: int = PHASE_NS

	self.step_count: int = 0
	self.total_cleared: int = 0
	self.last_action: int = -1 # -1 means "no previous action"
	self.consecutive_green: int = 0 # steps without a switch

	# Track previous total queue for improvement bonus
	self._prev_total_queue: int = 0

	# Detailed metrics for hackathon evaluation
	self._metrics: Dict[str, Any] = {
	"total_cleared": 0,
	"avg_waiting_time": 0.0,
	"max_queue_length": 0,
	"signal_switch_count": 0,
	"congestion_score": 0.001,
	"avg_ev_clear_time": 0.0,
	"total_ev_cleared": 0,
	"total_ev_penalty": 0.0,
	"fairness_score": 0.999,
	}

	# Track waiting steps for emergency vehicles and phase stability
	self.ev_timers: Dict[str, List[int]] = {lane: [] for lane in LANES}
	self.phase_duration: int = 0
	self._ev_clear_times: List[int] = []

	return self.get_state()

	# ------------------------------------------------------------------

	def get_state(self) -> Dict[str, Any]:
	"""Return the current environment state as a structured dictionary."""
	return {
	"north_cars": self.queues["north"],
	"south_cars": self.queues["south"],
	"east_cars": self.queues["east"],
	"west_cars": self.queues["west"],
	"waiting_times": dict(self.waiting_times), # copy
	"phase": self.phase,
	"emergency_flags": dict(self.emergency_flags), # copy
	"step_count": self.step_count,
	}

	# ------------------------------------------------------------------

	def state_vector(self) -> np.ndarray:
	"""Return the current state as a flat float32 numpy array (gym-friendly)."""
	return _state_to_vector(self.get_state())

	# ------------------------------------------------------------------

	def step(
	self, action: int
	) -> Tuple[Dict[str, Any], float, bool, Dict[str, Any]]:
	"""
	Advance the simulation by one step.

	Parameters
	----------
	action : int
	0 → Keep current phase
	1 → Switch phase

	Returns
	-------
	next_state : dict
	reward : float (approximately in [-1, +1])
	done : bool
	info : dict (evaluation metrics)
	"""
	if action not in (0, 1):
	raise ValueError(f"Invalid action {action}. Must be 0 or 1.")

	self.step_count += 1

	# ── 1. Record pre-step total queue for improvement bonus ──────
	pre_total_queue = sum(self.queues.values())

	# ── 2. Apply signal switch ────────────────────────────────────
	did_switch = False
	if action == 1:
	self.phase = 1 - self.phase
	self._metrics["signal_switch_count"] += 1
	did_switch = True
	self.phase_duration = 0
	else:
	self.phase_duration += 1
	self.last_action = action

	# ── 3. Increment EV timers BEFORE discharge so clear times are
	# accurate (t=1 on the step it was cleared, not t=0).
	for lane in LANES:
	self.ev_timers[lane] = [t + 1 for t in self.ev_timers[lane]]

	# ── 4. Discharge vehicles from green lanes ────────────────────
	cleared_this_step = self._discharge_traffic()
	self.total_cleared += cleared_this_step
	self._metrics["total_cleared"] = self.total_cleared

	# ── 5. Stochastic vehicle arrivals ────────────────────────────
	self._add_arrivals()

	# ── 6. Update waiting-time pressure ───────────────────────────
	self._update_waiting_times()

	# ── 6. Update scalar metrics ──────────────────────────────────
	current_max_q = max(self.queues.values())
	self._metrics["max_queue_length"] = max(
	self._metrics["max_queue_length"], current_max_q
	)
	total_wait_sum = sum(self.waiting_times.values())
	denom = max(1, self.total_cleared)
	self._metrics["avg_waiting_time"] = total_wait_sum / denom
	self._metrics["congestion_score"] = float(np.clip(
	sum(self.queues.values()) / (self.max_queue * len(LANES)),
	0.001, 0.999
	))

	# ── 7. Calculate reward ───────────────────────────────────────
	post_total_queue = sum(self.queues.values())
	reward = self._calculate_reward(
	cleared=cleared_this_step,
	did_switch=did_switch,
	pre_total=pre_total_queue,
	post_total=post_total_queue,
	current_max_q=current_max_q
	)

	# ── 8. Update fairness index ──────────────────────────────────
	# Simple fairness: (1 - variance of wait times / threshold)
	wait_vals = list(self.waiting_times.values())
	if max(wait_vals) > 0:
	self._metrics["fairness_score"] = float(np.clip(
	1.0 - (np.std(wait_vals) / self.starvation_limit),
	0.001, 0.999
	))
	else:
	self._metrics["fairness_score"] = 0.999

	# ── 9. Termination ────────────────────────────────────────────
	done = self.step_count >= self.max_steps
	self._prev_total_queue = post_total_queue

	return self.get_state(), float(reward), done, dict(self._metrics)

	# ------------------------------------------------------------------
	# Internal dynamics
	# ------------------------------------------------------------------

	def _discharge_traffic(self) -> int:
	"""
	Allow vehicles to pass through green lanes.

	Discharge is stochastic: between discharge_rate[0] and
	discharge_rate[1] vehicles leave per green lane per step.
	"""
	cleared = 0
	low, high = self.discharge_rate
	green_lanes = NS_LANES if self.phase == PHASE_NS else EW_LANES

	for lane in green_lanes:
	num_to_clear = random.randint(low, high)
	actual = min(self.queues[lane], num_to_clear)
	self.queues[lane] -= actual
	cleared += actual

	# Reduce waiting-time pressure proportionally
	if self.queues[lane] == 0:
	self.waiting_times[lane] = 0.0
	else:
	# Each departing vehicle relieves ~2 units of wait pressure
	self.waiting_times[lane] = max(
	0.0, self.waiting_times[lane] - actual * 2.0
	)

	# Clear emergency flag once queue nearly drained
	if self.queues[lane] < 2:
	if self.emergency_flags[lane]:
	# Record clearance time for metrics
	if self.ev_timers[lane]:
	clear_time = self.ev_timers[lane].pop(0)
	self._ev_clear_times.append(clear_time)
	self._metrics["total_ev_cleared"] += 1
	self._metrics["avg_ev_clear_time"] = np.mean(self._ev_clear_times)
	self.emergency_flags[lane] = False

	return cleared

	# ------------------------------------------------------------------

	def _add_arrivals(self) -> None:
	"""
	Add stochastic vehicle arrivals to every lane.

	In burst mode (Medium/Hard), random lanes occasionally
	receive additional vehicles to simulate rush-hour spikes.
	"""
	low, high = self.arrival_rate

	for lane in LANES:
	arrivals = random.randint(low, high)

	# Burst traffic event
	if random.random() < self.burst_prob:
	arrivals = int(arrivals * self.burst_multiplier)

	# Emergency vehicle appearance
	if random.random() < self.emergency_prob:
	self.emergency_flags[lane] = True
	self.ev_timers[lane].append(0) # Start timing from age 0
	arrivals += random.randint(1, 2) # EVs usually have follow-on traffic

	self.queues[lane] = min(
	self.max_queue, self.queues[lane] + arrivals
	)

	# ------------------------------------------------------------------

	def _update_waiting_times(self) -> None:
	"""
	Increment lane-level waiting-time pressure.

	Red lanes accumulate pressure faster (proportional to queue),
	while green lanes still accumulate a smaller residual penalty.
	"""
	green_lanes = NS_LANES if self.phase == PHASE_NS else EW_LANES

	for lane in LANES:
	q = self.queues[lane]
	if q == 0:
	continue
	if lane in green_lanes:
	self.waiting_times[lane] += 0.2 * q # reduced residual pressure
	else:
	self.waiting_times[lane] += 1.0 * q # full waiting pressure



	# ------------------------------------------------------------------
	# Reward function
	# ------------------------------------------------------------------

	def _calculate_reward(
	self,
	cleared: int,
	did_switch: bool,
	pre_total: int,
	post_total: int,
	current_max_q: int,
	) -> float:
	"""
	Premium multi-component shaped reward function for Hackathon Judges.

	Reward Philosphy:
	- CLEAR & CONTINUOUS: Each component scales linearly or exponentially
	to provide a smooth gradient for the RL agent.
	- COMPETING PRESSURES: Efficiency (+) vs. Stability (-) vs. Fairness (-).
	- SAFETY-CRITICAL: Emergency response is heavily weighted.
	"""

	# ── (1) Efficiency: Reward for high throughput ───────────────
	r_efficiency = self.r_efficiency_scale * cleared

	# ── (2) Congestion: Penalty for total density ─────────────────
	congestion_ratio = post_total / (self.max_queue * len(LANES))
	p_congestion = -self.p_congestion_scale * congestion_ratio

	# ── (3) Max Queue Penalty: Discourage extreme bottlenecks ─────
	# Critical for realistic urban flow to avoid total gridlock in one lane.
	p_max_queue = -self.p_max_q_scale * (current_max_q / self.max_queue)

	# ── (4) Switch Penalty: Stability constraint ──────────────────
	p_switch = -self.switch_penalty_val if did_switch else 0.0

	# ── (5) Improvement Bonus: Reward active decongestion ──────────
	r_improvement = 0.0
	if post_total < pre_total:
	delta_ratio = (pre_total - post_total) / max(1, pre_total)
	r_improvement = self.r_improvement_bonus * delta_ratio

	# ── (6) Starvation & Fairness: Temporal constraints ───────────
	# Wait-time penalty + bonus for staying in fair bounds.
	p_starvation = 0.0
	r_fairness = 0.0
	starvation_limit_scaled = self.starvation_limit * 5.0
	max_wait = max(self.waiting_times.values()) if self.waiting_times else 0

	if max_wait > starvation_limit_scaled:
	p_starvation = -self.p_starvation_scale * (max_wait / starvation_limit_scaled)
	elif max_wait < (starvation_limit_scaled * 0.5):
	r_fairness = self.r_fairness_bonus # Bonus for keeping system balanced

	# ── (7) Emergency Vehicle Priority ────────────────────────────
	# "Golden Window" bonus for fast clearance + exponential delay penalty.
	p_emergency = 0.0
	r_ev_bonus = 0.0
	green_lanes = NS_LANES if self.phase == PHASE_NS else EW_LANES
	red_lanes = EW_LANES if self.phase == PHASE_NS else NS_LANES

	for lane in LANES:
	if self.emergency_flags[lane]:
	if lane in red_lanes:
	# Ongoing penalty proportional to queue depth while blocked
	block_ratio = self.queues[lane] / max(1, self.max_queue)
	p_emergency -= self.p_emergency_scale * block_ratio

	# Exponential penalty once past max-delay threshold
	for t in self.ev_timers[lane]:
	if t > self.ev_max_delay:
	p_emergency -= self.p_emergency_scale * 0.5
	else:
	# EV is in a green lane — check Golden Window
	for t in self.ev_timers[lane]:
	if t <= self.ev_golden_window:
	# Cleared within the golden window → full bonus
	r_ev_bonus += self.r_ev_bonus_scale
	else:
	# Being served but took too long → partial bonus
	r_ev_bonus += self.r_ev_bonus_scale * 0.2

	# Record EV penalty for metrics
	self._metrics["total_ev_penalty"] += abs(p_emergency)

	# ── Aggregate & clip ──────────────────────────────────────────
	# Clip to open interval (-0.999, 0.999) so the validator's
	# normalisation score = (reward + 1) / 2 always lands strictly
	# inside (0, 1) — never exactly 0.0 or 1.0.
	total = (
	r_efficiency
	+ p_congestion
	+ p_max_queue
	+ p_switch
	+ r_improvement
	+ p_starvation
	+ r_fairness
	+ p_emergency
	+ r_ev_bonus
	)
	return float(np.clip(total, -0.999, 0.999))

	# ------------------------------------------------------------------
	# Rendering
	# ------------------------------------------------------------------

	def render(self) -> str:
	"""Return a human-readable ASCII snapshot of the intersection."""
	phase_str = "NS 🟢 \| EW 🔴" if self.phase == PHASE_NS else "NS 🔴 \| EW 🟢"
	ev_lanes = [lane for lane, f in self.emergency_flags.items() if f]
	ev_str = ", ".join(ev_lanes) or "none"

	# Calculate some quick stats for the render
	total_q = sum(self.queues.values())
	fairness = self._metrics.get("fairness_score", 1.0)

	lines = [
	f"Step {self.step_count:>4} / {self.max_steps} Phase: {phase_str} ({self.phase_duration} steps)",
	f" North: {self.queues['north']:>3} cars \| South: {self.queues['south']:>3} cars",
	f" East: {self.queues['east']:>3} cars \| West: {self.queues['west']:>3} cars",
	f" Emergency: {ev_str:<15} \| Fairness: {fairness:.2f}",
	f" Total Q: {total_q:>3} \| Cleared: {self.total_cleared:>4} \| EV Clear Avg: {self._metrics['avg_ev_clear_time']:.1f}",
	]
	return "\n".join(lines)