Spaces:

tokev
/

traffic-visualizer

Sleeping

App Files Files Community

tokev commited on Mar 8

Commit

38d40b2

verified ·

1 Parent(s): 9c55eea

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +5 -0
Dockerfile +49 -0
README.md +186 -6
__init__.py +1 -0
agents/README.md +20 -0
agents/__init__.py +15 -0
agents/district_controller.py +187 -0
agents/district_coordinator.py +11 -0
agents/heuristic_controller.py +91 -0
agents/local_policy.py +110 -0
agents/message_protocol.py +116 -0
artifacts/district_llm_adapter_v3/main_run/adapter/README.md +210 -0
artifacts/district_llm_adapter_v3/main_run/adapter/adapter_config.json +50 -0
artifacts/district_llm_adapter_v3/main_run/adapter/adapter_model.safetensors +3 -0
artifacts/district_llm_adapter_v3/main_run/adapter/tokenizer.json +3 -0
artifacts/district_llm_adapter_v3/main_run/adapter/tokenizer_config.json +18 -0
artifacts/dqn_shared/best_validation.pt +3 -0
client.py +52 -0
data/splits/README.md +21 -0
data/splits/test_cities.txt +15 -0
data/splits/train_cities.txt +70 -0
data/splits/val_cities.txt +15 -0
district_llm/FINAL_ABLATION_RUNBOOK.md +79 -0
district_llm/RL_GUIDANCE_EVAL_RUNBOOK.md +126 -0
district_llm/RL_LLM_WRAPPER_SWEEP_RUNBOOK.md +121 -0
district_llm/__init__.py +18 -0
district_llm/data.py +27 -0
district_llm/derivation.py +228 -0
district_llm/eval.py +436 -0
district_llm/generate_dataset.py +390 -0
district_llm/guided_control.py +67 -0
district_llm/heuristic_guidance.py +73 -0
district_llm/inference.py +223 -0
district_llm/metrics.py +97 -0
district_llm/prompting.py +66 -0
district_llm/repair.py +392 -0
district_llm/rl_guidance_wrapper.py +1004 -0
district_llm/schema.py +429 -0
district_llm/summary_builder.py +413 -0
district_llm/teachers.py +227 -0
district_llm/train_unsloth.py +129 -0
env/README.md +43 -0
env/__init__.py +18 -0
env/cityflow_adapter.py +102 -0
env/district_summary.py +9 -0
env/intersection_config.py +49 -0
env/observation_builder.py +224 -0
env/reward.py +244 -0
env/scenarios.py +10 -0
env/traffic_env.py +356 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+artifacts/district_llm_adapter_v3/main_run/adapter/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+third_party/CityFlow/examples/replay.txt filter=lfs diff=lfs merge=lfs -text
+third_party/CityFlow/extern/pybind11/.git.bak/objects/pack/pack-0b2353194187af7e228cfabd32025bb4d3af8551.idx filter=lfs diff=lfs merge=lfs -text
+third_party/CityFlow/extern/pybind11/.git.bak/objects/pack/pack-0b2353194187af7e228cfabd32025bb4d3af8551.pack filter=lfs diff=lfs merge=lfs -text
+third_party/CityFlow/extern/pybind11/.git.bak/objects/pack/pack-0b2353194187af7e228cfabd32025bb4d3af8551.rev filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,49 @@

+FROM python:3.12-slim AS builder
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        build-essential \
+        cmake \
+        libboost-all-dev \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /build
+COPY third_party/CityFlow ./CityFlow
+RUN rm -rf ./CityFlow/build
+RUN pip install --no-cache-dir ./CityFlow
+FROM python:3.12-slim AS runtime
+WORKDIR /app
+COPY --from=builder /usr/local/lib/python3.12/site-packages/cityflow* \
+                    /usr/local/lib/python3.12/site-packages/
+COPY openenv_app/requirements.txt ./requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+COPY __init__.py      ./__init__.py
+COPY client.py        ./client.py
+COPY models.py        ./models.py
+COPY agents/          ./agents/
+COPY district_llm/    ./district_llm/
+COPY env/             ./env/
+COPY openenv_app/     ./openenv_app/
+COPY server/          ./server/
+COPY training/        ./training/
+COPY data/splits/     ./data/splits/
+COPY artifacts/dqn_shared/best_validation.pt ./artifacts/dqn_shared/best_validation.pt
+COPY artifacts/district_llm_adapter_v3/main_run/adapter/ ./artifacts/district_llm_adapter_v3/main_run/adapter/
+RUN mkdir -p /app/data/generated /app/data/splits
+ENV DATA_DIR=/app/data/generated
+ENV SPLITS_DIR=/app/data/splits
+ENV CHECKPOINT_PATH=/app/artifacts/dqn_shared/best_validation.pt
+ENV DISTRICT_LLM_ADAPTER_PATH=/app/artifacts/district_llm_adapter_v3/main_run/adapter
+EXPOSE 7860
+ENV ENABLE_WEB_INTERFACE=true
+CMD ["sh", "-c", "uvicorn server.app:app --host 0.0.0.0 --port ${PORT:-7860}"]

README.md CHANGED Viewed

@@ -1,11 +1,191 @@
 ---
-title: Traffic Visualizer
-emoji: 📈
-colorFrom: yellow
-colorTo: pink
 sdk: docker
 pinned: false
-short_description: test
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Agentic Traffic
+emoji: 🏢
+colorFrom: green
+colorTo: purple
 sdk: docker
 pinned: false
+short_description: Agentic AI to control traffic lights
+app_port: 7860
+base_path: /web
 ---
+# traffic-llm
+CityFlow-based traffic-control project with intersection-level multi-agent DQN training and district-aware policy variants.
+## Training
+The default local-policy trainer now uses parameter-shared dueling Double DQN with prioritized replay and n-step returns:
+```bash
+python3 -m training.train_local_policy train
+```
+That trains against `data/generated`, uses `data/splits`, writes checkpoints to `artifacts/dqn_shared`, enables TensorBoard logging, uses parallel CPU rollout workers by default, shows `tqdm` progress bars, and now validates plus checkpoints every 40 updates by default.
+For a broader but still manageable validation pass:
+```bash
+python3 -m training.train_local_policy train --max-val-cities 3 --val-scenarios-per-city 7
+```
+That evaluates 3 validation cities across all 7 scenario types. This gives 21 learned-policy validation episodes per eval, or 63 total episodes if random and fixed baselines are also enabled.
+Phase-3-style full training with the same 40-update eval/checkpoint cadence:
+```bash
+python3 -m training.train_local_policy train \
+  --max-train-cities 70 \
+  --max-val-cities 3 \
+  --val-scenarios-per-city 7 \
+  --policy-arch single_head_with_district_feature \
+  --reward-variant wait_queue_throughput
+```
+Useful ablations:
+```bash
+python3 -m training.train_local_policy train --policy-arch multi_head --reward-variant current
+python3 -m training.train_local_policy train --policy-arch single_head --reward-variant current
+python3 -m training.train_local_policy train --policy-arch single_head_with_district_feature --reward-variant wait_queue_throughput
+```
+For a fast phase-1 overfit run on one fixed world:
+```bash
+python3 -m training.train_local_policy train \
+  --total-updates 25 \
+  --train-city-id city_0072 \
+  --train-scenario-name normal \
+  --overfit-val-on-train-scenario \
+  --fast-overfit \
+  --policy-arch single_head_with_district_feature \
+  --reward-variant wait_queue_throughput
+```
+To create or refresh dataset splits:
+```bash
+python3 -m training.train_local_policy make-splits
+```
+To evaluate the best checkpoint:
+```bash
+python3 -m training.train_local_policy evaluate \
+  --checkpoint artifacts/dqn_shared/best_validation.pt \
+  --split val
+```
+To evaluate a heuristic baseline directly:
+```bash
+python3 -m training.train_local_policy evaluate --baseline queue_greedy --split val
+```
+## TensorBoard
+TensorBoard logs are written to `artifacts/dqn_shared/tensorboard` by default.
+```bash
+tensorboard --logdir artifacts/dqn_shared/tensorboard
+```
+## District LLM
+The district LLM stack lives under `district_llm/`. It treats the learned DQN local controller as the low-level executor, derives district-scale SFT labels automatically from DQN rollout windows, and defaults district-model fine-tuning to DQN-derived rows only.
+Generate district-LLM data from a learned checkpoint:
+```bash
+python3 -m district_llm.generate_dataset \
+  --controller rl_checkpoint \
+  --checkpoint artifacts/dqn_shared/best_validation.pt \
+  --episodes 100 \
+  --decision-interval 10 \
+  --use-checkpoint-env-config \
+  --output data/district_llm_train.jsonl
+```
+Generate from fixed or heuristic baselines:
+```bash
+python3 -m district_llm.generate_dataset --controller fixed --episodes 50 --decision-interval 10 --output data/district_llm_fixed.jsonl
+python3 -m district_llm.generate_dataset --controller queue_greedy --episodes 50 --decision-interval 10 --output data/district_llm_heuristic.jsonl
+python3 -m district_llm.generate_dataset --teacher-spec fixed --teacher-spec random --episodes 50 --decision-interval 10 --output data/district_llm_multi_teacher.jsonl
+```
+Train a first-pass district model with Unsloth/QLoRA:
+```bash
+python3 -m training.train_district_llm \
+  --dataset data/district_llm_train.jsonl \
+  --output-dir artifacts/district_llm_qwen \
+  --model-name Qwen/Qwen2.5-7B-Instruct \
+  --load-in-4bit \
+  --lora-rank 16 \
+  --max-seq-length 1024 \
+  --max-steps 1000
+```
+Run single-sample inference:
+```bash
+python3 -m district_llm.inference \
+  --model artifacts/district_llm_qwen \
+  --city-id city_0006 \
+  --scenario-name accident \
+  --district-id d_00
+```
+Run the OpenEnv-compatible district wrapper on top of the current DQN stack:
+```bash
+uvicorn openenv_app.app:app --reload
+```
+## Algorithm
+- Training algorithm: parameter-shared dueling Double DQN.
+- Replay: prioritized replay over per-intersection transitions gathered from full CityFlow worlds.
+- Return target: n-step bootstrap target with target-network updates.
+- Execution: all controllable intersections act simultaneously every RL decision interval.
+- Action space: `0 = hold current phase`, `1 = switch to next green phase`.
+- Safety: `min_green_time` is enforced in the environment and exposed through action masking.
+Policy architecture modes:
+- `multi_head`: shared trunk with district-type-specific Q heads.
+- `single_head`: one shared Q head for all intersections, with district type removed from the observation.
+- `single_head_with_district_feature`: one shared Q head for all intersections, with district type left in the observation as an explicit feature.
+Reward variants:
+- `current`: backward-compatible waiting and queue penalty.
+- `normalized_wait_queue`: normalized queue and waiting reduction reward.
+- `wait_queue_throughput`: normalized queue/wait reduction plus throughput bonus and imbalance penalty.
+## Smoke Test
+To sanity-check one generated scenario with the real CityFlow environment:
+```bash
+python3 scripts/smoke_test_env.py --city-id city_0001 --scenario-name normal --policy random
+```
+## Project layout
+- `agents/`: heuristic local policies and simple baselines.
+- `env/`: CityFlow environment, topology parsing, observation building, and reward logic.
+- `training/`: dataset utilities, replay-based DQN training, evaluation helpers, TensorBoard logging, and CLIs.
+- `data/`: generated synthetic cities, split files, and dataset generation utilities.
+- `scripts/`: utility scripts, including the CityFlow smoke test.
+- `third_party/`: vendored dependencies, including CityFlow source.
+## Notes
+- The generated dataset is assumed to already exist under `data/generated`.
+- District membership comes from `district_map.json`.
+- District types come from `metadata.json`.
+- Runtime training and evaluation require the `cityflow` Python module to be installed in the active environment.

__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """OpenEnv package root for the lean agentic traffic environment."""

agents/README.md ADDED Viewed

	@@ -0,0 +1,20 @@

+# agents
+Local traffic-control policies and compatibility shims.
+## Main files
+- [local_policy.py](/Users/aditya/Developer/traffic-llm/agents/local_policy.py)
+  Active v1 policy interfaces and simple baselines:
+  - `HoldPhasePolicy`
+  - `FixedCyclePolicy`
+  - `QueueGreedyPolicy`
+- [district_controller.py](/Users/aditya/Developer/traffic-llm/agents/district_controller.py)
+  Older district-level prototype logic kept for compatibility.
+- [district_coordinator.py](/Users/aditya/Developer/traffic-llm/agents/district_coordinator.py)
+  Import shim for older code paths.
+## Notes
+- The learned local-policy network itself lives in [training/models.py](/Users/aditya/Developer/traffic-llm/training/models.py), not here.
+- For active training, use the parameter-shared DQN path in `training/`, not the district-controller prototypes.

agents/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from agents.local_policy import (
+    BaseLocalPolicy,
+    FixedCyclePolicy,
+    HoldPhasePolicy,
+    QueueGreedyPolicy,
+    SharedHeuristicLocalPolicy,
+)
+__all__ = [
+    "BaseLocalPolicy",
+    "FixedCyclePolicy",
+    "HoldPhasePolicy",
+    "QueueGreedyPolicy",
+    "SharedHeuristicLocalPolicy",
+]

agents/district_controller.py ADDED Viewed

	@@ -0,0 +1,187 @@

+from __future__ import annotations
+from abc import ABC, abstractmethod
+from typing import Any, Callable
+from agents.message_protocol import DistrictDirective, parse_district_directive
+class BaseDistrictCoordinator(ABC):
+    @abstractmethod
+    def decide(self, district_summary: dict[str, Any]) -> dict[str, Any]:
+        raise NotImplementedError
+class RuleBasedDistrictCoordinator(BaseDistrictCoordinator):
+    """
+    Fast, deterministic, and robust.
+    Good first coordinator and good fallback if the LLM output fails.
+    """
+    def __init__(
+        self,
+        imbalance_threshold: float = 0.15,
+        border_pressure_threshold: float = 0.65,
+        default_duration: int = 2,
+    ):
+        self.imbalance_threshold = imbalance_threshold
+        self.border_pressure_threshold = border_pressure_threshold
+        self.default_duration = default_duration
+    def decide(self, district_summary: dict[str, Any]) -> dict[str, Any]:
+        district_id = district_summary.get("district_id", "unknown")
+        intersection_ids = district_summary.get("intersection_ids", [])
+        emergency = district_summary.get("emergency_vehicle", {})
+        if emergency.get("present", False):
+            return (
+                DistrictDirective(
+                    mode="emergency_route",
+                    target_intersections=emergency.get("route", intersection_ids),
+                    duration=2,
+                    rationale=f"Emergency vehicle detected in district {district_id}.",
+                    corridor=emergency.get("corridor"),
+                    district_weight=1.0,
+                )
+                .validate()
+                .to_dict()
+            )
+        corridor_loads = district_summary.get("corridor_loads", {})
+        ns = float(corridor_loads.get("ns", corridor_loads.get("north_south", 0.0)))
+        ew = float(corridor_loads.get("ew", corridor_loads.get("east_west", 0.0)))
+        border_pressure = district_summary.get("border_pressure", {})
+        border_max = 0.0
+        if isinstance(border_pressure, dict) and border_pressure:
+            border_max = max(float(v) for v in border_pressure.values())
+        if ew - ns > self.imbalance_threshold:
+            return (
+                DistrictDirective(
+                    mode="prioritize_ew",
+                    target_intersections=intersection_ids,
+                    duration=self.default_duration,
+                    rationale="East-west corridor is currently more congested than north-south.",
+                    corridor="ew",
+                    district_weight=(
+                        0.7 if border_max < self.border_pressure_threshold else 0.9
+                    ),
+                )
+                .validate()
+                .to_dict()
+            )
+        if ns - ew > self.imbalance_threshold:
+            return (
+                DistrictDirective(
+                    mode="prioritize_ns",
+                    target_intersections=intersection_ids,
+                    duration=self.default_duration,
+                    rationale="North-south corridor is currently more congested than east-west.",
+                    corridor="ns",
+                    district_weight=(
+                        0.7 if border_max < self.border_pressure_threshold else 0.9
+                    ),
+                )
+                .validate()
+                .to_dict()
+            )
+        if border_max >= self.border_pressure_threshold:
+            return (
+                DistrictDirective(
+                    mode="damp_border_inflow",
+                    target_intersections=intersection_ids,
+                    duration=2,
+                    rationale="Border pressure is high; reduce spill-in and smooth cross-district flow.",
+                    district_weight=0.8,
+                )
+                .validate()
+                .to_dict()
+            )
+        return (
+            DistrictDirective(
+                mode="none",
+                target_intersections=[],
+                duration=1,
+                rationale="District is reasonably balanced.",
+                district_weight=0.5,
+            )
+            .validate()
+            .to_dict()
+        )
+class LLMDistrictCoordinator(BaseDistrictCoordinator):
+    """
+    LLM-backed coordinator.
+    `generator_fn` should accept a prompt string and return either:
+      - a JSON string, or
+      - a dict
+    Example:
+        coordinator = LLMDistrictCoordinator(generator_fn=my_model_call)
+    """
+    def __init__(
+        self,
+        generator_fn: Callable[[str], str | dict[str, Any]],
+        fallback: BaseDistrictCoordinator | None = None,
+        max_prompt_chars: int = 4000,
+    ):
+        self.generator_fn = generator_fn
+        self.fallback = fallback or RuleBasedDistrictCoordinator()
+        self.max_prompt_chars = max_prompt_chars
+    def decide(self, district_summary: dict[str, Any]) -> dict[str, Any]:
+        prompt = self.build_prompt(district_summary)
+        try:
+            raw = self.generator_fn(prompt)
+            directive = parse_district_directive(raw).to_dict()
+            # If the LLM returns a no-op too often or malformed content,
+            # the parser still makes it safe. We keep that behavior.
+            return directive
+        except Exception:
+            return self.fallback.decide(district_summary)
+    def build_prompt(self, district_summary: dict[str, Any]) -> str:
+        summary_text = repr(district_summary)
+        if len(summary_text) > self.max_prompt_chars:
+            summary_text = summary_text[: self.max_prompt_chars] + " ...[truncated]"
+        return f"""You are a district-level traffic coordinator.
+Your job is to choose a single strategic directive for the next few cycles.
+Allowed modes:
+- none
+- prioritize_ns
+- prioritize_ew
+- green_wave
+- emergency_route
+- damp_border_inflow
+Return ONLY valid JSON with these fields:
+{{
+  "mode": string,
+  "target_intersections": list[string],
+  "duration": int,
+  "rationale": string,
+  "corridor": string or null,
+  "district_weight": float
+}}
+Guidelines:
+- Use emergency_route if an emergency vehicle is present.
+- Use prioritize_ns or prioritize_ew when one corridor is clearly more congested.
+- Use damp_border_inflow when cross-district border pressure is high.
+- Keep duration between 1 and 5.
+- district_weight should be between 0.0 and 1.0.
+District summary:
+{summary_text}
+"""

agents/district_coordinator.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from agents.district_controller import (
+    BaseDistrictCoordinator,
+    LLMDistrictCoordinator,
+    RuleBasedDistrictCoordinator,
+)
+__all__ = [
+    "BaseDistrictCoordinator",
+    "LLMDistrictCoordinator",
+    "RuleBasedDistrictCoordinator",
+]

agents/heuristic_controller.py ADDED Viewed

	@@ -0,0 +1,91 @@

+from __future__ import annotations
+from typing import Any
+class HeuristicController:
+    """
+    Simple local traffic-light controller.
+    Action space:
+        0 -> choose NS green
+        1 -> choose EW green
+    Assumes:
+        queue_lengths = [N, S, E, W]
+        waiting_counts = [N, S, E, W]
+    """
+    def __init__(
+        self,
+        min_green_steps: int = 5,
+        switch_margin: float = 1.0,
+        district_bonus_scale: float = 3.0,
+        neighbor_pressure_scale: float = 0.25,
+    ):
+        self.min_green_steps = min_green_steps
+        self.switch_margin = switch_margin
+        self.district_bonus_scale = district_bonus_scale
+        self.neighbor_pressure_scale = neighbor_pressure_scale
+    def act(self, obs: dict[str, Any]) -> int:
+        queue_lengths = obs.get("queue_lengths", [0, 0, 0, 0])
+        waiting_counts = obs.get("waiting_counts", [0, 0, 0, 0])
+        current_phase = int(obs.get("current_phase", 0))
+        time_since_switch = int(obs.get("time_since_switch", 0))
+        district_mode = obs.get("district_mode", "none")
+        district_weight = float(obs.get("district_weight", 0.5))
+        neighbor_pressure = obs.get("neighbor_pressure", [0.0, 0.0])
+        ns_score = (
+            queue_lengths[0]
+            + queue_lengths[1]
+            + 1.5 * (waiting_counts[0] + waiting_counts[1])
+        )
+        ew_score = (
+            queue_lengths[2]
+            + queue_lengths[3]
+            + 1.5 * (waiting_counts[2] + waiting_counts[3])
+        )
+        # Optional small neighbor-pressure bias
+        if isinstance(neighbor_pressure, list) and len(neighbor_pressure) >= 2:
+            ns_score += self.neighbor_pressure_scale * float(neighbor_pressure[0])
+            ew_score += self.neighbor_pressure_scale * float(neighbor_pressure[1])
+        # District-level strategic bias
+        district_bonus = self.district_bonus_scale * district_weight
+        if district_mode == "prioritize_ns":
+            ns_score += district_bonus
+        elif district_mode == "prioritize_ew":
+            ew_score += district_bonus
+        elif district_mode == "green_wave":
+            corridor = obs.get("district_corridor")
+            if corridor == "ns":
+                ns_score += district_bonus
+            elif corridor == "ew":
+                ew_score += district_bonus
+        elif district_mode == "emergency_route":
+            corridor = obs.get("district_corridor")
+            if corridor in {"north_to_south", "south_to_north", "ns"}:
+                ns_score += district_bonus * 1.5
+            elif corridor in {"west_to_east", "east_to_west", "ew"}:
+                ew_score += district_bonus * 1.5
+        desired_phase = 0 if ns_score >= ew_score else 1
+        # Avoid thrashing
+        if time_since_switch < self.min_green_steps:
+            return current_phase
+        # Only switch if the other phase is meaningfully better
+        current_score = ns_score if current_phase == 0 else ew_score
+        desired_score = ns_score if desired_phase == 0 else ew_score
+        if (
+            desired_phase != current_phase
+            and desired_score < current_score + self.switch_margin
+        ):
+            return current_phase
+        return desired_phase

agents/local_policy.py ADDED Viewed

	@@ -0,0 +1,110 @@

+from __future__ import annotations
+from abc import ABC, abstractmethod
+import numpy as np
+class BaseLocalPolicy(ABC):
+    @abstractmethod
+    def act(self, observation_batch: dict[str, np.ndarray]) -> np.ndarray:
+        raise NotImplementedError
+class HoldPhasePolicy(BaseLocalPolicy):
+    def act(self, observation_batch: dict[str, np.ndarray]) -> np.ndarray:
+        intersection_count = len(observation_batch["intersection_ids"])
+        return np.zeros(intersection_count, dtype=np.int64)
+class RandomPhasePolicy(BaseLocalPolicy):
+    def __init__(self, seed: int = 7):
+        self.rng = np.random.default_rng(seed)
+    def act(self, observation_batch: dict[str, np.ndarray]) -> np.ndarray:
+        action_mask = observation_batch["action_mask"]
+        actions = np.zeros(action_mask.shape[0], dtype=np.int64)
+        for row_index, mask in enumerate(action_mask):
+            valid_actions = np.flatnonzero(mask > 0.0)
+            actions[row_index] = int(self.rng.choice(valid_actions))
+        return actions
+class FixedCyclePolicy(BaseLocalPolicy):
+    def __init__(self, green_time: int = 20):
+        self.green_time = int(green_time)
+    def act(self, observation_batch: dict[str, np.ndarray]) -> np.ndarray:
+        elapsed = observation_batch["phase_elapsed"]
+        action_mask = observation_batch["action_mask"]
+        should_switch = (elapsed >= self.green_time) & (action_mask[:, 1] > 0.0)
+        return should_switch.astype(np.int64)
+class QueueGreedyPolicy(BaseLocalPolicy):
+    def __init__(self, switch_margin: float = 1.0):
+        self.switch_margin = float(switch_margin)
+    def act(self, observation_batch: dict[str, np.ndarray]) -> np.ndarray:
+        counts = observation_batch["incoming_counts"]
+        waiting = observation_batch["incoming_waiting"]
+        lane_mask = observation_batch["lane_mask"]
+        current_phase = observation_batch["current_phase"]
+        action_mask = observation_batch["action_mask"]
+        midpoint = counts.shape[1] // 2
+        ns_score = (
+            counts[:, :midpoint].sum(axis=1)
+            + 1.5 * waiting[:, :midpoint].sum(axis=1)
+        )
+        ew_score = (
+            counts[:, midpoint:].sum(axis=1)
+            + 1.5 * waiting[:, midpoint:].sum(axis=1)
+        )
+        valid_midpoint = lane_mask[:, :midpoint].sum(axis=1) > 0
+        ns_score = np.where(valid_midpoint, ns_score, 0.0)
+        desired_switch = np.where(
+            current_phase == 0,
+            ew_score > ns_score + self.switch_margin,
+            ns_score > ew_score + self.switch_margin,
+        )
+        desired_switch = desired_switch & (action_mask[:, 1] > 0.0)
+        return desired_switch.astype(np.int64)
+class SharedHeuristicLocalPolicy(QueueGreedyPolicy):
+    def __init__(
+        self,
+        min_green_steps: int = 5,
+        switch_margin: float = 1.0,
+        district_bonus_scale: float = 0.0,
+        neighbor_pressure_scale: float = 0.0,
+    ):
+        self.min_green_steps = int(min_green_steps)
+        del district_bonus_scale, neighbor_pressure_scale
+        super().__init__(switch_margin=switch_margin)
+    def act_batch(self, observation_batch):
+        if "intersection_ids" in observation_batch:
+            return self.act(observation_batch)
+        actions: dict[str, int] = {}
+        for intersection_id, payload in observation_batch.items():
+            waiting = payload.get("waiting_counts", [0, 0, 0, 0])
+            queues = payload.get("queue_lengths", [0, 0, 0, 0])
+            current_phase = int(payload.get("current_phase", 0))
+            time_since_switch = int(payload.get("time_since_switch", 0))
+            ns_score = float(sum(queues[:2]) + 1.5 * sum(waiting[:2]))
+            ew_score = float(sum(queues[2:4]) + 1.5 * sum(waiting[2:4]))
+            desired_phase = 0 if ns_score >= ew_score else 1
+            if time_since_switch < self.min_green_steps:
+                actions[intersection_id] = current_phase
+            elif desired_phase != current_phase and abs(ns_score - ew_score) <= self.switch_margin:
+                actions[intersection_id] = current_phase
+            else:
+                actions[intersection_id] = desired_phase
+        return actions

agents/message_protocol.py ADDED Viewed

	@@ -0,0 +1,116 @@

+from __future__ import annotations
+import json
+from dataclasses import asdict, dataclass, field
+from typing import Any
+VALID_MODES = {
+    "none",
+    "prioritize_ns",
+    "prioritize_ew",
+    "green_wave",
+    "emergency_route",
+    "damp_border_inflow",
+}
+@dataclass
+class NeighborMessage:
+    sender_intersection: str
+    receiver_intersection: str
+    congestion_level: float
+    spillback_risk: bool
+    dominant_direction: str  # "ns", "ew", or "balanced"
+    queue_total: int
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+@dataclass
+class DistrictDirective:
+    mode: str = "none"
+    target_intersections: list[str] = field(default_factory=list)
+    duration: int = 1
+    rationale: str = ""
+    corridor: str | None = None
+    district_weight: float = 0.5
+    def validate(self) -> "DistrictDirective":
+        if self.mode not in VALID_MODES:
+            self.mode = "none"
+        if not isinstance(self.target_intersections, list):
+            self.target_intersections = []
+        if not isinstance(self.duration, int):
+            self.duration = 1
+        self.duration = max(1, min(self.duration, 10))
+        if not isinstance(self.rationale, str):
+            self.rationale = ""
+        if self.corridor is not None and self.corridor not in {
+            "ns",
+            "ew",
+            "west_to_east",
+            "east_to_west",
+            "north_to_south",
+            "south_to_north",
+        }:
+            self.corridor = None
+        if not isinstance(self.district_weight, (int, float)):
+            self.district_weight = 0.5
+        self.district_weight = float(max(0.0, min(1.0, self.district_weight)))
+        return self
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+def parse_district_directive(payload: str | dict[str, Any]) -> DistrictDirective:
+    """
+    Accept either raw JSON text or a dict and return a validated DistrictDirective.
+    Falls back safely to a no-op directive.
+    """
+    try:
+        if isinstance(payload, str):
+            payload = payload.strip()
+            if not payload:
+                return DistrictDirective().validate()
+            # Try direct JSON parse
+            try:
+                data = json.loads(payload)
+            except json.JSONDecodeError:
+                # Try to extract JSON object from surrounding text
+                start = payload.find("{")
+                end = payload.rfind("}")
+                if start == -1 or end == -1 or end <= start:
+                    return DistrictDirective().validate()
+                data = json.loads(payload[start : end + 1])
+        elif isinstance(payload, dict):
+            data = payload
+        else:
+            return DistrictDirective().validate()
+        directive = DistrictDirective(
+            mode=data.get("mode", "none"),
+            target_intersections=data.get("target_intersections", []),
+            duration=data.get("duration", 1),
+            rationale=data.get("rationale", ""),
+            corridor=data.get("corridor"),
+            district_weight=data.get("district_weight", 0.5),
+        )
+        return directive.validate()
+    except Exception:
+        return DistrictDirective().validate()
+def safe_directive_dict(payload: str | dict[str, Any] | None) -> dict[str, Any]:
+    if payload is None:
+        return DistrictDirective().validate().to_dict()
+    return parse_district_directive(payload).to_dict()

artifacts/district_llm_adapter_v3/main_run/adapter/README.md ADDED Viewed

	@@ -0,0 +1,210 @@

+---
+base_model: unsloth/llama-3.1-8b-unsloth-bnb-4bit
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:unsloth/llama-3.1-8b-unsloth-bnb-4bit
+- lora
+- sft
+- transformers
+- trl
+- unsloth
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.1

artifacts/district_llm_adapter_v3/main_run/adapter/adapter_config.json ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": {
+    "base_model_class": "LlamaForCausalLM",
+    "parent_library": "transformers.models.llama.modeling_llama",
+    "unsloth_fixed": true
+  },
+  "base_model_name_or_path": "unsloth/llama-3.1-8b-unsloth-bnb-4bit",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "down_proj",
+    "q_proj",
+    "up_proj",
+    "gate_proj",
+    "v_proj",
+    "k_proj",
+    "o_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

artifacts/district_llm_adapter_v3/main_run/adapter/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69163c214c0ad5462dc64dfb110bea79e50a2c4d5affdf084a8b351352231777
+size 167832240

artifacts/district_llm_adapter_v3/main_run/adapter/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
+size 17209920

artifacts/district_llm_adapter_v3/main_run/adapter/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "backend": "tokenizers",
+  "bos_token": "<|begin_of_text|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|end_of_text|>",
+  "from_slow": true,
+  "is_local": false,
+  "legacy": false,
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 131072,
+  "pad_token": "<|finetune_right_pad_id|>",
+  "padding_side": "left",
+  "tokenizer_class": "TokenizersBackend",
+  "unk_token": null
+}

artifacts/dqn_shared/best_validation.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a478cd3149c74ef0f0a57e5510bccd35262272a0a25bfb76b6aac2e8417af85
+size 1320091

client.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from __future__ import annotations
+from typing import Any
+import requests
+from models import (
+    AgenticTrafficAction,
+    AgenticTrafficObservation,
+    AgenticTrafficState,
+)
+class AgenticTrafficClient:
+    """Thin HTTP client for the DistrictFlow OpenEnv server."""
+    def __init__(self, base_url: str):
+        self.base_url = base_url.rstrip("/")
+    def reset(self, seed: int | None = None) -> AgenticTrafficObservation:
+        response = requests.post(
+            f"{self.base_url}/reset",
+            json={"seed": seed},
+            timeout=60,
+        )
+        response.raise_for_status()
+        payload = response.json()
+        return AgenticTrafficObservation.model_validate(payload["observation"])
+    def step(self, action: AgenticTrafficAction) -> AgenticTrafficObservation:
+        response = requests.post(
+            f"{self.base_url}/step",
+            json={"action": action.model_dump()},
+            timeout=60,
+        )
+        response.raise_for_status()
+        payload = response.json()
+        observation = AgenticTrafficObservation.model_validate(payload["observation"])
+        observation.done = bool(payload.get("done", False))
+        observation.reward = float(payload.get("reward", 0.0))
+        return observation
+    def state(self) -> AgenticTrafficState:
+        response = requests.get(f"{self.base_url}/state", timeout=60)
+        response.raise_for_status()
+        payload = response.json()
+        return AgenticTrafficState.model_validate(payload["state"])
+    def health(self) -> dict[str, Any]:
+        response = requests.get(f"{self.base_url}/health", timeout=30)
+        response.raise_for_status()
+        return response.json()

data/splits/README.md ADDED Viewed

	@@ -0,0 +1,21 @@

+# data/splits
+City-level train/validation/test splits for the generated dataset.
+## Files
+- [train_cities.txt](/Users/aditya/Developer/traffic-llm/data/splits/train_cities.txt)
+- [val_cities.txt](/Users/aditya/Developer/traffic-llm/data/splits/val_cities.txt)
+- [test_cities.txt](/Users/aditya/Developer/traffic-llm/data/splits/test_cities.txt)
+## Important rule
+Splits are by city only. All scenarios for a given city belong to the same split.
+## Regeneration
+Use:
+`python3 -m training.train_local_policy make-splits`
+The split logic is implemented in [training/dataset.py](/Users/aditya/Developer/traffic-llm/training/dataset.py).

data/splits/test_cities.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+city_0005
+city_0007
+city_0008
+city_0010
+city_0012
+city_0013
+city_0020
+city_0028
+city_0042
+city_0047
+city_0051
+city_0065
+city_0069
+city_0075
+city_0084

data/splits/train_cities.txt ADDED Viewed

	@@ -0,0 +1,70 @@

+city_0001
+city_0002
+city_0003
+city_0004
+city_0006
+city_0011
+city_0014
+city_0015
+city_0017
+city_0018
+city_0019
+city_0021
+city_0022
+city_0023
+city_0024
+city_0025
+city_0026
+city_0027
+city_0030
+city_0032
+city_0033
+city_0034
+city_0035
+city_0036
+city_0037
+city_0038
+city_0039
+city_0040
+city_0041
+city_0043
+city_0044
+city_0045
+city_0046
+city_0048
+city_0049
+city_0050
+city_0052
+city_0053
+city_0057
+city_0058
+city_0059
+city_0060
+city_0061
+city_0062
+city_0063
+city_0064
+city_0066
+city_0067
+city_0068
+city_0070
+city_0072
+city_0074
+city_0076
+city_0077
+city_0079
+city_0080
+city_0081
+city_0082
+city_0083
+city_0085
+city_0087
+city_0088
+city_0089
+city_0092
+city_0093
+city_0094
+city_0095
+city_0097
+city_0099
+city_0100

data/splits/val_cities.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+city_0009
+city_0016
+city_0029
+city_0031
+city_0054
+city_0055
+city_0056
+city_0071
+city_0073
+city_0078
+city_0086
+city_0090
+city_0091
+city_0096
+city_0098

district_llm/FINAL_ABLATION_RUNBOOK.md ADDED Viewed

	@@ -0,0 +1,79 @@

+# Final Ablation Runbook
+## Dataset
+Generate the constrained v3 dataset:
+```bash
+python scripts/generate_large_district_dataset.py \
+  --num-train 10000 \
+  --num-val 2500 \
+  --output-dir data/district_llm_dataset_v3 \
+  --checkpoint artifacts/dqn_shared/best_validation.pt \
+  --max-candidate-intersections 6 \
+  --max-target-intersections 3
+```
+Defaults:
+- candidate pool is visible in the prompt via `candidate_intersections`
+- labels are constrained to visible candidates
+- DQN teacher sources are preferred by default
+## Notebook
+Use [notebooks/llama_finetune.ipynb](/root/aditya/agentic-traffic/notebooks/llama_finetune.ipynb).
+Recommended defaults for the A100 main run:
+- `RUN_MODE = "main_run"`
+- `num_train_epochs = 2`
+- `per_device_train_batch_size = 8`
+- `gradient_accumulation_steps = 4`
+- effective batch size = 32
+- `learning_rate = 1e-4`
+- `warmup_ratio = 0.05`
+- `eval_steps = 100`
+- `save_steps = 100`
+Smoke test mode:
+- `RUN_MODE = "smoke_test"`
+- short `max_steps`
+- verifies formatting, checkpointing, and eval wiring
+Optional max-step override:
+- set `MAX_STEPS_OVERRIDE = 5000` only for explicit experimentation
+- do not use it as the default main run
+Artifacts:
+- checkpoints: `artifacts/district_llm_adapter_v3/<run_mode>/checkpoints`
+- saved adapter: `artifacts/district_llm_adapter_v3/<run_mode>/adapter`
+## Evaluation
+Run offline eval with repair enabled:
+```bash
+python -m district_llm.eval \
+  --model-path artifacts/district_llm_adapter_v3/main_run/adapter \
+  --val-jsonl data/district_llm_dataset_v3/val.jsonl \
+  --generated-root data/generated \
+  --max-examples 250 \
+  --debug-examples 10 \
+  --allow-only-visible-candidates \
+  --max-target-intersections 3 \
+  --fallback-on-empty-targets \
+  --fallback-mode heuristic \
+  --restrict-targets-to-visible-summary \
+  --report-before-after-repair
+```
+Key outputs:
+- raw vs repaired target metrics
+- invalid target-id rate before and after repair
+- visible-candidate-restricted metrics
+- target failure buckets and debug examples

district_llm/RL_GUIDANCE_EVAL_RUNBOOK.md ADDED Viewed

	@@ -0,0 +1,126 @@

+# RL Guidance Eval Runbook
+This ablation keeps the RL checkpoint fixed.
+District guidance is only used at inference time through the wrapper in
+`district_llm/rl_guidance_wrapper.py`. The safest default is
+`target_only_soft`, which applies a small local Q-value bias only at
+`target_intersections`.
+## Wrapper Modes
+- `no_op`: guidance is computed and logged, but RL actions are unchanged.
+- `target_only_soft`: weak local prior on target intersections. Default debug mode.
+- `target_only_medium`: same scope, slightly stronger.
+- `corridor_soft`: small corridor prior on targets plus a few aligned boundary intersections.
+- `global_soft`: weak district-wide prior. Use only as an ablation.
+- `current_legacy`: reference mode approximating the old strong/global wrapper.
+## Fast Debug Matrix
+Use a short horizon first so the wrapper can be debugged quickly:
+```bash
+python scripts/eval_rl_guidance_ablation.py \
+  --rl-checkpoint artifacts/dqn_shared/best_validation.pt \
+  --llm-model-path artifacts/district_llm_adapter_v3/main_run/adapter \
+  --modes rl_only rl_heuristic rl_llm \
+  --wrapper-modes no_op target_only_soft current_legacy \
+  --split val \
+  --cities city_0001 \
+  --scenarios normal \
+  --seeds 7 11 13 \
+  --num-episodes 1 \
+  --max-episode-seconds 300 \
+  --guidance-refresh-steps 10 \
+  --guidance-persistence-steps 3 \
+  --bias-strength 0.12 \
+  --target-only-bias-strength 0.18 \
+  --corridor-bias-strength 0.05 \
+  --max-intersections-affected 3 \
+  --fallback-policy hold_previous \
+  --save-guidance-traces \
+  --output-dir artifacts/rl_guidance_eval/debug_matrix_300s
+```
+This expands into the paired comparison:
+- `rl_only`
+- `rl_heuristic+no_op`
+- `rl_heuristic+target_only_soft`
+- `rl_heuristic+current_legacy`
+- `rl_llm+no_op`
+- `rl_llm+target_only_soft`
+- `rl_llm+current_legacy`
+That command runs a superset of the exact smaller matrix from the wrapper audit prompt. Focus analysis on:
+- `rl_only`
+- `rl_heuristic+no_op`
+- `rl_heuristic+target_only_soft`
+- `rl_llm+no_op`
+- `rl_llm+target_only_soft`
+- `rl_llm+current_legacy`
+## What To Look At
+Primary files:
+- `summary.json`
+- `episode_metrics.csv`
+- `guidance_traces.jsonl`
+- `config.json`
+Key wrapper metrics in `episode_metrics.csv`:
+- `wrapper_mode`
+- `mean_bias_magnitude`
+- `max_bias_magnitude`
+- `avg_num_targeted_intersections`
+- `avg_num_affected_intersections`
+- `percent_steps_with_active_guidance`
+- `num_guidance_refreshes`
+- `num_noop_guidance_events`
+- `fallback_policy_used_count`
+Interpretation:
+- If `rl_heuristic+no_op` and `rl_llm+no_op` match `rl_only`, the harness itself is fine.
+- If `current_legacy` collapses while `target_only_soft` stays near `rl_only`, the wrapper was too strong/global.
+- If `rl_llm+target_only_soft` diverges from `rl_heuristic+target_only_soft`, the LLM is adding signal under safe integration.
+- If `avg_num_affected_intersections` is large or `percent_steps_with_active_guidance` is near `1.0`, the wrapper is still too persistent or too broad.
+- If `fallback_policy_used_count` stays high in `rl_llm`, inspect `guidance_traces.jsonl` before trusting traffic metrics.
+## Cheap Follow-Up Ablations
+Softer local prior:
+```bash
+--wrapper-modes no_op target_only_soft target_only_medium
+```
+Scope ablation:
+```bash
+--wrapper-modes target_only_soft corridor_soft global_soft current_legacy
+```
+More conservative persistence:
+```bash
+--guidance-refresh-steps 8 --guidance-persistence-steps 2
+```
+## Output Layout
+Outputs are saved under the requested directory, for example:
+```text
+artifacts/rl_guidance_eval/debug_matrix_300s/
+  config.json
+  summary.json
+  episode_metrics.csv
+  episode_metrics.jsonl
+  guidance_traces.jsonl
+  seeded_configs/
+```

district_llm/RL_LLM_WRAPPER_SWEEP_RUNBOOK.md ADDED Viewed

	@@ -0,0 +1,121 @@

+# RL+LLM Wrapper Sweep
+This sweep keeps both checkpoints fixed:
+- RL weights stay fixed.
+- LLM weights stay fixed.
+- Only the inference-time `target_only_soft` wrapper settings change.
+## Recommended First Sweep
+Run the default cheap preset on one city, one scenario, and three seeds:
+```bash
+python scripts/sweep_rl_llm_wrapper.py \
+  --rl-checkpoint artifacts/dqn_shared/best_validation.pt \
+  --llm-model-path artifacts/district_llm_adapter_v3/main_run/adapter \
+  --preset strength_targets_gating \
+  --split val \
+  --cities city_0001 \
+  --scenarios normal \
+  --seeds 7 11 13 \
+  --episodes-per-seed 1 \
+  --max-episode-seconds 300 \
+  --guidance-refresh-steps 10 \
+  --queue-threshold 150 \
+  --imbalance-threshold 20 \
+  --fallback-policy no_op \
+  --output-dir artifacts/rl_llm_wrapper_sweep/first_pass
+```
+This preset sweeps a small curated grid over:
+- `bias_strength` in `{0.025, 0.05, 0.075}`
+- `max_intersections_affected` in `{1, 2}`
+- `gating_mode` in `{always_on, incident_or_spillback, queue_or_imbalance}`
+- `guidance_persistence_steps = 5`
+- `enable_bias_decay = false`
+It also includes `baseline_current_soft` as a reference row.
+## Cheaper Probe
+If you only want the fastest possible read on strength sensitivity:
+```bash
+python scripts/sweep_rl_llm_wrapper.py \
+  --rl-checkpoint artifacts/dqn_shared/best_validation.pt \
+  --llm-model-path artifacts/district_llm_adapter_v3/main_run/adapter \
+  --preset strength_only \
+  --cities city_0001 \
+  --scenarios normal \
+  --seeds 7 11 13 \
+  --episodes-per-seed 1 \
+  --max-episode-seconds 300 \
+  --output-dir artifacts/rl_llm_wrapper_sweep/strength_only
+```
+## Broader Conservative Follow-Up
+After the first pass identifies a promising strength/gating region:
+```bash
+python scripts/sweep_rl_llm_wrapper.py \
+  --rl-checkpoint artifacts/dqn_shared/best_validation.pt \
+  --llm-model-path artifacts/district_llm_adapter_v3/main_run/adapter \
+  --preset full_conservative \
+  --cities city_0001 \
+  --scenarios normal \
+  --seeds 7 11 13 \
+  --episodes-per-seed 1 \
+  --max-episode-seconds 300 \
+  --output-dir artifacts/rl_llm_wrapper_sweep/full_conservative
+```
+## Outputs
+Each sweep writes:
+- `config.json`
+- `sweep_results.csv`
+- `sweep_results.parquet` when parquet support is available
+- `paired_episode_metrics.csv`
+- `ranking.json`
+- `summary_report.json`
+- optional `step_metrics.*`
+- optional `guidance_traces.jsonl`
+## What To Inspect
+Start with:
+- `summary_report.json`
+- `ranking.json`
+- `paired_episode_metrics.csv`
+Key fields:
+- `mean_return_delta_vs_rl_only`
+- `mean_throughput_delta_vs_rl_only`
+- `mean_avg_queue_delta_vs_rl_only`
+- `mean_avg_wait_delta_vs_rl_only`
+- `mean_percent_steps_with_active_guidance`
+- `mean_avg_num_affected_intersections`
+- `mean_num_steps_guidance_blocked_by_gate`
+## Interpretation
+The most promising configs should usually look like:
+- small negative or positive `mean_return_delta_vs_rl_only`
+- low `mean_avg_num_affected_intersections`
+- moderate or low `mean_percent_steps_with_active_guidance`
+- low fallback / invalid guidance counts
+If the best configs cluster around:
+- lower `bias_strength`
+- `max_intersections_affected = 1`
+- gated modes like `incident_or_spillback` or `queue_or_imbalance`
+then the wrapper was still too active and guidance needs to remain a rare local prior.

district_llm/__init__.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from district_llm.derivation import DistrictWindowData, LocalIntersectionAction, derive_district_action
+from district_llm.prompting import build_system_prompt, format_district_prompt, format_sft_text
+from district_llm.schema import CandidateIntersection, CongestedIntersection, DistrictAction, DistrictStateSummary
+from district_llm.summary_builder import DistrictStateSummaryBuilder
+__all__ = [
+    "CandidateIntersection",
+    "CongestedIntersection",
+    "DistrictAction",
+    "DistrictStateSummary",
+    "DistrictStateSummaryBuilder",
+    "DistrictWindowData",
+    "LocalIntersectionAction",
+    "derive_district_action",
+    "build_system_prompt",
+    "format_district_prompt",
+    "format_sft_text",
+]

district_llm/data.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from __future__ import annotations
+from pathlib import Path
+def load_jsonl_text_dataset(
+    path: str | Path,
+    controller_families: list[str] | None = None,
+    controller_types: list[str] | None = None,
+):
+    from datasets import load_dataset
+    dataset = load_dataset("json", data_files=str(Path(path)), split="train")
+    if "text" not in dataset.column_names:
+        raise ValueError("Expected a JSONL dataset with a 'text' field.")
+    if controller_families:
+        allowed_families = set(controller_families)
+        dataset = dataset.filter(
+            lambda row: row.get("controller_family") in allowed_families
+        )
+    if controller_types:
+        allowed_types = set(controller_types)
+        dataset = dataset.filter(
+            lambda row: row.get("controller_type") in allowed_types
+        )
+    if len(dataset) == 0:
+        raise ValueError("No dataset rows remain after applying the requested filters.")
+    return dataset

district_llm/derivation.py ADDED Viewed

	@@ -0,0 +1,228 @@

+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any
+from district_llm.repair import fallback_target_intersections
+from district_llm.schema import DistrictAction, DistrictStateSummary
+@dataclass
+class LocalIntersectionAction:
+    intersection_id: str
+    district_id: str
+    action: int
+    current_phase: int
+    next_phase: int
+    queue_total: float
+    wait_total: float
+    outgoing_load: float
+    is_boundary: bool
+    @property
+    def switched(self) -> bool:
+        return int(self.action) == 1 and self.next_phase != self.current_phase
+@dataclass
+class DistrictWindowData:
+    district_id: str
+    start_summary: DistrictStateSummary
+    end_summary: DistrictStateSummary
+    controller_actions: list[LocalIntersectionAction] = field(default_factory=list)
+    step_count: int = 0
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "district_id": self.district_id,
+            "step_count": int(self.step_count),
+            "queue_delta": round(self.end_summary.total_queue - self.start_summary.total_queue, 3),
+            "wait_delta": round(self.end_summary.total_wait - self.start_summary.total_wait, 3),
+            "throughput_delta": round(
+                self.end_summary.recent_throughput - self.start_summary.recent_throughput,
+                3,
+            ),
+        }
+def derive_district_action(
+    window_data: DistrictWindowData,
+    controller_actions: list[LocalIntersectionAction] | None = None,
+    district_state: DistrictStateSummary | None = None,
+    max_target_intersections: int = 3,
+) -> DistrictAction:
+    """
+    Deterministic first-pass label extraction from local-controller behavior.
+    Heuristic order:
+    1. Incident-heavy windows map to `incident_response`.
+    2. Strong spillback / boundary pressure maps to `clear_spillback`.
+    3. Rising boundary demand maps to `drain_inbound`.
+    4. Persistently high outgoing pressure maps to `drain_outbound`.
+    5. Boundary-heavy rush windows map to `arterial_priority`.
+    6. Clear NS/EW directional dominance maps to `favor_NS` / `favor_EW`.
+    7. Otherwise emit `hold`.
+    """
+    actions = controller_actions if controller_actions is not None else window_data.controller_actions
+    state = district_state if district_state is not None else window_data.start_summary
+    end_state = window_data.end_summary
+    duration_steps = max(1, min(int(window_data.step_count or 1), 20))
+    phase_counts = {"NS": 0, "EW": 0}
+    focus_scores: dict[str, float] = {}
+    boundary_focus = 0
+    switch_count = 0
+    for item in actions:
+        phase_key = "NS" if int(item.next_phase) == 0 else "EW"
+        phase_counts[phase_key] += 1
+        switch_count += int(item.switched)
+        if item.is_boundary:
+            boundary_focus += 1
+        focus_scores[item.intersection_id] = focus_scores.get(item.intersection_id, 0.0) + (
+            item.queue_total + 1.5 * item.wait_total + 2.0 * float(item.switched)
+        )
+    total_action_records = max(1, len(actions))
+    ns_phase_ratio = phase_counts["NS"] / float(total_action_records)
+    ew_phase_ratio = phase_counts["EW"] / float(total_action_records)
+    boundary_focus_ratio = boundary_focus / float(total_action_records)
+    queue_delta = end_state.total_queue - state.total_queue
+    wait_delta = end_state.total_wait - state.total_wait
+    boundary_share = state.boundary_queue_total / max(1.0, state.total_queue)
+    outgoing_pressure = end_state.total_outgoing_load / max(1.0, end_state.total_queue)
+    if ns_phase_ratio > ew_phase_ratio + 0.1:
+        phase_bias = "NS"
+    elif ew_phase_ratio > ns_phase_ratio + 0.1:
+        phase_bias = "EW"
+    else:
+        phase_bias = "NONE"
+    if phase_bias == "NONE" and state.dominant_flow in {"NS", "EW"}:
+        phase_bias = state.dominant_flow
+    def select_targets(
+        strategy: str,
+        priority_corridor: str | None,
+        selected_phase_bias: str,
+    ) -> list[str]:
+        return fallback_target_intersections(
+            summary=state,
+            max_target_intersections=max_target_intersections,
+            strategy=strategy,
+            priority_corridor=priority_corridor,
+            phase_bias=selected_phase_bias,
+            focus_scores=focus_scores,
+        )
+    if state.incident_flag or end_state.incident_flag:
+        target_intersections = select_targets(
+            strategy="incident_response",
+            priority_corridor=phase_bias if phase_bias in {"NS", "EW"} else "arterial",
+            selected_phase_bias=phase_bias,
+        )
+        return DistrictAction(
+            strategy="incident_response",
+            priority_corridor=phase_bias if phase_bias in {"NS", "EW"} else "arterial",
+            target_intersections=target_intersections,
+            phase_bias=phase_bias,
+            duration_steps=duration_steps,
+        ).validate()
+    if state.spillback_risk or end_state.spillback_risk or (boundary_share >= 0.55 and outgoing_pressure >= 0.45):
+        priority_corridor = "inbound" if boundary_share >= 0.55 else phase_bias if phase_bias in {"NS", "EW"} else None
+        target_intersections = select_targets(
+            strategy="clear_spillback",
+            priority_corridor=priority_corridor,
+            selected_phase_bias=phase_bias,
+        )
+        return DistrictAction(
+            strategy="clear_spillback",
+            priority_corridor=priority_corridor,
+            target_intersections=target_intersections,
+            phase_bias=phase_bias,
+            duration_steps=duration_steps,
+        ).validate()
+    if boundary_share >= 0.55 and (queue_delta >= 0.0 or wait_delta >= 0.0):
+        target_intersections = select_targets(
+            strategy="drain_inbound",
+            priority_corridor="inbound",
+            selected_phase_bias=phase_bias,
+        )
+        return DistrictAction(
+            strategy="drain_inbound",
+            priority_corridor="inbound",
+            target_intersections=target_intersections,
+            phase_bias=phase_bias,
+            duration_steps=duration_steps,
+        ).validate()
+    if outgoing_pressure >= 0.65 and end_state.total_queue >= state.total_queue * 0.9:
+        target_intersections = select_targets(
+            strategy="drain_outbound",
+            priority_corridor="outbound",
+            selected_phase_bias=phase_bias,
+        )
+        return DistrictAction(
+            strategy="drain_outbound",
+            priority_corridor="outbound",
+            target_intersections=target_intersections,
+            phase_bias=phase_bias,
+            duration_steps=duration_steps,
+        ).validate()
+    if (
+        state.event_flag
+        or state.overload_flag
+        or end_state.overload_flag
+        or (boundary_focus_ratio >= 0.6 and switch_count >= max(2, duration_steps))
+    ):
+        priority_corridor = phase_bias if phase_bias in {"NS", "EW"} else "arterial"
+        target_intersections = select_targets(
+            strategy="arterial_priority",
+            priority_corridor=priority_corridor,
+            selected_phase_bias=phase_bias,
+        )
+        return DistrictAction(
+            strategy="arterial_priority",
+            priority_corridor=priority_corridor,
+            target_intersections=target_intersections,
+            phase_bias=phase_bias,
+            duration_steps=duration_steps,
+        ).validate()
+    ns_pressure = state.ns_queue + 1.5 * state.ns_wait
+    ew_pressure = state.ew_queue + 1.5 * state.ew_wait
+    imbalance_threshold = max(5.0, 0.15 * max(1.0, ns_pressure + ew_pressure))
+    if ns_pressure - ew_pressure >= imbalance_threshold:
+        target_intersections = select_targets(
+            strategy="favor_NS",
+            priority_corridor="NS",
+            selected_phase_bias="NS",
+        )
+        return DistrictAction(
+            strategy="favor_NS",
+            priority_corridor="NS",
+            target_intersections=target_intersections,
+            phase_bias="NS",
+            duration_steps=duration_steps,
+        ).validate()
+    if ew_pressure - ns_pressure >= imbalance_threshold:
+        target_intersections = select_targets(
+            strategy="favor_EW",
+            priority_corridor="EW",
+            selected_phase_bias="EW",
+        )
+        return DistrictAction(
+            strategy="favor_EW",
+            priority_corridor="EW",
+            target_intersections=target_intersections,
+            phase_bias="EW",
+            duration_steps=duration_steps,
+        ).validate()
+    return DistrictAction.default_hold(duration_steps=duration_steps)

district_llm/eval.py ADDED Viewed

	@@ -0,0 +1,436 @@

+from __future__ import annotations
+import argparse
+import json
+from collections import Counter
+from pathlib import Path
+from typing import Any
+from district_llm.metrics import aggregate_target_metrics, compute_target_metrics, safe_ratio, target_failure_buckets
+from district_llm.repair import RepairConfig, extract_visible_candidate_ids, sanitize_action_payload
+from district_llm.schema import DistrictAction
+from env.utils import build_topology
+try:
+    from tqdm.auto import tqdm
+except ImportError:  # pragma: no cover
+    tqdm = None
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Offline evaluation for district-LLM outputs."
+    )
+    parser.add_argument("--model-path", required=True)
+    parser.add_argument("--val-jsonl", required=True)
+    parser.add_argument("--max-examples", type=int, default=200)
+    parser.add_argument("--debug-examples", type=int, default=10)
+    parser.add_argument("--max-new-tokens", type=int, default=128)
+    parser.add_argument("--device", default=None)
+    parser.add_argument("--generated-root", default="data/generated")
+    parser.add_argument("--restrict-targets-to-visible-summary", action="store_true")
+    parser.add_argument(
+        "--allow-only-visible-candidates",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+    )
+    parser.add_argument("--max-target-intersections", type=int, default=3)
+    parser.add_argument(
+        "--fallback-on-empty-targets",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+    )
+    parser.add_argument(
+        "--fallback-mode",
+        choices=("heuristic", "hold", "none"),
+        default="heuristic",
+    )
+    parser.add_argument(
+        "--report-before-after-repair",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+    )
+    return parser.parse_args()
+def load_rows(path: str | Path, max_examples: int | None = None) -> list[dict[str, Any]]:
+    rows = []
+    with Path(path).open("r", encoding="utf-8") as handle:
+        for line in handle:
+            if not line.strip():
+                continue
+            rows.append(json.loads(line))
+            if max_examples is not None and len(rows) >= max_examples:
+                break
+    return rows
+def extract_json_object(payload: str) -> str:
+    start = payload.find("{")
+    end = payload.rfind("}")
+    if start == -1 or end == -1 or end <= start:
+        raise ValueError("No JSON object found.")
+    return payload[start : end + 1]
+def load_model_and_tokenizer(model_path: str, device: str | None = None):
+    import torch
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    model_dir = Path(model_path)
+    tokenizer = AutoTokenizer.from_pretrained(model_path)
+    if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
+        tokenizer.pad_token = tokenizer.eos_token
+    if (model_dir / "adapter_config.json").exists():
+        try:
+            from peft import AutoPeftModelForCausalLM
+        except ImportError as exc:
+            raise ImportError(
+                "Evaluating a LoRA adapter requires the 'peft' package."
+            ) from exc
+        model = AutoPeftModelForCausalLM.from_pretrained(model_path)
+    else:
+        target_device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        model = AutoModelForCausalLM.from_pretrained(model_path).to(target_device)
+    model.eval()
+    return model, tokenizer
+def build_generation_prompt(tokenizer, messages: list[dict[str, str]]) -> str:
+    if getattr(tokenizer, "chat_template", None):
+        return tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+    return "\n".join(f"{message['role']}: {message['content']}" for message in messages) + "\nassistant:"
+def generate_response(model, tokenizer, messages: list[dict[str, str]], max_new_tokens: int) -> str:
+    import torch
+    prompt = build_generation_prompt(tokenizer, messages)
+    device = getattr(model, "device", None)
+    inputs = tokenizer(prompt, return_tensors="pt")
+    if device is not None:
+        inputs = {key: value.to(device) for key, value in inputs.items()}
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            do_sample=False,
+            pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
+        )
+    generated = outputs[0][inputs["input_ids"].shape[1] :]
+    return tokenizer.decode(generated, skip_special_tokens=True)
+def parse_prediction(payload: str) -> tuple[bool, bool, dict[str, Any] | None]:
+    try:
+        json_payload = json.loads(extract_json_object(payload))
+    except Exception:
+        return False, False, None
+    try:
+        action = DistrictAction.from_dict(json_payload)
+    except Exception:
+        return True, False, json_payload
+    return True, True, action.to_dict()
+class DistrictTopologyIndex:
+    def __init__(self, generated_root: str | Path):
+        self.generated_root = Path(generated_root)
+        self._cache: dict[str, dict[str, set[str]]] = {}
+    def district_intersections(self, city_id: str, district_id: str) -> set[str]:
+        if city_id not in self._cache:
+            roadnet_path = self.generated_root / city_id / "roadnet.json"
+            district_map_path = self.generated_root / city_id / "district_map.json"
+            metadata_path = self.generated_root / city_id / "metadata.json"
+            _, districts = build_topology(
+                roadnet_path=roadnet_path,
+                district_map_path=district_map_path,
+                metadata_path=metadata_path,
+            )
+            self._cache[city_id] = {
+                key: set(value.intersection_ids)
+                for key, value in districts.items()
+            }
+        return self._cache[city_id].get(district_id, set())
+def field_accuracy(pred: dict[str, Any] | None, gt: dict[str, Any], field: str) -> float:
+    if pred is None:
+        return 0.0
+    return float(pred.get(field) == gt.get(field))
+def invalid_target_fraction(pred_targets: list[str], district_candidates: set[str]) -> float:
+    if not pred_targets:
+        return 0.0
+    invalid_count = sum(1 for item in pred_targets if item not in district_candidates)
+    return safe_ratio(invalid_count, len(pred_targets))
+def evaluate_rows(
+    rows: list[dict[str, Any]],
+    model,
+    tokenizer,
+    max_new_tokens: int,
+    topology_index: DistrictTopologyIndex,
+    restrict_targets_to_visible_summary: bool,
+    debug_examples: int,
+    repair_config: RepairConfig,
+    report_before_after_repair: bool,
+) -> dict[str, Any]:
+    json_valid_count = 0
+    schema_valid_count = 0
+    field_totals_before = Counter()
+    field_totals_after = Counter()
+    full_object_correct_before = 0
+    full_object_correct_after = 0
+    target_rows_before: list[dict[str, float]] = []
+    target_rows_after: list[dict[str, float]] = []
+    restricted_target_rows_before: list[dict[str, float]] = []
+    restricted_target_rows_after: list[dict[str, float]] = []
+    invalid_rates_before: list[float] = []
+    invalid_rates_after: list[float] = []
+    fallback_used_count = 0
+    failure_buckets = Counter()
+    debug_rows = []
+    progress = (
+        tqdm(total=len(rows), desc="eval", dynamic_ncols=True)
+        if tqdm is not None
+        else None
+    )
+    try:
+        for row in rows:
+            messages = row["messages"]
+            ground_truth = json.loads(messages[2]["content"])
+            raw_prediction = generate_response(
+                model=model,
+                tokenizer=tokenizer,
+                messages=messages[:2],
+                max_new_tokens=max_new_tokens,
+            )
+            json_valid, schema_valid, prediction_before = parse_prediction(raw_prediction)
+            repaired_action, repair_report = sanitize_action_payload(
+                payload=prediction_before if json_valid else None,
+                summary=row,
+                prompt_text=messages[1]["content"],
+                config=repair_config,
+            )
+            prediction_after = repaired_action.to_dict()
+            json_valid_count += int(json_valid)
+            schema_valid_count += int(schema_valid)
+            fallback_used_count += int(repair_report.fallback_used)
+            field_totals_before["strategy"] += field_accuracy(prediction_before, ground_truth, "strategy")
+            field_totals_before["priority_corridor"] += field_accuracy(prediction_before, ground_truth, "priority_corridor")
+            field_totals_before["phase_bias"] += field_accuracy(prediction_before, ground_truth, "phase_bias")
+            field_totals_before["duration_steps"] += field_accuracy(prediction_before, ground_truth, "duration_steps")
+            field_totals_after["strategy"] += field_accuracy(prediction_after, ground_truth, "strategy")
+            field_totals_after["priority_corridor"] += field_accuracy(prediction_after, ground_truth, "priority_corridor")
+            field_totals_after["phase_bias"] += field_accuracy(prediction_after, ground_truth, "phase_bias")
+            field_totals_after["duration_steps"] += field_accuracy(prediction_after, ground_truth, "duration_steps")
+            if prediction_before == ground_truth:
+                full_object_correct_before += 1
+            if prediction_after == ground_truth:
+                full_object_correct_after += 1
+            pred_targets_before = [] if prediction_before is None else list(prediction_before.get("target_intersections", []))
+            pred_targets_after = list(prediction_after.get("target_intersections", []))
+            gt_targets = list(ground_truth.get("target_intersections", []))
+            visible_candidates = set(
+                extract_visible_candidate_ids(summary=row, prompt_text=messages[1]["content"])
+            )
+            district_candidates = topology_index.district_intersections(
+                city_id=row["city_id"],
+                district_id=row["district_id"],
+            )
+            invalid_before = [item for item in pred_targets_before if item not in district_candidates]
+            invalid_after = [item for item in pred_targets_after if item not in district_candidates]
+            non_visible_before = [
+                item for item in pred_targets_before
+                if visible_candidates and item not in visible_candidates
+            ]
+            metrics_before = compute_target_metrics(pred_targets_before, gt_targets)
+            metrics_after = compute_target_metrics(pred_targets_after, gt_targets)
+            target_rows_before.append(metrics_before)
+            target_rows_after.append(metrics_after)
+            invalid_rates_before.append(invalid_target_fraction(pred_targets_before, district_candidates))
+            invalid_rates_after.append(invalid_target_fraction(pred_targets_after, district_candidates))
+            if restrict_targets_to_visible_summary:
+                filtered_pred_before = [item for item in pred_targets_before if item in visible_candidates]
+                filtered_pred_after = [item for item in pred_targets_after if item in visible_candidates]
+                filtered_gt = [item for item in gt_targets if item in visible_candidates]
+                restricted_target_rows_before.append(
+                    compute_target_metrics(filtered_pred_before, filtered_gt)
+                )
+                restricted_target_rows_after.append(
+                    compute_target_metrics(filtered_pred_after, filtered_gt)
+                )
+            for failure_bucket in set(
+                target_failure_buckets(
+                    pred_list=pred_targets_before,
+                    gt_list=gt_targets,
+                    visible_candidates=visible_candidates,
+                    invalid_ids=invalid_before,
+                    non_visible_ids=non_visible_before,
+                    repaired_targets=pred_targets_after,
+                    fallback_used=repair_report.fallback_used,
+                )
+            ):
+                failure_buckets[failure_bucket] += 1
+            if len(debug_rows) < debug_examples:
+                debug_rows.append(
+                    {
+                        "district_summary": messages[1]["content"],
+                        "predicted_json_raw": raw_prediction,
+                        "predicted_json_parsed_before_repair": prediction_before,
+                        "predicted_json_parsed_after_repair": prediction_after,
+                        "ground_truth_json": ground_truth,
+                        "target_intersections_metrics_before_repair": metrics_before,
+                        "target_intersections_metrics_after_repair": metrics_after,
+                        "repair_report": repair_report.to_dict(),
+                        "visible_candidate_ids": sorted(visible_candidates),
+                        "failure_buckets": sorted(
+                            set(
+                                target_failure_buckets(
+                                    pred_list=pred_targets_before,
+                                    gt_list=gt_targets,
+                                    visible_candidates=visible_candidates,
+                                    invalid_ids=invalid_before,
+                                    non_visible_ids=non_visible_before,
+                                    repaired_targets=pred_targets_after,
+                                    fallback_used=repair_report.fallback_used,
+                                )
+                            )
+                        ),
+                    }
+                )
+            if progress is not None:
+                progress.update(1)
+    finally:
+        if progress is not None:
+            progress.close()
+    total_rows = max(1, len(rows))
+    results = {
+        "num_examples": len(rows),
+        "json_validity_rate": float(json_valid_count) / total_rows,
+        "schema_validity_rate": float(schema_valid_count) / total_rows,
+        "field_accuracy": {
+            "strategy": float(field_totals_before["strategy"]) / total_rows,
+            "priority_corridor": float(field_totals_before["priority_corridor"]) / total_rows,
+            "phase_bias": float(field_totals_before["phase_bias"]) / total_rows,
+            "duration_steps": float(field_totals_before["duration_steps"]) / total_rows,
+        },
+        "field_accuracy_after_repair": {
+            "strategy": float(field_totals_after["strategy"]) / total_rows,
+            "priority_corridor": float(field_totals_after["priority_corridor"]) / total_rows,
+            "phase_bias": float(field_totals_after["phase_bias"]) / total_rows,
+            "duration_steps": float(field_totals_after["duration_steps"]) / total_rows,
+        },
+        "target_intersections_before_repair": aggregate_target_metrics(target_rows_before),
+        "target_intersections_after_repair": aggregate_target_metrics(target_rows_after),
+        "target_intersections": aggregate_target_metrics(target_rows_after),
+        "target_intersections_failure_buckets": dict(sorted(failure_buckets.items())),
+        "exact_full_object_accuracy": float(full_object_correct_before) / total_rows,
+        "exact_full_object_accuracy_after_repair": float(full_object_correct_after) / total_rows,
+        "debug_examples": debug_rows,
+    }
+    if restrict_targets_to_visible_summary:
+        results["target_intersections_restricted_to_visible_summary_before_repair"] = aggregate_target_metrics(
+            restricted_target_rows_before
+        )
+        results["target_intersections_restricted_to_visible_summary_after_repair"] = aggregate_target_metrics(
+            restricted_target_rows_after
+        )
+        results["target_intersections_restricted_to_visible_summary"] = aggregate_target_metrics(
+            restricted_target_rows_after
+        )
+    if report_before_after_repair:
+        results["target_intersections_before_after_repair"] = {
+            "invalid_id_rate_before_repair": float(sum(invalid_rates_before) / total_rows),
+            "invalid_id_rate_after_repair": float(sum(invalid_rates_after) / total_rows),
+            "exact_set_match_before_repair": aggregate_target_metrics(target_rows_before).get("exact_set_match", 0.0),
+            "exact_set_match_after_repair": aggregate_target_metrics(target_rows_after).get("exact_set_match", 0.0),
+            "jaccard_before_repair": aggregate_target_metrics(target_rows_before).get("jaccard", 0.0),
+            "jaccard_after_repair": aggregate_target_metrics(target_rows_after).get("jaccard", 0.0),
+            "fallback_used_rate": float(fallback_used_count) / total_rows,
+        }
+    return results
+def print_debug_examples(debug_rows: list[dict[str, Any]]) -> None:
+    for index, item in enumerate(debug_rows, start=1):
+        print(f"[debug {index}] district_summary:")
+        print(item["district_summary"])
+        print(f"[debug {index}] predicted_json_raw={item['predicted_json_raw']}")
+        print(
+            f"[debug {index}] predicted_json_parsed_before_repair="
+            f"{json.dumps(item['predicted_json_parsed_before_repair'], sort_keys=True)}"
+        )
+        print(
+            f"[debug {index}] predicted_json_parsed_after_repair="
+            f"{json.dumps(item['predicted_json_parsed_after_repair'], sort_keys=True)}"
+        )
+        print(
+            f"[debug {index}] ground_truth_json="
+            f"{json.dumps(item['ground_truth_json'], sort_keys=True)}"
+        )
+        print(
+            f"[debug {index}] target_intersections_metrics_before_repair="
+            f"{json.dumps(item['target_intersections_metrics_before_repair'], sort_keys=True)}"
+        )
+        print(
+            f"[debug {index}] target_intersections_metrics_after_repair="
+            f"{json.dumps(item['target_intersections_metrics_after_repair'], sort_keys=True)}"
+        )
+        print(
+            f"[debug {index}] repair_report="
+            f"{json.dumps(item['repair_report'], sort_keys=True)}"
+        )
+        print(
+            f"[debug {index}] visible_candidate_ids="
+            f"{json.dumps(item['visible_candidate_ids'], sort_keys=True)}"
+        )
+        print(f"[debug {index}] failure_buckets={json.dumps(item['failure_buckets'])}")
+def main() -> None:
+    args = parse_args()
+    rows = load_rows(args.val_jsonl, max_examples=args.max_examples)
+    model, tokenizer = load_model_and_tokenizer(args.model_path, device=args.device)
+    topology_index = DistrictTopologyIndex(args.generated_root)
+    results = evaluate_rows(
+        rows=rows,
+        model=model,
+        tokenizer=tokenizer,
+        max_new_tokens=args.max_new_tokens,
+        topology_index=topology_index,
+        restrict_targets_to_visible_summary=args.restrict_targets_to_visible_summary,
+        debug_examples=args.debug_examples,
+        repair_config=RepairConfig(
+            allow_only_visible_candidates=args.allow_only_visible_candidates,
+            max_target_intersections=args.max_target_intersections,
+            fallback_on_empty_targets=args.fallback_on_empty_targets,
+            fallback_mode=args.fallback_mode,
+        ),
+        report_before_after_repair=args.report_before_after_repair,
+    )
+    print(json.dumps({k: v for k, v in results.items() if k != "debug_examples"}, indent=2, sort_keys=True))
+    print_debug_examples(results["debug_examples"])
+if __name__ == "__main__":
+    main()

district_llm/generate_dataset.py ADDED Viewed

	@@ -0,0 +1,390 @@

+from __future__ import annotations
+import argparse
+import json
+import random
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+import numpy as np
+from district_llm.derivation import DistrictWindowData, LocalIntersectionAction, derive_district_action
+from district_llm.prompting import format_district_prompt, format_sft_text
+from district_llm.summary_builder import DistrictStateSummaryBuilder
+from district_llm.teachers import BaseTeacher, build_teacher, parse_teacher_spec
+from env.observation_builder import ObservationConfig
+from env.reward import RewardConfig
+from env.traffic_env import EnvConfig, TrafficEnv
+from training.cityflow_dataset import CityFlowDataset, ScenarioSpec
+@dataclass
+class _WindowBuffer:
+    start_summary: Any
+    controller_actions: list[LocalIntersectionAction] = field(default_factory=list)
+    step_count: int = 0
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Generate district-LLM SFT data from CityFlow rollouts."
+    )
+    parser.add_argument(
+        "--controller",
+        default="queue_greedy",
+        choices=("rl_checkpoint", "hold", "fixed", "random", "queue_greedy"),
+        help="Single controller source used when --teacher-spec is not provided.",
+    )
+    parser.add_argument("--checkpoint", default=None)
+    parser.add_argument(
+        "--teacher-spec",
+        action="append",
+        default=[],
+        help="Repeatable source spec, e.g. rl_checkpoint=artifacts/dqn_shared/best_validation.pt or fixed.",
+    )
+    parser.add_argument("--episodes", type=int, default=10)
+    parser.add_argument(
+        "--decision-interval",
+        "--district-decision-interval",
+        dest="district_decision_interval",
+        type=int,
+        default=10,
+        help="District-LLM decision interval in local-controller decision steps.",
+    )
+    parser.add_argument("--output", required=True)
+    parser.add_argument("--generated-root", default="data/generated")
+    parser.add_argument("--splits-root", default="data/splits")
+    parser.add_argument("--split", default="train", choices=("train", "val", "test"))
+    parser.add_argument("--city-id", default=None)
+    parser.add_argument("--scenario-name", default=None)
+    parser.add_argument("--seed", type=int, default=7)
+    parser.add_argument("--fixed-green-time", type=int, default=20)
+    parser.add_argument("--device", default=None)
+    parser.add_argument("--append", action="store_true")
+    parser.add_argument("--top-k-congested", type=int, default=3)
+    parser.add_argument("--max-candidate-intersections", type=int, default=6)
+    parser.add_argument("--max-target-intersections", type=int, default=3)
+    parser.add_argument("--use-checkpoint-env-config", action="store_true")
+    parser.add_argument("--env-decision-interval", type=int, default=5)
+    parser.add_argument("--simulator-interval", type=int, default=1)
+    parser.add_argument("--min-green-time", type=int, default=10)
+    parser.add_argument("--thread-num", type=int, default=1)
+    parser.add_argument("--max-episode-seconds", type=int, default=None)
+    parser.add_argument("--max-incoming-lanes", type=int, default=16)
+    parser.add_argument("--count-scale", type=float, default=20.0)
+    parser.add_argument("--elapsed-time-scale", type=float, default=60.0)
+    parser.add_argument("--disable-district-context", action="store_true")
+    parser.add_argument("--disable-outgoing-congestion", action="store_true")
+    parser.add_argument("--reward-variant", default="wait_queue_throughput")
+    parser.add_argument("--waiting-weight", type=float, default=1.0)
+    parser.add_argument("--vehicle-weight", type=float, default=0.1)
+    parser.add_argument("--pressure-weight", type=float, default=0.0)
+    parser.add_argument("--reward-scale", type=float, default=0.1)
+    parser.add_argument("--disable-lane-reward-normalization", action="store_true")
+    parser.add_argument("--reward-clip", type=float, default=5.0)
+    parser.add_argument("--queue-delta-weight", type=float, default=2.0)
+    parser.add_argument("--wait-delta-weight", type=float, default=4.0)
+    parser.add_argument("--queue-level-weight", type=float, default=0.5)
+    parser.add_argument("--wait-level-weight", type=float, default=1.0)
+    parser.add_argument("--throughput-weight", type=float, default=0.1)
+    parser.add_argument("--imbalance-weight", type=float, default=0.1)
+    parser.add_argument("--reward-delta-clip", type=float, default=2.0)
+    parser.add_argument("--reward-level-normalizer", type=float, default=10.0)
+    parser.add_argument("--throughput-normalizer", type=float, default=2.0)
+    return parser.parse_args()
+def build_env_config(args: argparse.Namespace) -> EnvConfig:
+    return EnvConfig(
+        simulator_interval=args.simulator_interval,
+        decision_interval=args.env_decision_interval,
+        min_green_time=args.min_green_time,
+        thread_num=args.thread_num,
+        max_episode_seconds=args.max_episode_seconds,
+        observation=ObservationConfig(
+            max_incoming_lanes=args.max_incoming_lanes,
+            count_scale=args.count_scale,
+            elapsed_time_scale=args.elapsed_time_scale,
+            include_outgoing_congestion=not args.disable_outgoing_congestion,
+            include_district_context=not args.disable_district_context,
+            include_district_type_feature=True,
+        ),
+        reward=RewardConfig(
+            variant=args.reward_variant,
+            waiting_weight=args.waiting_weight,
+            vehicle_weight=args.vehicle_weight,
+            pressure_weight=args.pressure_weight,
+            reward_scale=args.reward_scale,
+            normalize_by_lane_count=not args.disable_lane_reward_normalization,
+            clip_reward=args.reward_clip,
+            queue_delta_weight=args.queue_delta_weight,
+            wait_delta_weight=args.wait_delta_weight,
+            queue_level_weight=args.queue_level_weight,
+            wait_level_weight=args.wait_level_weight,
+            throughput_weight=args.throughput_weight,
+            imbalance_weight=args.imbalance_weight,
+            delta_clip=args.reward_delta_clip,
+            level_normalizer=args.reward_level_normalizer,
+            throughput_normalizer=args.throughput_normalizer,
+        ),
+    )
+def build_env(env_config: EnvConfig, scenario_spec: ScenarioSpec) -> TrafficEnv:
+    return TrafficEnv(
+        city_id=scenario_spec.city_id,
+        scenario_name=scenario_spec.scenario_name,
+        city_dir=scenario_spec.city_dir,
+        scenario_dir=scenario_spec.scenario_dir,
+        config_path=scenario_spec.config_path,
+        roadnet_path=scenario_spec.roadnet_path,
+        district_map_path=scenario_spec.district_map_path,
+        metadata_path=scenario_spec.metadata_path,
+        env_config=env_config,
+    )
+def resolve_teachers(args: argparse.Namespace) -> list[BaseTeacher]:
+    teacher_specs = list(args.teacher_spec)
+    if not teacher_specs:
+        teacher_specs = [args.controller if args.controller != "rl_checkpoint" else f"rl_checkpoint={args.checkpoint}"]
+    teachers = []
+    for spec in teacher_specs:
+        controller_type, checkpoint = parse_teacher_spec(spec)
+        if controller_type == "rl_checkpoint":
+            checkpoint = checkpoint or args.checkpoint
+        teachers.append(
+            build_teacher(
+                controller_type=controller_type,
+                checkpoint=checkpoint,
+                fixed_green_time=args.fixed_green_time,
+                seed=args.seed,
+                device=args.device,
+            )
+        )
+    return teachers
+def resolve_env_config(args: argparse.Namespace, teachers: list[BaseTeacher]) -> EnvConfig:
+    env_config = build_env_config(args)
+    if not args.use_checkpoint_env_config:
+        return env_config
+    checkpoint_env_configs = [
+        teacher.env_config for teacher in teachers if teacher.env_config is not None
+    ]
+    if not checkpoint_env_configs:
+        return env_config
+    first_payload = checkpoint_env_configs[0]
+    assert first_payload is not None
+    for item in checkpoint_env_configs[1:]:
+        if item != first_payload:
+            raise ValueError("Checkpoint teachers use different env configs. Generate separate datasets.")
+    return first_payload
+def sample_scenario(
+    dataset: CityFlowDataset,
+    rng: random.Random,
+    split: str,
+    city_id: str | None,
+    scenario_name: str | None,
+) -> ScenarioSpec:
+    if city_id and scenario_name:
+        return dataset.build_scenario_spec(city_id, scenario_name)
+    return dataset.sample_scenario(
+        split_name=split,
+        rng=rng,
+        city_id=city_id,
+        scenario_name=scenario_name,
+    )
+def extract_step_actions(
+    env: TrafficEnv,
+    observation_batch: dict[str, Any],
+    next_observation_batch: dict[str, Any],
+    actions: np.ndarray,
+) -> dict[str, list[LocalIntersectionAction]]:
+    grouped: dict[str, list[LocalIntersectionAction]] = {district_id: [] for district_id in env.districts}
+    lane_vehicle_count = env.adapter.get_lane_vehicle_count()
+    for index, intersection_id in enumerate(observation_batch["intersection_ids"]):
+        district_id = observation_batch["district_ids"][index]
+        grouped[district_id].append(
+            LocalIntersectionAction(
+                intersection_id=intersection_id,
+                district_id=district_id,
+                action=int(actions[index]),
+                current_phase=int(observation_batch["current_phase"][index]),
+                next_phase=int(next_observation_batch["current_phase"][index]),
+                queue_total=float(np.asarray(observation_batch["incoming_counts"][index], dtype=np.float32).sum()),
+                wait_total=float(np.asarray(observation_batch["incoming_waiting"][index], dtype=np.float32).sum()),
+                outgoing_load=float(
+                    sum(
+                        float(lane_vehicle_count.get(lane_id, 0))
+                        for lane_id in env.intersections[intersection_id].outgoing_lanes
+                    )
+                ),
+                is_boundary=bool(env.intersections[intersection_id].is_boundary),
+            )
+        )
+    return grouped
+def generate_examples_for_episode(
+    env: TrafficEnv,
+    teacher: BaseTeacher,
+    district_interval: int,
+    top_k_congested: int,
+    max_candidate_intersections: int,
+    max_target_intersections: int,
+    episode_index: int,
+) -> list[dict[str, Any]]:
+    summary_builder = DistrictStateSummaryBuilder(
+        top_k=top_k_congested,
+        candidate_limit=max_candidate_intersections,
+    )
+    observation_batch = env.reset()
+    summary_builder.reset()
+    current_summaries = summary_builder.build_all(env, observation_batch)
+    windows = {
+        district_id: _WindowBuffer(start_summary=summary)
+        for district_id, summary in current_summaries.items()
+    }
+    samples: list[dict[str, Any]] = []
+    done = False
+    window_index = 0
+    while not done:
+        actions = teacher.act(observation_batch)
+        next_observation_batch, rewards, done, info = env.step(actions)
+        del rewards, info
+        step_actions = extract_step_actions(env, observation_batch, next_observation_batch, actions)
+        next_summaries = summary_builder.build_all(env, next_observation_batch)
+        for district_id, buffer in windows.items():
+            buffer.controller_actions.extend(step_actions[district_id])
+            buffer.step_count += 1
+            should_emit = buffer.step_count >= district_interval or done
+            if not should_emit:
+                continue
+            end_summary = next_summaries[district_id]
+            window_data = DistrictWindowData(
+                district_id=district_id,
+                start_summary=buffer.start_summary,
+                end_summary=end_summary,
+                controller_actions=list(buffer.controller_actions),
+                step_count=buffer.step_count,
+            )
+            action = derive_district_action(
+                window_data=window_data,
+                max_target_intersections=max_target_intersections,
+            )
+            prompt = format_district_prompt(
+                buffer.start_summary,
+                max_target_intersections=max_target_intersections,
+                allow_only_visible_candidates=True,
+            )
+            samples.append(
+                {
+                    "text": format_sft_text(
+                        buffer.start_summary,
+                        action,
+                        max_target_intersections=max_target_intersections,
+                        allow_only_visible_candidates=True,
+                    ),
+                    "prompt": prompt,
+                    "response_json": action.to_dict(),
+                    "state": buffer.start_summary.to_dict(),
+                    "candidate_intersections": buffer.start_summary.to_dict().get("candidate_intersections", []),
+                    "window_summary": window_data.to_dict(),
+                    "city_id": env.city_id,
+                    "district_id": district_id,
+                    "district_type": env.districts[district_id].district_type,
+                    "scenario": env.scenario_name,
+                    "controller_type": teacher.metadata.controller_type,
+                    "controller_id": teacher.metadata.controller_id,
+                    "controller_family": teacher.metadata.controller_family,
+                    "teacher_algorithm": teacher.metadata.teacher_algorithm,
+                    "checkpoint_path": teacher.metadata.checkpoint_path,
+                    "episode_index": int(episode_index),
+                    "window_index": int(window_index),
+                    "decision_interval": int(district_interval),
+                    "sim_time": int(buffer.start_summary.sim_time),
+                }
+            )
+            windows[district_id] = _WindowBuffer(start_summary=end_summary)
+            window_index += 1
+        observation_batch = next_observation_batch
+    return samples
+def append_jsonl(path: Path, records: list[dict[str, Any]], append: bool) -> None:
+    mode = "a" if append else "w"
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open(mode, encoding="utf-8") as handle:
+        for record in records:
+            handle.write(json.dumps(record, sort_keys=True))
+            handle.write("\n")
+def main() -> None:
+    args = parse_args()
+    dataset = CityFlowDataset(
+        generated_root=args.generated_root,
+        splits_root=args.splits_root,
+    )
+    dataset.generate_default_splits()
+    rng = random.Random(args.seed)
+    teachers = resolve_teachers(args)
+    env_config = resolve_env_config(args, teachers)
+    output_path = Path(args.output)
+    write_mode_append = bool(args.append)
+    for episode_index in range(args.episodes):
+        scenario_spec = sample_scenario(
+            dataset=dataset,
+            rng=rng,
+            split=args.split,
+            city_id=args.city_id,
+            scenario_name=args.scenario_name,
+        )
+        episode_records: list[dict[str, Any]] = []
+        for teacher in teachers:
+            env = build_env(env_config=env_config, scenario_spec=scenario_spec)
+            episode_records.extend(
+                generate_examples_for_episode(
+                    env=env,
+                    teacher=teacher,
+                    district_interval=args.district_decision_interval,
+                    top_k_congested=args.top_k_congested,
+                    max_candidate_intersections=args.max_candidate_intersections,
+                    max_target_intersections=args.max_target_intersections,
+                    episode_index=episode_index,
+                )
+            )
+        append_jsonl(output_path, episode_records, append=write_mode_append)
+        write_mode_append = True
+        print(
+            json.dumps(
+                {
+                    "episode_index": episode_index,
+                    "city_id": scenario_spec.city_id,
+                    "scenario_name": scenario_spec.scenario_name,
+                    "records_written": len(episode_records),
+                }
+            )
+        )
+if __name__ == "__main__":
+    main()

district_llm/guided_control.py ADDED Viewed

	@@ -0,0 +1,67 @@

+from __future__ import annotations
+from typing import Any
+import numpy as np
+from district_llm.schema import DistrictAction
+class DistrictGuidedLocalController:
+    """
+    Wrap a low-level controller and bias its actions with district directives.
+    The shared DQN still produces the base per-intersection action, and the
+    district plan only nudges hold/switch decisions toward the requested phase.
+    """
+    def __init__(self, base_teacher):
+        self.base_teacher = base_teacher
+    def act(
+        self,
+        observation_batch: dict[str, Any],
+        district_actions: dict[str, DistrictAction] | None = None,
+    ) -> np.ndarray:
+        base_actions = np.asarray(self.base_teacher.act(observation_batch), dtype=np.int64)
+        if not district_actions:
+            return base_actions
+        guided_actions = base_actions.copy()
+        for index, district_id in enumerate(observation_batch["district_ids"]):
+            directive = district_actions.get(district_id)
+            if directive is None:
+                continue
+            guided_actions[index] = self._apply_directive(
+                observation_batch=observation_batch,
+                index=index,
+                base_action=int(base_actions[index]),
+                directive=directive,
+            )
+        return guided_actions
+    @staticmethod
+    def _apply_directive(
+        observation_batch: dict[str, Any],
+        index: int,
+        base_action: int,
+        directive: DistrictAction,
+    ) -> int:
+        action_mask = observation_batch["action_mask"][index]
+        current_phase = int(observation_batch["current_phase"][index])
+        can_switch = bool(action_mask[1] > 0.0)
+        if directive.strategy == "hold" or directive.phase_bias == "NONE":
+            return int(base_action)
+        if directive.phase_bias == "NS":
+            if current_phase == 0:
+                return 0
+            return 1 if can_switch else 0
+        if directive.phase_bias == "EW":
+            if current_phase != 0:
+                return 0
+            return 1 if can_switch else 0
+        return int(base_action)

district_llm/heuristic_guidance.py ADDED Viewed

	@@ -0,0 +1,73 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from district_llm.repair import fallback_target_intersections
+from district_llm.schema import DistrictAction, DistrictStateSummary
+@dataclass(frozen=True)
+class HeuristicGuidanceConfig:
+    max_target_intersections: int = 3
+    incident_duration_steps: int = 12
+    spillback_duration_steps: int = 10
+    default_duration_steps: int = 8
+def generate_heuristic_guidance(
+    summary: DistrictStateSummary,
+    config: HeuristicGuidanceConfig | None = None,
+) -> DistrictAction:
+    config = config or HeuristicGuidanceConfig()
+    if summary.incident_flag or summary.construction_flag:
+        strategy = "incident_response"
+        priority_corridor = summary.dominant_flow if summary.dominant_flow in {"NS", "EW"} else "arterial"
+        phase_bias = summary.dominant_flow if summary.dominant_flow in {"NS", "EW"} else "NONE"
+        duration_steps = config.incident_duration_steps
+    elif summary.spillback_risk:
+        strategy = "clear_spillback"
+        boundary_share = summary.boundary_queue_total / max(1.0, summary.total_queue)
+        if boundary_share >= 0.45:
+            priority_corridor = "inbound"
+        elif summary.dominant_flow in {"NS", "EW"}:
+            priority_corridor = summary.dominant_flow
+        else:
+            priority_corridor = None
+        phase_bias = summary.dominant_flow if summary.dominant_flow in {"NS", "EW"} else "NONE"
+        duration_steps = config.spillback_duration_steps
+    elif summary.event_flag or summary.overload_flag:
+        strategy = "arterial_priority"
+        priority_corridor = summary.dominant_flow if summary.dominant_flow in {"NS", "EW"} else "arterial"
+        phase_bias = summary.dominant_flow if summary.dominant_flow in {"NS", "EW"} else "NONE"
+        duration_steps = config.spillback_duration_steps
+    elif summary.dominant_flow == "NS":
+        strategy = "favor_NS"
+        priority_corridor = "NS"
+        phase_bias = "NS"
+        duration_steps = config.default_duration_steps
+    elif summary.dominant_flow == "EW":
+        strategy = "favor_EW"
+        priority_corridor = "EW"
+        phase_bias = "EW"
+        duration_steps = config.default_duration_steps
+    else:
+        strategy = "hold"
+        priority_corridor = None
+        phase_bias = "NONE"
+        duration_steps = config.default_duration_steps
+    targets = fallback_target_intersections(
+        summary=summary,
+        max_target_intersections=config.max_target_intersections,
+        strategy=strategy,
+        priority_corridor=priority_corridor,
+        phase_bias=phase_bias,
+    )
+    return DistrictAction(
+        strategy=strategy,
+        priority_corridor=priority_corridor,
+        target_intersections=targets,
+        phase_bias=phase_bias,
+        duration_steps=duration_steps,
+    ).validate()

district_llm/inference.py ADDED Viewed

	@@ -0,0 +1,223 @@

+from __future__ import annotations
+import argparse
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Callable
+from district_llm.prompting import format_district_prompt
+from district_llm.repair import RepairConfig, RepairReport, sanitize_action_payload
+from district_llm.schema import DistrictAction, DistrictStateSummary
+from district_llm.summary_builder import DistrictStateSummaryBuilder
+from env.observation_builder import ObservationConfig
+from env.reward import RewardConfig
+from env.traffic_env import EnvConfig, TrafficEnv
+from training.cityflow_dataset import CityFlowDataset
+def _extract_json_object(payload: str) -> str:
+    start = payload.find("{")
+    end = payload.rfind("}")
+    if start == -1 or end == -1 or end <= start:
+        raise ValueError("No JSON object found in model output.")
+    return payload[start : end + 1]
+@dataclass(frozen=True)
+class DistrictLLMInferenceResult:
+    action: DistrictAction
+    raw_text: str
+    parsed_payload_before_repair: dict[str, Any]
+    repair_report: RepairReport
+    json_valid: bool
+    schema_valid_before_repair: bool
+class DistrictLLMInference:
+    def __init__(
+        self,
+        generator_fn: Callable[[str], str] | None = None,
+        model_name_or_path: str | None = None,
+        device: str | None = None,
+        fallback_action: DistrictAction | None = None,
+        repair_config: RepairConfig | None = None,
+    ):
+        self.fallback_action = fallback_action or DistrictAction.default_hold()
+        self.generator_fn = generator_fn
+        self.repair_config = repair_config or RepairConfig()
+        self.tokenizer = None
+        self.model = None
+        self.device = device or "cpu"
+        if self.generator_fn is None:
+            if not model_name_or_path:
+                raise ValueError("Provide either generator_fn or model_name_or_path.")
+            import torch
+            from transformers import AutoModelForCausalLM, AutoTokenizer
+            self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+            model_dir = Path(model_name_or_path)
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
+            if self.tokenizer.pad_token_id is None and self.tokenizer.eos_token_id is not None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            if (model_dir / "adapter_config.json").exists():
+                try:
+                    from peft import AutoPeftModelForCausalLM
+                except ImportError as exc:
+                    raise ImportError("Loading a LoRA adapter requires the 'peft' package.") from exc
+                self.model = AutoPeftModelForCausalLM.from_pretrained(model_name_or_path).to(self.device)
+            else:
+                self.model = AutoModelForCausalLM.from_pretrained(model_name_or_path).to(self.device)
+            self.model.eval()
+    def generate_raw(self, prompt: str, max_new_tokens: int = 128) -> str:
+        if self.generator_fn is not None:
+            return self.generator_fn(prompt)
+        import torch
+        assert self.model is not None and self.tokenizer is not None
+        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
+        with torch.no_grad():
+            outputs = self.model.generate(
+                **inputs,
+                max_new_tokens=max_new_tokens,
+                do_sample=False,
+                pad_token_id=self.tokenizer.eos_token_id,
+            )
+        generated = outputs[0][inputs["input_ids"].shape[1] :]
+        return self.tokenizer.decode(generated, skip_special_tokens=True)
+    def parse_action(
+        self,
+        payload: str,
+        summary: DistrictStateSummary | None = None,
+    ) -> tuple[DistrictAction, RepairReport, dict[str, Any], bool, bool]:
+        json_valid = True
+        schema_valid_before_repair = True
+        try:
+            parsed_payload = json.loads(_extract_json_object(payload))
+        except Exception:
+            json_valid = False
+            schema_valid_before_repair = False
+            parsed_payload = self.fallback_action.to_dict()
+        action, repair_report = sanitize_action_payload(
+            payload=parsed_payload,
+            summary=summary,
+            config=self.repair_config,
+        )
+        return action, repair_report, parsed_payload, json_valid, schema_valid_before_repair
+    def predict_with_result(
+        self,
+        summary: DistrictStateSummary,
+        max_new_tokens: int = 128,
+    ) -> DistrictLLMInferenceResult:
+        prompt = format_district_prompt(
+            summary,
+            max_target_intersections=self.repair_config.max_target_intersections,
+            allow_only_visible_candidates=self.repair_config.allow_only_visible_candidates,
+        )
+        raw = self.generate_raw(prompt=prompt, max_new_tokens=max_new_tokens)
+        action, repair_report, parsed_payload, json_valid, schema_valid_before_repair = self.parse_action(
+            raw,
+            summary=summary,
+        )
+        return DistrictLLMInferenceResult(
+            action=action,
+            raw_text=raw,
+            parsed_payload_before_repair=parsed_payload,
+            repair_report=repair_report,
+            json_valid=json_valid,
+            schema_valid_before_repair=schema_valid_before_repair,
+        )
+    def predict(self, summary: DistrictStateSummary, max_new_tokens: int = 128) -> DistrictAction:
+        return self.predict_with_result(summary=summary, max_new_tokens=max_new_tokens).action
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Run single-sample district LLM inference.")
+    parser.add_argument("--model", required=True, help="Model name, local path, or LoRA adapter path.")
+    parser.add_argument("--generated-root", default="data/generated")
+    parser.add_argument("--splits-root", default="data/splits")
+    parser.add_argument("--city-id", required=True)
+    parser.add_argument("--scenario-name", required=True)
+    parser.add_argument("--district-id", required=True)
+    parser.add_argument("--device", default=None)
+    parser.add_argument("--max-new-tokens", type=int, default=128)
+    parser.add_argument(
+        "--allow-only-visible-candidates",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+    )
+    parser.add_argument("--max-target-intersections", type=int, default=3)
+    parser.add_argument(
+        "--fallback-on-empty-targets",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+    )
+    parser.add_argument(
+        "--fallback-mode",
+        choices=("heuristic", "hold", "none"),
+        default="heuristic",
+    )
+    return parser.parse_args()
+def build_env(scenario_spec) -> TrafficEnv:
+    env_config = EnvConfig(
+        simulator_interval=1,
+        decision_interval=5,
+        min_green_time=10,
+        thread_num=1,
+        observation=ObservationConfig(),
+        reward=RewardConfig(variant="wait_queue_throughput"),
+    )
+    return TrafficEnv(
+        city_id=scenario_spec.city_id,
+        scenario_name=scenario_spec.scenario_name,
+        city_dir=scenario_spec.city_dir,
+        scenario_dir=scenario_spec.scenario_dir,
+        config_path=scenario_spec.config_path,
+        roadnet_path=scenario_spec.roadnet_path,
+        district_map_path=scenario_spec.district_map_path,
+        metadata_path=scenario_spec.metadata_path,
+        env_config=env_config,
+    )
+def main() -> None:
+    args = parse_args()
+    dataset = CityFlowDataset(
+        generated_root=args.generated_root,
+        splits_root=args.splits_root,
+    )
+    scenario_spec = dataset.build_scenario_spec(args.city_id, args.scenario_name)
+    env = build_env(scenario_spec)
+    summary_builder = DistrictStateSummaryBuilder(candidate_limit=max(6, args.max_target_intersections))
+    observation_batch = env.reset()
+    summaries = summary_builder.build_all(env, observation_batch)
+    if args.district_id not in summaries:
+        raise ValueError(f"Unknown district_id '{args.district_id}' for {args.city_id}/{args.scenario_name}.")
+    inference = DistrictLLMInference(
+        model_name_or_path=args.model,
+        device=args.device,
+        fallback_action=DistrictAction.default_hold(),
+        repair_config=RepairConfig(
+            allow_only_visible_candidates=args.allow_only_visible_candidates,
+            max_target_intersections=args.max_target_intersections,
+            fallback_on_empty_targets=args.fallback_on_empty_targets,
+            fallback_mode=args.fallback_mode,
+        ),
+    )
+    action = inference.predict(
+        summary=summaries[args.district_id],
+        max_new_tokens=args.max_new_tokens,
+    )
+    print(action.to_pretty_json())
+if __name__ == "__main__":
+    main()

district_llm/metrics.py ADDED Viewed

	@@ -0,0 +1,97 @@

+from __future__ import annotations
+from typing import Any
+def safe_ratio(numerator: int | float, denominator: int | float, default_if_empty: float = 0.0) -> float:
+    if denominator <= 0:
+        return default_if_empty
+    return float(numerator) / float(denominator)
+def compute_target_metrics(pred_list: list[str], gt_list: list[str]) -> dict[str, float]:
+    pred = list(pred_list)
+    gt = list(gt_list)
+    pred_set = set(pred)
+    gt_set = set(gt)
+    overlap = pred_set & gt_set
+    union = pred_set | gt_set
+    both_empty = not pred_set and not gt_set
+    precision_default = 1.0 if both_empty else 0.0
+    recall_default = 1.0 if both_empty else 0.0
+    jaccard_default = 1.0 if both_empty else 0.0
+    overlap_rate_default = 1.0 if both_empty else 0.0
+    overlap_count = len(overlap)
+    return {
+        "exact_list_match": float(pred == gt),
+        "exact_set_match": float(pred_set == gt_set),
+        "overlap_count": float(overlap_count),
+        "overlap_rate": safe_ratio(overlap_count, len(gt_set), overlap_rate_default),
+        "precision": safe_ratio(overlap_count, len(pred_set), precision_default),
+        "recall": safe_ratio(overlap_count, len(gt_set), recall_default),
+        "jaccard": safe_ratio(overlap_count, len(union), jaccard_default),
+        "hit_at_1": float(overlap_count >= 1),
+        "hit_at_2": float(overlap_count >= 2),
+        "hit_at_3": float(overlap_count >= 3),
+    }
+def aggregate_target_metrics(metric_rows: list[dict[str, float]]) -> dict[str, float]:
+    if not metric_rows:
+        return {}
+    keys = metric_rows[0].keys()
+    return {
+        key: float(sum(row[key] for row in metric_rows) / len(metric_rows))
+        for key in keys
+    }
+def target_failure_buckets(
+    pred_list: list[str],
+    gt_list: list[str],
+    visible_candidates: set[str],
+    invalid_ids: list[str] | None = None,
+    non_visible_ids: list[str] | None = None,
+    repaired_targets: list[str] | None = None,
+    fallback_used: bool = False,
+) -> list[str]:
+    buckets: list[str] = []
+    pred_set = set(pred_list)
+    gt_set = set(gt_list)
+    if not pred_list:
+        buckets.append("prediction_empty")
+    if not gt_list:
+        buckets.append("ground_truth_empty")
+    if pred_list and gt_list and pred_set == gt_set and pred_list != gt_list:
+        buckets.append("same_set_different_order")
+    elif pred_set & gt_set:
+        buckets.append("partial_overlap")
+    elif pred_list and gt_list:
+        buckets.append("no_overlap")
+    if invalid_ids:
+        buckets.append("prediction_contains_invalid_ids")
+    if non_visible_ids:
+        buckets.append("prediction_contains_ids_not_visible_in_summary")
+    if pred_list and visible_candidates and any(item not in visible_candidates for item in pred_list):
+        buckets.append("prediction_contains_ids_not_visible_in_summary")
+    if fallback_used:
+        buckets.append("fallback_used")
+    if repaired_targets is not None:
+        repaired_set = set(repaired_targets)
+        if repaired_set == gt_set and pred_set != gt_set:
+            buckets.append("repaired_successfully")
+        elif (invalid_ids or non_visible_ids or fallback_used) and repaired_set != gt_set:
+            buckets.append("repair_failed")
+    return buckets
+def average_item_rate(values: list[list[Any]]) -> float:
+    numerators = sum(len(item) for item in values)
+    denominators = sum(max(len(item), 1) for item in values)
+    return safe_ratio(numerators, denominators)

district_llm/prompting.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from __future__ import annotations
+from district_llm.schema import DISTRICT_STRATEGIES, PHASE_BIASES, PRIORITY_CORRIDORS, DistrictAction, DistrictStateSummary
+DEFAULT_MAX_TARGET_INTERSECTIONS = 3
+def build_system_prompt(
+    max_target_intersections: int = DEFAULT_MAX_TARGET_INTERSECTIONS,
+    allow_only_visible_candidates: bool = True,
+) -> str:
+    candidate_rule = (
+        " If candidate_intersections is present, target_intersections must use only ids from that list."
+        if allow_only_visible_candidates
+        else ""
+    )
+    return (
+        "You are a district traffic coordinator for RL traffic lights. "
+        "Return only valid JSON with exactly these keys: "
+        "strategy, priority_corridor, target_intersections, phase_bias, duration_steps. "
+        f"target_intersections must be a JSON array with at most {int(max_target_intersections)} unique ids."
+        f"{candidate_rule} "
+        "Do not invent intersection ids. Deduplicate ids. If uncertain, prefer the most congested valid candidates."
+    )
+def format_district_prompt(
+    summary: DistrictStateSummary,
+    max_target_intersections: int = DEFAULT_MAX_TARGET_INTERSECTIONS,
+    allow_only_visible_candidates: bool = True,
+) -> str:
+    target_rule = (
+        f"target_intersections: up to {int(max_target_intersections)} ids from candidate_intersections only"
+        if allow_only_visible_candidates
+        else f"target_intersections: up to {int(max_target_intersections)} valid ids"
+    )
+    return "\n".join(
+        [
+            "### DISTRICT ACTION SCHEMA",
+            f"strategy: {'|'.join(DISTRICT_STRATEGIES)}",
+            f"phase_bias: {'|'.join(PHASE_BIASES)}",
+            f"priority_corridor: {'|'.join(PRIORITY_CORRIDORS)}|none",
+            "duration_steps: integer 1..20",
+            target_rule,
+            "rules: return only valid JSON; do not invent ids; deduplicate target_intersections",
+            "fallback: if uncertain, prefer the most congested visible candidates",
+            "",
+            "### DISTRICT STATE",
+            summary.to_prompt_text(),
+            "",
+            "### DECISION",
+        ]
+    )
+def format_sft_text(
+    summary: DistrictStateSummary,
+    action: DistrictAction,
+    max_target_intersections: int = DEFAULT_MAX_TARGET_INTERSECTIONS,
+    allow_only_visible_candidates: bool = True,
+) -> str:
+    return (
+        f"{format_district_prompt(summary, max_target_intersections=max_target_intersections, allow_only_visible_candidates=allow_only_visible_candidates)}\n"
+        f"{action.to_pretty_json()}"
+    )

district_llm/repair.py ADDED Viewed

	@@ -0,0 +1,392 @@

+from __future__ import annotations
+import re
+from dataclasses import dataclass, field
+from typing import Any
+from district_llm.schema import (
+    DISTRICT_STRATEGIES,
+    PHASE_BIASES,
+    PRIORITY_CORRIDORS,
+    CandidateIntersection,
+    DistrictAction,
+    DistrictStateSummary,
+    candidate_priority_score,
+    canonicalize_target_intersections,
+)
+INTERSECTION_ID_PATTERN = re.compile(r"\bi_\d+\b")
+@dataclass(frozen=True)
+class RepairConfig:
+    allow_only_visible_candidates: bool = True
+    max_target_intersections: int = 3
+    fallback_on_empty_targets: bool = True
+    fallback_mode: str = "heuristic"
+@dataclass
+class RepairReport:
+    raw_targets: list[str] = field(default_factory=list)
+    repaired_targets: list[str] = field(default_factory=list)
+    invalid_ids_removed: list[str] = field(default_factory=list)
+    non_visible_ids_removed: list[str] = field(default_factory=list)
+    deduplicated: bool = False
+    truncated: bool = False
+    fallback_used: bool = False
+    fallback_mode: str | None = None
+    empty_after_filtering: bool = False
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "raw_targets": list(self.raw_targets),
+            "repaired_targets": list(self.repaired_targets),
+            "invalid_ids_removed": list(self.invalid_ids_removed),
+            "non_visible_ids_removed": list(self.non_visible_ids_removed),
+            "deduplicated": bool(self.deduplicated),
+            "truncated": bool(self.truncated),
+            "fallback_used": bool(self.fallback_used),
+            "fallback_mode": self.fallback_mode,
+            "empty_after_filtering": bool(self.empty_after_filtering),
+        }
+def normalize_candidate_intersections(
+    payload: list[CandidateIntersection | dict[str, Any]] | None,
+) -> list[dict[str, Any]]:
+    normalized: list[dict[str, Any]] = []
+    for item in payload or []:
+        if isinstance(item, CandidateIntersection):
+            normalized.append(item.to_dict())
+        elif isinstance(item, dict):
+            normalized.append(dict(item))
+    return normalized
+def parse_candidate_intersections_from_text(text: str) -> list[dict[str, Any]]:
+    if "candidate_intersections:" not in text:
+        return []
+    candidates: list[dict[str, Any]] = []
+    capture = False
+    for line in text.splitlines():
+        stripped = line.strip()
+        if stripped == "candidate_intersections:":
+            capture = True
+            continue
+        if not capture:
+            continue
+        if stripped == "- none":
+            continue
+        if not stripped.startswith("- "):
+            if stripped.endswith(":"):
+                break
+            continue
+        fields = stripped[2:].split()
+        if not fields:
+            continue
+        candidate: dict[str, Any] = {
+            "intersection_id": fields[0],
+            "queue_total": 0.0,
+            "wait_total": 0.0,
+            "outgoing_load": 0.0,
+            "current_phase": 0,
+            "is_boundary": False,
+            "spillback_risk": False,
+            "incident_proximity": False,
+            "overload_marker": False,
+            "event_proximity": False,
+            "corridor_alignment": "BALANCED",
+            "selection_reasons": [],
+        }
+        for token in fields[1:]:
+            if "=" not in token:
+                continue
+            key, value = token.split("=", 1)
+            if key == "q":
+                candidate["queue_total"] = float(value)
+            elif key == "w":
+                candidate["wait_total"] = float(value)
+            elif key == "out":
+                candidate["outgoing_load"] = float(value)
+            elif key == "phase":
+                candidate["current_phase"] = int(value)
+            elif key == "boundary":
+                candidate["is_boundary"] = value == "1"
+            elif key == "spillback":
+                candidate["spillback_risk"] = value == "1"
+            elif key == "incident":
+                candidate["incident_proximity"] = value == "1"
+            elif key == "overload":
+                candidate["overload_marker"] = value == "1"
+            elif key == "event":
+                candidate["event_proximity"] = value == "1"
+            elif key == "align":
+                candidate["corridor_alignment"] = value
+            elif key == "reasons":
+                candidate["selection_reasons"] = [] if value == "none" else value.split("|")
+        candidates.append(candidate)
+    return normalized_candidate_intersections_from_dicts(candidates)
+def normalized_candidate_intersections_from_dicts(
+    payload: list[dict[str, Any]],
+) -> list[dict[str, Any]]:
+    normalized: list[dict[str, Any]] = []
+    for item in payload:
+        try:
+            normalized.append(
+                CandidateIntersection(
+                    intersection_id=str(item.get("intersection_id", "")).strip(),
+                    queue_total=float(item.get("queue_total", 0.0)),
+                    wait_total=float(item.get("wait_total", 0.0)),
+                    outgoing_load=float(item.get("outgoing_load", 0.0)),
+                    current_phase=int(item.get("current_phase", 0)),
+                    is_boundary=bool(item.get("is_boundary", False)),
+                    spillback_risk=bool(item.get("spillback_risk", False)),
+                    incident_proximity=bool(item.get("incident_proximity", False)),
+                    overload_marker=bool(item.get("overload_marker", False)),
+                    event_proximity=bool(item.get("event_proximity", False)),
+                    corridor_alignment=str(item.get("corridor_alignment", "BALANCED")),
+                    selection_reasons=list(item.get("selection_reasons", [])),
+                ).to_dict()
+            )
+        except Exception:
+            continue
+    return normalized
+def candidate_intersections_from_context(
+    summary: DistrictStateSummary | dict[str, Any] | None = None,
+    prompt_text: str | None = None,
+) -> list[dict[str, Any]]:
+    if isinstance(summary, DistrictStateSummary):
+        return normalize_candidate_intersections(summary.candidate_intersections)
+    if isinstance(summary, dict):
+        if "candidate_intersections" in summary:
+            return normalize_candidate_intersections(summary.get("candidate_intersections"))
+        state_payload = summary.get("state")
+        if isinstance(state_payload, dict) and "candidate_intersections" in state_payload:
+            return normalize_candidate_intersections(state_payload.get("candidate_intersections"))
+    if prompt_text:
+        return parse_candidate_intersections_from_text(prompt_text)
+    return []
+def fallback_target_intersections(
+    summary: DistrictStateSummary | dict[str, Any] | None = None,
+    prompt_text: str | None = None,
+    max_target_intersections: int = 3,
+    strategy: str | None = None,
+    priority_corridor: str | None = None,
+    phase_bias: str | None = None,
+    focus_scores: dict[str, float] | None = None,
+) -> list[str]:
+    candidate_intersections = candidate_intersections_from_context(summary=summary, prompt_text=prompt_text)
+    if candidate_intersections:
+        ordered_candidates = sorted(
+            candidate_intersections,
+            key=lambda item: (
+                -(
+                    candidate_priority_score(item)
+                    + _focus_score_bonus(item, focus_scores)
+                    + _strategy_target_bonus(
+                        candidate=item,
+                        strategy=strategy,
+                        priority_corridor=priority_corridor,
+                        phase_bias=phase_bias,
+                    )
+                ),
+                -float(item.get("queue_total", 0.0)),
+                -float(item.get("wait_total", 0.0)),
+                -float(item.get("outgoing_load", 0.0)),
+                str(item.get("intersection_id", "")),
+            ),
+        )
+        ordered_ids = canonicalize_target_intersections(
+            [item["intersection_id"] for item in ordered_candidates],
+            ordered_candidates,
+            limit=max_target_intersections,
+        )
+        return ordered_ids[:max_target_intersections]
+    if isinstance(summary, DistrictStateSummary):
+        return [item.intersection_id for item in summary.top_congested_intersections[:max_target_intersections]]
+    if isinstance(summary, dict):
+        top_congested = summary.get("top_congested_intersections") or summary.get("state", {}).get("top_congested_intersections", [])
+        return [
+            str(item.get("intersection_id"))
+            for item in top_congested[:max_target_intersections]
+            if str(item.get("intersection_id", "")).strip()
+        ]
+    if prompt_text:
+        return list(dict.fromkeys(INTERSECTION_ID_PATTERN.findall(prompt_text)))[:max_target_intersections]
+    return []
+def _focus_score_bonus(candidate: dict[str, Any], focus_scores: dict[str, float] | None) -> float:
+    if not focus_scores:
+        return 0.0
+    max_focus = max(max(focus_scores.values()), 1.0)
+    return 4.0 * float(focus_scores.get(str(candidate.get("intersection_id", "")), 0.0)) / max_focus
+def _strategy_target_bonus(
+    candidate: dict[str, Any],
+    strategy: str | None,
+    priority_corridor: str | None,
+    phase_bias: str | None,
+) -> float:
+    reasons = set(candidate.get("selection_reasons", []))
+    corridor_alignment = str(candidate.get("corridor_alignment", "BALANCED"))
+    bonus = 0.0
+    if strategy == "incident_response":
+        bonus += 4.0 * float(bool(candidate.get("incident_proximity", False)))
+    elif strategy == "clear_spillback":
+        bonus += 4.0 * float(bool(candidate.get("spillback_risk", False)))
+        bonus += 1.0 * float(bool(candidate.get("is_boundary", False)))
+    elif strategy == "drain_inbound":
+        bonus += 4.0 * float(bool(candidate.get("is_boundary", False)))
+        bonus += 1.0 * float(bool(candidate.get("spillback_risk", False)))
+    elif strategy == "drain_outbound":
+        bonus += 4.0 * float("outgoing" in reasons)
+        bonus += 1.0 * float(bool(candidate.get("spillback_risk", False)))
+    elif strategy == "arterial_priority":
+        bonus += 2.0 * float(bool(candidate.get("is_boundary", False)))
+        bonus += 1.5 * float(bool(candidate.get("overload_marker", False)))
+        bonus += 1.5 * float(bool(candidate.get("event_proximity", False)))
+    elif strategy == "favor_NS":
+        bonus += 4.0 * float(corridor_alignment == "NS")
+    elif strategy == "favor_EW":
+        bonus += 4.0 * float(corridor_alignment == "EW")
+    if priority_corridor in {"NS", "EW"}:
+        bonus += 1.5 * float(corridor_alignment == priority_corridor)
+    elif priority_corridor == "inbound":
+        bonus += 1.5 * float(bool(candidate.get("is_boundary", False)))
+    elif priority_corridor == "outbound":
+        bonus += 1.5 * float("outgoing" in reasons)
+    elif priority_corridor == "arterial":
+        bonus += 0.75 * float(bool(candidate.get("is_boundary", False)))
+    if phase_bias in {"NS", "EW"}:
+        bonus += 0.5 * float(corridor_alignment == phase_bias)
+    return bonus
+def extract_visible_candidate_ids(
+    summary: DistrictStateSummary | dict[str, Any] | None = None,
+    prompt_text: str | None = None,
+) -> list[str]:
+    candidate_intersections = candidate_intersections_from_context(summary=summary, prompt_text=prompt_text)
+    if candidate_intersections:
+        return [item["intersection_id"] for item in candidate_intersections]
+    if prompt_text:
+        return list(dict.fromkeys(INTERSECTION_ID_PATTERN.findall(prompt_text)))
+    return []
+def sanitize_action_payload(
+    payload: dict[str, Any] | None,
+    summary: DistrictStateSummary | dict[str, Any] | None = None,
+    prompt_text: str | None = None,
+    config: RepairConfig | None = None,
+) -> tuple[DistrictAction, RepairReport]:
+    config = config or RepairConfig()
+    payload = dict(payload or {})
+    candidate_intersections = candidate_intersections_from_context(summary=summary, prompt_text=prompt_text)
+    visible_candidate_ids = [item["intersection_id"] for item in candidate_intersections]
+    visible_candidate_set = set(visible_candidate_ids)
+    raw_target_payload = payload.get("target_intersections", [])
+    if isinstance(raw_target_payload, str):
+        raw_targets = INTERSECTION_ID_PATTERN.findall(raw_target_payload)
+    elif isinstance(raw_target_payload, (list, tuple)):
+        raw_targets = [str(item).strip() for item in raw_target_payload if str(item).strip()]
+    else:
+        raw_targets = []
+    report = RepairReport(raw_targets=list(raw_targets))
+    deduped_targets: list[str] = []
+    seen: set[str] = set()
+    for item in raw_targets:
+        if item in seen:
+            report.deduplicated = True
+            continue
+        seen.add(item)
+        deduped_targets.append(item)
+    filtered_targets: list[str] = []
+    for item in deduped_targets:
+        if not INTERSECTION_ID_PATTERN.fullmatch(item):
+            report.invalid_ids_removed.append(item)
+            continue
+        if config.allow_only_visible_candidates and visible_candidate_set and item not in visible_candidate_set:
+            report.non_visible_ids_removed.append(item)
+            continue
+        filtered_targets.append(item)
+    if len(filtered_targets) > int(config.max_target_intersections):
+        report.truncated = True
+    filtered_targets = canonicalize_target_intersections(
+        filtered_targets,
+        candidate_intersections,
+        limit=int(config.max_target_intersections),
+    )
+    if not filtered_targets:
+        report.empty_after_filtering = bool(raw_targets)
+        if config.fallback_on_empty_targets:
+            report.fallback_used = True
+            report.fallback_mode = config.fallback_mode
+            if config.fallback_mode == "heuristic":
+                filtered_targets = fallback_target_intersections(
+                    summary=summary,
+                    prompt_text=prompt_text,
+                    max_target_intersections=int(config.max_target_intersections),
+                )
+            elif config.fallback_mode == "hold":
+                filtered_targets = []
+            elif config.fallback_mode == "none":
+                filtered_targets = []
+            else:
+                raise ValueError(f"Unsupported fallback_mode '{config.fallback_mode}'.")
+    strategy = str(payload.get("strategy", "hold"))
+    if strategy not in DISTRICT_STRATEGIES:
+        strategy = "hold"
+    priority_corridor = payload.get("priority_corridor")
+    if priority_corridor is not None:
+        priority_corridor = str(priority_corridor)
+    if priority_corridor not in PRIORITY_CORRIDORS:
+        priority_corridor = None
+    phase_bias = str(payload.get("phase_bias", "NONE"))
+    if phase_bias not in PHASE_BIASES:
+        phase_bias = "NONE"
+    duration_steps_raw = payload.get("duration_steps", 1)
+    try:
+        duration_steps = int(duration_steps_raw)
+    except (TypeError, ValueError):
+        duration_steps = 1
+    duration_steps = max(1, min(duration_steps, 20))
+    if config.fallback_mode == "hold" and report.fallback_used and not filtered_targets:
+        action = DistrictAction.default_hold(duration_steps=duration_steps)
+    else:
+        action = DistrictAction(
+            strategy=strategy,
+            priority_corridor=priority_corridor,
+            target_intersections=filtered_targets,
+            phase_bias=phase_bias,
+            duration_steps=duration_steps,
+        ).validate()
+    report.repaired_targets = list(action.target_intersections)
+    return action, report

district_llm/rl_guidance_wrapper.py ADDED Viewed

	@@ -0,0 +1,1004 @@

+from __future__ import annotations
+from abc import ABC, abstractmethod
+from dataclasses import asdict, dataclass, field, replace
+import hashlib
+from time import perf_counter
+from typing import Any
+import numpy as np
+import torch
+from district_llm.heuristic_guidance import (
+    HeuristicGuidanceConfig,
+    generate_heuristic_guidance,
+)
+from district_llm.inference import DistrictLLMInference, DistrictLLMInferenceResult
+from district_llm.repair import RepairReport
+from district_llm.schema import CandidateIntersection, DistrictAction, DistrictStateSummary
+from district_llm.summary_builder import DistrictStateSummaryBuilder
+from district_llm.teachers import RLCheckpointTeacher
+WRAPPER_MODES: tuple[str, ...] = (
+    "no_op",
+    "target_only_soft",
+    "target_only_medium",
+    "corridor_soft",
+    "global_soft",
+    "current_legacy",
+)
+FALLBACK_POLICIES: tuple[str, ...] = (
+    "no_op",
+    "hold_previous",
+    "heuristic_weak",
+)
+GATING_MODES: tuple[str, ...] = (
+    "always_on",
+    "incident_or_spillback",
+    "queue_threshold",
+    "imbalance_threshold",
+    "queue_or_imbalance",
+    "combined",
+)
+BIAS_DECAY_SCHEDULES: tuple[str, ...] = (
+    "linear",
+)
+STRATEGY_BIAS_MULTIPLIERS: dict[str, float] = {
+    "hold": 0.0,
+    "favor_NS": 1.0,
+    "favor_EW": 1.0,
+    "drain_inbound": 1.05,
+    "drain_outbound": 1.05,
+    "clear_spillback": 1.1,
+    "incident_response": 1.15,
+    "arterial_priority": 1.05,
+}
+@dataclass(frozen=True)
+class GuidanceInfluenceConfig:
+    """
+    Conservative inference-time wrapper around the fixed DQN policy.
+    The DQN checkpoint remains unchanged. Guidance is treated as a weak prior
+    and only biases Q-values slightly before greedy action selection.
+    """
+    wrapper_mode: str = "target_only_soft"
+    bias_strength: float = 0.12
+    target_only_bias_strength: float = 0.18
+    corridor_bias_strength: float = 0.05
+    max_intersections_affected: int = 3
+    guidance_refresh_steps: int = 5
+    guidance_persistence_steps: int = 3
+    max_guidance_duration: int = 6
+    apply_global_bias: bool = False
+    apply_target_only: bool = True
+    gating_mode: str = "always_on"
+    min_avg_queue_for_guidance: float = 150.0
+    min_queue_imbalance_for_guidance: float = 20.0
+    require_incident_or_spillback: bool = False
+    allow_guidance_in_normal_conditions: bool = True
+    enable_bias_decay: bool = True
+    bias_decay_schedule: str = "linear"
+    fallback_policy: str = "hold_previous"
+    log_guidance_debug: bool = False
+    max_debug_chars: int = 240
+    def validate(self) -> "GuidanceInfluenceConfig":
+        if self.wrapper_mode not in WRAPPER_MODES:
+            raise ValueError(
+                f"Unsupported wrapper_mode '{self.wrapper_mode}'. Expected one of {WRAPPER_MODES}."
+            )
+        if self.fallback_policy not in FALLBACK_POLICIES:
+            raise ValueError(
+                f"Unsupported fallback_policy '{self.fallback_policy}'. Expected one of {FALLBACK_POLICIES}."
+            )
+        if self.gating_mode not in GATING_MODES:
+            raise ValueError(
+                f"Unsupported gating_mode '{self.gating_mode}'. Expected one of {GATING_MODES}."
+            )
+        if self.bias_decay_schedule not in BIAS_DECAY_SCHEDULES:
+            raise ValueError(
+                f"Unsupported bias_decay_schedule '{self.bias_decay_schedule}'. "
+                f"Expected one of {BIAS_DECAY_SCHEDULES}."
+            )
+        if self.guidance_refresh_steps < 1:
+            raise ValueError("guidance_refresh_steps must be at least 1.")
+        if self.guidance_persistence_steps < 1:
+            raise ValueError("guidance_persistence_steps must be at least 1.")
+        if self.max_guidance_duration < 1:
+            raise ValueError("max_guidance_duration must be at least 1.")
+        if self.max_intersections_affected < 1:
+            raise ValueError("max_intersections_affected must be at least 1.")
+        return self
+@dataclass(frozen=True)
+class RLPolicyDecision:
+    q_values: np.ndarray
+    actions: np.ndarray
+@dataclass
+class GuidanceDecision:
+    source: str
+    action: DistrictAction
+    runtime_seconds: float
+    raw_text: str | None = None
+    parsed_payload_before_repair: dict[str, Any] | None = None
+    repair_report: RepairReport | None = None
+    json_valid: bool = True
+    schema_valid_before_repair: bool = True
+    provider_error: str | None = None
+    fallback_policy_applied: str | None = None
+    @property
+    def repair_applied(self) -> bool:
+        report = self.repair_report
+        if report is None:
+            return False
+        return any(
+            (
+                report.invalid_ids_removed,
+                report.non_visible_ids_removed,
+                report.deduplicated,
+                report.truncated,
+                report.fallback_used,
+                report.empty_after_filtering,
+            )
+        )
+    @property
+    def invalid_before_repair(self) -> bool:
+        report = self.repair_report
+        if self.provider_error:
+            return True
+        if not self.json_valid or not self.schema_valid_before_repair:
+            return True
+        if report is None:
+            return False
+        return bool(
+            report.invalid_ids_removed
+            or report.non_visible_ids_removed
+            or report.empty_after_filtering
+        )
+    def to_trace_payload(self) -> dict[str, Any]:
+        return {
+            "source": self.source,
+            "runtime_seconds": float(self.runtime_seconds),
+            "action": self.action.to_dict(),
+            "raw_text": self.raw_text,
+            "parsed_payload_before_repair": self.parsed_payload_before_repair,
+            "repair_report": None if self.repair_report is None else self.repair_report.to_dict(),
+            "json_valid": bool(self.json_valid),
+            "schema_valid_before_repair": bool(self.schema_valid_before_repair),
+            "repair_applied": bool(self.repair_applied),
+            "invalid_before_repair": bool(self.invalid_before_repair),
+            "provider_error": self.provider_error,
+            "fallback_policy_applied": self.fallback_policy_applied,
+        }
+@dataclass(frozen=True)
+class GuidanceApplicationPlan:
+    wrapper_mode: str
+    scope: str
+    affected_intersections: tuple[str, ...]
+    targeted_intersections: tuple[str, ...]
+    target_candidate_ids: tuple[str, ...]
+    priority_direction: str | None
+    strength_scale: float
+    base_bias_strength: float
+    target_bias_strength: float
+    corridor_bias_strength: float
+    apply_global_bias: bool
+    apply_target_only: bool
+    max_intersections_affected: int
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "wrapper_mode": self.wrapper_mode,
+            "scope": self.scope,
+            "affected_intersections": list(self.affected_intersections),
+            "targeted_intersections": list(self.targeted_intersections),
+            "target_candidate_ids": list(self.target_candidate_ids),
+            "priority_direction": self.priority_direction,
+            "strength_scale": float(self.strength_scale),
+            "base_bias_strength": float(self.base_bias_strength),
+            "target_bias_strength": float(self.target_bias_strength),
+            "corridor_bias_strength": float(self.corridor_bias_strength),
+            "apply_global_bias": bool(self.apply_global_bias),
+            "apply_target_only": bool(self.apply_target_only),
+            "max_intersections_affected": int(self.max_intersections_affected),
+        }
+@dataclass
+class ActiveDistrictGuidance:
+    district_id: str
+    summary: DistrictStateSummary
+    decision: GuidanceDecision
+    application_plan: GuidanceApplicationPlan
+    generated_step: int
+    expires_step: int
+    fallback_used: bool = False
+@dataclass(frozen=True)
+class GuidanceGateDecision:
+    allowed: bool
+    gating_mode: str
+    triggered_conditions: tuple[str, ...]
+    blocked_reasons: tuple[str, ...]
+    avg_queue: float
+    queue_imbalance: float
+    incident_flag: bool
+    spillback_risk: bool
+    overload_flag: bool
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "allowed": bool(self.allowed),
+            "gating_mode": self.gating_mode,
+            "triggered_conditions": list(self.triggered_conditions),
+            "blocked_reasons": list(self.blocked_reasons),
+            "avg_queue": float(self.avg_queue),
+            "queue_imbalance": float(self.queue_imbalance),
+            "incident_flag": bool(self.incident_flag),
+            "spillback_risk": bool(self.spillback_risk),
+            "overload_flag": bool(self.overload_flag),
+        }
+@dataclass
+class GuidanceRefreshTrace:
+    mode_source: str
+    district_id: str
+    decision_step: int
+    summary_hash: str
+    summary_excerpt: str
+    summary_payload: dict[str, Any]
+    guidance: dict[str, Any]
+    repaired_guidance: dict[str, Any]
+    fallback_used: bool
+    fallback_policy: str
+    application_plan: dict[str, Any]
+    applied_biases: dict[str, float]
+    gate_decision: dict[str, Any] | None = None
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "mode_source": self.mode_source,
+            "district_id": self.district_id,
+            "decision_step": int(self.decision_step),
+            "summary_hash": self.summary_hash,
+            "summary_excerpt": self.summary_excerpt,
+            "summary": self.summary_payload,
+            "raw_guidance": self.guidance,
+            "repaired_guidance": self.repaired_guidance,
+            "fallback_used": bool(self.fallback_used),
+            "fallback_policy": self.fallback_policy,
+            "application_plan": self.application_plan,
+            "applied_biases": self.applied_biases,
+            "gate_decision": self.gate_decision,
+        }
+@dataclass
+class WrapperEpisodeStats:
+    step_count: int = 0
+    steps_with_active_guidance: int = 0
+    guidance_refresh_count: int = 0
+    guidance_blocked_step_count: int = 0
+    guidance_blocked_refresh_count: int = 0
+    bias_application_count: int = 0
+    noop_guidance_events: int = 0
+    fallback_event_count: int = 0
+    total_affected_intersections: int = 0
+    total_targeted_intersections: int = 0
+    total_bias_magnitude: float = 0.0
+    max_bias_magnitude: float = 0.0
+    def to_dict(self) -> dict[str, float]:
+        refresh_count = max(1, self.guidance_refresh_count)
+        return {
+            "num_guidance_refreshes": float(self.guidance_refresh_count),
+            "num_steps_guidance_blocked_by_gate": float(self.guidance_blocked_step_count),
+            "num_guidance_refreshes_blocked_by_gate": float(self.guidance_blocked_refresh_count),
+            "num_bias_applications": float(self.bias_application_count),
+            "num_noop_guidance_events": float(self.noop_guidance_events),
+            "fallback_policy_used_count": float(self.fallback_event_count),
+            "avg_num_affected_intersections": float(self.total_affected_intersections) / float(refresh_count),
+            "avg_num_targeted_intersections": float(self.total_targeted_intersections) / float(refresh_count),
+            "mean_bias_magnitude": float(self.total_bias_magnitude) / float(max(1, self.bias_application_count)),
+            "max_bias_magnitude": float(self.max_bias_magnitude),
+            "percent_steps_with_active_guidance": float(self.steps_with_active_guidance)
+            / float(max(1, self.step_count)),
+        }
+@dataclass
+class GuidedActionBatch:
+    actions: np.ndarray
+    base_actions: np.ndarray
+    base_q_values: np.ndarray
+    guided_q_values: np.ndarray
+    q_bias: np.ndarray
+    refresh_traces: list[GuidanceRefreshTrace] = field(default_factory=list)
+    runtime_seconds: float = 0.0
+class BaseGuidanceProvider(ABC):
+    source_name: str
+    @abstractmethod
+    def generate(self, summary: DistrictStateSummary) -> GuidanceDecision:
+        raise NotImplementedError
+class HeuristicGuidanceProvider(BaseGuidanceProvider):
+    source_name = "heuristic"
+    def __init__(self, config: HeuristicGuidanceConfig | None = None):
+        self.config = config or HeuristicGuidanceConfig()
+    def generate(self, summary: DistrictStateSummary) -> GuidanceDecision:
+        started = perf_counter()
+        action = generate_heuristic_guidance(summary=summary, config=self.config)
+        return GuidanceDecision(
+            source=self.source_name,
+            action=action,
+            runtime_seconds=perf_counter() - started,
+            parsed_payload_before_repair=action.to_dict(),
+        )
+class LLMGuidanceProvider(BaseGuidanceProvider):
+    source_name = "llm"
+    def __init__(self, inference: DistrictLLMInference, max_new_tokens: int = 128):
+        self.inference = inference
+        self.max_new_tokens = int(max_new_tokens)
+    def generate(self, summary: DistrictStateSummary) -> GuidanceDecision:
+        started = perf_counter()
+        result: DistrictLLMInferenceResult = self.inference.predict_with_result(
+            summary=summary,
+            max_new_tokens=self.max_new_tokens,
+        )
+        return GuidanceDecision(
+            source=self.source_name,
+            action=result.action,
+            runtime_seconds=perf_counter() - started,
+            raw_text=result.raw_text,
+            parsed_payload_before_repair=result.parsed_payload_before_repair,
+            repair_report=result.repair_report,
+            json_valid=result.json_valid,
+            schema_valid_before_repair=result.schema_valid_before_repair,
+        )
+class FixedRLPolicyAdapter:
+    def __init__(self, checkpoint_path: str, device: str | None = None):
+        self.teacher = RLCheckpointTeacher(checkpoint_path=checkpoint_path, device=device)
+        self.device = self.teacher.device
+    @property
+    def env_config(self) -> Any | None:
+        return self.teacher.env_config
+    def decide(self, observation_batch: dict[str, Any]) -> RLPolicyDecision:
+        raw_obs = observation_batch["observations"].astype(np.float32)
+        normalized_obs = (
+            self.teacher.obs_normalizer.normalize(raw_obs)
+            if self.teacher.obs_normalizer is not None
+            else raw_obs
+        )
+        obs_tensor = torch.as_tensor(normalized_obs, dtype=torch.float32, device=self.device)
+        district_type_tensor = torch.as_tensor(
+            observation_batch["district_type_indices"],
+            dtype=torch.int64,
+            device=self.device,
+        )
+        action_mask_tensor = torch.as_tensor(
+            observation_batch["action_mask"],
+            dtype=torch.float32,
+            device=self.device,
+        )
+        with torch.no_grad():
+            q_values = self.teacher.model.forward(
+                observations=obs_tensor,
+                district_type_indices=district_type_tensor,
+                action_mask=action_mask_tensor,
+            )
+        q_values_np = q_values.detach().cpu().numpy().astype(np.float32)
+        return RLPolicyDecision(q_values=q_values_np, actions=q_values_np.argmax(axis=1).astype(np.int64))
+class DistrictGuidedRLController:
+    def __init__(
+        self,
+        policy: FixedRLPolicyAdapter,
+        mode_source: str,
+        summary_builder: DistrictStateSummaryBuilder | None = None,
+        guidance_provider: BaseGuidanceProvider | None = None,
+        influence_config: GuidanceInfluenceConfig | None = None,
+        heuristic_provider: BaseGuidanceProvider | None = None,
+    ):
+        self.policy = policy
+        self.mode_source = mode_source
+        self.summary_builder = summary_builder
+        self.guidance_provider = guidance_provider
+        self.influence_config = (influence_config or GuidanceInfluenceConfig()).validate()
+        self.heuristic_provider = heuristic_provider
+        self._active_guidance: dict[str, ActiveDistrictGuidance] = {}
+        self._next_refresh_step_by_district: dict[str, int] = {}
+        self._episode_stats = WrapperEpisodeStats()
+    def reset(self) -> None:
+        self._active_guidance = {}
+        self._next_refresh_step_by_district = {}
+        self._episode_stats = WrapperEpisodeStats()
+        if self.summary_builder is not None:
+            self.summary_builder.reset()
+    def active_guidance_snapshot(self) -> dict[str, dict[str, Any]]:
+        return {
+            district_id: active.decision.action.to_dict()
+            for district_id, active in sorted(self._active_guidance.items())
+        }
+    def episode_debug_summary(self) -> dict[str, Any]:
+        payload = self._episode_stats.to_dict()
+        payload.update(
+            {
+                "wrapper_mode": self.influence_config.wrapper_mode,
+                "fallback_policy": self.influence_config.fallback_policy,
+            }
+        )
+        return payload
+    def act(self, env, observation_batch: dict[str, Any]) -> GuidedActionBatch:
+        started = perf_counter()
+        base_decision = self.policy.decide(observation_batch)
+        base_q_values = base_decision.q_values
+        guided_q_values = base_q_values.copy()
+        q_bias = np.zeros_like(guided_q_values, dtype=np.float32)
+        refresh_traces = self._refresh_guidance_if_needed(env=env, observation_batch=observation_batch)
+        if self.guidance_provider is None:
+            self._episode_stats.step_count += 1
+            return GuidedActionBatch(
+                actions=base_decision.actions.copy(),
+                base_actions=base_decision.actions,
+                base_q_values=base_q_values,
+                guided_q_values=guided_q_values,
+                q_bias=q_bias,
+                refresh_traces=refresh_traces,
+                runtime_seconds=perf_counter() - started,
+            )
+        active_any = False
+        decision_step = int(observation_batch.get("decision_step", 0))
+        candidate_lookup_by_district = {
+            district_id: {
+                item.intersection_id: item
+                for item in active.summary.candidate_intersections
+            }
+            for district_id, active in self._active_guidance.items()
+        }
+        for row_index, intersection_id in enumerate(observation_batch["intersection_ids"]):
+            district_id = str(observation_batch["district_ids"][row_index])
+            active = self._active_guidance.get(district_id)
+            if active is None:
+                continue
+            active_any = True
+            candidate = candidate_lookup_by_district[district_id].get(str(intersection_id))
+            row_bias = self._row_action_bias(
+                active=active,
+                candidate=candidate,
+                intersection_id=str(intersection_id),
+                current_phase=int(observation_batch["current_phase"][row_index]),
+                decision_step=decision_step,
+            )
+            if row_bias is None:
+                continue
+            q_bias[row_index] = row_bias
+            guided_q_values[row_index] = guided_q_values[row_index] + row_bias
+            magnitude = float(np.abs(row_bias).max())
+            self._episode_stats.bias_application_count += 1
+            self._episode_stats.total_bias_magnitude += magnitude
+            self._episode_stats.max_bias_magnitude = max(self._episode_stats.max_bias_magnitude, magnitude)
+        self._episode_stats.step_count += 1
+        if active_any:
+            self._episode_stats.steps_with_active_guidance += 1
+        actions = guided_q_values.argmax(axis=1).astype(np.int64)
+        return GuidedActionBatch(
+            actions=actions,
+            base_actions=base_decision.actions,
+            base_q_values=base_q_values,
+            guided_q_values=guided_q_values,
+            q_bias=q_bias,
+            refresh_traces=refresh_traces,
+            runtime_seconds=perf_counter() - started,
+        )
+    def _refresh_guidance_if_needed(
+        self,
+        env,
+        observation_batch: dict[str, Any],
+    ) -> list[GuidanceRefreshTrace]:
+        if self.guidance_provider is None or self.summary_builder is None:
+            return []
+        decision_step = int(observation_batch.get("decision_step", 0))
+        due_districts = [
+            district_id
+            for district_id in tuple(sorted(env.districts))
+            if self._district_requires_refresh(district_id=district_id, decision_step=decision_step)
+        ]
+        if not due_districts:
+            return []
+        summaries = self.summary_builder.build_all(env, observation_batch)
+        refresh_traces: list[GuidanceRefreshTrace] = []
+        gate_blocked_this_step = False
+        for district_id in due_districts:
+            summary = summaries[district_id]
+            previous_active = self._active_guidance.get(district_id)
+            gate_decision = _evaluate_guidance_gate(summary=summary, config=self.influence_config)
+            if not gate_decision.allowed:
+                gate_blocked_this_step = True
+                self._active_guidance.pop(district_id, None)
+                self._next_refresh_step_by_district[district_id] = (
+                    decision_step + self._resolve_blocked_refresh_horizon()
+                )
+                self._episode_stats.guidance_refresh_count += 1
+                self._episode_stats.guidance_blocked_refresh_count += 1
+                self._episode_stats.noop_guidance_events += 1
+                decision = GuidanceDecision(
+                    source=f"{self.mode_source}_gate_blocked",
+                    action=DistrictAction.default_hold(),
+                    runtime_seconds=0.0,
+                    fallback_policy_applied="gate_blocked",
+                )
+                application_plan = _build_application_plan(
+                    summary=summary,
+                    action=decision.action,
+                    config=replace(self.influence_config, wrapper_mode="no_op"),
+                    district_intersection_ids=tuple(env.districts[district_id].intersection_ids),
+                )
+                trace = GuidanceRefreshTrace(
+                    mode_source=self.mode_source,
+                    district_id=district_id,
+                    decision_step=decision_step,
+                    summary_hash=_summary_hash(summary),
+                    summary_excerpt=summary.to_prompt_text()[:240],
+                    summary_payload=summary.to_dict(),
+                    guidance=decision.to_trace_payload(),
+                    repaired_guidance=decision.action.to_dict(),
+                    fallback_used=False,
+                    fallback_policy="gate_blocked",
+                    application_plan=application_plan.to_dict(),
+                    applied_biases={
+                        "base": 0.0,
+                        "target": 0.0,
+                        "corridor": 0.0,
+                        "strength_scale": 0.0,
+                    },
+                    gate_decision=gate_decision.to_dict(),
+                )
+                refresh_traces.append(trace)
+                if self.influence_config.log_guidance_debug:
+                    _log_guidance_debug(trace)
+                continue
+            decision, fallback_used = self._generate_guidance(
+                district_id=district_id,
+                summary=summary,
+                previous_active=previous_active,
+            )
+            application_plan = _build_application_plan(
+                summary=summary,
+                action=decision.action,
+                config=self.influence_config,
+                district_intersection_ids=tuple(env.districts[district_id].intersection_ids),
+            )
+            expires_step = decision_step + self._resolve_refresh_horizon(decision.action)
+            active = ActiveDistrictGuidance(
+                district_id=district_id,
+                summary=summary,
+                decision=decision,
+                application_plan=application_plan,
+                generated_step=decision_step,
+                expires_step=expires_step,
+                fallback_used=fallback_used,
+            )
+            self._active_guidance[district_id] = active
+            self._next_refresh_step_by_district[district_id] = int(expires_step)
+            self._episode_stats.guidance_refresh_count += 1
+            self._episode_stats.total_affected_intersections += len(application_plan.affected_intersections)
+            self._episode_stats.total_targeted_intersections += len(application_plan.targeted_intersections)
+            if application_plan.wrapper_mode == "no_op" or not application_plan.affected_intersections:
+                self._episode_stats.noop_guidance_events += 1
+            if fallback_used:
+                self._episode_stats.fallback_event_count += 1
+            trace = GuidanceRefreshTrace(
+                mode_source=self.mode_source,
+                district_id=district_id,
+                decision_step=decision_step,
+                summary_hash=_summary_hash(summary),
+                summary_excerpt=summary.to_prompt_text()[:240],
+                summary_payload=summary.to_dict(),
+                guidance=decision.to_trace_payload(),
+                repaired_guidance=decision.action.to_dict(),
+                fallback_used=fallback_used,
+                fallback_policy=self.influence_config.fallback_policy if fallback_used else "none",
+                application_plan=application_plan.to_dict(),
+                applied_biases={
+                    "base": float(application_plan.base_bias_strength),
+                    "target": float(application_plan.target_bias_strength),
+                    "corridor": float(application_plan.corridor_bias_strength),
+                    "strength_scale": float(application_plan.strength_scale),
+                },
+                gate_decision=gate_decision.to_dict(),
+            )
+            refresh_traces.append(trace)
+            if self.influence_config.log_guidance_debug:
+                _log_guidance_debug(trace)
+        if gate_blocked_this_step:
+            self._episode_stats.guidance_blocked_step_count += 1
+        return refresh_traces
+    def _generate_guidance(
+        self,
+        district_id: str,
+        summary: DistrictStateSummary,
+        previous_active: ActiveDistrictGuidance | None,
+    ) -> tuple[GuidanceDecision, bool]:
+        fallback_used = False
+        try:
+            decision = self.guidance_provider.generate(summary)
+        except Exception as exc:
+            decision = GuidanceDecision(
+                source=self.guidance_provider.source_name,
+                action=DistrictAction.default_hold(),
+                runtime_seconds=0.0,
+                provider_error=str(exc),
+                json_valid=False,
+                schema_valid_before_repair=False,
+            )
+        if not _should_fallback(decision):
+            return decision, fallback_used
+        fallback_used = True
+        fallback_policy = self.influence_config.fallback_policy
+        if fallback_policy == "hold_previous" and previous_active is not None:
+            fallback_decision = GuidanceDecision(
+                source=f"{decision.source}_fallback_hold_previous",
+                action=previous_active.decision.action,
+                runtime_seconds=decision.runtime_seconds,
+                raw_text=decision.raw_text,
+                parsed_payload_before_repair=decision.parsed_payload_before_repair,
+                repair_report=decision.repair_report,
+                json_valid=decision.json_valid,
+                schema_valid_before_repair=decision.schema_valid_before_repair,
+                provider_error=decision.provider_error,
+                fallback_policy_applied=fallback_policy,
+            )
+            return fallback_decision, fallback_used
+        if fallback_policy == "heuristic_weak" and self.heuristic_provider is not None:
+            fallback_decision = self.heuristic_provider.generate(summary)
+            fallback_decision.fallback_policy_applied = fallback_policy
+            return fallback_decision, fallback_used
+        fallback_decision = GuidanceDecision(
+            source=f"{decision.source}_fallback_no_op",
+            action=DistrictAction.default_hold(),
+            runtime_seconds=decision.runtime_seconds,
+            raw_text=decision.raw_text,
+            parsed_payload_before_repair=decision.parsed_payload_before_repair,
+            repair_report=decision.repair_report,
+            json_valid=decision.json_valid,
+            schema_valid_before_repair=decision.schema_valid_before_repair,
+            provider_error=decision.provider_error,
+            fallback_policy_applied=fallback_policy,
+        )
+        return fallback_decision, fallback_used
+    def _district_requires_refresh(self, district_id: str, decision_step: int) -> bool:
+        next_refresh_step = self._next_refresh_step_by_district.get(district_id)
+        if next_refresh_step is None:
+            return True
+        return decision_step >= int(next_refresh_step)
+    def _resolve_refresh_horizon(self, action: DistrictAction) -> int:
+        requested = max(1, min(int(action.duration_steps), self.influence_config.max_guidance_duration))
+        return min(
+            requested,
+            int(self.influence_config.guidance_refresh_steps),
+            int(self.influence_config.guidance_persistence_steps),
+        )
+    def _resolve_blocked_refresh_horizon(self) -> int:
+        return max(
+            1,
+            min(
+                int(self.influence_config.guidance_refresh_steps),
+                int(self.influence_config.guidance_persistence_steps),
+            ),
+        )
+    def _row_action_bias(
+        self,
+        active: ActiveDistrictGuidance,
+        candidate: CandidateIntersection | None,
+        intersection_id: str,
+        current_phase: int,
+        decision_step: int,
+    ) -> np.ndarray | None:
+        plan = active.application_plan
+        if plan.wrapper_mode == "no_op":
+            return None
+        if intersection_id not in set(plan.affected_intersections):
+            return None
+        preferred_action = _preferred_action_for_direction(
+            direction=plan.priority_direction,
+            current_phase=current_phase,
+        )
+        if preferred_action is None:
+            return None
+        decay = 1.0
+        if self.influence_config.enable_bias_decay:
+            horizon = max(1, active.expires_step - active.generated_step)
+            age = max(0, decision_step - active.generated_step)
+            if self.influence_config.bias_decay_schedule == "linear":
+                decay = max(0.25, 1.0 - (float(age) / float(horizon)))
+        magnitude = plan.base_bias_strength * plan.strength_scale * decay
+        if intersection_id in set(plan.targeted_intersections):
+            magnitude += plan.target_bias_strength * plan.strength_scale * decay
+        if candidate is not None and plan.priority_direction in {"NS", "EW"}:
+            if candidate.corridor_alignment == plan.priority_direction:
+                magnitude += plan.corridor_bias_strength * plan.strength_scale * decay
+            if candidate.is_boundary and plan.scope in {"corridor_local", "global"}:
+                magnitude += 0.5 * plan.corridor_bias_strength * plan.strength_scale * decay
+        strategy_multiplier = STRATEGY_BIAS_MULTIPLIERS.get(active.decision.action.strategy, 1.0)
+        magnitude *= strategy_multiplier
+        if magnitude <= 0.0:
+            return None
+        bias = np.zeros(2, dtype=np.float32)
+        bias[preferred_action] += float(magnitude)
+        return bias
+def _build_application_plan(
+    summary: DistrictStateSummary,
+    action: DistrictAction,
+    config: GuidanceInfluenceConfig,
+    district_intersection_ids: tuple[str, ...],
+) -> GuidanceApplicationPlan:
+    wrapper_mode = config.wrapper_mode
+    target_ids = tuple(
+        intersection_id
+        for intersection_id in action.target_intersections
+        if intersection_id in {item.intersection_id for item in summary.candidate_intersections}
+    )
+    candidate_lookup = {
+        item.intersection_id: item
+        for item in summary.candidate_intersections
+    }
+    priority_direction = _resolve_guidance_direction(action=action, summary=summary)
+    if wrapper_mode == "no_op":
+        return GuidanceApplicationPlan(
+            wrapper_mode=wrapper_mode,
+            scope="none",
+            affected_intersections=(),
+            targeted_intersections=target_ids,
+            target_candidate_ids=tuple(candidate_lookup),
+            priority_direction=priority_direction,
+            strength_scale=0.0,
+            base_bias_strength=0.0,
+            target_bias_strength=0.0,
+            corridor_bias_strength=0.0,
+            apply_global_bias=False,
+            apply_target_only=True,
+            max_intersections_affected=0,
+        )
+    if wrapper_mode == "current_legacy":
+        affected = tuple(district_intersection_ids)
+        return GuidanceApplicationPlan(
+            wrapper_mode=wrapper_mode,
+            scope="global",
+            affected_intersections=affected,
+            targeted_intersections=target_ids,
+            target_candidate_ids=tuple(candidate_lookup),
+            priority_direction=priority_direction,
+            strength_scale=1.0,
+            base_bias_strength=float(max(config.bias_strength, 0.75)),
+            target_bias_strength=float(max(config.target_only_bias_strength, 1.25)),
+            corridor_bias_strength=float(max(config.corridor_bias_strength, 0.5)),
+            apply_global_bias=True,
+            apply_target_only=False,
+            max_intersections_affected=max(len(affected), config.max_intersections_affected),
+        )
+    if wrapper_mode in {"target_only_soft", "target_only_medium"}:
+        strength_scale = 0.5 if wrapper_mode == "target_only_soft" else 1.0
+        affected = target_ids[: config.max_intersections_affected]
+        return GuidanceApplicationPlan(
+            wrapper_mode=wrapper_mode,
+            scope="targeted",
+            affected_intersections=affected,
+            targeted_intersections=target_ids,
+            target_candidate_ids=tuple(candidate_lookup),
+            priority_direction=priority_direction,
+            strength_scale=strength_scale,
+            base_bias_strength=float(config.bias_strength),
+            target_bias_strength=float(config.target_only_bias_strength),
+            corridor_bias_strength=float(config.corridor_bias_strength),
+            apply_global_bias=False,
+            apply_target_only=True,
+            max_intersections_affected=config.max_intersections_affected,
+        )
+    if wrapper_mode == "corridor_soft":
+        ranked = list(target_ids)
+        extras = [
+            item.intersection_id
+            for item in summary.candidate_intersections
+            if item.intersection_id not in ranked
+            and item.is_boundary
+            and (priority_direction is None or item.corridor_alignment == priority_direction)
+        ]
+        affected = tuple((ranked + extras)[: config.max_intersections_affected])
+        return GuidanceApplicationPlan(
+            wrapper_mode=wrapper_mode,
+            scope="corridor_local",
+            affected_intersections=affected,
+            targeted_intersections=target_ids,
+            target_candidate_ids=tuple(candidate_lookup),
+            priority_direction=priority_direction,
+            strength_scale=0.6,
+            base_bias_strength=float(config.bias_strength),
+            target_bias_strength=float(config.target_only_bias_strength),
+            corridor_bias_strength=float(config.corridor_bias_strength),
+            apply_global_bias=False,
+            apply_target_only=False,
+            max_intersections_affected=config.max_intersections_affected,
+        )
+    affected_global = tuple(district_intersection_ids)
+    return GuidanceApplicationPlan(
+        wrapper_mode="global_soft",
+        scope="global",
+        affected_intersections=affected_global,
+        targeted_intersections=target_ids,
+        target_candidate_ids=tuple(candidate_lookup),
+        priority_direction=priority_direction,
+        strength_scale=0.35,
+        base_bias_strength=float(config.bias_strength),
+        target_bias_strength=float(config.target_only_bias_strength),
+        corridor_bias_strength=float(config.corridor_bias_strength),
+        apply_global_bias=True,
+        apply_target_only=False,
+        max_intersections_affected=config.max_intersections_affected,
+    )
+def _should_fallback(decision: GuidanceDecision) -> bool:
+    if decision.provider_error is not None:
+        return True
+    if not decision.json_valid or not decision.schema_valid_before_repair:
+        return True
+    report = decision.repair_report
+    if report is None:
+        return False
+    return bool(
+        report.fallback_used
+        or report.empty_after_filtering
+    )
+def _evaluate_guidance_gate(
+    summary: DistrictStateSummary,
+    config: GuidanceInfluenceConfig,
+) -> GuidanceGateDecision:
+    queue_imbalance = abs(float(summary.ns_queue) - float(summary.ew_queue))
+    queue_trigger = float(summary.avg_queue) >= float(config.min_avg_queue_for_guidance)
+    imbalance_trigger = queue_imbalance >= float(config.min_queue_imbalance_for_guidance)
+    incident_or_spillback = bool(summary.incident_flag or summary.spillback_risk or summary.overload_flag)
+    triggers = {
+        "incident_or_spillback": incident_or_spillback,
+        "queue_threshold": queue_trigger,
+        "imbalance_threshold": imbalance_trigger,
+    }
+    triggered_conditions = tuple(name for name, active in triggers.items() if active)
+    if config.gating_mode == "always_on":
+        allowed = True
+    elif config.gating_mode == "incident_or_spillback":
+        allowed = incident_or_spillback
+    elif config.gating_mode == "queue_threshold":
+        allowed = queue_trigger
+    elif config.gating_mode == "imbalance_threshold":
+        allowed = imbalance_trigger
+    elif config.gating_mode == "queue_or_imbalance":
+        allowed = queue_trigger or imbalance_trigger
+    else:
+        allowed = incident_or_spillback or queue_trigger or imbalance_trigger
+    blocked_reasons: list[str] = []
+    if config.require_incident_or_spillback and not incident_or_spillback:
+        allowed = False
+        blocked_reasons.append("requires_incident_or_spillback")
+    if not config.allow_guidance_in_normal_conditions and not triggered_conditions:
+        allowed = False
+        blocked_reasons.append("normal_conditions_blocked")
+    if not allowed and not blocked_reasons:
+        blocked_reasons.append(f"gating_mode:{config.gating_mode}")
+    return GuidanceGateDecision(
+        allowed=allowed,
+        gating_mode=config.gating_mode,
+        triggered_conditions=triggered_conditions,
+        blocked_reasons=tuple(blocked_reasons),
+        avg_queue=float(summary.avg_queue),
+        queue_imbalance=float(queue_imbalance),
+        incident_flag=bool(summary.incident_flag),
+        spillback_risk=bool(summary.spillback_risk),
+        overload_flag=bool(summary.overload_flag),
+    )
+def _resolve_guidance_direction(action: DistrictAction, summary: DistrictStateSummary) -> str | None:
+    if action.phase_bias in {"NS", "EW"}:
+        return action.phase_bias
+    if action.priority_corridor in {"NS", "EW"}:
+        return action.priority_corridor
+    if summary.dominant_flow in {"NS", "EW"}:
+        return summary.dominant_flow
+    return None
+def _preferred_action_for_direction(direction: str | None, current_phase: int) -> int | None:
+    if direction == "NS":
+        return 0 if current_phase == 0 else 1
+    if direction == "EW":
+        return 0 if current_phase != 0 else 1
+    return None
+def _summary_hash(summary: DistrictStateSummary) -> str:
+    return hashlib.sha1(summary.to_json().encode("utf-8")).hexdigest()[:16]
+def guidance_config_payload(config: GuidanceInfluenceConfig) -> dict[str, Any]:
+    return asdict(config.validate())
+def _log_guidance_debug(trace: GuidanceRefreshTrace) -> None:
+    print(
+        "[guidance-debug] "
+        f"mode={trace.mode_source} "
+        f"district={trace.district_id} "
+        f"wrapper_mode={trace.application_plan['wrapper_mode']} "
+        f"gate_allowed={trace.gate_decision.get('allowed') if trace.gate_decision else True} "
+        f"scope={trace.application_plan['scope']} "
+        f"targets={trace.repaired_guidance.get('target_intersections', [])} "
+        f"affected={trace.application_plan['affected_intersections']} "
+        f"fallback_used={trace.fallback_used} "
+        f"fallback_policy={trace.fallback_policy}"
+    )

district_llm/schema.py ADDED Viewed

	@@ -0,0 +1,429 @@

+from __future__ import annotations
+import json
+from dataclasses import dataclass, field
+from typing import Any
+DISTRICT_STRATEGIES: tuple[str, ...] = (
+    "hold",
+    "favor_NS",
+    "favor_EW",
+    "drain_inbound",
+    "drain_outbound",
+    "clear_spillback",
+    "incident_response",
+    "arterial_priority",
+)
+PHASE_BIASES: tuple[str, ...] = ("NONE", "NS", "EW")
+PRIORITY_CORRIDORS: tuple[str, ...] = (
+    "NS",
+    "EW",
+    "inbound",
+    "outbound",
+    "arterial",
+)
+DOMINANT_FLOWS: tuple[str, ...] = ("NS", "EW", "BALANCED")
+CANDIDATE_REASON_TAGS: tuple[str, ...] = (
+    "congested",
+    "boundary",
+    "spillback",
+    "incident",
+    "outgoing",
+    "overload",
+    "event",
+)
+def _round_float(value: float, digits: int = 3) -> float:
+    return round(float(value), digits)
+def _dedupe_string_list(values: list[str] | tuple[str, ...] | None, limit: int | None = None) -> list[str]:
+    normalized: list[str] = []
+    seen: set[str] = set()
+    for item in values or []:
+        value = str(item).strip()
+        if not value or value in seen:
+            continue
+        normalized.append(value)
+        seen.add(value)
+        if limit is not None and len(normalized) >= limit:
+            break
+    return normalized
+def _stable_reason_list(values: list[str] | tuple[str, ...] | None) -> list[str]:
+    present = {str(item).strip() for item in (values or []) if str(item).strip()}
+    return [item for item in CANDIDATE_REASON_TAGS if item in present]
+def candidate_priority_score(candidate: "CandidateIntersection | dict[str, Any]") -> float:
+    item = candidate.to_dict() if hasattr(candidate, "to_dict") else dict(candidate)
+    queue_total = float(item.get("queue_total", 0.0))
+    wait_total = float(item.get("wait_total", 0.0))
+    outgoing_load = float(item.get("outgoing_load", 0.0))
+    score = queue_total + 1.5 * wait_total + 0.5 * outgoing_load
+    score += 2.0 * float(bool(item.get("spillback_risk", False)))
+    score += 1.5 * float(bool(item.get("incident_proximity", False)))
+    score += 1.0 * float(bool(item.get("is_boundary", False)))
+    score += 0.75 * float(bool(item.get("event_proximity", False)))
+    score += 0.75 * float(bool(item.get("overload_marker", False)))
+    return score
+def candidate_priority_tuple(candidate: "CandidateIntersection | dict[str, Any]") -> tuple[float, float, float, float, str]:
+    item = candidate.to_dict() if hasattr(candidate, "to_dict") else dict(candidate)
+    return (
+        candidate_priority_score(item),
+        float(item.get("queue_total", 0.0)),
+        float(item.get("wait_total", 0.0)),
+        float(item.get("outgoing_load", 0.0)),
+        str(item.get("intersection_id", "")),
+    )
+def canonicalize_target_intersections(
+    targets: list[str] | tuple[str, ...] | None,
+    candidates: list["CandidateIntersection | dict[str, Any]"] | None = None,
+    limit: int | None = None,
+) -> list[str]:
+    normalized = _dedupe_string_list(targets, limit=None)
+    if not candidates:
+        return normalized[:limit] if limit is not None else normalized
+    candidate_order = {
+        str(candidate.to_dict()["intersection_id"] if hasattr(candidate, "to_dict") else candidate["intersection_id"]): (
+            -candidate_priority_tuple(candidate)[0],
+            -candidate_priority_tuple(candidate)[1],
+            -candidate_priority_tuple(candidate)[2],
+            -candidate_priority_tuple(candidate)[3],
+            candidate_priority_tuple(candidate)[4],
+        )
+        for candidate in candidates
+    }
+    normalized.sort(key=lambda item: candidate_order.get(item, (1.0, 1.0, 1.0, 1.0, item)))
+    if limit is not None:
+        normalized = normalized[:limit]
+    return normalized
+@dataclass
+class CongestedIntersection:
+    intersection_id: str
+    queue_total: float
+    wait_total: float
+    outgoing_load: float
+    current_phase: int
+    is_boundary: bool
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "intersection_id": self.intersection_id,
+            "queue_total": _round_float(self.queue_total),
+            "wait_total": _round_float(self.wait_total),
+            "outgoing_load": _round_float(self.outgoing_load),
+            "current_phase": int(self.current_phase),
+            "is_boundary": bool(self.is_boundary),
+        }
+    def to_prompt_line(self) -> str:
+        return (
+            f"- {self.intersection_id} "
+            f"q={self.queue_total:.2f} "
+            f"w={self.wait_total:.2f} "
+            f"out={self.outgoing_load:.2f} "
+            f"phase={self.current_phase} "
+            f"boundary={int(self.is_boundary)}"
+        )
+@dataclass
+class CandidateIntersection:
+    intersection_id: str
+    queue_total: float
+    wait_total: float
+    outgoing_load: float
+    current_phase: int
+    is_boundary: bool
+    spillback_risk: bool = False
+    incident_proximity: bool = False
+    overload_marker: bool = False
+    event_proximity: bool = False
+    corridor_alignment: str = "BALANCED"
+    selection_reasons: list[str] = field(default_factory=list)
+    def validate(self) -> "CandidateIntersection":
+        if self.corridor_alignment not in DOMINANT_FLOWS:
+            raise ValueError(
+                f"Invalid corridor_alignment '{self.corridor_alignment}'. Expected one of {DOMINANT_FLOWS}."
+            )
+        self.selection_reasons = _stable_reason_list(self.selection_reasons)
+        return self
+    def to_dict(self) -> dict[str, Any]:
+        self.validate()
+        return {
+            "intersection_id": self.intersection_id,
+            "queue_total": _round_float(self.queue_total),
+            "wait_total": _round_float(self.wait_total),
+            "outgoing_load": _round_float(self.outgoing_load),
+            "current_phase": int(self.current_phase),
+            "is_boundary": bool(self.is_boundary),
+            "spillback_risk": bool(self.spillback_risk),
+            "incident_proximity": bool(self.incident_proximity),
+            "overload_marker": bool(self.overload_marker),
+            "event_proximity": bool(self.event_proximity),
+            "corridor_alignment": self.corridor_alignment,
+            "selection_reasons": list(self.selection_reasons),
+        }
+    def to_prompt_line(self) -> str:
+        self.validate()
+        reasons = "|".join(self.selection_reasons) if self.selection_reasons else "none"
+        return (
+            f"- {self.intersection_id} "
+            f"q={self.queue_total:.2f} "
+            f"w={self.wait_total:.2f} "
+            f"out={self.outgoing_load:.2f} "
+            f"phase={self.current_phase} "
+            f"boundary={int(self.is_boundary)} "
+            f"spillback={int(self.spillback_risk)} "
+            f"incident={int(self.incident_proximity)} "
+            f"overload={int(self.overload_marker)} "
+            f"event={int(self.event_proximity)} "
+            f"align={self.corridor_alignment} "
+            f"reasons={reasons}"
+        )
+@dataclass
+class DistrictAction:
+    strategy: str = "hold"
+    priority_corridor: str | None = None
+    target_intersections: list[str] = field(default_factory=list)
+    phase_bias: str = "NONE"
+    duration_steps: int = 1
+    def validate(self) -> "DistrictAction":
+        if self.strategy not in DISTRICT_STRATEGIES:
+            raise ValueError(
+                f"Invalid strategy '{self.strategy}'. Expected one of {DISTRICT_STRATEGIES}."
+            )
+        if self.priority_corridor is not None and self.priority_corridor not in PRIORITY_CORRIDORS:
+            raise ValueError(
+                f"Invalid priority_corridor '{self.priority_corridor}'. "
+                f"Expected one of {PRIORITY_CORRIDORS} or None."
+            )
+        if self.phase_bias not in PHASE_BIASES:
+            raise ValueError(
+                f"Invalid phase_bias '{self.phase_bias}'. Expected one of {PHASE_BIASES}."
+            )
+        if not isinstance(self.duration_steps, int):
+            raise ValueError("duration_steps must be an integer.")
+        if not 1 <= self.duration_steps <= 20:
+            raise ValueError("duration_steps must be between 1 and 20.")
+        self.target_intersections = _dedupe_string_list(self.target_intersections, limit=8)
+        return self
+    @classmethod
+    def default_hold(cls, duration_steps: int = 1) -> "DistrictAction":
+        return cls(
+            strategy="hold",
+            priority_corridor=None,
+            target_intersections=[],
+            phase_bias="NONE",
+            duration_steps=max(1, min(int(duration_steps), 20)),
+        )
+    @classmethod
+    def from_dict(cls, payload: dict[str, Any]) -> "DistrictAction":
+        return cls(
+            strategy=str(payload.get("strategy", "hold")),
+            priority_corridor=payload.get("priority_corridor"),
+            target_intersections=list(payload.get("target_intersections", [])),
+            phase_bias=str(payload.get("phase_bias", "NONE")),
+            duration_steps=int(payload.get("duration_steps", 1)),
+        ).validate()
+    @classmethod
+    def from_json(cls, payload: str) -> "DistrictAction":
+        return cls.from_dict(json.loads(payload))
+    def to_dict(self) -> dict[str, Any]:
+        self.validate()
+        return {
+            "strategy": self.strategy,
+            "priority_corridor": self.priority_corridor,
+            "target_intersections": list(self.target_intersections),
+            "phase_bias": self.phase_bias,
+            "duration_steps": int(self.duration_steps),
+        }
+    def to_json(self) -> str:
+        return json.dumps(self.to_dict(), sort_keys=True, separators=(",", ":"))
+    def to_pretty_json(self) -> str:
+        return json.dumps(self.to_dict(), sort_keys=True, indent=2)
+    def to_rl_context(self) -> dict[str, Any]:
+        payload = self.to_dict()
+        payload["district_strategy"] = payload.pop("strategy")
+        payload["district_duration_steps"] = payload.pop("duration_steps")
+        return payload
+@dataclass
+class DistrictStateSummary:
+    city_id: str
+    district_id: str
+    district_type: str
+    scenario_name: str
+    scenario_type: str
+    decision_step: int
+    sim_time: int
+    intersection_count: int
+    avg_queue: float
+    max_queue: float
+    total_queue: float
+    avg_wait: float
+    max_wait: float
+    total_wait: float
+    avg_outgoing_load: float
+    max_outgoing_load: float
+    total_outgoing_load: float
+    recent_throughput: float
+    queue_change: float
+    wait_change: float
+    throughput_change: float
+    ns_queue: float
+    ew_queue: float
+    ns_wait: float
+    ew_wait: float
+    dominant_flow: str
+    boundary_queue_total: float
+    boundary_wait_total: float
+    spillback_risk: bool
+    incident_flag: bool
+    construction_flag: bool
+    overload_flag: bool
+    event_flag: bool
+    top_congested_intersections: list[CongestedIntersection] = field(default_factory=list)
+    candidate_intersections: list[CandidateIntersection] = field(default_factory=list)
+    def validate(self) -> "DistrictStateSummary":
+        if self.dominant_flow not in DOMINANT_FLOWS:
+            raise ValueError(
+                f"Invalid dominant_flow '{self.dominant_flow}'. Expected one of {DOMINANT_FLOWS}."
+            )
+        self.top_congested_intersections = list(self.top_congested_intersections[:5])
+        self.candidate_intersections = list(self.candidate_intersections[:8])
+        return self
+    def candidate_ids(self) -> list[str]:
+        self.validate()
+        return [item.intersection_id for item in self.candidate_intersections]
+    def candidate_lookup(self) -> dict[str, CandidateIntersection]:
+        self.validate()
+        return {
+            item.intersection_id: item
+            for item in self.candidate_intersections
+        }
+    def to_dict(self) -> dict[str, Any]:
+        self.validate()
+        return {
+            "city_id": self.city_id,
+            "district_id": self.district_id,
+            "district_type": self.district_type,
+            "scenario_name": self.scenario_name,
+            "scenario_type": self.scenario_type,
+            "decision_step": int(self.decision_step),
+            "sim_time": int(self.sim_time),
+            "intersection_count": int(self.intersection_count),
+            "avg_queue": _round_float(self.avg_queue),
+            "max_queue": _round_float(self.max_queue),
+            "total_queue": _round_float(self.total_queue),
+            "avg_wait": _round_float(self.avg_wait),
+            "max_wait": _round_float(self.max_wait),
+            "total_wait": _round_float(self.total_wait),
+            "avg_outgoing_load": _round_float(self.avg_outgoing_load),
+            "max_outgoing_load": _round_float(self.max_outgoing_load),
+            "total_outgoing_load": _round_float(self.total_outgoing_load),
+            "recent_throughput": _round_float(self.recent_throughput),
+            "queue_change": _round_float(self.queue_change),
+            "wait_change": _round_float(self.wait_change),
+            "throughput_change": _round_float(self.throughput_change),
+            "ns_queue": _round_float(self.ns_queue),
+            "ew_queue": _round_float(self.ew_queue),
+            "ns_wait": _round_float(self.ns_wait),
+            "ew_wait": _round_float(self.ew_wait),
+            "dominant_flow": self.dominant_flow,
+            "boundary_queue_total": _round_float(self.boundary_queue_total),
+            "boundary_wait_total": _round_float(self.boundary_wait_total),
+            "spillback_risk": bool(self.spillback_risk),
+            "incident_flag": bool(self.incident_flag),
+            "construction_flag": bool(self.construction_flag),
+            "overload_flag": bool(self.overload_flag),
+            "event_flag": bool(self.event_flag),
+            "top_congested_intersections": [
+                item.to_dict() for item in self.top_congested_intersections
+            ],
+            "candidate_intersections": [
+                item.to_dict() for item in self.candidate_intersections
+            ],
+        }
+    def to_json(self) -> str:
+        return json.dumps(self.to_dict(), sort_keys=True, separators=(",", ":"))
+    def to_prompt_text(self) -> str:
+        self.validate()
+        top_lines = [item.to_prompt_line() for item in self.top_congested_intersections]
+        candidate_lines = [item.to_prompt_line() for item in self.candidate_intersections]
+        if not top_lines:
+            top_lines = ["- none"]
+        if not candidate_lines:
+            candidate_lines = ["- none"]
+        return "\n".join(
+            [
+                f"city_id: {self.city_id}",
+                f"district_id: {self.district_id}",
+                f"district_type: {self.district_type}",
+                f"scenario: {self.scenario_name}",
+                f"scenario_type: {self.scenario_type}",
+                f"decision_step: {self.decision_step}",
+                f"sim_time: {self.sim_time}",
+                f"intersection_count: {self.intersection_count}",
+                f"avg_queue: {self.avg_queue:.2f}",
+                f"max_queue: {self.max_queue:.2f}",
+                f"total_queue: {self.total_queue:.2f}",
+                f"avg_wait: {self.avg_wait:.2f}",
+                f"max_wait: {self.max_wait:.2f}",
+                f"total_wait: {self.total_wait:.2f}",
+                f"avg_outgoing_load: {self.avg_outgoing_load:.2f}",
+                f"max_outgoing_load: {self.max_outgoing_load:.2f}",
+                f"total_outgoing_load: {self.total_outgoing_load:.2f}",
+                f"recent_throughput: {self.recent_throughput:.2f}",
+                f"queue_change: {self.queue_change:.2f}",
+                f"wait_change: {self.wait_change:.2f}",
+                f"throughput_change: {self.throughput_change:.2f}",
+                f"ns_queue: {self.ns_queue:.2f}",
+                f"ew_queue: {self.ew_queue:.2f}",
+                f"ns_wait: {self.ns_wait:.2f}",
+                f"ew_wait: {self.ew_wait:.2f}",
+                f"dominant_flow: {self.dominant_flow}",
+                f"boundary_queue_total: {self.boundary_queue_total:.2f}",
+                f"boundary_wait_total: {self.boundary_wait_total:.2f}",
+                f"spillback_risk: {int(self.spillback_risk)}",
+                f"incident_flag: {int(self.incident_flag)}",
+                f"construction_flag: {int(self.construction_flag)}",
+                f"overload_flag: {int(self.overload_flag)}",
+                f"event_flag: {int(self.event_flag)}",
+                "top_congested_intersections:",
+                *top_lines,
+                "candidate_intersections:",
+                *candidate_lines,
+            ]
+        )

district_llm/summary_builder.py ADDED Viewed

	@@ -0,0 +1,413 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+import numpy as np
+from district_llm.schema import CandidateIntersection, CongestedIntersection, DistrictStateSummary, candidate_priority_score
+from env.utils import load_json
+@dataclass
+class _SummaryContext:
+    previous_summaries: dict[str, DistrictStateSummary]
+    previous_finished_vehicles: int
+class DistrictStateSummaryBuilder:
+    def __init__(self, top_k: int = 3, candidate_limit: int = 6):
+        self.top_k = int(top_k)
+        self.candidate_limit = int(candidate_limit)
+        self._context = _SummaryContext(previous_summaries={}, previous_finished_vehicles=0)
+        self._scenario_metadata: dict[str, Any] | None = None
+        self._road_endpoints: dict[str, tuple[str, str]] | None = None
+        self._incident_intersections: set[str] = set()
+    def reset(self) -> None:
+        self._context = _SummaryContext(previous_summaries={}, previous_finished_vehicles=0)
+        self._scenario_metadata = None
+        self._road_endpoints = None
+        self._incident_intersections = set()
+    def build_all(self, env, observation_batch: dict[str, Any]) -> dict[str, DistrictStateSummary]:
+        if self._scenario_metadata is None:
+            metadata_path = Path(env.scenario_dir) / "scenario_metadata.json"
+            self._scenario_metadata = load_json(metadata_path) if metadata_path.exists() else {}
+            self._road_endpoints = self._load_road_endpoints(Path(env.roadnet_path))
+            self._incident_intersections = self._derive_incident_intersections()
+        lane_vehicle_count = env.adapter.get_lane_vehicle_count()
+        finished_vehicles = int(env.adapter.get_finished_vehicle_count())
+        district_summaries: dict[str, DistrictStateSummary] = {}
+        for district_id in env.districts:
+            district_summaries[district_id] = self._build_single(
+                env=env,
+                observation_batch=observation_batch,
+                lane_vehicle_count=lane_vehicle_count,
+                district_id=district_id,
+                finished_vehicles=finished_vehicles,
+            )
+        self._context.previous_summaries = district_summaries
+        self._context.previous_finished_vehicles = finished_vehicles
+        return district_summaries
+    def _build_single(
+        self,
+        env,
+        observation_batch: dict[str, Any],
+        lane_vehicle_count: dict[str, int],
+        district_id: str,
+        finished_vehicles: int,
+    ) -> DistrictStateSummary:
+        district = env.districts[district_id]
+        scenario_metadata = self._scenario_metadata or {}
+        intersection_ids = observation_batch["intersection_ids"]
+        district_ids = observation_batch["district_ids"]
+        incoming_counts = observation_batch["incoming_counts"]
+        incoming_waiting = observation_batch["incoming_waiting"]
+        current_phase = observation_batch["current_phase"]
+        queue_totals: list[float] = []
+        wait_totals: list[float] = []
+        outgoing_loads: list[float] = []
+        ns_queue = 0.0
+        ew_queue = 0.0
+        ns_wait = 0.0
+        ew_wait = 0.0
+        boundary_queue_total = 0.0
+        boundary_wait_total = 0.0
+        congestion_items: list[CongestedIntersection] = []
+        candidate_seed_items: list[dict[str, Any]] = []
+        for index, intersection_id in enumerate(intersection_ids):
+            if district_ids[index] != district_id:
+                continue
+            queue_total = float(np.asarray(incoming_counts[index], dtype=np.float32).sum())
+            wait_total = float(np.asarray(incoming_waiting[index], dtype=np.float32).sum())
+            outgoing_load = self._compute_outgoing_load(
+                env=env,
+                lane_vehicle_count=lane_vehicle_count,
+                intersection_id=intersection_id,
+            )
+            queue_totals.append(queue_total)
+            wait_totals.append(wait_total)
+            outgoing_loads.append(outgoing_load)
+            midpoint = incoming_counts.shape[1] // 2
+            ns_queue_local = float(np.asarray(incoming_counts[index][:midpoint], dtype=np.float32).sum())
+            ew_queue_local = float(np.asarray(incoming_counts[index][midpoint:], dtype=np.float32).sum())
+            ns_wait_local = float(np.asarray(incoming_waiting[index][:midpoint], dtype=np.float32).sum())
+            ew_wait_local = float(np.asarray(incoming_waiting[index][midpoint:], dtype=np.float32).sum())
+            ns_queue += ns_queue_local
+            ew_queue += ew_queue_local
+            ns_wait += ns_wait_local
+            ew_wait += ew_wait_local
+            intersection_config = env.intersections[intersection_id]
+            if intersection_config.is_boundary:
+                boundary_queue_total += queue_total
+                boundary_wait_total += wait_total
+            congestion_items.append(
+                CongestedIntersection(
+                    intersection_id=intersection_id,
+                    queue_total=queue_total,
+                    wait_total=wait_total,
+                    outgoing_load=outgoing_load,
+                    current_phase=int(current_phase[index]),
+                    is_boundary=bool(intersection_config.is_boundary),
+                )
+            )
+            candidate_seed_items.append(
+                {
+                    "intersection_id": intersection_id,
+                    "queue_total": queue_total,
+                    "wait_total": wait_total,
+                    "outgoing_load": outgoing_load,
+                    "current_phase": int(current_phase[index]),
+                    "is_boundary": bool(intersection_config.is_boundary),
+                    "spillback_risk": bool(
+                        outgoing_load >= max(6.0, queue_total * 0.6)
+                        or (
+                            intersection_config.is_boundary
+                            and outgoing_load >= max(4.0, queue_total * 0.4)
+                        )
+                    ),
+                    "incident_proximity": intersection_id in self._incident_intersections,
+                    "corridor_alignment": self._compute_corridor_alignment(
+                        ns_queue=ns_queue_local,
+                        ew_queue=ew_queue_local,
+                        ns_wait=ns_wait_local,
+                        ew_wait=ew_wait_local,
+                    ),
+                }
+            )
+        queue_array = np.asarray(queue_totals or [0.0], dtype=np.float32)
+        wait_array = np.asarray(wait_totals or [0.0], dtype=np.float32)
+        outgoing_array = np.asarray(outgoing_loads or [0.0], dtype=np.float32)
+        previous_summary = self._context.previous_summaries.get(district_id)
+        recent_throughput = float(
+            finished_vehicles - self._context.previous_finished_vehicles
+            if self._context.previous_finished_vehicles
+            else 0.0
+        )
+        queue_change = 0.0 if previous_summary is None else float(queue_array.sum() - previous_summary.total_queue)
+        wait_change = 0.0 if previous_summary is None else float(wait_array.sum() - previous_summary.total_wait)
+        throughput_change = (
+            0.0
+            if previous_summary is None
+            else recent_throughput - previous_summary.recent_throughput
+        )
+        directional_ns = ns_queue + 1.5 * ns_wait
+        directional_ew = ew_queue + 1.5 * ew_wait
+        if directional_ns > directional_ew * 1.1:
+            dominant_flow = "NS"
+        elif directional_ew > directional_ns * 1.1:
+            dominant_flow = "EW"
+        else:
+            dominant_flow = "BALANCED"
+        boundary_share = boundary_queue_total / max(1.0, float(queue_array.sum()))
+        spillback_risk = bool(
+            outgoing_array.max() >= max(8.0, queue_array.max() * 0.5)
+            or (boundary_share >= 0.6 and queue_change >= 0.0)
+        )
+        top_intersections = sorted(
+            congestion_items,
+            key=lambda item: (item.queue_total + 1.5 * item.wait_total + 0.5 * item.outgoing_load),
+            reverse=True,
+        )[: self.top_k]
+        overload_flag = bool(
+            scenario_metadata.get("overload_district") == district_id
+            or (scenario_metadata.get("name") == "district_overload" and queue_array.sum() >= 25.0)
+        )
+        event_flag = bool(scenario_metadata.get("event_district") == district_id)
+        incident_flag = bool(
+            scenario_metadata.get("name") in {"accident", "construction"}
+            or bool(scenario_metadata.get("blocked_roads"))
+        )
+        construction_flag = bool(scenario_metadata.get("name") == "construction")
+        candidate_intersections = self._build_candidate_intersections(
+            candidate_seed_items=candidate_seed_items,
+            overload_flag=overload_flag,
+            event_flag=event_flag,
+        )
+        return DistrictStateSummary(
+            city_id=env.city_id,
+            district_id=district_id,
+            district_type=district.district_type,
+            scenario_name=env.scenario_name,
+            scenario_type=str(scenario_metadata.get("intensity", env.scenario_name)),
+            decision_step=int(observation_batch["decision_step"]),
+            sim_time=int(observation_batch["sim_time"]),
+            intersection_count=int(len(district.intersection_ids)),
+            avg_queue=float(queue_array.mean()),
+            max_queue=float(queue_array.max()),
+            total_queue=float(queue_array.sum()),
+            avg_wait=float(wait_array.mean()),
+            max_wait=float(wait_array.max()),
+            total_wait=float(wait_array.sum()),
+            avg_outgoing_load=float(outgoing_array.mean()),
+            max_outgoing_load=float(outgoing_array.max()),
+            total_outgoing_load=float(outgoing_array.sum()),
+            recent_throughput=recent_throughput,
+            queue_change=queue_change,
+            wait_change=wait_change,
+            throughput_change=throughput_change,
+            ns_queue=ns_queue,
+            ew_queue=ew_queue,
+            ns_wait=ns_wait,
+            ew_wait=ew_wait,
+            dominant_flow=dominant_flow,
+            boundary_queue_total=boundary_queue_total,
+            boundary_wait_total=boundary_wait_total,
+            spillback_risk=spillback_risk,
+            incident_flag=incident_flag,
+            construction_flag=construction_flag,
+            overload_flag=overload_flag,
+            event_flag=event_flag,
+            top_congested_intersections=top_intersections,
+            candidate_intersections=candidate_intersections,
+        ).validate()
+    @staticmethod
+    def _compute_outgoing_load(env, lane_vehicle_count: dict[str, int], intersection_id: str) -> float:
+        intersection_config = env.intersections[intersection_id]
+        if not intersection_config.outgoing_lanes:
+            return 0.0
+        return float(
+            sum(float(lane_vehicle_count.get(lane_id, 0)) for lane_id in intersection_config.outgoing_lanes)
+        )
+    @staticmethod
+    def _compute_corridor_alignment(
+        ns_queue: float,
+        ew_queue: float,
+        ns_wait: float,
+        ew_wait: float,
+    ) -> str:
+        ns_pressure = ns_queue + 1.5 * ns_wait
+        ew_pressure = ew_queue + 1.5 * ew_wait
+        if ns_pressure > ew_pressure * 1.1:
+            return "NS"
+        if ew_pressure > ns_pressure * 1.1:
+            return "EW"
+        return "BALANCED"
+    @staticmethod
+    def _load_road_endpoints(roadnet_path: Path) -> dict[str, tuple[str, str]]:
+        roadnet = load_json(roadnet_path)
+        return {
+            str(road["id"]): (
+                str(road["startIntersection"]),
+                str(road["endIntersection"]),
+            )
+            for road in roadnet.get("roads", [])
+        }
+    def _derive_incident_intersections(self) -> set[str]:
+        if not self._road_endpoints:
+            return set()
+        scenario_metadata = self._scenario_metadata or {}
+        details = scenario_metadata.get("details", {})
+        incident_roads = list(scenario_metadata.get("blocked_roads", []))
+        incident_roads.extend(details.get("accident_roads", []))
+        incident_roads.extend(details.get("construction_roads", []))
+        if not incident_roads:
+            incident_roads.extend(list((scenario_metadata.get("penalized_roads") or {}).keys()))
+        intersections: set[str] = set()
+        for road_id in incident_roads:
+            endpoints = self._road_endpoints.get(str(road_id))
+            if endpoints is None:
+                continue
+            intersections.update(endpoints)
+        return intersections
+    def _build_candidate_intersections(
+        self,
+        candidate_seed_items: list[dict[str, Any]],
+        overload_flag: bool,
+        event_flag: bool,
+    ) -> list[CandidateIntersection]:
+        if not candidate_seed_items or self.candidate_limit <= 0:
+            return []
+        def severity_key(item: dict[str, Any]) -> tuple[float, float, float, float, str]:
+            candidate = CandidateIntersection(
+                intersection_id=str(item["intersection_id"]),
+                queue_total=float(item["queue_total"]),
+                wait_total=float(item["wait_total"]),
+                outgoing_load=float(item["outgoing_load"]),
+                current_phase=int(item["current_phase"]),
+                is_boundary=bool(item["is_boundary"]),
+                spillback_risk=bool(item["spillback_risk"]),
+                incident_proximity=bool(item["incident_proximity"]),
+                overload_marker=overload_flag,
+                event_proximity=event_flag,
+                corridor_alignment=str(item["corridor_alignment"]),
+                selection_reasons=[],
+            )
+            return (
+                candidate_priority_score(candidate),
+                float(item["queue_total"]),
+                float(item["wait_total"]),
+                float(item["outgoing_load"]),
+                str(item["intersection_id"]),
+            )
+        overall_sorted = sorted(
+            candidate_seed_items,
+            key=lambda item: (
+                -severity_key(item)[0],
+                -severity_key(item)[1],
+                -severity_key(item)[2],
+                -severity_key(item)[3],
+                severity_key(item)[4],
+            ),
+        )
+        boundary_sorted = [item for item in overall_sorted if item["is_boundary"]]
+        spillback_sorted = [item for item in overall_sorted if item["spillback_risk"]]
+        incident_sorted = [item for item in overall_sorted if item["incident_proximity"]]
+        outgoing_sorted = sorted(
+            candidate_seed_items,
+            key=lambda item: (
+                -float(item["outgoing_load"]),
+                -float(item["queue_total"]),
+                -float(item["wait_total"]),
+                str(item["intersection_id"]),
+            ),
+        )
+        reason_tags: dict[str, set[str]] = {}
+        selected_ids: list[str] = []
+        def mark(items: list[dict[str, Any]], tag: str, limit: int) -> None:
+            for item in items[:limit]:
+                intersection_id = str(item["intersection_id"])
+                reason_tags.setdefault(intersection_id, set()).add(tag)
+                if intersection_id not in selected_ids:
+                    selected_ids.append(intersection_id)
+        mark(overall_sorted, "congested", max(1, min(self.top_k, self.candidate_limit)))
+        mark(boundary_sorted, "boundary", min(2, self.candidate_limit))
+        mark(spillback_sorted, "spillback", min(2, self.candidate_limit))
+        mark(incident_sorted, "incident", min(2, self.candidate_limit))
+        mark(outgoing_sorted, "outgoing", min(2, self.candidate_limit))
+        if overload_flag:
+            mark(overall_sorted, "overload", min(2, self.candidate_limit))
+        if event_flag:
+            event_seed = boundary_sorted if boundary_sorted else outgoing_sorted
+            mark(event_seed, "event", min(2, self.candidate_limit))
+        for item in overall_sorted:
+            if len(selected_ids) >= self.candidate_limit:
+                break
+            intersection_id = str(item["intersection_id"])
+            if intersection_id in selected_ids:
+                continue
+            selected_ids.append(intersection_id)
+            reason_tags.setdefault(intersection_id, {"congested"})
+        seed_lookup = {
+            str(item["intersection_id"]): item
+            for item in candidate_seed_items
+        }
+        candidates = [
+            CandidateIntersection(
+                intersection_id=intersection_id,
+                queue_total=float(seed_lookup[intersection_id]["queue_total"]),
+                wait_total=float(seed_lookup[intersection_id]["wait_total"]),
+                outgoing_load=float(seed_lookup[intersection_id]["outgoing_load"]),
+                current_phase=int(seed_lookup[intersection_id]["current_phase"]),
+                is_boundary=bool(seed_lookup[intersection_id]["is_boundary"]),
+                spillback_risk=bool(seed_lookup[intersection_id]["spillback_risk"]),
+                incident_proximity=bool(seed_lookup[intersection_id]["incident_proximity"]),
+                overload_marker=overload_flag,
+                event_proximity=event_flag,
+                corridor_alignment=str(seed_lookup[intersection_id]["corridor_alignment"]),
+                selection_reasons=sorted(reason_tags.get(intersection_id, {"congested"})),
+            ).validate()
+            for intersection_id in selected_ids[: self.candidate_limit]
+        ]
+        return sorted(
+            candidates,
+            key=lambda item: (
+                -candidate_priority_score(item),
+                -item.queue_total,
+                -item.wait_total,
+                -item.outgoing_load,
+                item.intersection_id,
+            ),
+        )

district_llm/teachers.py ADDED Viewed

	@@ -0,0 +1,227 @@

+from __future__ import annotations
+import json
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+import numpy as np
+from agents.local_policy import (
+    BaseLocalPolicy,
+    FixedCyclePolicy,
+    HoldPhasePolicy,
+    QueueGreedyPolicy,
+    RandomPhasePolicy,
+)
+BASELINE_TYPES: tuple[str, ...] = ("hold", "fixed", "random", "queue_greedy")
+@dataclass(frozen=True)
+class TeacherMetadata:
+    controller_type: str
+    controller_id: str
+    controller_family: str
+    teacher_algorithm: str
+    checkpoint_path: str | None = None
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "controller_type": self.controller_type,
+            "controller_id": self.controller_id,
+            "controller_family": self.controller_family,
+            "teacher_algorithm": self.teacher_algorithm,
+            "checkpoint_path": self.checkpoint_path,
+        }
+class BaseTeacher(ABC):
+    def __init__(self, metadata: TeacherMetadata):
+        self.metadata = metadata
+    @property
+    def env_config(self) -> Any | None:
+        return None
+    @abstractmethod
+    def act(self, observation_batch: dict[str, Any]) -> np.ndarray:
+        raise NotImplementedError
+class BaselineTeacher(BaseTeacher):
+    def __init__(self, policy: BaseLocalPolicy, metadata: TeacherMetadata):
+        super().__init__(metadata=metadata)
+        self.policy = policy
+    def act(self, observation_batch: dict[str, Any]) -> np.ndarray:
+        return np.asarray(self.policy.act(observation_batch), dtype=np.int64)
+class RLCheckpointTeacher(BaseTeacher):
+    def __init__(
+        self,
+        checkpoint_path: str | Path,
+        device: str | None = None,
+        deterministic: bool = True,
+    ):
+        try:
+            import torch
+        except ImportError as exc:
+            raise ImportError(
+                "RL checkpoint teachers require PyTorch to be installed."
+            ) from exc
+        from training.models import RunningNormalizer, TrafficControlQNetwork
+        from training.train_local_policy import load_env_config
+        checkpoint_path = Path(checkpoint_path)
+        self._torch = torch
+        self.device = torch.device(device or ("cuda" if torch.cuda.is_available() else "cpu"))
+        self.checkpoint = torch.load(
+            checkpoint_path,
+            map_location=self.device,
+            weights_only=False,
+        )
+        network_architecture = self.checkpoint.get("network_architecture") or self.checkpoint.get(
+            "policy_architecture",
+            {},
+        )
+        trainer_config = self.checkpoint.get("dqn_config", {})
+        policy_arch = network_architecture.get(
+            "policy_arch",
+            trainer_config.get("policy_arch", "single_head_with_district_feature"),
+        )
+        self.model = TrafficControlQNetwork(
+            observation_dim=int(network_architecture["observation_dim"]),
+            action_dim=int(network_architecture.get("action_dim", 2)),
+            hidden_dim=int(trainer_config.get("hidden_dim", 256)),
+            num_layers=int(trainer_config.get("hidden_layers", 2)),
+            district_types=tuple(network_architecture.get("district_types", ())),
+            policy_arch=policy_arch,
+            dueling=bool(network_architecture.get("dueling", True)),
+        ).to(self.device)
+        self.model.load_state_dict(
+            self.checkpoint.get("q_network_state_dict") or self.checkpoint["policy_state_dict"]
+        )
+        self.model.eval()
+        self.obs_normalizer = None
+        if self.checkpoint.get("obs_normalizer"):
+            self.obs_normalizer = RunningNormalizer()
+            self.obs_normalizer.load_state_dict(self.checkpoint["obs_normalizer"])
+        checkpoint_id = checkpoint_path.stem
+        super().__init__(
+            metadata=TeacherMetadata(
+                controller_type="rl_checkpoint",
+                controller_id=checkpoint_id,
+                controller_family="dqn",
+                teacher_algorithm="dqn",
+                checkpoint_path=str(checkpoint_path),
+            )
+        )
+        self.deterministic = bool(deterministic)
+        self._env_config = (
+            load_env_config(self.checkpoint["env_config"])
+            if self.checkpoint.get("env_config")
+            else None
+        )
+    @property
+    def env_config(self) -> Any | None:
+        return self._env_config
+    def act(self, observation_batch: dict[str, Any]) -> np.ndarray:
+        torch = self._torch
+        raw_obs = observation_batch["observations"].astype(np.float32)
+        normalized_obs = self.obs_normalizer.normalize(raw_obs) if self.obs_normalizer else raw_obs
+        obs_tensor = torch.as_tensor(normalized_obs, dtype=torch.float32, device=self.device)
+        district_type_tensor = torch.as_tensor(
+            observation_batch["district_type_indices"],
+            dtype=torch.int64,
+            device=self.device,
+        )
+        action_mask_tensor = torch.as_tensor(
+            observation_batch["action_mask"],
+            dtype=torch.float32,
+            device=self.device,
+        )
+        with torch.no_grad():
+            actions = self.model.act(
+                observations=obs_tensor,
+                district_type_indices=district_type_tensor,
+                action_mask=action_mask_tensor,
+                deterministic=self.deterministic,
+                epsilon=0.0,
+            )
+        return actions.cpu().numpy().astype(np.int64)
+def build_teacher(
+    controller_type: str,
+    checkpoint: str | None = None,
+    fixed_green_time: int = 20,
+    seed: int = 7,
+    device: str | None = None,
+) -> BaseTeacher:
+    if controller_type == "rl_checkpoint":
+        if not checkpoint:
+            raise ValueError("controller_type='rl_checkpoint' requires --checkpoint.")
+        return RLCheckpointTeacher(checkpoint_path=checkpoint, device=device)
+    if controller_type == "hold":
+        return BaselineTeacher(
+            policy=HoldPhasePolicy(),
+            metadata=TeacherMetadata(
+                controller_type="hold",
+                controller_id="hold",
+                controller_family="baseline",
+                teacher_algorithm="hold",
+            ),
+        )
+    if controller_type == "fixed":
+        return BaselineTeacher(
+            policy=FixedCyclePolicy(green_time=fixed_green_time),
+            metadata=TeacherMetadata(
+                controller_type="fixed",
+                controller_id=f"fixed_{fixed_green_time}",
+                controller_family="baseline",
+                teacher_algorithm="fixed_cycle",
+            ),
+        )
+    if controller_type == "random":
+        return BaselineTeacher(
+            policy=RandomPhasePolicy(seed=seed),
+            metadata=TeacherMetadata(
+                controller_type="random",
+                controller_id=f"random_{seed}",
+                controller_family="baseline",
+                teacher_algorithm="random",
+            ),
+        )
+    if controller_type == "queue_greedy":
+        return BaselineTeacher(
+            policy=QueueGreedyPolicy(),
+            metadata=TeacherMetadata(
+                controller_type="queue_greedy",
+                controller_id="queue_greedy",
+                controller_family="baseline",
+                teacher_algorithm="queue_greedy",
+            ),
+        )
+    raise ValueError(
+        f"Unsupported controller_type '{controller_type}'. "
+        f"Expected rl_checkpoint or one of {BASELINE_TYPES}."
+    )
+def parse_teacher_spec(spec: str) -> tuple[str, str | None]:
+    if "=" not in spec:
+        return spec.strip(), None
+    controller_type, checkpoint_path = spec.split("=", 1)
+    return controller_type.strip(), checkpoint_path.strip() or None
+def teachers_metadata_json(teachers: list[BaseTeacher]) -> str:
+    return json.dumps([teacher.metadata.to_dict() for teacher in teachers], sort_keys=True)

district_llm/train_unsloth.py ADDED Viewed

	@@ -0,0 +1,129 @@

+from __future__ import annotations
+import argparse
+from pathlib import Path
+from district_llm.data import load_jsonl_text_dataset
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Supervised fine-tune a district LLM on DQN-derived district traces with Unsloth/QLoRA."
+    )
+    parser.add_argument("--dataset", required=True, help="JSONL dataset with a 'text' field.")
+    parser.add_argument("--output-dir", required=True)
+    parser.add_argument("--model-name", default="Qwen/Qwen2.5-7B-Instruct")
+    parser.add_argument("--max-seq-length", type=int, default=1024)
+    parser.add_argument("--load-in-4bit", action="store_true")
+    parser.add_argument("--lora-rank", type=int, default=16)
+    parser.add_argument("--lora-alpha", type=int, default=16)
+    parser.add_argument("--lora-dropout", type=float, default=0.0)
+    parser.add_argument("--batch-size", type=int, default=2)
+    parser.add_argument("--gradient-accumulation-steps", type=int, default=8)
+    parser.add_argument("--learning-rate", type=float, default=2e-4)
+    parser.add_argument("--warmup-steps", type=int, default=50)
+    parser.add_argument("--max-steps", type=int, default=500)
+    parser.add_argument("--logging-steps", type=int, default=10)
+    parser.add_argument("--save-steps", type=int, default=100)
+    parser.add_argument("--seed", type=int, default=7)
+    parser.add_argument("--dataset-num-proc", type=int, default=2)
+    parser.add_argument("--eval-dataset", default=None)
+    parser.add_argument("--resume-from-checkpoint", default=None)
+    parser.add_argument(
+        "--include-non-dqn-sources",
+        action="store_true",
+        help="By default the trainer keeps only DQN-derived rows (controller_family=dqn).",
+    )
+    return parser.parse_args()
+def main() -> None:
+    args = parse_args()
+    try:
+        import torch
+        from trl import SFTTrainer
+        from transformers import TrainingArguments
+        from unsloth import FastLanguageModel
+    except ImportError as exc:
+        raise ImportError(
+            "District LLM training requires 'unsloth' and 'trl'. "
+            "Install them in the active environment before running this entry point."
+        ) from exc
+    controller_families = None if args.include_non_dqn_sources else ["dqn"]
+    train_dataset = load_jsonl_text_dataset(
+        args.dataset,
+        controller_families=controller_families,
+    )
+    eval_dataset = (
+        load_jsonl_text_dataset(
+            args.eval_dataset,
+            controller_families=controller_families,
+        )
+        if args.eval_dataset
+        else None
+    )
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    model, tokenizer = FastLanguageModel.from_pretrained(
+        model_name=args.model_name,
+        max_seq_length=args.max_seq_length,
+        dtype=None,
+        load_in_4bit=bool(args.load_in_4bit),
+    )
+    model = FastLanguageModel.get_peft_model(
+        model,
+        r=args.lora_rank,
+        target_modules=[
+            "q_proj",
+            "k_proj",
+            "v_proj",
+            "o_proj",
+            "gate_proj",
+            "up_proj",
+            "down_proj",
+        ],
+        lora_alpha=args.lora_alpha,
+        lora_dropout=args.lora_dropout,
+        bias="none",
+        use_gradient_checkpointing="unsloth",
+        random_state=args.seed,
+    )
+    trainer = SFTTrainer(
+        model=model,
+        tokenizer=tokenizer,
+        train_dataset=train_dataset,
+        eval_dataset=eval_dataset,
+        dataset_text_field="text",
+        max_seq_length=args.max_seq_length,
+        dataset_num_proc=args.dataset_num_proc,
+        packing=False,
+        args=TrainingArguments(
+            output_dir=str(output_dir),
+            per_device_train_batch_size=args.batch_size,
+            gradient_accumulation_steps=args.gradient_accumulation_steps,
+            warmup_steps=args.warmup_steps,
+            max_steps=args.max_steps,
+            learning_rate=args.learning_rate,
+            logging_steps=args.logging_steps,
+            save_steps=args.save_steps,
+            bf16=bool(torch.cuda.is_available() and torch.cuda.is_bf16_supported()),
+            fp16=bool(torch.cuda.is_available() and not torch.cuda.is_bf16_supported()),
+            optim="paged_adamw_8bit",
+            lr_scheduler_type="cosine",
+            seed=args.seed,
+            report_to="none",
+            evaluation_strategy="steps" if eval_dataset is not None else "no",
+            eval_steps=args.save_steps if eval_dataset is not None else None,
+        ),
+    )
+    trainer.train(resume_from_checkpoint=args.resume_from_checkpoint)
+    model.save_pretrained(str(output_dir))
+    tokenizer.save_pretrained(str(output_dir))
+if __name__ == "__main__":
+    main()

env/README.md ADDED Viewed

	@@ -0,0 +1,43 @@

+# env
+CityFlow environment implementation for intersection-level RL with district-type metadata.
+## Main files
+- [traffic_env.py](/Users/aditya/Developer/traffic-llm/env/traffic_env.py)
+  Main environment. One episode corresponds to one `(city, scenario)` pair.
+- [cityflow_adapter.py](/Users/aditya/Developer/traffic-llm/env/cityflow_adapter.py)
+  Thin wrapper around the CityFlow Python engine.
+- [observation_builder.py](/Users/aditya/Developer/traffic-llm/env/observation_builder.py)
+  Converts variable city topology into fixed-size per-intersection tensors.
+- [reward.py](/Users/aditya/Developer/traffic-llm/env/reward.py)
+  Configurable local reward calculation.
+- [utils.py](/Users/aditya/Developer/traffic-llm/env/utils.py)
+  Topology parsing and helper functions.
+- [intersection_config.py](/Users/aditya/Developer/traffic-llm/env/intersection_config.py)
+  Internal topology dataclasses.
+## How it works
+- Reads `roadnet.json`, `district_map.json`, and district types from `metadata.json`.
+- Identifies non-virtual controllable intersections with at least two green phases.
+- Uses one action per controllable intersection.
+- Enforces `min_green_time` inside the environment.
+- Advances CityFlow for `decision_interval` simulator steps between policy decisions.
+- Returns a batched observation for all controlled intersections.
+## Observation model
+Per intersection:
+- padded incoming lane vehicle counts
+- padded incoming lane waiting counts
+- incoming lane mask
+- current green phase index
+- elapsed time in current phase
+- optional outgoing congestion summary
+- district-type one-hot features
+- optional small district context
+- boundary-intersection indicator
+The observation dimension is exposed as `TrafficEnv.observation_dim`.

env/__init__.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from env.intersection_config import DistrictConfig, IntersectionConfig, PhaseConfig
+from env.observation_builder import ObservationBuilder, ObservationConfig
+from env.reward import RewardCalculator, RewardConfig
+from env.traffic_env import EnvConfig, TrafficEnv
+from env.utils import build_topology
+__all__ = [
+    "DistrictConfig",
+    "EnvConfig",
+    "IntersectionConfig",
+    "ObservationBuilder",
+    "ObservationConfig",
+    "PhaseConfig",
+    "RewardCalculator",
+    "RewardConfig",
+    "TrafficEnv",
+    "build_topology",
+]

env/cityflow_adapter.py ADDED Viewed

	@@ -0,0 +1,102 @@

+from __future__ import annotations
+from pathlib import Path
+from typing import Any
+class CityFlowAdapter:
+    def __init__(self, config_path: str | Path, thread_num: int = 1):
+        self.config_path = str(config_path)
+        self.thread_num = int(thread_num)
+        self.engine = None
+        self._phase_cache: dict[str, int] = {}
+        self._active_vehicle_ids: set[str] = set()
+        self._finished_vehicle_ids: set[str] = set()
+    def reset(self) -> None:
+        try:
+            import cityflow
+        except ImportError as exc:
+            raise RuntimeError(
+                "CityFlow is not installed. Install the CityFlow Python bindings "
+                "before running smoke tests, training, or evaluation."
+            ) from exc
+        self.engine = cityflow.Engine(self.config_path, thread_num=self.thread_num)
+        self._phase_cache.clear()
+        self._active_vehicle_ids = self._fetch_active_vehicle_ids()
+        self._finished_vehicle_ids.clear()
+    def step(self) -> None:
+        self._require_engine()
+        self.engine.next_step()
+        current_vehicle_ids = self._fetch_active_vehicle_ids()
+        self._finished_vehicle_ids.update(self._active_vehicle_ids - current_vehicle_ids)
+        self._active_vehicle_ids = current_vehicle_ids
+    def set_tl_phase(self, intersection_id: str, phase: int) -> None:
+        self._require_engine()
+        self.engine.set_tl_phase(intersection_id, int(phase))
+        self._phase_cache[intersection_id] = int(phase)
+    def get_tl_phase(self, intersection_id: str) -> int:
+        self._require_engine()
+        if hasattr(self.engine, "get_tl_phase"):
+            phase = int(self.engine.get_tl_phase(intersection_id))
+            self._phase_cache[intersection_id] = phase
+            return phase
+        return int(self._phase_cache.get(intersection_id, 0))
+    def get_lane_vehicle_count(self) -> dict[str, int]:
+        self._require_engine()
+        return {
+            lane_id: int(count)
+            for lane_id, count in self.engine.get_lane_vehicle_count().items()
+        }
+    def get_lane_waiting_vehicle_count(self) -> dict[str, int]:
+        self._require_engine()
+        return {
+            lane_id: int(count)
+            for lane_id, count in self.engine.get_lane_waiting_vehicle_count().items()
+        }
+    def get_current_time(self) -> int:
+        self._require_engine()
+        return int(self.engine.get_current_time())
+    def get_vehicle_count(self) -> int:
+        self._require_engine()
+        if hasattr(self.engine, "get_vehicle_count"):
+            return int(self.engine.get_vehicle_count())
+        return len(self._active_vehicle_ids)
+    def get_average_travel_time(self) -> float | None:
+        self._require_engine()
+        if hasattr(self.engine, "get_average_travel_time"):
+            return float(self.engine.get_average_travel_time())
+        return None
+    def get_finished_vehicle_count(self) -> int:
+        self._require_engine()
+        if hasattr(self.engine, "get_finished_vehicle_count"):
+            return int(self.engine.get_finished_vehicle_count())
+        return len(self._finished_vehicle_ids)
+    def get_active_vehicle_ids(self) -> set[str]:
+        return set(self._active_vehicle_ids)
+    def _fetch_active_vehicle_ids(self) -> set[str]:
+        if self.engine is None or not hasattr(self.engine, "get_vehicles"):
+            return set()
+        vehicles = self.engine.get_vehicles()
+        if isinstance(vehicles, dict):
+            return set(vehicles.keys())
+        return set(vehicles)
+    def _require_engine(self) -> None:
+        if self.engine is None:
+            raise RuntimeError(
+                "CityFlow engine has not been initialized. Call reset() before use."
+            )

env/district_summary.py ADDED Viewed

	@@ -0,0 +1,9 @@

+class DistrictSummaryBuilder:
+    def build(self, adapter, district_config):
+        waiting = adapter.get_lane_waiting_vehicle_count()
+        return {
+            "district_id": district_config.id,
+            "intersection_ids": district_config.intersection_ids,
+            "avg_wait": sum(waiting.values()) / len(waiting),
+        }

env/intersection_config.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from __future__ import annotations
+from dataclasses import dataclass
+DISTRICT_TYPES: tuple[str, ...] = (
+    "residential",
+    "commercial",
+    "industrial",
+    "mixed",
+)
+DISTRICT_TYPE_TO_INDEX: dict[str, int] = {
+    district_type: index for index, district_type in enumerate(DISTRICT_TYPES)
+}
+DEFAULT_DISTRICT_TYPE = "mixed"
+@dataclass(frozen=True)
+class PhaseConfig:
+    engine_phase_index: int
+    available_road_links: tuple[int, ...]
+    incoming_lanes_served: tuple[str, ...]
+    outgoing_lanes_served: tuple[str, ...]
+@dataclass(frozen=True)
+class IntersectionConfig:
+    intersection_id: str
+    district_id: str
+    district_type: str
+    district_type_index: int
+    incoming_lanes: tuple[str, ...]
+    outgoing_lanes: tuple[str, ...]
+    is_boundary: bool
+    green_phases: tuple[PhaseConfig, ...]
+    all_phase_indices: tuple[int, ...]
+    initial_engine_phase_index: int
+    @property
+    def num_green_phases(self) -> int:
+        return len(self.green_phases)
+@dataclass(frozen=True)
+class DistrictConfig:
+    district_id: str
+    district_type: str
+    district_type_index: int
+    intersection_ids: tuple[str, ...]
+    neighbor_districts: tuple[str, ...]

env/observation_builder.py ADDED Viewed

	@@ -0,0 +1,224 @@

+from __future__ import annotations
+from dataclasses import dataclass
+import numpy as np
+from env.intersection_config import DistrictConfig, IntersectionConfig
+from env.utils import normalize_scalar
+@dataclass(frozen=True)
+class ObservationConfig:
+    max_incoming_lanes: int = 16
+    count_scale: float = 20.0
+    elapsed_time_scale: float = 60.0
+    include_outgoing_congestion: bool = True
+    include_district_context: bool = True
+    include_district_type_feature: bool = True
+class ObservationBuilder:
+    def __init__(
+        self,
+        intersections: dict[str, IntersectionConfig],
+        districts: dict[str, DistrictConfig],
+        config: ObservationConfig | None = None,
+    ):
+        self.intersections = intersections
+        self.districts = districts
+        self.config = config or ObservationConfig()
+        self.intersection_ids = tuple(sorted(intersections))
+        self._district_lookup = {
+            intersection_id: intersections[intersection_id].district_id
+            for intersection_id in self.intersection_ids
+        }
+        self._district_sizes = {
+            district_id: max(1, len(district.intersection_ids))
+            for district_id, district in districts.items()
+        }
+        self.observation_dim = self._compute_observation_dim()
+    def build(
+        self,
+        lane_vehicle_count: dict[str, int],
+        lane_waiting_count: dict[str, int],
+        phase_positions: dict[str, int],
+        phase_elapsed_times: dict[str, int],
+        switch_allowed: dict[str, bool],
+    ) -> dict[str, np.ndarray | tuple[str, ...]]:
+        district_context = self._compute_district_context(
+            lane_vehicle_count=lane_vehicle_count,
+            lane_waiting_count=lane_waiting_count,
+        )
+        num_intersections = len(self.intersection_ids)
+        max_lanes = self.config.max_incoming_lanes
+        observations = np.zeros(
+            (num_intersections, self.observation_dim),
+            dtype=np.float32,
+        )
+        incoming_counts = np.zeros((num_intersections, max_lanes), dtype=np.float32)
+        incoming_waiting = np.zeros((num_intersections, max_lanes), dtype=np.float32)
+        lane_mask = np.zeros((num_intersections, max_lanes), dtype=np.float32)
+        action_mask = np.ones((num_intersections, 2), dtype=np.float32)
+        current_phase = np.zeros(num_intersections, dtype=np.int64)
+        phase_elapsed = np.zeros(num_intersections, dtype=np.float32)
+        outgoing_congestion = np.zeros(num_intersections, dtype=np.float32)
+        district_type_indices = np.zeros(num_intersections, dtype=np.int64)
+        boundary_mask = np.zeros(num_intersections, dtype=np.float32)
+        for row_index, intersection_id in enumerate(self.intersection_ids):
+            config = self.intersections[intersection_id]
+            lane_count_vector, waiting_vector, mask_vector = self._lane_vectors(
+                config=config,
+                lane_vehicle_count=lane_vehicle_count,
+                lane_waiting_count=lane_waiting_count,
+            )
+            incoming_counts[row_index] = lane_count_vector
+            incoming_waiting[row_index] = waiting_vector
+            lane_mask[row_index] = mask_vector
+            phase_index = int(phase_positions[intersection_id])
+            phase_time = float(phase_elapsed_times[intersection_id])
+            phase_count = max(1, config.num_green_phases)
+            current_phase[row_index] = phase_index
+            phase_elapsed[row_index] = phase_time
+            district_type_indices[row_index] = config.district_type_index
+            boundary_mask[row_index] = 1.0 if config.is_boundary else 0.0
+            next_col = 0
+            observations[row_index, next_col : next_col + max_lanes] = (
+                lane_count_vector / self.config.count_scale
+            )
+            next_col += max_lanes
+            observations[row_index, next_col : next_col + max_lanes] = (
+                waiting_vector / self.config.count_scale
+            )
+            next_col += max_lanes
+            observations[row_index, next_col : next_col + max_lanes] = mask_vector
+            next_col += max_lanes
+            if self.config.include_outgoing_congestion:
+                outgoing_congestion[row_index] = self._mean_outgoing_congestion(
+                    config=config,
+                    lane_vehicle_count=lane_vehicle_count,
+                )
+            meta_features = [
+                normalize_scalar(phase_index, max(1, phase_count - 1))
+                if phase_count > 1
+                else 0.0,
+                normalize_scalar(phase_time, self.config.elapsed_time_scale),
+                normalize_scalar(float(outgoing_congestion[row_index]), self.config.count_scale),
+                normalize_scalar(float(lane_count_vector.sum()), self.config.count_scale),
+                normalize_scalar(float(phase_count), 4.0),
+                1.0 if switch_allowed[intersection_id] else 0.0,
+                boundary_mask[row_index],
+            ]
+            observations[row_index, next_col : next_col + len(meta_features)] = meta_features
+            next_col += len(meta_features)
+            if self.config.include_district_type_feature:
+                observations[row_index, next_col + config.district_type_index] = 1.0
+                next_col += 4
+            if self.config.include_district_context:
+                district_values = district_context.get(
+                    config.district_id,
+                    (0.0, 0.0),
+                )
+                observations[row_index, next_col : next_col + len(district_values)] = district_values
+            if not switch_allowed[intersection_id]:
+                action_mask[row_index, 1] = 0.0
+        return {
+            "observations": observations,
+            "incoming_counts": incoming_counts,
+            "incoming_waiting": incoming_waiting,
+            "lane_mask": lane_mask,
+            "action_mask": action_mask,
+            "current_phase": current_phase,
+            "phase_elapsed": phase_elapsed,
+            "outgoing_congestion": outgoing_congestion,
+            "boundary_mask": boundary_mask,
+            "district_type_indices": district_type_indices,
+            "district_types": tuple(
+                self.intersections[intersection_id].district_type
+                for intersection_id in self.intersection_ids
+            ),
+            "district_ids": tuple(
+                self.intersections[intersection_id].district_id
+                for intersection_id in self.intersection_ids
+            ),
+            "intersection_ids": self.intersection_ids,
+        }
+    def _compute_observation_dim(self) -> int:
+        base_dim = self.config.max_incoming_lanes * 3 + 7
+        if self.config.include_district_type_feature:
+            base_dim += 4
+        if self.config.include_district_context:
+            base_dim += 2
+        return base_dim
+    def _lane_vectors(
+        self,
+        config: IntersectionConfig,
+        lane_vehicle_count: dict[str, int],
+        lane_waiting_count: dict[str, int],
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        max_lanes = self.config.max_incoming_lanes
+        count_vector = np.zeros(max_lanes, dtype=np.float32)
+        waiting_vector = np.zeros(max_lanes, dtype=np.float32)
+        mask_vector = np.zeros(max_lanes, dtype=np.float32)
+        for lane_index, lane_id in enumerate(config.incoming_lanes[:max_lanes]):
+            count_vector[lane_index] = float(lane_vehicle_count.get(lane_id, 0))
+            waiting_vector[lane_index] = float(lane_waiting_count.get(lane_id, 0))
+            mask_vector[lane_index] = 1.0
+        return count_vector, waiting_vector, mask_vector
+    def _mean_outgoing_congestion(
+        self,
+        config: IntersectionConfig,
+        lane_vehicle_count: dict[str, int],
+    ) -> float:
+        if not config.outgoing_lanes:
+            return 0.0
+        total = sum(float(lane_vehicle_count.get(lane_id, 0)) for lane_id in config.outgoing_lanes)
+        return total / float(len(config.outgoing_lanes))
+    def _compute_district_context(
+        self,
+        lane_vehicle_count: dict[str, int],
+        lane_waiting_count: dict[str, int],
+    ) -> dict[str, tuple[float, float]]:
+        context: dict[str, tuple[float, float]] = {}
+        if not self.config.include_district_context:
+            return context
+        for district_id, district in self.districts.items():
+            total_count = 0.0
+            total_waiting = 0.0
+            for intersection_id in district.intersection_ids:
+                config = self.intersections[intersection_id]
+                total_count += sum(
+                    float(lane_vehicle_count.get(lane_id, 0))
+                    for lane_id in config.incoming_lanes
+                )
+                total_waiting += sum(
+                    float(lane_waiting_count.get(lane_id, 0))
+                    for lane_id in config.incoming_lanes
+                )
+            size = float(self._district_sizes[district_id])
+            context[district_id] = (
+                normalize_scalar(total_count / size, self.config.count_scale),
+                normalize_scalar(total_waiting / size, self.config.count_scale),
+            )
+        return context

env/reward.py ADDED Viewed

	@@ -0,0 +1,244 @@

+from __future__ import annotations
+from dataclasses import dataclass
+import numpy as np
+REWARD_VARIANTS: tuple[str, ...] = (
+    "current",
+    "normalized_wait_queue",
+    "wait_queue_throughput",
+)
+@dataclass(frozen=True)
+class RewardConfig:
+    variant: str = "current"
+    waiting_weight: float = 1.0
+    vehicle_weight: float = 0.1
+    pressure_weight: float = 0.0
+    reward_scale: float = 0.1
+    normalize_by_lane_count: bool = True
+    clip_reward: float | None = 5.0
+    queue_delta_weight: float = 2.0
+    wait_delta_weight: float = 4.0
+    queue_level_weight: float = 0.5
+    wait_level_weight: float = 1.0
+    throughput_weight: float = 0.1
+    imbalance_weight: float = 0.1
+    delta_clip: float = 2.0
+    level_normalizer: float = 10.0
+    throughput_normalizer: float = 2.0
+@dataclass(frozen=True)
+class RewardBreakdown:
+    reward: np.ndarray
+    components: dict[str, np.ndarray]
+class RewardCalculator:
+    def __init__(self, config: RewardConfig | None = None):
+        self.config = config or RewardConfig()
+        if self.config.variant not in REWARD_VARIANTS:
+            raise ValueError(
+                f"Unsupported reward variant: {self.config.variant}. "
+                f"Expected one of {REWARD_VARIANTS}."
+            )
+        self._prev_queue_norm: np.ndarray | None = None
+        self._prev_wait_norm: np.ndarray | None = None
+        self._prev_finished_vehicle_count = 0.0
+    def reset(
+        self,
+        incoming_waiting: np.ndarray,
+        incoming_counts: np.ndarray,
+        incoming_lane_counts: np.ndarray | None = None,
+        finished_vehicle_count: float = 0.0,
+    ) -> None:
+        queue_norm, wait_norm, _ = self._normalized_state(
+            incoming_waiting=incoming_waiting,
+            incoming_counts=incoming_counts,
+            incoming_lane_counts=incoming_lane_counts,
+        )
+        self._prev_queue_norm = queue_norm
+        self._prev_wait_norm = wait_norm
+        self._prev_finished_vehicle_count = float(finished_vehicle_count)
+    def compute(
+        self,
+        incoming_waiting: np.ndarray,
+        incoming_counts: np.ndarray,
+        outgoing_counts: np.ndarray | None = None,
+        incoming_lane_counts: np.ndarray | None = None,
+        finished_vehicle_count: float = 0.0,
+    ) -> np.ndarray:
+        return self.compute_breakdown(
+            incoming_waiting=incoming_waiting,
+            incoming_counts=incoming_counts,
+            outgoing_counts=outgoing_counts,
+            incoming_lane_counts=incoming_lane_counts,
+            finished_vehicle_count=finished_vehicle_count,
+        ).reward
+    def compute_breakdown(
+        self,
+        incoming_waiting: np.ndarray,
+        incoming_counts: np.ndarray,
+        outgoing_counts: np.ndarray | None = None,
+        incoming_lane_counts: np.ndarray | None = None,
+        finished_vehicle_count: float = 0.0,
+    ) -> RewardBreakdown:
+        if self.config.variant == "current":
+            return self._compute_current(
+                incoming_waiting=incoming_waiting,
+                incoming_counts=incoming_counts,
+                outgoing_counts=outgoing_counts,
+                incoming_lane_counts=incoming_lane_counts,
+            )
+        return self._compute_delta_based(
+            incoming_waiting=incoming_waiting,
+            incoming_counts=incoming_counts,
+            incoming_lane_counts=incoming_lane_counts,
+            finished_vehicle_count=finished_vehicle_count,
+            include_throughput=self.config.variant == "wait_queue_throughput",
+        )
+    def _compute_current(
+        self,
+        incoming_waiting: np.ndarray,
+        incoming_counts: np.ndarray,
+        outgoing_counts: np.ndarray | None = None,
+        incoming_lane_counts: np.ndarray | None = None,
+    ) -> RewardBreakdown:
+        waiting_total = incoming_waiting.sum(axis=1)
+        vehicle_total = incoming_counts.sum(axis=1)
+        normalization = self._lane_normalization(waiting_total.shape[0], incoming_lane_counts)
+        components = {
+            "wait_term": (-self.config.waiting_weight * waiting_total / normalization).astype(np.float32),
+            "queue_term": (-self.config.vehicle_weight * vehicle_total / normalization).astype(np.float32),
+        }
+        if outgoing_counts is not None and self.config.pressure_weight != 0.0:
+            outgoing_total = outgoing_counts.sum(axis=1)
+            components["pressure_term"] = (
+                self.config.pressure_weight * (outgoing_total - vehicle_total) / normalization
+            ).astype(np.float32)
+        components = self._scale_components(components)
+        reward = self._finalize_reward(components)
+        return RewardBreakdown(reward=reward, components=components)
+    def _compute_delta_based(
+        self,
+        incoming_waiting: np.ndarray,
+        incoming_counts: np.ndarray,
+        incoming_lane_counts: np.ndarray | None,
+        finished_vehicle_count: float,
+        include_throughput: bool,
+    ) -> RewardBreakdown:
+        queue_norm, wait_norm, lane_norm = self._normalized_state(
+            incoming_waiting=incoming_waiting,
+            incoming_counts=incoming_counts,
+            incoming_lane_counts=incoming_lane_counts,
+        )
+        if self._prev_queue_norm is None or self._prev_wait_norm is None:
+            self._prev_queue_norm = queue_norm.copy()
+            self._prev_wait_norm = wait_norm.copy()
+        queue_delta = np.clip(
+            self._prev_queue_norm - queue_norm,
+            -self.config.delta_clip,
+            self.config.delta_clip,
+        ).astype(np.float32)
+        wait_delta = np.clip(
+            self._prev_wait_norm - wait_norm,
+            -self.config.delta_clip,
+            self.config.delta_clip,
+        ).astype(np.float32)
+        components: dict[str, np.ndarray] = {
+            "queue_term": (self.config.queue_delta_weight * queue_delta).astype(np.float32),
+            "wait_term": (self.config.wait_delta_weight * wait_delta).astype(np.float32),
+            "queue_level_term": (
+                -self.config.queue_level_weight
+                * np.clip(queue_norm / self.config.level_normalizer, 0.0, self.config.delta_clip)
+            ).astype(np.float32),
+            "wait_level_term": (
+                -self.config.wait_level_weight
+                * np.clip(wait_norm / self.config.level_normalizer, 0.0, self.config.delta_clip)
+            ).astype(np.float32),
+        }
+        if include_throughput:
+            num_intersections = max(1, queue_norm.shape[0])
+            finished_delta = max(
+                0.0,
+                float(finished_vehicle_count) - self._prev_finished_vehicle_count,
+            )
+            throughput_per_intersection = finished_delta / float(num_intersections)
+            throughput_term = np.full(
+                queue_norm.shape,
+                self.config.throughput_weight
+                * min(1.0, throughput_per_intersection / self.config.throughput_normalizer),
+                dtype=np.float32,
+            )
+            imbalance = np.std(
+                incoming_waiting / lane_norm[:, None],
+                axis=1,
+            ).astype(np.float32)
+            components["throughput_term"] = throughput_term
+            components["imbalance_term"] = (-self.config.imbalance_weight * imbalance).astype(
+                np.float32
+            )
+        components = self._scale_components(components)
+        reward = self._finalize_reward(components)
+        self._prev_queue_norm = queue_norm
+        self._prev_wait_norm = wait_norm
+        self._prev_finished_vehicle_count = float(finished_vehicle_count)
+        return RewardBreakdown(reward=reward, components=components)
+    def _normalized_state(
+        self,
+        incoming_waiting: np.ndarray,
+        incoming_counts: np.ndarray,
+        incoming_lane_counts: np.ndarray | None,
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        lane_norm = self._lane_normalization(incoming_counts.shape[0], incoming_lane_counts)
+        queue_norm = (incoming_counts.sum(axis=1) / lane_norm).astype(np.float32)
+        wait_norm = (incoming_waiting.sum(axis=1) / lane_norm).astype(np.float32)
+        return queue_norm, wait_norm, lane_norm
+    def _lane_normalization(
+        self,
+        batch_size: int,
+        incoming_lane_counts: np.ndarray | None,
+    ) -> np.ndarray:
+        normalization = np.ones(batch_size, dtype=np.float32)
+        if incoming_lane_counts is not None and self.config.normalize_by_lane_count:
+            normalization = np.maximum(1.0, incoming_lane_counts.astype(np.float32))
+        return normalization
+    def _finalize_reward(self, components: dict[str, np.ndarray]) -> np.ndarray:
+        reward = np.zeros_like(next(iter(components.values())), dtype=np.float32)
+        for term in components.values():
+            reward += term.astype(np.float32)
+        if self.config.clip_reward is not None:
+            reward = np.clip(
+                reward,
+                -float(self.config.clip_reward),
+                float(self.config.clip_reward),
+            )
+        return reward.astype(np.float32)
+    def _scale_components(
+        self,
+        components: dict[str, np.ndarray],
+    ) -> dict[str, np.ndarray]:
+        scale = float(self.config.reward_scale)
+        return {
+            name: (values.astype(np.float32) * scale).astype(np.float32)
+            for name, values in components.items()
+        }

env/scenarios.py ADDED Viewed

	@@ -0,0 +1,10 @@

+class ScenarioGenerator:
+    def generate(self, seed):
+        import random
+        random.seed(seed)
+        return {
+            "traffic_bias": random.choice(["ns", "ew", "balanced"]),
+            "emergency_vehicle": random.random() < 0.2,
+        }

env/traffic_env.py ADDED Viewed

	@@ -0,0 +1,356 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+import numpy as np
+from env.cityflow_adapter import CityFlowAdapter
+from env.intersection_config import DistrictConfig, IntersectionConfig
+from env.observation_builder import ObservationBuilder, ObservationConfig
+from env.reward import RewardCalculator, RewardConfig
+from env.utils import build_topology, load_json
+@dataclass(frozen=True)
+class EnvConfig:
+    simulator_interval: int = 1
+    decision_interval: int = 5
+    min_green_time: int = 10
+    thread_num: int = 1
+    observation: ObservationConfig = ObservationConfig()
+    reward: RewardConfig = RewardConfig()
+    max_episode_seconds: int | None = None
+class TrafficEnv:
+    def __init__(
+        self,
+        city_id: str,
+        scenario_name: str,
+        city_dir: str | Path,
+        scenario_dir: str | Path,
+        config_path: str | Path,
+        roadnet_path: str | Path,
+        district_map_path: str | Path | None = None,
+        metadata_path: str | Path | None = None,
+        env_config: EnvConfig | None = None,
+    ):
+        self.city_id = city_id
+        self.scenario_name = scenario_name
+        self.city_dir = Path(city_dir)
+        self.scenario_dir = Path(scenario_dir)
+        self.original_config_path = Path(config_path)
+        self.roadnet_path = Path(roadnet_path)
+        self.district_map_path = Path(district_map_path) if district_map_path else None
+        self.metadata_path = Path(metadata_path) if metadata_path else None
+        self.env_config = env_config or EnvConfig()
+        self.intersections, self.districts = build_topology(
+            roadnet_path=self.roadnet_path,
+            district_map_path=self.district_map_path,
+            metadata_path=self.metadata_path,
+        )
+        if not self.intersections:
+            raise ValueError(
+                f"No controllable intersections found for {self.city_id}/{self.scenario_name}."
+            )
+        self.controlled_intersection_ids = tuple(sorted(self.intersections))
+        self.observation_builder = ObservationBuilder(
+            intersections=self.intersections,
+            districts=self.districts,
+            config=self.env_config.observation,
+        )
+        self.reward_calculator = RewardCalculator(self.env_config.reward)
+        self.adapter = CityFlowAdapter(
+            config_path=self.original_config_path,
+            thread_num=self.env_config.thread_num,
+        )
+        config_payload = load_json(self.original_config_path)
+        self.max_episode_seconds = int(
+            self.env_config.max_episode_seconds
+            or config_payload.get("step", 0)
+        )
+        self.metadata = load_json(self.metadata_path) if self.metadata_path else {}
+        self._district_type_labels = tuple(
+            self.intersections[intersection_id].district_type
+            for intersection_id in self.controlled_intersection_ids
+        )
+        self._incoming_lane_counts = np.asarray(
+            [
+                max(1, len(self.intersections[intersection_id].incoming_lanes))
+                for intersection_id in self.controlled_intersection_ids
+            ],
+            dtype=np.float32,
+        )
+        self.current_phase_positions: dict[str, int] = {}
+        self.phase_elapsed_times: dict[str, int] = {}
+        self.decision_step_count = 0
+        self.episode_return = 0.0
+        self.total_episode_return = 0.0
+        self.last_info: dict[str, Any] = {}
+        self.reward_component_sums: dict[str, float] = {}
+    @property
+    def observation_dim(self) -> int:
+        return self.observation_builder.observation_dim
+    def reset(self, seed: int | None = None) -> dict[str, Any]:
+        del seed
+        self.adapter.reset()
+        self.decision_step_count = 0
+        self.episode_return = 0.0
+        self.total_episode_return = 0.0
+        self.reward_component_sums = {}
+        self.current_phase_positions = {}
+        self.phase_elapsed_times = {}
+        for intersection_id in self.controlled_intersection_ids:
+            config = self.intersections[intersection_id]
+            initial_position = 0
+            initial_phase = config.green_phases[initial_position].engine_phase_index
+            self.current_phase_positions[intersection_id] = initial_position
+            self.phase_elapsed_times[intersection_id] = 0
+            self.adapter.set_tl_phase(intersection_id, initial_phase)
+        observation = self._build_observation()
+        self.reward_calculator.reset(
+            incoming_waiting=observation["incoming_waiting"],
+            incoming_counts=observation["incoming_counts"],
+            incoming_lane_counts=self._incoming_lane_counts,
+            finished_vehicle_count=self.adapter.get_finished_vehicle_count(),
+        )
+        self.last_info = self._build_info(
+            rewards=np.zeros(len(self.controlled_intersection_ids), dtype=np.float32),
+            avg_incoming_counts=observation["incoming_counts"],
+            avg_incoming_waiting=observation["incoming_waiting"],
+            reward_components={},
+        )
+        return observation
+    def step(
+        self,
+        actions: dict[str, int] | list[int] | np.ndarray,
+    ) -> tuple[dict[str, Any], np.ndarray, bool, dict[str, Any]]:
+        normalized_actions = self._normalize_actions(actions)
+        self._apply_actions(normalized_actions)
+        avg_incoming_counts, avg_incoming_waiting, avg_outgoing_counts = self._advance_simulator()
+        reward_breakdown = self.reward_calculator.compute_breakdown(
+            incoming_waiting=avg_incoming_waiting,
+            incoming_counts=avg_incoming_counts,
+            outgoing_counts=avg_outgoing_counts,
+            incoming_lane_counts=self._incoming_lane_counts,
+            finished_vehicle_count=self.adapter.get_finished_vehicle_count(),
+        )
+        rewards = reward_breakdown.reward
+        self.decision_step_count += 1
+        self.total_episode_return += float(rewards.sum())
+        self.episode_return = self._mean_step_intersection_reward()
+        self._accumulate_reward_components(reward_breakdown.components)
+        observation = self._build_observation()
+        done = self.adapter.get_current_time() >= self.max_episode_seconds
+        info = self._build_info(
+            rewards=rewards,
+            avg_incoming_counts=avg_incoming_counts,
+            avg_incoming_waiting=avg_incoming_waiting,
+            reward_components=reward_breakdown.components,
+        )
+        self.last_info = info
+        return observation, rewards, done, info
+    def _build_observation(self) -> dict[str, Any]:
+        lane_vehicle_count = self.adapter.get_lane_vehicle_count()
+        lane_waiting_count = self.adapter.get_lane_waiting_vehicle_count()
+        switch_allowed = {
+            intersection_id: (
+                self.phase_elapsed_times[intersection_id] >= self.env_config.min_green_time
+            )
+            for intersection_id in self.controlled_intersection_ids
+        }
+        observation = self.observation_builder.build(
+            lane_vehicle_count=lane_vehicle_count,
+            lane_waiting_count=lane_waiting_count,
+            phase_positions=self.current_phase_positions,
+            phase_elapsed_times=self.phase_elapsed_times,
+            switch_allowed=switch_allowed,
+        )
+        observation["city_id"] = self.city_id
+        observation["scenario_name"] = self.scenario_name
+        observation["decision_step"] = self.decision_step_count
+        observation["sim_time"] = self.adapter.get_current_time()
+        return observation
+    def _apply_actions(self, actions: np.ndarray) -> None:
+        for action_index, intersection_id in enumerate(self.controlled_intersection_ids):
+            config = self.intersections[intersection_id]
+            current_position = self.current_phase_positions[intersection_id]
+            can_switch = self.phase_elapsed_times[intersection_id] >= self.env_config.min_green_time
+            should_switch = int(actions[action_index]) == 1 and can_switch
+            if should_switch:
+                next_position = (current_position + 1) % config.num_green_phases
+                engine_phase = config.green_phases[next_position].engine_phase_index
+                self.adapter.set_tl_phase(intersection_id, engine_phase)
+                self.current_phase_positions[intersection_id] = next_position
+                self.phase_elapsed_times[intersection_id] = 0
+            else:
+                current_engine_phase = config.green_phases[current_position].engine_phase_index
+                self.adapter.set_tl_phase(intersection_id, current_engine_phase)
+    def _advance_simulator(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        num_intersections = len(self.controlled_intersection_ids)
+        max_lanes = self.env_config.observation.max_incoming_lanes
+        avg_incoming_counts = np.zeros((num_intersections, max_lanes), dtype=np.float32)
+        avg_incoming_waiting = np.zeros((num_intersections, max_lanes), dtype=np.float32)
+        avg_outgoing_counts = np.zeros((num_intersections, max_lanes), dtype=np.float32)
+        for _ in range(self.env_config.decision_interval):
+            self.adapter.step()
+            lane_vehicle_count = self.adapter.get_lane_vehicle_count()
+            lane_waiting_count = self.adapter.get_lane_waiting_vehicle_count()
+            for row_index, intersection_id in enumerate(self.controlled_intersection_ids):
+                config = self.intersections[intersection_id]
+                for lane_index, lane_id in enumerate(
+                    config.incoming_lanes[: self.env_config.observation.max_incoming_lanes]
+                ):
+                    avg_incoming_counts[row_index, lane_index] += float(
+                        lane_vehicle_count.get(lane_id, 0)
+                    )
+                    avg_incoming_waiting[row_index, lane_index] += float(
+                        lane_waiting_count.get(lane_id, 0)
+                    )
+                for lane_index, lane_id in enumerate(
+                    config.outgoing_lanes[: self.env_config.observation.max_incoming_lanes]
+                ):
+                    avg_outgoing_counts[row_index, lane_index] += float(
+                        lane_vehicle_count.get(lane_id, 0)
+                    )
+                self.phase_elapsed_times[intersection_id] += self.env_config.simulator_interval
+        avg_incoming_counts /= float(self.env_config.decision_interval)
+        avg_incoming_waiting /= float(self.env_config.decision_interval)
+        avg_outgoing_counts /= float(self.env_config.decision_interval)
+        return avg_incoming_counts, avg_incoming_waiting, avg_outgoing_counts
+    def _build_info(
+        self,
+        rewards: np.ndarray,
+        avg_incoming_counts: np.ndarray,
+        avg_incoming_waiting: np.ndarray,
+        reward_components: dict[str, np.ndarray],
+    ) -> dict[str, Any]:
+        mean_reward = float(rewards.mean()) if rewards.size else 0.0
+        average_travel_time = self.adapter.get_average_travel_time()
+        info = {
+            "city_id": self.city_id,
+            "scenario_name": self.scenario_name,
+            "decision_step": self.decision_step_count,
+            "sim_time": self.adapter.get_current_time(),
+            "episode_return": float(self.episode_return),
+            "total_episode_return": float(self.total_episode_return),
+            "intersection_ids": self.controlled_intersection_ids,
+            "district_types": self._district_type_labels,
+            "metrics": {
+                "num_controlled_intersections": len(self.controlled_intersection_ids),
+                "mean_reward": mean_reward,
+                "mean_step_intersection_reward": self._mean_step_intersection_reward(),
+                "mean_waiting_vehicles": float(avg_incoming_waiting.sum(axis=1).mean()),
+                "mean_incoming_vehicles": float(avg_incoming_counts.sum(axis=1).mean()),
+                "total_waiting_vehicles": float(avg_incoming_waiting.sum()),
+                "total_incoming_vehicles": float(avg_incoming_counts.sum()),
+                "running_vehicles": self.adapter.get_vehicle_count(),
+                "throughput": self.adapter.get_finished_vehicle_count(),
+                "average_travel_time": average_travel_time,
+                "reward_variant": self.env_config.reward.variant,
+            },
+        }
+        info["metrics"].update(self._reward_component_metrics(reward_components))
+        info["metrics"].update(
+            per_district_type_metrics(
+                district_types=self._district_type_labels,
+                rewards=rewards,
+                avg_incoming_counts=avg_incoming_counts,
+                avg_incoming_waiting=avg_incoming_waiting,
+            )
+        )
+        return info
+    def _normalize_actions(
+        self,
+        actions: dict[str, int] | list[int] | np.ndarray,
+    ) -> np.ndarray:
+        if isinstance(actions, dict):
+            return np.asarray(
+                [int(actions.get(intersection_id, 0)) for intersection_id in self.controlled_intersection_ids],
+                dtype=np.int64,
+            )
+        array = np.asarray(actions, dtype=np.int64)
+        if array.shape != (len(self.controlled_intersection_ids),):
+            raise ValueError(
+                "Actions must provide exactly one action per controlled intersection."
+            )
+        return array
+    def _mean_step_intersection_reward(self) -> float:
+        denominator = max(
+            1,
+            self.decision_step_count * len(self.controlled_intersection_ids),
+        )
+        return float(self.total_episode_return) / float(denominator)
+    def _accumulate_reward_components(self, components: dict[str, np.ndarray]) -> None:
+        for name, values in components.items():
+            self.reward_component_sums[name] = self.reward_component_sums.get(name, 0.0) + float(
+                np.asarray(values, dtype=np.float32).mean()
+            )
+    def _reward_component_metrics(
+        self,
+        reward_components: dict[str, np.ndarray],
+    ) -> dict[str, float]:
+        metrics: dict[str, float] = {}
+        for name, values in reward_components.items():
+            metrics[f"reward_component_step_{name}"] = float(
+                np.asarray(values, dtype=np.float32).mean()
+            )
+        if self.decision_step_count <= 0:
+            return metrics
+        for name, total in self.reward_component_sums.items():
+            metrics[f"reward_component_mean_{name}"] = float(total) / float(
+                self.decision_step_count
+            )
+        return metrics
+def per_district_type_metrics(
+    district_types: tuple[str, ...],
+    rewards: np.ndarray,
+    avg_incoming_counts: np.ndarray,
+    avg_incoming_waiting: np.ndarray,
+) -> dict[str, float]:
+    metrics: dict[str, float] = {}
+    reward_vector = np.asarray(rewards, dtype=np.float32)
+    incoming_totals = avg_incoming_counts.sum(axis=1)
+    waiting_totals = avg_incoming_waiting.sum(axis=1)
+    for district_type in sorted(set(district_types)):
+        mask = np.asarray(
+            [item == district_type for item in district_types],
+            dtype=bool,
+        )
+        if not mask.any():
+            continue
+        metrics[f"num_{district_type}_intersections"] = float(mask.sum())
+        metrics[f"mean_reward_{district_type}"] = float(reward_vector[mask].mean())
+        metrics[f"mean_waiting_vehicles_{district_type}"] = float(waiting_totals[mask].mean())
+        metrics[f"mean_incoming_vehicles_{district_type}"] = float(incoming_totals[mask].mean())
+    return metrics