Spaces:

wi-lab
/

LWM-Spectro

Running

App Files Files Community

“Namhyun-Kim” commited on Nov 27, 2025

Commit

aebafe2

1 Parent(s): 24c4d80

Update demo with MoE centroid evaluation

Browse files

Files changed (11) hide show

.gitattributes +0 -22
README.md +0 -10
app.py +443 -374
mixture/train_embedding_router.py +0 -0
mixture/train_top1_router.py +0 -1039
pretraining/__pycache__/__init__.cpython-311.pyc +0 -0
pretraining/__pycache__/pretrained_model.cpython-311.pyc +0 -0
pretraining/pretrained_model.py +0 -7
task1/plot_tsne.py +0 -802
task1/train_mcs_models.py +0 -0
task2/mobility_utils.py +0 -414

.gitattributes CHANGED Viewed

@@ -1,24 +1,2 @@
-# Git LFS configuration for large model files
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-# Large data files
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tar.gz filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-# Large image files (if needed)
-*.png filter=lfs diff=lfs merge=lfs -text
-*.jpg filter=lfs diff=lfs merge=lfs -text
-*.jpeg filter=lfs diff=lfs merge=lfs -text





1	*.pt filter=lfs diff=lfs merge=lfs -text








2

README.md DELETED Viewed

@@ -1,10 +0,0 @@
----
-title: LWM Spectro Demo
-emoji: 🔬
-colorFrom: blue
-colorTo: indigo
-sdk: gradio
-sdk_version: 5.5.0
-app_file: app.py
-pinned: false
----

app.py CHANGED Viewed

@@ -1,412 +1,481 @@
-import inspect
-import random
 import sys
 from pathlib import Path
-import huggingface_hub as hf_hub
-# Gradio imports HfFolder; add shim before importing gradio.
-if not hasattr(hf_hub, "HfFolder"):
-    class _HfFolderShim:
-        @staticmethod
-        def get_token():
-            return None
-        @staticmethod
-        def save_token(token):
-            return None
-    hf_hub.HfFolder = _HfFolderShim  # type: ignore[attr-defined]
 import gradio as gr
-import torch
 import numpy as np
 import pandas as pd
-from sklearn.manifold import TSNE
 from sklearn.decomposition import PCA
-from sklearn.preprocessing import StandardScaler
-from sklearn.metrics import confusion_matrix, f1_score
-import matplotlib.pyplot as plt
-# Repo root for local imports
-REPO_ROOT = Path(__file__).resolve().parent
 if str(REPO_ROOT) not in sys.path:
     sys.path.append(str(REPO_ROOT))
-from mixture.train_embedding_router import MoEPredictor  # type: ignore
-# ------------------------------------------------------------------------------
-# Data loading (t-SNE + evaluation)
-# ------------------------------------------------------------------------------
-# Load data
-def load_data():
-    print("Loading data...")
-    data = torch.load("demo_data.pt")
     records = []
-    for i, d in enumerate(data):
-        records.append({
-            "index": i,
-            "tech": d['tech'],
-            "snr": d['snr'],
-            "mod": d['mod'],
-            "mob": d['mob'],
-            "embedding": d['embedding'].numpy(),
-            "spectrogram": d['data'].numpy().flatten()
-        })
-    df = pd.DataFrame(records)
-    print(f"Loaded {len(df)} samples.")
-    return df, data
-df, raw_samples = load_data()
-# Get unique values for filters
-tech_choices = sorted(list(df['tech'].unique()))
-snr_choices = sorted(list(df['snr'].unique()))
-mod_choices = sorted(list(df['mod'].unique()))
-mob_choices = sorted(list(df['mob'].unique()))
 def plot_tsne(tech_filter, snr_filter, mod_filter, mob_filter, representation, color_by, perplexity, n_iter):
-    # Filter data
-    filtered_df = df.copy()
-    if not tech_filter:
-        return None, "Select at least one technology."
-    if tech_filter and len(tech_filter) > 0:
-        filtered_df = filtered_df[filtered_df['tech'].isin(tech_filter)]
-    if snr_filter and len(snr_filter) > 0:
-        filtered_df = filtered_df[filtered_df['snr'].isin(snr_filter)]
-    if mod_filter and len(mod_filter) > 0:
-        filtered_df = filtered_df[filtered_df['mod'].isin(mod_filter)]
-    if mob_filter and len(mob_filter) > 0:
-        filtered_df = filtered_df[filtered_df['mob'].isin(mob_filter)]
     if len(filtered_df) < 5:
         return None, f"Not enough data points ({len(filtered_df)}). Need at least 5."
-    # Select features
     if representation == "LWM Embedding":
-        features = np.stack(filtered_df['embedding'].values)
     else:
-        features = np.stack(filtered_df['spectrogram'].values)
-        # PCA for raw spectrograms to speed up t-SNE
         if features.shape[1] > 50:
             pca = PCA(n_components=50, random_state=42)
             features = pca.fit_transform(features)
-    # Clean up NaNs/Infs that can blank out t-SNE plots
-    features = np.nan_to_num(features, copy=False)
-    # Match task1/plot_tsne.py preprocessing: standardize, clamp, float32
-    scaler = StandardScaler()
-    features = scaler.fit_transform(features)
-    features = np.nan_to_num(features, copy=False, nan=0.0, posinf=0.0, neginf=0.0)
-    features = np.clip(features, -1e6, 1e6).astype(np.float32, copy=False)
-    # Run t-SNE
-    # Adjust perplexity if N is small; cap similarly to task1/plot_tsne.py
-    max_perplexity = max(5, min(30, len(filtered_df) // 10 if len(filtered_df) > 10 else len(filtered_df) - 1))
-    eff_perplexity = min(perplexity, len(filtered_df) - 1, max_perplexity)
-    eff_perplexity = max(eff_perplexity, 5)
-    tsne_kwargs = {"n_components": 2, "perplexity": eff_perplexity, "random_state": 42}
-    sig = inspect.signature(TSNE.__init__)
-    if "init" in sig.parameters:
-        tsne_kwargs["init"] = "random"
-    if "learning_rate" in sig.parameters:
-        tsne_kwargs["learning_rate"] = "auto"
-    if "n_iter" in sig.parameters:
-        tsne_kwargs["n_iter"] = n_iter
-    elif "max_iter" in sig.parameters:
-        tsne_kwargs["max_iter"] = n_iter
-    try:
-        tsne = TSNE(**tsne_kwargs)
-        projections = tsne.fit_transform(features)
-        if not np.isfinite(projections).all():
-            raise ValueError("t-SNE produced NaN/Inf projections")
-        status_msg = f"t-SNE ok ({len(filtered_df)} samples)."
-    except Exception as e:
-        # Fallback to 2D PCA so we always show something
-        pca_fallback = PCA(n_components=2, random_state=42, svd_solver="full")
-        projections = pca_fallback.fit_transform(features)
-        status_msg = f"t-SNE failed ({e}); showing 2D PCA instead. Samples: {len(filtered_df)}"
-    filtered_df['x'] = projections[:, 0]
-    filtered_df['y'] = projections[:, 1]
-    # If t-SNE collapses to a line/point, add tiny jitter so points are visible.
-    x_span = filtered_df['x'].max() - filtered_df['x'].min()
-    y_span = filtered_df['y'].max() - filtered_df['y'].min()
-    if x_span < 1e-6:
-        filtered_df['x'] += np.random.normal(scale=1e-3, size=len(filtered_df))
-    if y_span < 1e-6:
-        filtered_df['y'] += np.random.normal(scale=1e-3, size=len(filtered_df))
-    x_min, x_max = filtered_df['x'].min(), filtered_df['x'].max()
-    y_min, y_max = filtered_df['y'].min(), filtered_df['y'].max()
-    x_pad = max(1e-3, (x_max - x_min) * 0.05)
-    y_pad = max(1e-3, (y_max - y_min) * 0.05)
-    # Plot using matplotlib for maximum reliability in Spaces
-    fig, ax = plt.subplots(figsize=(7, 6))
-    colors = plt.cm.tab20(np.linspace(0, 1, len(filtered_df[color_by].unique())))
-    for c, cls in zip(colors, sorted(filtered_df[color_by].unique())):
-        mask = filtered_df[color_by] == cls
-        ax.scatter(
-            filtered_df.loc[mask, 'x'],
-            filtered_df.loc[mask, 'y'],
-            s=18,
-            alpha=0.8,
-            label=str(cls),
-            color=c,
-            edgecolors='none',
         )
-    ax.set_xlim(x_min - x_pad, x_max + x_pad)
-    ax.set_ylim(y_min - y_pad, y_max + y_pad)
-    ax.set_xlabel("t-SNE x")
-    ax.set_ylabel("t-SNE y")
-    ax.set_title(f"t-SNE of {representation} ({len(filtered_df)} samples)")
-    ax.grid(True, alpha=0.3)
-    ax.legend(title=color_by, fontsize=9, title_fontsize=10, loc='best')
-    fig.tight_layout()
-    coord_info = f"x[{x_min:.3f},{x_max:.3f}] y[{y_min:.3f},{y_max:.3f}]"
-    trace_info = f"traces: {len(filtered_df[color_by].unique())}"
-    return fig, f"{status_msg} | filtered samples: {len(filtered_df)} | {coord_info} | {trace_info}"
-# ------------------------------------------------------------------------------
-# Evaluation utilities (confusion matrix, F1) using the MoE checkpoint
-# ------------------------------------------------------------------------------
-_predictor: MoEPredictor | None = None
-def load_predictor() -> MoEPredictor:
-    global _predictor
-    if _predictor is not None:
-        return _predictor
-    # Prefer local checkpoint if present; otherwise pull from Hub
-    candidates = [
-        REPO_ROOT / "mixture" / "runs" / "embedding_router" / "moe_checkpoint.pth",
-        REPO_ROOT / "moe_checkpoint.pth",
-    ]
-    ckpt_path = None
-    for cand in candidates:
-        if cand.exists():
-            ckpt_path = cand
-            break
-    if ckpt_path is None:
-        ckpt_path = Path(
-            hf_hub.hf_hub_download(repo_id="wi-lab/lwm-spectro", filename="moe_checkpoint.pth")
-        )
-    # Ensure expert checkpoints are resolvable in the Space (paths inside ckpt are absolute)
-    def ensure_expert(name: str, comm: str) -> Path:
-        """Return a local path to the expert checkpoint, downloading if needed."""
-        fname = Path(name).name
-        comm_tag = comm.replace("/", "_")
-        local_candidates = [
-            REPO_ROOT / "experts" / fname,
-            REPO_ROOT / fname,
-            REPO_ROOT / "experts" / f"{comm_tag}_expert.pth",
-            REPO_ROOT / f"{comm_tag}_expert.pth",
-        ]
-        for cand in local_candidates:
-            if cand.exists():
-                return cand
-        # Download from model repo with multiple filename guesses
-        download_candidates = [
-            f"experts/{fname}",
-            f"experts/{comm_tag}_expert.pth",
-            fname,
-        ]
-        last_err = None
-        for rel in download_candidates:
-            try:
-                downloaded = hf_hub.hf_hub_download(
-                    repo_id="wi-lab/lwm-spectro",
-                    filename=rel,
-                )
-                return Path(downloaded)
-            except Exception as exc:  # pragma: no cover - network/permissions issues
-                last_err = exc
-                continue
-        raise RuntimeError(f"Could not resolve expert checkpoint for {comm} ({fname}): {last_err}")
-    # Rewrite expert paths into a temp checkpoint so MoEPredictor loads cleanly
-    import torch  # local import to keep top import list compact
-    raw_ckpt = torch.load(ckpt_path, map_location="cpu")
-    experts = raw_ckpt.get("experts", [])
-    if experts:
-        patched = False
-        for expert in experts:
-            ckpt_field = expert.get("checkpoint")
-            if not ckpt_field:
-                continue
-            fname = Path(ckpt_field).name
-            comm = expert.get("comm", "unknown")
-            local_path = ensure_expert(fname, comm)
-            if str(local_path) != ckpt_field:
-                expert["checkpoint"] = str(local_path)
-                patched = True
-        if patched:
-            tmp_path = Path("/tmp/moe_checkpoint_patched.pth")
-            torch.save(raw_ckpt, tmp_path)
-            ckpt_path = tmp_path
-    _predictor = MoEPredictor.from_checkpoint(ckpt_path)
-    return _predictor
-def _to_tensor(spec) -> torch.Tensor:
-    t = spec
-    if not isinstance(t, torch.Tensor):
-        t = torch.as_tensor(t)
-    if t.dim() == 2:
-        t = t.unsqueeze(0)
-    return t
-def _normalize_label(val):
-    """Convert labels to a simple string for metrics."""
-    if isinstance(val, (list, tuple)):
-        return " | ".join(str(v) for v in val)
-    return str(val)
-def compute_eval(task: str):
-    """Compute confusion matrix + macro F1 with balanced sampling per class."""
-    predictor = load_predictor()
-    y_true, y_pred = [], []
-    # Balanced sampling per class
-    rng = random.Random(42)
-    per_class_target = 100
-    def class_key(sample):
-        if task == "comm":
-            return _normalize_label(sample["tech"])
-        return _normalize_label((sample["snr"], sample["mob"]))
-    buckets = {}
-    for s in raw_samples:
-        key = class_key(s)
-        buckets.setdefault(key, []).append(s)
-    selected = []
-    for key, items in buckets.items():
-        rng.shuffle(items)
-        take = min(per_class_target, len(items))
-        selected.extend(items[:take])
-    rng.shuffle(selected)
-    for sample in selected:
-        spec = _to_tensor(sample["data"])
-        try:
-            res = predictor.predict(spec, return_routing=True)
-        except Exception as exc:
-            print(f"[WARN] predict failed: {exc}")
-            continue
-        if task == "comm":
-            routing = res.get("routing") or []
-            pred = _normalize_label(routing[0]["comm"]) if routing else "Unknown"
-            true = _normalize_label(sample["tech"])
-        else:  # snr_mobility
-            pred_raw = res.get("label", res["predicted_class"])
-            pred = _normalize_label(pred_raw)
-            true = _normalize_label((sample["snr"], sample["mob"]))
-        y_true.append(true)
-        y_pred.append(pred)
-    if not y_true or not y_pred:
-        raise RuntimeError("No samples were evaluated; check data or predictions.")
-    labels = sorted(list({*y_true, *y_pred}))
-    cm = confusion_matrix(y_true, y_pred, labels=labels)
-    f1 = f1_score(y_true, y_pred, labels=labels, average="macro", zero_division=0)
-    acc = (np.array(y_true) == np.array(y_pred)).mean()
-    return cm, labels, f1, acc, len(y_true)
-def plot_confusion(cm: np.ndarray, labels):
-    fig, ax = plt.subplots(figsize=(6, 5))
-    im = ax.imshow(cm, cmap="Blues")
-    ax.figure.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
-    ax.set_xticks(np.arange(len(labels)), labels=labels, rotation=45, ha="right")
-    ax.set_yticks(np.arange(len(labels)), labels=labels)
-    ax.set_xlabel("Predicted")
-    ax.set_ylabel("True")
-    for i in range(cm.shape[0]):
-        for j in range(cm.shape[1]):
-            ax.text(j, i, int(cm[i, j]), ha="center", va="center", color="black")
-    fig.tight_layout()
     return fig
-def run_eval(task):
-    cm, labels, f1, acc, n = compute_eval(task)
-    fig = plot_confusion(cm, labels)
-    summary = f"Task: {task} | Samples: {n} | Accuracy: {acc:.4f} | Macro F1: {f1:.4f}"
-    return fig, summary
-# ------------------------------------------------------------------------------
-# UI
-# ------------------------------------------------------------------------------
 with gr.Blocks(title="LWM-Spectro Demo") as demo:
     gr.Markdown("# 🔬 LWM-Spectro Interactive Demo")
-    gr.Markdown("Compare embeddings vs raw for t-SNE, and view quick metrics from the latest MoE checkpoint.")
-    with gr.Tab("t-SNE"):
-        with gr.Row():
-            with gr.Column(scale=1, min_width=300):
-                gr.Markdown("### Filters")
-                tech_filter = gr.CheckboxGroup(choices=tech_choices, value=tech_choices[:1], label="Technology (default: single tech)")
-                snr_filter = gr.Dropdown(choices=snr_choices, value=None, multiselect=True, label="SNR (Empty = All)")
-                mod_filter = gr.Dropdown(choices=mod_choices, value=None, multiselect=True, label="Modulation (Empty = All)")
-                mob_filter = gr.Dropdown(choices=mob_choices, value=None, multiselect=True, label="Mobility (Empty = All)")
-                gr.Markdown("### Visualization Settings")
-                representation = gr.Radio(choices=["LWM Embedding", "Raw Spectrogram"], value="LWM Embedding", label="Representation")
-                color_by = gr.Dropdown(choices=["tech", "snr", "mod", "mob"], value="snr", label="Color By")
-                with gr.Accordion("Advanced t-SNE Settings", open=False):
-                    perplexity = gr.Slider(minimum=5, maximum=50, value=10, step=1, label="Perplexity")
-                    n_iter = gr.Slider(minimum=250, maximum=2000, value=1000, step=50, label="Iterations")
-                btn = gr.Button("Update Plot", variant="primary")
-                status = gr.Textbox(label="Status", interactive=False)
-            with gr.Column(scale=3):
-                plot = gr.Plot(label="t-SNE Visualization")
-        btn.click(plot_tsne, inputs=[tech_filter, snr_filter, mod_filter, mob_filter, representation, color_by, perplexity, n_iter], outputs=[plot, status])
-        # Initial load
-        demo.load(plot_tsne, inputs=[tech_filter, snr_filter, mod_filter, mob_filter, representation, color_by, perplexity, n_iter], outputs=[plot, status])
-    with gr.Tab("Evaluation (MoE)"):
-        gr.Markdown("Uses the latest MoE checkpoint to score the bundled demo set.\n\n- **comm**: predicts communication type (LTE/WiFi/5G) via router gating.\n- **snr_mobility**: predicts the SNR/Mobility class via the classifier head.")
-        task_choice = gr.Radio(choices=["comm", "snr_mobility"], value="snr_mobility", label="Task")
-        eval_btn = gr.Button("Run Evaluation", variant="primary")
-        cm_plot = gr.Plot(label="Confusion Matrix")
-        eval_summary = gr.Textbox(label="Metrics", interactive=False)
-        def _safe_run(task):
-            try:
-                return run_eval(task)
-            except Exception as exc:
-                return None, f"Error during evaluation: {exc}"
-        eval_btn.click(_safe_run, inputs=[task_choice], outputs=[cm_plot, eval_summary])
-        # Run once on load for convenience
-        demo.load(_safe_run, inputs=[task_choice], outputs=[cm_plot, eval_summary])
 if __name__ == "__main__":
     demo.launch()

+import json
 import sys
 from pathlib import Path
+from typing import Dict, List, Optional, Sequence, Tuple
 import gradio as gr
 import numpy as np
 import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import torch
 from sklearn.decomposition import PCA
+from sklearn.manifold import TSNE
+from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
+REPO_ROOT = Path(__file__).resolve().parents[1]
+APP_DIR = Path(__file__).resolve().parent
+DEMO_DATA_PATH = APP_DIR / "demo_data.pt"
+MOE_DATA_PATH = APP_DIR / "demo_data_moe.pt"
+MOE_CHECKPOINT = REPO_ROOT / "mixture" / "runs" / "embedding_router" / "moe_checkpoint.pth"
+SNR_MOB_MAPPING_PATH = REPO_ROOT / "mixture" / "runs" / "embedding_router" / "snr_mobility_mapping.json"
 if str(REPO_ROOT) not in sys.path:
     sys.path.append(str(REPO_ROOT))
+from mixture.train_embedding_router import (  # type: ignore
+    MoEPredictor,
+    compute_selected_expert_embeddings,
+    normalize_per_sample_tensor,
+    stack_expert_embeddings,
+)
+def load_joint_mapping() -> Optional[Dict[str, object]]:
+    if not SNR_MOB_MAPPING_PATH.exists():
+        print(f"[WARN] Mapping file not found at {SNR_MOB_MAPPING_PATH}")
+        return None
+    raw = json.loads(SNR_MOB_MAPPING_PATH.read_text())
+    ordered_pairs: List[Tuple[str, str]] = []
+    for key in sorted(raw.keys(), key=lambda k: int(k)):
+        snr, mob = raw[key]
+        ordered_pairs.append((snr, mob))
+    label_names = [f"{snr} | {mob}" for snr, mob in ordered_pairs]
+    pair_to_name = {pair: name for pair, name in zip(ordered_pairs, label_names)}
+    name_to_id = {name: idx for idx, name in enumerate(label_names)}
+    pair_to_id = {pair: idx for idx, pair in enumerate(ordered_pairs)}
+    return {
+        "pairs": ordered_pairs,
+        "label_names": label_names,
+        "pair_to_name": pair_to_name,
+        "name_to_id": name_to_id,
+        "pair_to_id": pair_to_id,
+    }
+def compute_moe_embeddings(
+    samples: Sequence[Dict[str, object]],
+    predictor: MoEPredictor,
+    batch_size: int = 64,
+) -> torch.Tensor:
+    router = predictor.router
+    experts = predictor.experts
+    device = predictor.device
+    embeddings: List[torch.Tensor] = []
+    with torch.no_grad():
+        for start in range(0, len(samples), batch_size):
+            batch = samples[start : start + batch_size]
+            specs = torch.cat([sample["data"] for sample in batch], dim=0).to(device)
+            specs_norm = normalize_per_sample_tensor(specs)
+            if router is not None:
+                router_logits = router(specs_norm)
+                probs = torch.softmax(router_logits, dim=1)
+                topk_vals, topk_idx = probs.topk(k=predictor.topk, dim=1)
+                weights = topk_vals / torch.clamp(topk_vals.sum(dim=1, keepdim=True), min=1e-6)
+                selected_embeddings = compute_selected_expert_embeddings(
+                    experts,
+                    specs_norm,
+                    topk_idx,
+                    allow_grad=False,
+                )
+                weighted = (weights.unsqueeze(-1) * selected_embeddings).sum(dim=1)
+            else:
+                stacked = stack_expert_embeddings(experts, specs_norm)
+                weighted = stacked.mean(dim=1)
+            embeddings.append(weighted.cpu())
+    return torch.cat(embeddings, dim=0)
+def ensure_moe_embeddings(samples: List[Dict[str, object]]) -> Tuple[List[Dict[str, object]], bool]:
+    if MOE_DATA_PATH.exists():
+        cached = torch.load(MOE_DATA_PATH)
+        if len(cached) == len(samples):
+            print(f"[INFO] Loaded cached MoE embeddings from {MOE_DATA_PATH}")
+            return cached, True
+        print("[WARN] Cached MoE embeddings length mismatch. Recomputing...")
+    if not MOE_CHECKPOINT.exists():
+        print(f"[WARN] MoE checkpoint not found at {MOE_CHECKPOINT}. Skipping MoE embeddings.")
+        return samples, False
+    print("[INFO] Computing MoE embeddings using router checkpoint...")
+    predictor = MoEPredictor.from_checkpoint(MOE_CHECKPOINT)
+    moe_embeddings = compute_moe_embeddings(samples, predictor)
+    for sample, emb in zip(samples, moe_embeddings):
+        sample["moe_embedding"] = emb.detach().cpu()
+    torch.save(samples, MOE_DATA_PATH)
+    print(f"[INFO] Saved MoE-augmented dataset to {MOE_DATA_PATH}")
+    return samples, True
+def load_data(mapping: Optional[Dict[str, object]]):
+    if not DEMO_DATA_PATH.exists():
+        raise FileNotFoundError(f"Dataset not found at {DEMO_DATA_PATH}")
+    print(f"[INFO] Loading base dataset from {DEMO_DATA_PATH}")
+    data: List[Dict[str, object]] = torch.load(DEMO_DATA_PATH)
+    data, has_moe = ensure_moe_embeddings(data)
+    pair_to_name = mapping["pair_to_name"] if mapping else {}
+    pair_to_id = mapping["pair_to_id"] if mapping else {}
     records = []
+    for i, sample in enumerate(data):
+        embedding = sample["embedding"]
+        if isinstance(embedding, torch.Tensor):
+            base_embedding = embedding.detach().cpu().numpy()
+        else:
+            base_embedding = np.asarray(embedding)
+        spectrogram = sample["data"]
+        if isinstance(spectrogram, torch.Tensor):
+            flat_spec = spectrogram.numpy().flatten()
+        else:
+            flat_spec = np.asarray(spectrogram).flatten()
+        moe_embedding = sample.get("moe_embedding")
+        if isinstance(moe_embedding, torch.Tensor):
+            moe_embedding = moe_embedding.numpy()
+        elif moe_embedding is not None:
+            moe_embedding = np.asarray(moe_embedding)
+        pair = (sample["snr"], sample["mob"])
+        joint_label = pair_to_name.get(pair)
+        joint_label_id = pair_to_id.get(pair)
+        records.append(
+            {
+                "index": i,
+                "tech": sample["tech"],
+                "snr": sample["snr"],
+                "mod": sample["mod"],
+                "mob": sample["mob"],
+                "embedding": base_embedding,
+                "moe_embedding": moe_embedding,
+                "spectrogram": flat_spec,
+                "joint_label": joint_label,
+                "joint_label_id": joint_label_id,
+            }
+        )
+    df = pd.DataFrame(records)
+    print(f"[INFO] Loaded {len(df)} samples.")
+    return df, has_moe
+def apply_filters(
+    dataframe: pd.DataFrame,
+    tech_filter,
+    snr_filter,
+    mod_filter,
+    mob_filter,
+) -> pd.DataFrame:
+    filtered = dataframe.copy()
+    if tech_filter:
+        filtered = filtered[filtered["tech"].isin(tech_filter)]
+    if snr_filter:
+        filtered = filtered[filtered["snr"].isin(snr_filter)]
+    if mod_filter:
+        filtered = filtered[filtered["mod"].isin(mod_filter)]
+    if mob_filter:
+        filtered = filtered[filtered["mob"].isin(mob_filter)]
+    return filtered
 def plot_tsne(tech_filter, snr_filter, mod_filter, mob_filter, representation, color_by, perplexity, n_iter):
+    filtered_df = apply_filters(df, tech_filter, snr_filter, mod_filter, mob_filter)
     if len(filtered_df) < 5:
         return None, f"Not enough data points ({len(filtered_df)}). Need at least 5."
     if representation == "LWM Embedding":
+        features = np.stack(filtered_df["embedding"].values)
     else:
+        features = np.stack(filtered_df["spectrogram"].values)
         if features.shape[1] > 50:
             pca = PCA(n_components=50, random_state=42)
             features = pca.fit_transform(features)
+    eff_perplexity = min(perplexity, len(filtered_df) - 1)
+    tsne = TSNE(
+        n_components=2,
+        perplexity=eff_perplexity,
+        n_iter=n_iter,
+        random_state=42,
+        init="pca",
+        learning_rate="auto",
+    )
+    projections = tsne.fit_transform(features)
+    filtered_df = filtered_df.copy()
+    filtered_df["x"] = projections[:, 0]
+    filtered_df["y"] = projections[:, 1]
+    fig = px.scatter(
+        filtered_df,
+        x="x",
+        y="y",
+        color=color_by,
+        hover_data=["tech", "snr", "mod", "mob"],
+        title=f"t-SNE of {representation} ({len(filtered_df)} samples)",
+        template="plotly_white",
+    )
+    fig.update_layout(legend_title_text=color_by.capitalize())
+    return fig, f"Displayed {len(filtered_df)} samples."
+def stratified_split(filtered_df: pd.DataFrame, train_ratio: float, seed: int) -> Tuple[np.ndarray, np.ndarray]:
+    rng = np.random.default_rng(int(seed))
+    train_indices: List[int] = []
+    test_indices: List[int] = []
+    for label_id, group in filtered_df.groupby("joint_label_id"):
+        indices = group.index.to_numpy()
+        if indices.size < 2:
+            raise ValueError(f"Class '{CLASS_LABELS[int(label_id)]}' needs at least 2 samples for evaluation.")
+        rng.shuffle(indices)
+        split = int(round(indices.size * train_ratio))
+        split = max(1, min(indices.size - 1, split))
+        train_indices.extend(indices[:split])
+        test_indices.extend(indices[split:])
+    return np.array(train_indices), np.array(test_indices)
+def compute_centroid_metrics(filtered_df: pd.DataFrame, train_idx: np.ndarray, test_idx: np.ndarray) -> Dict[str, object]:
+    train_subset = filtered_df.loc[train_idx]
+    test_subset = filtered_df.loc[test_idx]
+    train_embeddings = np.stack(train_subset["moe_embedding"].values)
+    test_embeddings = np.stack(test_subset["moe_embedding"].values)
+    train_labels = train_subset["joint_label_id"].to_numpy(dtype=int)
+    test_labels = test_subset["joint_label_id"].to_numpy(dtype=int)
+    unique_labels = np.unique(train_labels)
+    centroids = []
+    centroid_ids: List[int] = []
+    for label_id in unique_labels:
+        mask = train_labels == label_id
+        centroids.append(train_embeddings[mask].mean(axis=0))
+        centroid_ids.append(int(label_id))
+    centroids = np.stack(centroids)
+    centroid_ids = np.array(centroid_ids, dtype=int)
+    dists = ((test_embeddings[:, None, :] - centroids[None, :, :]) ** 2).sum(axis=-1)
+    preds = centroid_ids[np.argmin(dists, axis=1)]
+    accuracy = accuracy_score(test_labels, preds)
+    macro_f1 = f1_score(test_labels, preds, average="macro", labels=centroid_ids, zero_division=0)
+    active_ids = sorted(np.unique(np.concatenate([test_labels, preds])))
+    label_names = [CLASS_LABELS[i] for i in active_ids]
+    cm = confusion_matrix(test_labels, preds, labels=active_ids)
+    return {
+        "accuracy": accuracy,
+        "macro_f1": macro_f1,
+        "confusion": cm,
+        "label_names": label_names,
+        "train_size": len(train_idx),
+        "test_size": len(test_idx),
+    }
+def plot_confusion_heatmap(confusion: np.ndarray, label_names: List[str]) -> go.Figure:
+    fig = go.Figure(
+        data=go.Heatmap(
+            z=confusion,
+            x=label_names,
+            y=label_names,
+            colorscale="Viridis",
+            hovertemplate="Predicted %{x}<br>True %{y}<br>Count %{z}<extra></extra>",
         )
+    )
+    fig.update_layout(
+        title="Prototype Classifier Confusion Matrix",
+        xaxis_title="Predicted",
+        yaxis_title="True",
+        xaxis=dict(tickangle=45),
+    )
     return fig
+def run_joint_evaluation(train_pct, seed, tech_filter, snr_filter, mod_filter, mob_filter):
+    if joint_eval_df.empty:
+        fig = go.Figure()
+        fig.update_layout(title="MoE embeddings unavailable", xaxis=dict(visible=False), yaxis=dict(visible=False))
+        return fig, "MoE embeddings are not available for evaluation."
+    filtered = apply_filters(joint_eval_df, tech_filter, snr_filter, mod_filter, mob_filter)
+    if filtered.empty:
+        fig = go.Figure()
+        fig.update_layout(title="No samples after filtering", xaxis=dict(visible=False), yaxis=dict(visible=False))
+        return fig, "No samples match the selected filters."
+    if filtered["joint_label_id"].nunique() < 2:
+        fig = go.Figure()
+        fig.update_layout(title="Need at least two classes", xaxis=dict(visible=False), yaxis=dict(visible=False))
+        return fig, "Need at least two joint SNR/Doppler classes to evaluate."
+    try:
+        train_idx, test_idx = stratified_split(filtered, train_pct / 100.0, seed)
+    except ValueError as exc:
+        fig = go.Figure()
+        fig.update_layout(title="Unable to split dataset", xaxis=dict(visible=False), yaxis=dict(visible=False))
+        return fig, str(exc)
+    metrics = compute_centroid_metrics(filtered, train_idx, test_idx)
+    fig = plot_confusion_heatmap(metrics["confusion"], metrics["label_names"])
+    status = (
+        f"Train samples: {metrics['train_size']}\n"
+        f"Test samples: {metrics['test_size']}\n"
+        f"Accuracy: {metrics['accuracy'] * 100:.2f}%\n"
+        f"Macro F1: {metrics['macro_f1']:.3f}"
+    )
+    return fig, status
+mapping_info = load_joint_mapping()
+df, has_moe_embeddings = load_data(mapping_info)
+CLASS_LABELS: List[str] = mapping_info["label_names"] if mapping_info else []
+joint_eval_df = df.copy()
+joint_eval_df = joint_eval_df[joint_eval_df["joint_label_id"].notna()]
+joint_eval_df = joint_eval_df[joint_eval_df["moe_embedding"].notna()]
+tech_choices = sorted(df["tech"].unique())
+snr_choices = sorted(df["snr"].unique())
+mod_choices = sorted(df["mod"].unique())
+mob_choices = sorted(df["mob"].unique())
+evaluation_disabled = joint_eval_df.empty
 with gr.Blocks(title="LWM-Spectro Demo") as demo:
     gr.Markdown("# 🔬 LWM-Spectro Interactive Demo")
+    gr.Markdown(
+        """
+    Compare **LWM embeddings** vs **Raw Spectrograms** for visualization, then evaluate **MoE embeddings**
+    with a lightweight prototype classifier for joint SNR/Doppler recognition.
+    """
+    )
+    with gr.Tabs():
+        with gr.Tab("Visualization"):
+            with gr.Row():
+                with gr.Column(scale=1, min_width=300):
+                    gr.Markdown("### Filters")
+                    tech_filter = gr.CheckboxGroup(choices=tech_choices, value=tech_choices, label="Technology")
+                    snr_filter = gr.Dropdown(
+                        choices=snr_choices, value=None, multiselect=True, label="SNR (Empty = All)"
+                    )
+                    mod_filter = gr.Dropdown(
+                        choices=mod_choices, value=None, multiselect=True, label="Modulation (Empty = All)"
+                    )
+                    mob_filter = gr.Dropdown(
+                        choices=mob_choices, value=None, multiselect=True, label="Mobility (Empty = All)"
+                    )
+                    gr.Markdown("### Visualization Settings")
+                    representation = gr.Radio(
+                        choices=["LWM Embedding", "Raw Spectrogram"],
+                        value="LWM Embedding",
+                        label="Representation",
+                    )
+                    color_by = gr.Dropdown(choices=["tech", "snr", "mod", "mob"], value="tech", label="Color By")
+                    with gr.Accordion("Advanced t-SNE Settings", open=False):
+                        perplexity = gr.Slider(minimum=5, maximum=50, value=30, step=1, label="Perplexity")
+                        n_iter = gr.Slider(minimum=250, maximum=2000, value=1000, step=50, label="Iterations")
+                    btn = gr.Button("Update Plot", variant="primary")
+                    status = gr.Textbox(label="Status", interactive=False)
+                with gr.Column(scale=3):
+                    plot = gr.Plot(label="t-SNE Visualization")
+            btn.click(
+                plot_tsne,
+                inputs=[tech_filter, snr_filter, mod_filter, mob_filter, representation, color_by, perplexity, n_iter],
+                outputs=[plot, status],
+            )
+            demo.load(
+                plot_tsne,
+                inputs=[tech_filter, snr_filter, mod_filter, mob_filter, representation, color_by, perplexity, n_iter],
+                outputs=[plot, status],
+            )
+        with gr.Tab("Evaludation (Joint SNR/Doppler)"):
+            if evaluation_disabled:
+                gr.Markdown(
+                    "⚠️ MoE embeddings are unavailable. Ensure `demo_data_moe.pt` exists or the checkpoint is present."
+                )
+            with gr.Row():
+                with gr.Column(scale=1, min_width=320):
+                    gr.Markdown("### Evaluation Filters")
+                    eval_tech_filter = gr.CheckboxGroup(
+                        choices=tech_choices,
+                        value=tech_choices,
+                        label="Technology",
+                        interactive=not evaluation_disabled,
+                    )
+                    eval_snr_filter = gr.Dropdown(
+                        choices=snr_choices,
+                        value=None,
+                        multiselect=True,
+                        label="SNR (Empty = All)",
+                        interactive=not evaluation_disabled,
+                    )
+                    eval_mod_filter = gr.Dropdown(
+                        choices=mod_choices,
+                        value=None,
+                        multiselect=True,
+                        label="Modulation (Empty = All)",
+                        interactive=not evaluation_disabled,
+                    )
+                    eval_mob_filter = gr.Dropdown(
+                        choices=mob_choices,
+                        value=None,
+                        multiselect=True,
+                        label="Mobility (Empty = All)",
+                        interactive=not evaluation_disabled,
+                    )
+                    gr.Markdown("### Prototype Settings")
+                    train_pct = gr.Slider(
+                        minimum=10,
+                        maximum=80,
+                        step=5,
+                        value=60,
+                        label="Training Percentage (%)",
+                        interactive=not evaluation_disabled,
+                    )
+                    seed = gr.Slider(
+                        minimum=0,
+                        maximum=9999,
+                        step=1,
+                        value=42,
+                        label="Random Seed",
+                        interactive=not evaluation_disabled,
+                    )
+                    eval_btn = gr.Button("Run evaluation", variant="primary", interactive=not evaluation_disabled)
+                with gr.Column(scale=3):
+                    eval_plot = gr.Plot(label="Prototype Confusion Matrix")
+                    eval_status = gr.Textbox(label="Metrics", interactive=False)
+            eval_btn.click(
+                run_joint_evaluation,
+                inputs=[train_pct, seed, eval_tech_filter, eval_snr_filter, eval_mod_filter, eval_mob_filter],
+                outputs=[eval_plot, eval_status],
+            )
 if __name__ == "__main__":
     demo.launch()

mixture/train_embedding_router.py DELETED Viewed

The diff for this file is too large to render. See raw diff

mixture/train_top1_router.py DELETED Viewed

@@ -1,1039 +0,0 @@
-#!/usr/bin/env python3
-"""Train a communication-router with top-1 hard expert selection.
-The script builds a supervised mixture-of-experts pipeline:
-1. Gather spectrogram samples for each communication profile (LTE/WiFi/5G).
-2. Train a lightweight CNN router that predicts the communication label.
-3. (Optional) Attach pre-trained experts and evaluate top-1 hard routing by
-   running only the expert picked by the router's argmax for each sample.
-Expert checkpoints are expected to be LWM-based classifiers (for example those
-produced by `task2/train_joint_snr_mobility.py` or earlier mobility fine-tuning
-pipelines).
-Their architecture is inferred from the checkpoint to avoid manual plumbing.
-Example:
-```bash
-python mixture/train_top1_router.py \
-    --data-root spectrograms \
-    --cities city_1_losangeles \
-    --comm-types LTE WiFi 5G \
-    --task snr_mobility \
-    --mobilities vehicular pedestrian \
-    --snrs SNR-5dB SNR0dB SNR5dB SNR10dB SNR15dB \
-    --max-samples-per-comm 6000 \
-    --max-per-combo 400 \
-    --epochs 25 \
-    --batch-size 128 \
-    --lr 3e-4 \
-    --output-dir mixture/runs/top1_router \
-    --expert LTE=models/doppler_finetuned_binary/lte/lwm_lte_doppler_val90.67.pth \
-    --expert WiFi=models/doppler_finetuned_binary/wifi/lwm_wifi_doppler_val95.01.pth \
-    --expert 5G=models/doppler_finetuned_binary/5g/lwm_5g_doppler_val96.05.pth
-```
-"""
-from __future__ import annotations
-import argparse
-import json
-import random
-from collections import Counter, defaultdict
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Dict, List, Mapping, MutableMapping, Optional, Sequence, Tuple
-import glob
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.amp import GradScaler, autocast
-from torch.utils.data import DataLoader, Dataset
-try:
-    from task1.train_mcs_models import (
-        MODULATION_LABELS,
-        identify_modulation,
-        load_all_samples,
-        normalize_per_sample,
-        _extract_metadata,
-    )
-except ImportError as exc:  # pragma: no cover - safety net
-    raise ImportError(
-        "Failed to import helpers from task1.train_mcs_models. "
-        "Ensure the repository root is on PYTHONPATH."
-    ) from exc
-try:
-    from task2.train_joint_snr_mobility import snr_sort_key
-except ImportError:  # pragma: no cover - fallback if task2 module is unavailable
-    def snr_sort_key(snr: str) -> Tuple[int, str]:
-        import re
-        match = re.search(r"SNR(-?\d+)dB", snr)
-        if match:
-            return int(match.group(1)), snr
-        return 0, snr
-from pretraining.pretrained_model import lwm as lwm_model
-COMM_CANONICAL = {"lte": "LTE", "wifi": "WiFi", "5g": "5G"}
-def canonical_comm_name(name: str) -> str:
-    lower = name.strip().lower()
-    if lower in COMM_CANONICAL:
-        return COMM_CANONICAL[lower]
-    for canonical in COMM_CANONICAL.values():
-        if canonical.lower() == lower:
-            return canonical
-    raise ValueError(f"Unknown communication type: {name}")
-@dataclass(slots=True)
-class SampleMetadata:
-    comm: str
-    modulation: str
-    snr: str
-    mobility: str
-    rate: str
-    source: str
-@dataclass(slots=True)
-class ExpertSpec:
-    comm: str
-    checkpoint: Path
-    stats_path: Optional[Path]
-class RoutedSpectrogramDataset(Dataset):
-    """Spectrogram dataset that tracks both router and downstream labels."""
-    def __init__(
-        self,
-        specs: np.ndarray,
-        comm_labels: np.ndarray,
-        task_labels: np.ndarray,
-        metadata: List[SampleMetadata],
-    ) -> None:
-        if not (len(specs) == len(comm_labels) == len(task_labels) == len(metadata)):
-            raise ValueError("All dataset inputs must have the same length")
-        self.specs = torch.from_numpy(specs.astype(np.float32, copy=False))
-        self.comm_labels = torch.from_numpy(comm_labels.astype(np.int64, copy=False))
-        self.task_labels = torch.from_numpy(task_labels.astype(np.int64, copy=False))
-        self.metadata = metadata
-    def __len__(self) -> int:
-        return self.specs.shape[0]
-    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int, int]:
-        return self.specs[idx], int(self.comm_labels[idx]), int(self.task_labels[idx])
-class RouterNet(nn.Module):
-    """Lightweight CNN router for 128×128 spectrogram inputs."""
-    def __init__(self, num_comm: int, dropout: float = 0.1) -> None:
-        super().__init__()
-        self.features = nn.Sequential(
-            nn.Conv2d(1, 32, kernel_size=5, stride=2, padding=2),
-            nn.BatchNorm2d(32),
-            nn.SiLU(inplace=True),
-            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
-            nn.BatchNorm2d(64),
-            nn.SiLU(inplace=True),
-            nn.Conv2d(64, 96, kernel_size=3, stride=2, padding=1),
-            nn.BatchNorm2d(96),
-            nn.SiLU(inplace=True),
-            nn.Conv2d(96, 128, kernel_size=3, stride=2, padding=1),
-            nn.BatchNorm2d(128),
-            nn.SiLU(inplace=True),
-            nn.AdaptiveAvgPool2d((1, 1)),
-        )
-        head_layers: List[nn.Module] = [nn.Flatten()]
-        if dropout > 0:
-            head_layers.append(nn.Dropout(dropout))
-        head_layers.append(nn.Linear(128, num_comm))
-        self.classifier = nn.Sequential(*head_layers)
-    def forward(self, specs: torch.Tensor) -> torch.Tensor:
-        x = specs
-        if x.dim() == 3:
-            x = x.unsqueeze(1)
-        elif x.dim() != 4:
-            raise ValueError(f"Expected specs rank 3 or 4, got shape {tuple(specs.shape)}")
-        features = self.features(x)
-        logits = self.classifier(features)
-        return logits
-def set_seed(seed: int) -> None:
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed(seed)
-        torch.cuda.manual_seed_all(seed)
-def parse_expert_definitions(entries: Sequence[str]) -> Dict[str, ExpertSpec]:
-    experts: Dict[str, ExpertSpec] = {}
-    for entry in entries:
-        if "=" not in entry:
-            raise ValueError(f"Expert definition must use COMM=path syntax (got: {entry})")
-        comm_part, _, path_part = entry.partition("=")
-        comm = canonical_comm_name(comm_part)
-        if not path_part:
-            raise ValueError(f"Missing checkpoint path for expert '{comm}'")
-        if ":" in path_part:
-            ckpt_str, stats_str = path_part.split(":", 1)
-            stats_path = Path(stats_str).expanduser().resolve()
-        else:
-            ckpt_str = path_part
-            stats_path = None
-        checkpoint = Path(ckpt_str).expanduser().resolve()
-        if not checkpoint.exists():
-            raise FileNotFoundError(f"Expert checkpoint not found: {checkpoint}")
-        if stats_path is not None and not stats_path.exists():
-            raise FileNotFoundError(f"Dataset stats file not found: {stats_path}")
-        experts[comm] = ExpertSpec(comm=comm, checkpoint=checkpoint, stats_path=stats_path)
-    return experts
-def discover_stats_path(comm: str, defaults_root: Path) -> Optional[Path]:
-    candidates = [
-        defaults_root / f"{comm}_models" / "dataset_stats.json",
-        defaults_root / f"{comm.lower()}_models" / "dataset_stats.json",
-        defaults_root / comm / "dataset_stats.json",
-        defaults_root / comm.lower() / "dataset_stats.json",
-    ]
-    for candidate in candidates:
-        if candidate.exists():
-            return candidate
-    return None
-def load_dataset_stats(stats_path: Optional[Path]) -> Mapping[str, float | str]:
-    if stats_path is None:
-        return {"mean": 0.0, "std": 1.0, "normalization": "per_sample"}
-    with open(stats_path, "r", encoding="utf-8") as fh:
-        return json.load(fh)
-def _collect_candidate_files(
-    *,
-    data_root: Path,
-    cities: Sequence[str],
-    comm: str,
-    snr_filters: Optional[Sequence[str]],
-    mobility_filters: Optional[Sequence[str]],
-    modulation_filters: Optional[Sequence[str]],
-    fft_filters: Optional[Sequence[str]],
-) -> List[Tuple[Path, SampleMetadata]]:
-    mobility_set = set(mobility_filters) if mobility_filters else None
-    snr_set = set(snr_filters) if snr_filters else None
-    modulation_set = {m.upper() for m in modulation_filters} if modulation_filters else None
-    fft_set = set(fft_filters) if fft_filters else None
-    candidates: List[Tuple[Path, SampleMetadata]] = []
-    for city in cities:
-        base = data_root / city / comm
-        if not base.exists():
-            continue
-        pattern = str(base / "**" / "spectrograms" / "*.pkl")
-        for path_str in glob.iglob(pattern, recursive=True):
-            path = Path(path_str)
-            _, modulation = identify_modulation(str(path))
-            if modulation is None:
-                continue
-            if modulation_set is not None and modulation.upper() not in modulation_set:
-                continue
-            rate, snr, mobility = _extract_metadata(path.parts)
-            if mobility_set is not None and mobility not in mobility_set:
-                continue
-            if snr_set is not None and snr not in snr_set:
-                continue
-            fft_folder = next((part for part in path.parts if part.startswith("win")), None)
-            if fft_set is not None and fft_folder not in fft_set:
-                continue
-            meta = SampleMetadata(
-                comm=comm,
-                modulation=modulation,
-                snr=snr,
-                mobility=mobility,
-                rate=rate,
-                source=str(path),
-            )
-            candidates.append((path, meta))
-    return candidates
-def _sample_from_file(
-    array: np.ndarray,
-    take: int,
-    rng: np.random.Generator,
-) -> np.ndarray:
-    if take <= 0 or array.shape[0] == 0:
-        return np.empty((0, 128, 128), dtype=np.float32)
-    if take >= array.shape[0]:
-        return array.astype(np.float32, copy=False)
-    indices = rng.choice(array.shape[0], size=take, replace=False)
-    return array[indices].astype(np.float32, copy=False)
-def collect_spectrograms_for_comm(
-    *,
-    data_root: Path,
-    cities: Sequence[str],
-    comm: str,
-    snrs: Optional[Sequence[str]],
-    mobilities: Optional[Sequence[str]],
-    modulations: Optional[Sequence[str]],
-    fft_folders: Optional[Sequence[str]],
-    max_samples: int,
-    max_per_combo: Optional[int],
-    rng: np.random.Generator,
-) -> Tuple[np.ndarray, List[SampleMetadata]]:
-    candidates = _collect_candidate_files(
-        data_root=data_root,
-        cities=cities,
-        comm=comm,
-        snr_filters=snrs,
-        mobility_filters=mobilities,
-        modulation_filters=modulations,
-        fft_filters=fft_folders,
-    )
-    if not candidates:
-        raise RuntimeError(f"No spectrogram files matched filters for {comm}")
-    rng.shuffle(candidates)
-    combo_counts: MutableMapping[Tuple[str, str, str], int] = defaultdict(int)
-    collected: List[np.ndarray] = []
-    metadata: List[SampleMetadata] = []
-    remaining: Optional[int] = max_samples if max_samples > 0 else None
-    per_combo_limit: Optional[int] = max_per_combo if (max_per_combo is not None and max_per_combo > 0) else None
-    for path, meta in candidates:
-        if remaining is not None and remaining <= 0:
-            break
-        combo_key = (meta.modulation, meta.snr, meta.mobility)
-        already = combo_counts[combo_key]
-        if per_combo_limit is not None and already >= per_combo_limit:
-            continue
-        try:
-            specs = load_all_samples(str(path))
-        except Exception as exc:  # pragma: no cover - guard against corrupted files
-            print(f"[WARN] Failed to load {path}: {exc}")
-            continue
-        if specs.size == 0:
-            continue
-        remaining_for_combo = per_combo_limit - already if per_combo_limit is not None else specs.shape[0]
-        allowed = min(remaining_for_combo, specs.shape[0])
-        if remaining is not None:
-            allowed = min(allowed, remaining)
-        if allowed <= 0:
-            continue
-        chosen = _sample_from_file(specs, allowed, rng)
-        if chosen.size == 0:
-            continue
-        collected.append(chosen)
-        metadata.extend([meta] * chosen.shape[0])
-        combo_counts[combo_key] += chosen.shape[0]
-        if remaining is not None:
-            remaining -= chosen.shape[0]
-    if not collected:
-        raise RuntimeError(f"Unable to collect samples for {comm} after applying limits")
-    stacked = np.concatenate(collected, axis=0)
-    return stacked.astype(np.float32, copy=False), metadata
-def stratified_split(
-    labels: np.ndarray,
-    *,
-    train_ratio: float,
-    val_ratio: float,
-    seed: int,
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
-    if not (0 < train_ratio < 1) or not (0 < val_ratio < 1):
-        raise ValueError("train_ratio and val_ratio must be in (0, 1)")
-    if train_ratio + val_ratio >= 1.0:
-        raise ValueError("train_ratio + val_ratio must be < 1.0")
-    rng = np.random.default_rng(seed)
-    train_indices: List[int] = []
-    val_indices: List[int] = []
-    test_indices: List[int] = []
-    for label in np.unique(labels):
-        idx = np.flatnonzero(labels == label)
-        if idx.size < 3:
-            raise ValueError(f"Not enough samples for label {label} to form splits (need >=3, have {idx.size})")
-        rng.shuffle(idx)
-        train_end = int(round(train_ratio * idx.size))
-        val_end = train_end + int(round(val_ratio * idx.size))
-        train_indices.extend(idx[:train_end])
-        val_indices.extend(idx[train_end:val_end])
-        test_indices.extend(idx[val_end:])
-    return (
-        np.array(train_indices, dtype=np.int64),
-        np.array(val_indices, dtype=np.int64),
-        np.array(test_indices, dtype=np.int64),
-    )
-def build_dataloaders(
-    dataset: RoutedSpectrogramDataset,
-    train_idx: np.ndarray,
-    val_idx: np.ndarray,
-    test_idx: np.ndarray,
-    *,
-    batch_size: int,
-    num_workers: int,
-) -> Tuple[DataLoader, DataLoader, DataLoader]:
-    def subset(indices: np.ndarray) -> RoutedSpectrogramDataset:
-        specs = dataset.specs[indices].numpy()
-        comm = dataset.comm_labels[indices].numpy()
-        task = dataset.task_labels[indices].numpy()
-        meta = [dataset.metadata[int(i)] for i in indices]
-        return RoutedSpectrogramDataset(specs, comm, task, meta)
-    train_ds = subset(train_idx)
-    val_ds = subset(val_idx)
-    test_ds = subset(test_idx)
-    train_loader = DataLoader(
-        train_ds,
-        batch_size=batch_size,
-        shuffle=True,
-        drop_last=len(train_ds) > batch_size,
-        num_workers=num_workers,
-        pin_memory=torch.cuda.is_available(),
-    )
-    val_loader = DataLoader(
-        val_ds,
-        batch_size=batch_size,
-        shuffle=False,
-        num_workers=num_workers,
-        pin_memory=torch.cuda.is_available(),
-    )
-    test_loader = DataLoader(
-        test_ds,
-        batch_size=batch_size,
-        shuffle=False,
-        num_workers=num_workers,
-        pin_memory=torch.cuda.is_available(),
-    )
-    return train_loader, val_loader, test_loader
-def infer_expert_signature(state_dict: Mapping[str, torch.Tensor]) -> Dict[str, object]:
-    keys = set(state_dict.keys())
-    # Determine input dimension (128 vs 130 if stats appended).
-    layer_norm_weight = state_dict.get("classifier.0.weight")
-    if layer_norm_weight is None:
-        raise ValueError("Unable to infer classifier input dimension from checkpoint")
-    input_dim = layer_norm_weight.numel()
-    append_input_stats = input_dim > 128
-    # Determine classifier type.
-    if any(k.startswith("classifier.1.conv1") for k in keys):
-        head_type = "res1dcnn"
-    elif "classifier.1.weight" in keys:
-        head_type = "mlp"
-    elif "classifier.weight" in keys:
-        head_type = "linear"
-    else:
-        raise ValueError("Unrecognized classifier architecture in checkpoint")
-    # Hidden width for MLP head.
-    classifier_dim = None
-    if head_type == "mlp":
-        classifier_dim = int(state_dict["classifier.1.weight"].shape[0])
-    # Projection head dimensionality.
-    if "projection_head.0.weight" in keys:
-        projection_dim = int(state_dict["projection_head.0.weight"].shape[0])
-    else:
-        projection_dim = 0
-    # Number of output classes from final linear weight.
-    if head_type == "linear":
-        num_classes = int(state_dict["classifier.weight"].shape[0])
-    elif head_type == "mlp":
-        num_classes = int(state_dict["classifier.2.weight"].shape[0])
-    else:  # res1dcnn
-        num_classes = int(state_dict["classifier.1.fc.weight"].shape[0])
-    return {
-        "append_input_stats": append_input_stats,
-        "input_dim": input_dim,
-        "head_type": head_type,
-        "classifier_dim": classifier_dim if classifier_dim is not None else 128,
-        "projection_dim": projection_dim,
-        "num_classes": num_classes,
-    }
-def load_expert_model(
-    spec: ExpertSpec,
-    stats_root: Path,
-    device: torch.device,
-) -> Tuple[str, nn.Module, int]:
-    raw_state = torch.load(spec.checkpoint, map_location="cpu")
-    if any(k.startswith("module.") for k in raw_state):
-        raw_state = {k.replace("module.", "", 1): v for k, v in raw_state.items()}
-    signature = infer_expert_signature(raw_state)
-    stats_path = spec.stats_path
-    if stats_path is None:
-        stats_path = discover_stats_path(spec.comm, stats_root)
-    stats = load_dataset_stats(stats_path)
-    model = lwm_model(element_length=16, d_model=128, n_layers=12, max_len=1025, n_heads=8, dropout=0.1)
-    backbone_state = {
-        k.split("backbone.", 1)[1]: v
-        for k, v in raw_state.items()
-        if k.startswith("backbone.")
-    }
-    model.load_state_dict(backbone_state, strict=False)
-    classifier = LWMClassifierMinimalAdapter(
-        backbone=model,
-        num_classes=int(signature["num_classes"]),
-        classifier_dim=int(signature["classifier_dim"]),
-        head_type=str(signature["head_type"]),
-        append_input_stats=bool(signature["append_input_stats"]),
-        projection_dim=int(signature["projection_dim"]),
-        normalization_stats=stats,
-    )
-    classifier.load_state_dict(raw_state, strict=True)
-    classifier.eval()
-    classifier.to(device)
-    for param in classifier.parameters():
-        param.requires_grad_(False)
-    return spec.comm, classifier, int(signature["num_classes"])
-class LWMClassifierMinimalAdapter(nn.Module):
-    """Thin wrapper matching task2.mobility_utils.LWMClassifierMinimal."""
-    def __init__(
-        self,
-        *,
-        backbone: nn.Module,
-        num_classes: int,
-        classifier_dim: int,
-        head_type: str,
-        append_input_stats: bool,
-        projection_dim: int,
-        normalization_stats: Mapping[str, float | str],
-    ) -> None:
-        super().__init__()
-        from task2.mobility_utils import LWMClassifierMinimal  # local import to avoid cycle
-        self.inner = LWMClassifierMinimal(
-            backbone=backbone,
-            num_classes=num_classes,
-            classifier_dim=classifier_dim,
-            dropout=0.0,
-            trainable_layers=0,
-            projection_dim=projection_dim,
-            append_input_stats=append_input_stats,
-            normalization_stats=normalization_stats,
-            head_type=head_type,
-        )
-    def forward(self, specs: torch.Tensor) -> torch.Tensor:
-        return self.inner(specs)
-@torch.no_grad()
-def evaluate_router(
-    model: nn.Module,
-    loader: DataLoader,
-    criterion: nn.Module,
-    device: torch.device,
-) -> Tuple[float, float, np.ndarray, np.ndarray]:
-    model.eval()
-    total_loss = 0.0
-    correct = 0
-    seen = 0
-    y_true: List[int] = []
-    y_pred: List[int] = []
-    for specs, comm_labels, _ in loader:
-        specs = specs.to(device, non_blocking=True)
-        comm_labels = torch.as_tensor(comm_labels, device=device)
-        logits = model(specs)
-        loss = criterion(logits, comm_labels)
-        preds = logits.argmax(dim=1)
-        total_loss += loss.item() * specs.size(0)
-        correct += (preds == comm_labels).sum().item()
-        seen += specs.size(0)
-        y_true.extend(comm_labels.detach().cpu().tolist())
-        y_pred.extend(preds.detach().cpu().tolist())
-    avg_loss = total_loss / max(seen, 1)
-    acc = correct / max(seen, 1)
-    return avg_loss, acc, np.array(y_true, dtype=np.int64), np.array(y_pred, dtype=np.int64)
-def compute_confusion(y_true: np.ndarray, y_pred: np.ndarray, num_classes: int) -> np.ndarray:
-    matrix = np.zeros((num_classes, num_classes), dtype=np.int64)
-    for true, pred in zip(y_true, y_pred):
-        if 0 <= true < num_classes and 0 <= pred < num_classes:
-            matrix[true, pred] += 1
-    return matrix
-@torch.no_grad()
-def evaluate_routing(
-    router: nn.Module,
-    experts: Mapping[int, Tuple[str, nn.Module]],
-    loader: DataLoader,
-    *,
-    num_comm: int,
-    num_task_classes: int,
-    device: torch.device,
-    routing_mode: str,
-    routing_topk: int,
-) -> Dict[str, object]:
-    router.eval()
-    for _, model in experts.values():
-        model.eval()
-    criterion = nn.CrossEntropyLoss()
-    total_loss = 0.0
-    total = 0
-    correct_router = 0
-    correct_task = 0
-    confusion_router = np.zeros((num_comm, num_comm), dtype=np.int64)
-    confusion_task = np.zeros((num_task_classes, num_task_classes), dtype=np.int64)
-    coverage = Counter()  # type: ignore[type-arg]
-    for specs, comm_labels, task_labels in loader:
-        specs = specs.to(device, non_blocking=True)
-        comm_labels = torch.as_tensor(comm_labels, device=device)
-        task_labels = torch.as_tensor(task_labels, device=device)
-        logits = router(specs)
-        loss = criterion(logits, comm_labels)
-        probs = torch.softmax(logits, dim=1)
-        router_pred = probs.argmax(dim=1)
-        batch = specs.size(0)
-        total_loss += loss.item() * batch
-        total += batch
-        correct_router += (router_pred == comm_labels).sum().item()
-        confusion_router += compute_confusion(
-            comm_labels.detach().cpu().numpy(),
-            router_pred.detach().cpu().numpy(),
-            num_comm,
-        )
-        if not experts:
-            continue
-        weights = torch.zeros_like(probs)
-        if routing_mode == "hard":
-            weights.scatter_(1, router_pred.unsqueeze(1), 1.0)
-        elif routing_mode == "soft":
-            weights = probs
-        elif routing_mode == "topk":
-            topk = max(1, min(routing_topk, num_comm))
-            topk_vals, topk_indices = probs.topk(topk, dim=1)
-            weights.zero_()
-            weights.scatter_(1, topk_indices, topk_vals)
-        else:
-            raise ValueError(f"Unsupported routing mode: {routing_mode}")
-        final_logits = torch.zeros(batch, num_task_classes, device=device)
-        for comm_idx, (name, expert) in experts.items():
-            weight_column = weights[:, comm_idx]
-            if not torch.any(weight_column > 0):
-                continue
-            outputs = expert(specs)
-            if outputs.size(1) != num_task_classes:
-                raise ValueError(
-                    f"Expert '{name}' returned {outputs.size(1)} classes, expected {num_task_classes}"
-                )
-            final_logits += weight_column.unsqueeze(1) * outputs
-            coverage[name] += float(weight_column.sum().item())
-        task_pred = final_logits.argmax(dim=1)
-        correct_task += (task_pred == task_labels).sum().item()
-        confusion_task += compute_confusion(
-            task_labels.detach().cpu().numpy(),
-            task_pred.detach().cpu().numpy(),
-            num_task_classes,
-        )
-    metrics: Dict[str, object] = {
-        "router_loss": total_loss / max(total, 1),
-        "router_acc": correct_router / max(total, 1),
-        "router_confusion": confusion_router.tolist(),
-        "coverage": dict(coverage),
-    }
-    if experts:
-        metrics["task_acc"] = correct_task / max(total, 1)
-        metrics["task_confusion"] = confusion_task.tolist()
-    return metrics
-def modulation_labels_from_metadata(metadata: Sequence[SampleMetadata]) -> np.ndarray:
-    labels: List[int] = []
-    for meta in metadata:
-        label = MODULATION_LABELS.get(meta.modulation.upper())
-        if label is None:
-            raise ValueError(f"Unknown modulation label in metadata: {meta.modulation}")
-        labels.append(label)
-    return np.array(labels, dtype=np.int64)
-def snr_mobility_labels_from_metadata(
-    metadata: Sequence[SampleMetadata],
-    *,
-    snr_order: Sequence[str],
-    mobility_order: Sequence[str],
-) -> Tuple[np.ndarray, Dict[int, Tuple[str, str]]]:
-    combos: List[Tuple[str, str]] = []
-    for snr in snr_order:
-        for mobility in mobility_order:
-            combos.append((snr, mobility))
-    combo_to_idx = {combo: idx for idx, combo in enumerate(combos)}
-    labels: List[int] = []
-    for meta in metadata:
-        combo = (meta.snr, meta.mobility)
-        if combo not in combo_to_idx:
-            raise ValueError(f"Sample combo {combo} not present in configured (snr, mobility) grid")
-        labels.append(combo_to_idx[combo])
-    mapping = {idx: combo for combo, idx in combo_to_idx.items()}
-    return np.array(labels, dtype=np.int64), mapping
-def prepare_dataset(
-    *,
-    data_root: Path,
-    cities: Sequence[str],
-    comm_types: Sequence[str],
-    snrs: Optional[Sequence[str]],
-    mobilities: Optional[Sequence[str]],
-    modulations: Optional[Sequence[str]],
-    fft_folders: Optional[Sequence[str]],
-    max_samples_per_comm: int,
-    max_per_combo: Optional[int],
-    task: str,
-    seed: int,
-) -> Tuple[RoutedSpectrogramDataset, Dict[str, int], Optional[Dict[int, Tuple[str, str]]]]:
-    rng = np.random.default_rng(seed)
-    specs_list: List[np.ndarray] = []
-    comm_labels_list: List[int] = []
-    metadata_list: List[SampleMetadata] = []
-    comm_to_idx = {comm: idx for idx, comm in enumerate(comm_types)}
-    for comm in comm_types:
-        samples, metadata = collect_spectrograms_for_comm(
-            data_root=data_root,
-            cities=cities,
-            comm=comm,
-            snrs=snrs,
-            mobilities=mobilities,
-            modulations=modulations,
-            fft_folders=fft_folders,
-            max_samples=max_samples_per_comm,
-            max_per_combo=max_per_combo,
-            rng=rng,
-        )
-        specs_list.append(samples)
-        metadata_list.extend(metadata)
-        comm_labels_list.extend([comm_to_idx[comm]] * samples.shape[0])
-    specs = np.concatenate(specs_list, axis=0)
-    metadata = metadata_list
-    comm_labels = np.array(comm_labels_list, dtype=np.int64)
-    order = rng.permutation(specs.shape[0])
-    specs = specs[order]
-    comm_labels = comm_labels[order]
-    metadata = [metadata[idx] for idx in order]
-    normalized = normalize_per_sample(specs)
-    if task == "modulation":
-        task_labels = modulation_labels_from_metadata(metadata)
-        mapping = None
-    else:
-        if snrs is None:
-            snr_order = sorted({meta.snr for meta in metadata}, key=snr_sort_key)
-        else:
-            snr_order = [snr for snr in snrs if any(meta.snr == snr for meta in metadata)]
-        if mobilities is None:
-            mobility_order = sorted({meta.mobility for meta in metadata})
-        else:
-            mobility_order = [mob for mob in mobilities if any(meta.mobility == mob for meta in metadata)]
-        task_labels, mapping = snr_mobility_labels_from_metadata(
-            metadata,
-            snr_order=snr_order,
-            mobility_order=mobility_order,
-        )
-    dataset = RoutedSpectrogramDataset(normalized, comm_labels, task_labels, metadata)
-    return dataset, comm_to_idx, mapping
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("--data-root", type=Path, default=Path("spectrograms"), help="Root directory with spectrogram data")
-    parser.add_argument("--cities", nargs="*", default=["city_1_losangeles"], help="City folders to include")
-    parser.add_argument("--comm-types", nargs="*", default=["LTE", "WiFi", "5G"], help="Communication standards to model")
-    parser.add_argument("--snrs", nargs="*", default=None, help="SNR folders to include")
-    parser.add_argument("--mobilities", nargs="*", default=None, help="Mobility folders to include")
-    parser.add_argument("--modulations", nargs="*", default=None, help="Modulation classes to include (default: all)")
-    parser.add_argument("--fft-folders", nargs="*", default=None, help="Specific FFT/window folders to include")
-    parser.add_argument("--task", choices=("modulation", "snr_mobility"), default="snr_mobility", help="Downstream task label")
-    parser.add_argument("--max-samples-per-comm", type=int, default=6000, help="Maximum samples per communication profile")
-    parser.add_argument("--max-per-combo", type=int, default=512, help="Cap per (modulation,SNR,mobility) combo (0=unbounded)")
-    parser.add_argument("--seed", type=int, default=42, help="Random seed")
-    parser.add_argument(
-        "--routing-mode",
-        choices=("hard", "soft", "topk"),
-        default="hard",
-        help="Routing strategy: hard (top-1), soft (probability-weighted), or topk (restricted soft) (default: %(default)s)",
-    )
-    parser.add_argument(
-        "--routing-topk",
-        type=int,
-        default=2,
-        help="When routing-mode=topk, number of experts to evaluate per sample (default: %(default)s)",
-    )
-    parser.add_argument("--train-ratio", type=float, default=0.7, help="Fraction of data for training")
-    parser.add_argument("--val-ratio", type=float, default=0.15, help="Fraction of data for validation")
-    parser.add_argument("--batch-size", type=int, default=128, help="Mini-batch size")
-    parser.add_argument("--epochs", type=int, default=20, help="Training epochs")
-    parser.add_argument("--lr", type=float, default=3e-4, help="Learning rate")
-    parser.add_argument("--weight-decay", type=float, default=1e-4, help="Weight decay for AdamW")
-    parser.add_argument("--dropout", type=float, default=0.1, help="Router dropout probability")
-    parser.add_argument("--num-workers", type=int, default=4, help="DataLoader workers")
-    parser.add_argument("--use-amp", action="store_true", help="Enable mixed precision for router training")
-    parser.add_argument("--spec-augment", action="store_true", help="Apply SpecAugment to router inputs")
-    parser.add_argument("--spec-augment-freq", type=int, default=12, help="Frequency mask width for SpecAugment")
-    parser.add_argument("--spec-augment-time", type=int, default=16, help="Time mask width for SpecAugment")
-    parser.add_argument("--spec-augment-prob", type=float, default=0.5, help="Probability to apply SpecAugment to a sample")
-    parser.add_argument("--expert", action="append", default=[], help="Expert definition COMM=checkpoint[:stats_path]")
-    parser.add_argument("--expert-stats-root", type=Path, default=Path("models"), help="Root to auto-discover dataset_stats.json")
-    parser.add_argument("--output-dir", type=Path, default=Path("mixture/runs/top1_router"), help="Directory for logs and checkpoints")
-    parser.add_argument("--save-router", action="store_true", help="Save best router state_dict to output directory")
-    args = parser.parse_args()
-    if args.max_per_combo is not None and args.max_per_combo < 0:
-        parser.error("--max-per-combo must be >= 0")
-    if args.spec_augment and not (0.0 <= args.spec_augment_prob <= 1.0):
-        parser.error("--spec-augment-prob must be between 0 and 1")
-    if args.max_samples_per_comm <= 0:
-        parser.error("--max-samples-per-comm must be positive")
-    if args.train_ratio <= 0 or args.val_ratio <= 0:
-        parser.error("--train-ratio and --val-ratio must be positive")
-    if args.train_ratio + args.val_ratio >= 1.0:
-        parser.error("--train-ratio + --val-ratio must be < 1.0")
-    return args
-def maybe_apply_spec_augment(
-    specs: torch.Tensor,
-    *,
-    enabled: bool,
-    freq_width: int,
-    time_width: int,
-    prob: float,
-) -> torch.Tensor:
-    if not enabled:
-        return specs
-    from task1.train_mcs_models import apply_spec_augment
-    return apply_spec_augment(
-        specs,
-        freq_mask_width=freq_width,
-        time_mask_width=time_width,
-        mask_prob=prob,
-    )
-def main() -> None:
-    args = parse_args()
-    set_seed(args.seed)
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    comm_types = [canonical_comm_name(comm) for comm in args.comm_types]
-    dataset, comm_to_idx, combo_mapping = prepare_dataset(
-        data_root=args.data_root.expanduser().resolve(),
-        cities=args.cities,
-        comm_types=comm_types,
-        snrs=args.snrs,
-        mobilities=args.mobilities,
-        modulations=args.modulations,
-        fft_folders=args.fft_folders,
-        max_samples_per_comm=args.max_samples_per_comm,
-        max_per_combo=args.max_per_combo,
-        task=args.task,
-        seed=args.seed,
-    )
-    num_comm = len(comm_types)
-    num_task_classes = int(dataset.task_labels.max()) + 1
-    train_idx, val_idx, test_idx = stratified_split(
-        dataset.comm_labels.numpy(),
-        train_ratio=args.train_ratio,
-        val_ratio=args.val_ratio,
-        seed=args.seed,
-    )
-    train_loader, val_loader, test_loader = build_dataloaders(
-        dataset,
-        train_idx=train_idx,
-        val_idx=val_idx,
-        test_idx=test_idx,
-        batch_size=args.batch_size,
-        num_workers=args.num_workers,
-    )
-    router = RouterNet(num_comm=num_comm, dropout=args.dropout).to(device)
-    criterion = nn.CrossEntropyLoss()
-    optimizer = torch.optim.AdamW(router.parameters(), lr=args.lr, weight_decay=args.weight_decay)
-    scaler = GradScaler(enabled=args.use_amp and device.type == "cuda")
-    best_state: Optional[Dict[str, torch.Tensor]] = None
-    best_val_acc = 0.0
-    for epoch in range(1, args.epochs + 1):
-        router.train()
-        running_loss = 0.0
-        running_correct = 0
-        total = 0
-        for specs, comm_labels, _ in train_loader:
-            specs = specs.to(device, non_blocking=True)
-            comm_labels = torch.as_tensor(comm_labels, device=device)
-            specs_aug = maybe_apply_spec_augment(
-                specs,
-                enabled=args.spec_augment,
-                freq_width=args.spec_augment_freq,
-                time_width=args.spec_augment_time,
-                prob=args.spec_augment_prob,
-            )
-            optimizer.zero_grad(set_to_none=True)
-            context = autocast(device_type=device.type, enabled=scaler.is_enabled())
-            with context:
-                logits = router(specs_aug)
-                loss = criterion(logits, comm_labels)
-            if scaler.is_enabled():
-                scaler.scale(loss).backward()
-                scaler.step(optimizer)
-                scaler.update()
-            else:
-                loss.backward()
-                optimizer.step()
-            preds = logits.argmax(dim=1)
-            running_loss += loss.item() * specs.size(0)
-            running_correct += (preds == comm_labels).sum().item()
-            total += specs.size(0)
-        train_loss = running_loss / max(total, 1)
-        train_acc = running_correct / max(total, 1)
-        val_loss, val_acc, y_true_val, y_pred_val = evaluate_router(router, val_loader, criterion, device)
-        val_confusion = compute_confusion(y_true_val, y_pred_val, num_comm)
-        print(
-            f"[Epoch {epoch:02d}] train_loss={train_loss:.4f} "
-            f"train_acc={train_acc:.3f} val_loss={val_loss:.4f} val_acc={val_acc:.3f}"
-        )
-        if val_acc >= best_val_acc:
-            best_val_acc = val_acc
-            best_state = {k: v.detach().cpu() for k, v in router.state_dict().items()}
-        print(f"[Epoch {epoch:02d}] Val confusion matrix:\n{val_confusion}")
-    if best_state is None:
-        best_state = {k: v.detach().cpu() for k, v in router.state_dict().items()}
-    router.load_state_dict(best_state)
-    output_dir = args.output_dir.expanduser().resolve()
-    output_dir.mkdir(parents=True, exist_ok=True)
-    experts: Dict[int, Tuple[str, nn.Module]] = {}
-    expert_specs = parse_expert_definitions(args.expert)
-    for comm, spec in expert_specs.items():
-        comm_idx = comm_to_idx.get(comm)
-        if comm_idx is None:
-            print(f"[WARN] Expert for {comm} provided but communication not in dataset; skipping")
-            continue
-        name, model, out_classes = load_expert_model(
-            spec,
-            stats_root=args.expert_stats_root.expanduser().resolve(),
-            device=device,
-        )
-        if out_classes != num_task_classes:
-            print(
-                f"[WARN] Expert '{name}' outputs {out_classes} classes, "
-                f"but dataset task expects {num_task_classes}. Skipping expert."
-            )
-            continue
-        experts[comm_idx] = (name, model)
-    test_metrics = evaluate_routing(
-        router,
-        experts,
-        test_loader,
-        num_comm=num_comm,
-        num_task_classes=num_task_classes,
-        device=device,
-        routing_mode=args.routing_mode,
-        routing_topk=args.routing_topk,
-    )
-    print("[RESULT] Test metrics:")
-    print(json.dumps(test_metrics, indent=2))
-    metrics_path = output_dir / "metrics.json"
-    with open(metrics_path, "w", encoding="utf-8") as fh:
-        json.dump(test_metrics, fh, indent=2)
-    if combo_mapping is not None:
-        mapping_path = output_dir / "snr_mobility_mapping.json"
-        with open(mapping_path, "w", encoding="utf-8") as fh:
-            json.dump({int(k): v for k, v in combo_mapping.items()}, fh, indent=2)
-    if args.save_router:
-        ckpt_path = output_dir / "router_top1_state_dict.pth"
-        torch.save(best_state, ckpt_path)
-        print(f"[INFO] Saved router checkpoint to {ckpt_path}")
-if __name__ == "__main__":
-    main()

pretraining/__pycache__/__init__.cpython-311.pyc DELETED Viewed

Binary file (159 Bytes)

pretraining/__pycache__/pretrained_model.cpython-311.pyc DELETED Viewed

Binary file (14.6 kB)

pretraining/pretrained_model.py CHANGED Viewed

@@ -178,10 +178,3 @@ def lwm(*args, **kwargs) -> LWM:
     """Factory to preserve backward compatibility with older imports."""
     return LWM(*args, **kwargs)
-class PretrainedLWM(LWM):
-    """Alias retained for compatibility with existing inference scripts."""
-    def __init__(self, *args, **kwargs) -> None:
-        super().__init__(*args, **kwargs)


178	"""Factory to preserve backward compatibility with older imports."""
179
180	return LWM(args, *kwargs)

task1/plot_tsne.py DELETED Viewed

@@ -1,802 +0,0 @@
-#!/usr/bin/env python3
-"""Visualise how strongly metadata drives the learned embedding space.
-This script mirrors the functionality of ``task1/plot_mod_tsne.py`` but groups
-spectrograms by their SNR folder name (e.g. ``SNR0dB``) instead of modulation.
-It is useful for checking whether the self-supervised LWM backbone mostly
-captures channel/SNR differences rather than modulation characteristics.
-Pass ``--label-field modulation`` to reuse the same sampled spectrograms while
-colouring and scoring them by their modulation folder instead of SNR. Use
-``--label-field mobility`` to highlight link-level mobility categories when
-present in the dataset tree. Saved figures automatically include the detected
-communication profile (e.g. LTE/WiFi/5G) and label mode in the filename when
-those suffixes are not already present.
-Usage example:
-```bash
-python task1/plot_snr_tsne.py \
-    --data-root spectrograms/city_1_losangeles/LTE \
-    --snrs SNR-5dB,SNR0dB,SNR10dB,SNR15dB,SNR20dB,SNR25dB \
-    --save-path task1/snr_separation_plot_latest.png
-```
-Shortcut presets:
-```bash
-python task1/plot_snr_tsne.py --WiFi --report-metrics
-```
-"""
-from __future__ import annotations
-import argparse
-import glob
-import pickle
-import random
-import re
-from pathlib import Path
-from collections import Counter, defaultdict
-from typing import Dict, Iterable, List, Tuple
-import matplotlib.pyplot as plt
-import numpy as np
-import torch
-from sklearn.manifold import TSNE
-from sklearn.metrics import silhouette_score
-from sklearn.model_selection import StratifiedKFold
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.preprocessing import StandardScaler
-from pretraining.pretrained_model import lwm as lwm_model
-from utils import load_spectrogram_data  # support .mat and .pkl uniformly
-DEFAULT_DATA_ROOT = "spectrograms/city_1_losangeles/LTE"
-DEFAULT_MODELS_ROOT = "models/LTE_models"
-PROFILE_PRESETS: Dict[str, Dict[str, str]] = {
-    "LTE": {
-        "data_root": DEFAULT_DATA_ROOT,
-        "models_root": DEFAULT_MODELS_ROOT,
-    },
-    "WiFi": {
-        "data_root": "spectrograms/city_1_losangeles/WiFi",
-        "models_root": "models/WiFi_models",
-    },
-    "5G": {
-        "data_root": "spectrograms/city_1_losangeles/5G",
-        "models_root": "models/5G_models",
-    },
-}
-def normalize_per_sample(specs: np.ndarray, eps: float = 1e-6) -> np.ndarray:
-    means = specs.mean(axis=(1, 2), keepdims=True)
-    stds = specs.std(axis=(1, 2), keepdims=True)
-    stds = np.maximum(stds, eps)
-    return ((specs - means) / stds).astype(np.float32, copy=False)
-def normalize_dataset(specs: np.ndarray, eps: float = 1e-6) -> np.ndarray:
-    mean = float(specs.mean())
-    std = float(specs.std())
-    std = max(std, eps)
-    return ((specs - mean) / std).astype(np.float32, copy=False)
-# ---------------------------------------------------------------------------
-# Utility helpers
-# ---------------------------------------------------------------------------
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument(
-        "--data-root",
-        default=DEFAULT_DATA_ROOT,
-        help="Root directory containing modulation folders (default: %(default)s)",
-    )
-    parser.add_argument(
-        "--modulation",
-        default="all",
-        help="Modulation folder to load (default: %(default)s)",
-    )
-    parser.add_argument(
-        "--snrs",
-        default="SNR-5dB,SNR0dB,SNR5dB,SNR10dB,SNR15dB,SNR20dB,SNR25dB",
-        help=(
-            "Comma-separated list of SNR folder names to include. Pass 'all' "
-            "to include every SNR discovered under the modulation (default: %(default)s)"
-        ),
-    )
-    parser.add_argument(
-        "--mobility",
-        nargs="+",
-        default=["all"],
-        help=(
-            "Mobility folder(s) to filter on. Pass 'all' to include every mobility "
-            "(default: %(default)s). Multiple values can be provided either as a "
-            "space-separated list (e.g. '--mobility vehicular pedestrian') or a "
-            "comma-separated string."
-        ),
-    )
-    parser.add_argument(
-        "--fft-folder",
-        default="all",
-        help=(
-            "FFT size folder name to use. Pass 'all' to include every FFT variant "
-            "(default: %(default)s)"
-        ),
-    )
-    parser.add_argument(
-        "--samples-per-snr",
-        type=int,
-        default=500,
-        help="Maximum number of samples to draw for each SNR label",
-    )
-    parser.add_argument(
-        "--seed",
-        type=int,
-        default=42,
-        help="Random seed for sampling and t-SNE",
-    )
-    parser.add_argument(
-        "--pooling",
-        choices=("mean", "cls"),
-        default="mean",
-        help="How to collapse token embeddings into a single vector",
-    )
-    parser.add_argument(
-        "--save-path",
-        default="task1/snr_separation_plot_latest.png",
-        help="Location to save the generated figure (default: %(default)s)",
-    )
-    parser.add_argument(
-        "--checkpoint",
-        default=None,
-        help="Optional explicit checkpoint path; overrides automatic latest selection",
-    )
-    parser.add_argument(
-        "--models-root",
-        default=DEFAULT_MODELS_ROOT,
-        help=(
-            "Directory containing checkpoints. When --checkpoint is not given, "
-            "the latest/best checkpoint inside this directory will be used "
-            "(default: %(default)s)"
-        ),
-    )
-    preset_group = parser.add_mutually_exclusive_group()
-    preset_group.add_argument(
-        "--profile",
-        dest="profile",
-        choices=tuple(PROFILE_PRESETS.keys()),
-        help=(
-            "Convenience preset that sets --data-root and --models-root when they "
-            "are left at their defaults"
-        ),
-    )
-    preset_group.add_argument(
-        "--LTE",
-        dest="profile",
-        action="store_const",
-        const="LTE",
-        help="Shortcut for --profile LTE",
-    )
-    preset_group.add_argument(
-        "--WiFi",
-        dest="profile",
-        action="store_const",
-        const="WiFi",
-        help="Shortcut for --profile WiFi",
-    )
-    preset_group.add_argument(
-        "--5G",
-        dest="profile",
-        action="store_const",
-        const="5G",
-        help="Shortcut for --profile 5G",
-    )
-    parser.add_argument(
-        "--report-metrics",
-        action="store_true",
-        help="Print clustering metrics (silhouette, 5-fold kNN accuracy)",
-    )
-    parser.add_argument(
-        "--metrics-only",
-        action="store_true",
-        help="Exit after reporting metrics without running t-SNE or saving figures",
-    )
-    parser.add_argument(
-        "--sampling-mode",
-        choices=("first", "reservoir"),
-        default="first",
-        help="How to down-sample each class (default: first)",
-    )
-    parser.add_argument(
-        "--complex-mode",
-        choices=("auto", "magnitude", "interleaved"),
-        default="auto",
-        help=(
-            "How to handle complex spectrograms: 'magnitude' (abs), 'interleaved' (real/imag interleaved along width), "
-            "or 'auto' (prefer interleaved when complex). Real-valued inputs are unaffected."
-        ),
-    )
-    parser.add_argument(
-        "--label-field",
-        choices=("snr", "modulation", "mobility"),
-        default="snr",
-        help="Choose which label to visualise and score (default: %(default)s)",
-    )
-    parser.add_argument(
-        "--normalization",
-        choices=("per-sample", "dataset"),
-        default="per-sample",
-        help="Normalisation strategy applied before embedding extraction",
-    )
-    return parser.parse_args()
-def find_latest_checkpoint(models_root: Path) -> Path:
-    """Return a checkpoint path under ``models_root``.
-    Works with either a parent directory that contains multiple run folders,
-    or directly with a single run directory containing ``*.pth`` files.
-    Chooses the checkpoint with the lowest parsed validation value when
-    available, else falls back to most-recent modification time.
-    """
-    if not models_root.exists():
-        raise FileNotFoundError(f"Models root not found: {models_root}")
-    if models_root.is_file():
-        raise FileNotFoundError(f"Expected a directory, got file: {models_root}")
-    # If the provided directory itself contains checkpoints, use it directly.
-    checkpoints = list(models_root.glob("*.pth"))
-    if not checkpoints:
-        # Otherwise, look for subdirectories that contain checkpoints and ignore others (e.g., tensorboard)
-        run_dirs = [p for p in models_root.iterdir() if p.is_dir()]
-        candidate_runs = [d for d in run_dirs if any(d.glob("*.pth"))]
-        if not candidate_runs:
-            raise FileNotFoundError(
-                f"No checkpoints found under {models_root} (no .pth files in this dir or its run subdirs)"
-            )
-        latest_run = max(candidate_runs, key=lambda p: p.stat().st_mtime)
-        checkpoints = list(latest_run.glob("*.pth"))
-    def parse_val_metric(path: Path) -> float | None:
-        match = re.search(r"_val([0-9]+(?:\.[0-9]+)?)", path.name)
-        if match:
-            try:
-                return float(match.group(1))
-            except ValueError:
-                return None
-        return None
-    parsed = [(parse_val_metric(p), p) for p in checkpoints]
-    valid = [item for item in parsed if item[0] is not None]
-    if valid:
-        valid.sort(key=lambda item: item[0])
-        return valid[0][1]
-    # Fallback to most recent modification time
-    return max(checkpoints, key=lambda p: p.stat().st_mtime)
-def parse_snr_list(snr_argument: str | None) -> set[str] | None:
-    if snr_argument is None or snr_argument.lower() == "all":
-        return None
-    values = [item.strip() for item in snr_argument.split(",") if item.strip()]
-    return set(values)
-def list_snr_samples(
-    data_root: Path,
-    modulation: str,
-    allowed_snrs: set[str] | None,
-    mobility_filter: set[str] | None,
-    fft_folder: str,
-    max_per_class: int,
-    rng: random.Random,
-    mode: str,
-    complex_mode: str,
-) -> Dict[str, List[Tuple[np.ndarray, str, str]]]:
-    """Collect spectrogram samples grouped by SNR label.
-    Supports both legacy PKL layout with a trailing 'spectrograms/' folder and
-    MATLAB .mat bundles saved directly under the mobility folder.
-    Returns: mapping from SNR label to list of tuples: (spec, modulation, mobility)
-    """
-    class_samples: Dict[str, List[Tuple[np.ndarray, str, str]]] = defaultdict(list)
-    seen_counts: Dict[str, int] = defaultdict(int)
-    # Search patterns:
-    #  - PKL under .../spectrograms/*.pkl
-    #  - MAT under .../spectrogram_*.mat
-    patterns = [
-        str(data_root / "**" / "spectrograms" / "*.pkl"),
-        str(data_root / "**" / "spectrogram_*.mat"),
-    ]
-    mobility_set = {"static", "pedestrian", "vehicular"}
-    def extract_tokens(rel_parts: Tuple[str, ...]) -> Tuple[str, str, str, str] | None:
-        # Heuristic extraction to support both layouts
-        # modulation: first path segment below data_root
-        if not rel_parts:
-            return None
-        modulation_folder = rel_parts[0]
-        # snr: first segment like SNR(-?)NdB
-        snr_folder = next((p for p in rel_parts if re.match(r"^SNR-?\d+dB$", p)), None)
-        if snr_folder is None:
-            return None
-        # mobility: one of known labels
-        mobility_folder = next((p for p in rel_parts if p.lower() in mobility_set), None)
-        if mobility_folder is None:
-            return None
-        # fft/window folder if present (PKL layout), else fallback for MAT
-        fft_folder_name = next((p for p in rel_parts if p.startswith("win") or p.startswith("fft")), "fft_unknown")
-        return modulation_folder, snr_folder, mobility_folder, fft_folder_name
-    for pattern in patterns:
-        for path_str in glob.iglob(pattern, recursive=True):
-            path = Path(path_str)
-            try:
-                rel_parts = path.relative_to(data_root).parts
-            except ValueError:
-                continue
-            tokens = extract_tokens(rel_parts)
-            if tokens is None:
-                continue
-            modulation_folder, snr_folder, mobility_folder, fft_folder_name = tokens
-            # Apply filters
-            if modulation.lower() != "all" and modulation_folder != modulation:
-                continue
-            if allowed_snrs is not None and snr_folder not in allowed_snrs:
-                continue
-            if mobility_filter is not None and mobility_folder.lower() not in mobility_filter:
-                continue
-            if fft_folder != "all" and fft_folder_name != fft_folder:
-                continue
-            class_label = snr_folder
-            if mode == "first" and len(class_samples[class_label]) >= max_per_class:
-                continue
-            # Load spectrogram data (supports .pkl and .mat)
-            try:
-                arr = load_spectrogram_data(str(path))
-            except Exception as exc:  # pragma: no cover - I/O heavy
-                print(f"[WARN] Failed to load {path}: {exc}")
-                continue
-            if not isinstance(arr, np.ndarray) or arr.size == 0:
-                continue
-            # If loaded spectrograms are complex, convert according to mode
-            if np.iscomplexobj(arr):
-                if complex_mode == "magnitude":
-                    arr = np.abs(arr)
-                else:
-                    # Interleave real/imag parts along the width dimension
-                    if arr.ndim == 4 and arr.shape[1] == 1:
-                        arr = arr[:, 0]
-                    if arr.ndim == 3:
-                        real = arr.real.astype(np.float32, copy=False)
-                        imag = arr.imag.astype(np.float32, copy=False)
-                        n, h, w = real.shape
-                        inter = np.empty((n, h, w * 2), dtype=np.float32)
-                        inter[:, :, 0::2] = real
-                        inter[:, :, 1::2] = imag
-                        arr = inter
-                    else:
-                        # Fallback to magnitude for unsupported shapes
-                        arr = np.abs(arr)
-            # Normalize shapes:
-            #  - (N, H, W)
-            #  - (N, C, H, W) -> collapse channels via mean
-            if arr.ndim == 4:
-                # (N, C, H, W) -> (N, H, W)
-                if arr.shape[1] > 1:
-                    specs = arr.mean(axis=1)
-                else:
-                    specs = arr[:, 0]
-            elif arr.ndim == 3:
-                specs = arr
-            elif arr.ndim == 2:
-                specs = arr[None, ...]
-            else:
-                print(f"[WARN] Unexpected spectrogram shape in {path}: {arr.shape}")
-                continue
-            for spec in specs:
-                sample = np.asarray(spec, dtype=np.float32)
-                bucket = class_samples[class_label]
-                if len(bucket) < max_per_class:
-                    bucket.append((sample, modulation_folder, mobility_folder))
-                    seen_counts[class_label] += 1
-                elif mode == "reservoir":
-                    seen_counts[class_label] += 1
-                    j = rng.randint(0, seen_counts[class_label] - 1)
-                    if j < max_per_class:
-                        bucket[j] = (sample, modulation_folder, mobility_folder)
-                else:  # mode == "first" and already full
-                    break
-    return class_samples
-def sample_balanced_dataset(
-    class_samples: Dict[str, List[Tuple[np.ndarray, str, str]]],
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, List[str]]:
-    """Stack the sampled spectrograms alongside SNR, modulation, and mobility labels."""
-    features: List[np.ndarray] = []
-    snr_labels: List[str] = []
-    modulation_labels: List[str] = []
-    mobility_labels: List[str] = []
-    class_names = sorted(class_samples.keys())
-    for class_name in class_names:
-        samples = class_samples[class_name]
-        if not samples:
-            continue
-        for sample, modulation_label, mobility_label in samples:
-            features.append(sample)
-            snr_labels.append(class_name)
-            modulation_labels.append(modulation_label)
-            mobility_labels.append(mobility_label)
-    if not features:
-        raise RuntimeError("No spectrogram samples collected for the specified filters")
-    stacked = np.stack(features)  # [N, 128, 128]
-    return (
-        stacked,
-        np.array(snr_labels),
-        np.array(modulation_labels),
-        np.array(mobility_labels),
-        class_names,
-    )
-def unfold_patches_square(x: torch.Tensor, patch_size: int = 4) -> torch.Tensor:
-    # Input shape: [B, H, W]; extracts (patch_size x patch_size) patches
-    patches_h = x.unfold(1, patch_size, patch_size)
-    patches = patches_h.unfold(2, patch_size, patch_size)
-    return patches.contiguous().view(x.shape[0], -1, patch_size * patch_size)
-def unfold_patches_rect(x: torch.Tensor, patch_rows: int = 4, patch_cols: int = 8) -> torch.Tensor:
-    # Input shape: [B, H, W]; extracts (patch_rows x patch_cols) patches (for interleaved complex)
-    patches_h = x.unfold(1, patch_rows, patch_rows)
-    patches = patches_h.unfold(2, patch_cols, patch_cols)
-    return patches.contiguous().view(x.shape[0], -1, patch_rows * patch_cols)
-def extract_tokens(spec: np.ndarray, device: torch.device, interleaved: bool) -> torch.Tensor:
-    tensor = torch.from_numpy(spec).unsqueeze(0).to(device)
-    if interleaved:
-        # Rectangular patches 4x8 to cover 4x4 complex bins (real+imag)
-        return unfold_patches_rect(tensor, 4, 8)  # [1, 1024, 32]
-    else:
-        return unfold_patches_square(tensor, 4)   # [1, 1024, 16]
-def pool_embeddings(
-    tokens: torch.Tensor,
-    model: torch.nn.Module,
-    pooling: str,
-) -> np.ndarray:
-    # Append CLS token (value 0.2) before passing through the transformer.
-    cls_token = torch.full((tokens.size(0), 1, tokens.size(-1)), 0.2, device=tokens.device)
-    inputs = torch.cat([cls_token, tokens], dim=1)  # [B, 1025, 16]
-    with torch.no_grad():
-        outputs = model(inputs)  # [B, 1025, 128]
-    if pooling == "cls":
-        pooled = outputs[:, 0]
-    else:  # mean pooling across patch tokens (exclude CLS)
-        pooled = outputs[:, 1:].mean(dim=1)
-    return pooled.detach().cpu().numpy()
-def sort_snr_labels(labels: List[str]) -> List[str]:
-    """Sort SNR labels by numeric value instead of lexicographic order."""
-    def extract_snr_value(label: str) -> float:
-        """Extract numeric SNR value from label like 'SNR-5dB' -> -5.0"""
-        import re
-        match = re.search(r'SNR(-?\d+)dB', label)
-        if match:
-            return float(match.group(1))
-        else:
-            return float('inf')  # Put non-SNR labels at the end
-    return sorted(labels, key=extract_snr_value)
-def run_tsne(x: np.ndarray, labels: np.ndarray, title: str, ax: plt.Axes) -> None:
-    scaler = StandardScaler()
-    x_scaled = scaler.fit_transform(x)
-    # Guard against NaN/Inf from upstream (normalisation or model outputs)
-    x_scaled = np.nan_to_num(x_scaled, copy=False, nan=0.0, posinf=0.0, neginf=0.0)
-    # Use a safe perplexity relative to sample count (sklearn requirement: < n_samples).
-    max_perplexity = max(5, min(30, len(x_scaled) // 10))
-    perplexity = min(max_perplexity, len(x_scaled) - 1)
-    perplexity = max(perplexity, 5)
-    tsne = TSNE(n_components=2, perplexity=perplexity, random_state=42)
-    embedding = tsne.fit_transform(x_scaled)
-    class_names = sort_snr_labels(list(np.unique(labels)))
-    colors = plt.cm.Set3(np.linspace(0, 1, len(class_names)))
-    for color, class_name in zip(colors, class_names):
-        mask = labels == class_name
-        ax.scatter(embedding[mask, 0], embedding[mask, 1], c=[color], s=18, alpha=0.7, label=class_name)
-    # ax.set_title(title, fontsize=14, fontweight="bold")  # Title removed for paper
-    ax.set_xlabel("t-SNE Component 1", fontsize=16)
-    ax.set_ylabel("t-SNE Component 2", fontsize=16)
-    ax.tick_params(labelsize=14)  # Increase tick label size
-    ax.grid(True, alpha=0.3)
-    ax.legend(bbox_to_anchor=(1.02, 1), loc="upper left", fontsize=12)
-def compute_metrics(name: str, features: np.ndarray, labels: np.ndarray) -> None:
-    if len(np.unique(labels)) < 2:
-        print(f"[METRIC] {name}: skipped (only one class present)")
-        return
-    scaler = StandardScaler()
-    features_scaled = scaler.fit_transform(features)
-    silhouette = silhouette_score(features_scaled, labels)
-    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
-    scores: List[float] = []
-    for train_idx, test_idx in skf.split(features_scaled, labels):
-        clf = KNeighborsClassifier(n_neighbors=5)
-        clf.fit(features_scaled[train_idx], labels[train_idx])
-        scores.append(clf.score(features_scaled[test_idx], labels[test_idx]))
-    mean_acc = float(np.mean(scores))
-    std_acc = float(np.std(scores))
-    print(
-        f"[METRIC] {name}: silhouette={silhouette:.3f}, "
-        f"5-NN accuracy={mean_acc:.3f} ± {std_acc:.3f}"
-    )
-# ---------------------------------------------------------------------------
-# Main execution
-# ---------------------------------------------------------------------------
-def main() -> None:
-    args = parse_args()
-    if args.profile:
-        preset = PROFILE_PRESETS.get(args.profile)
-        if not preset:
-            raise ValueError(f"Unknown profile requested: {args.profile}")
-        if args.data_root == DEFAULT_DATA_ROOT:
-            args.data_root = preset["data_root"]
-        if args.models_root == DEFAULT_MODELS_ROOT:
-            args.models_root = preset["models_root"]
-    if args.profile:
-        print(f"[INFO] Profile preset active: {args.profile}")
-    random.seed(args.seed)
-    np.random.seed(args.seed)
-    torch.manual_seed(args.seed)
-    data_root = Path(args.data_root)
-    if not data_root.exists():
-        raise FileNotFoundError(f"Data root not found: {data_root}")
-    allowed_snrs = parse_snr_list(args.snrs)
-    mobility_filter: set[str] | None = None
-    if args.mobility:
-        mobility_values: List[str] = []
-        for value in args.mobility:
-            mobility_values.extend([item.strip() for item in value.split(",") if item.strip()])
-        mobility_values = [value for value in mobility_values if value]
-        if mobility_values and not (len(mobility_values) == 1 and mobility_values[0].lower() == "all"):
-            mobility_filter = {value.lower() for value in mobility_values}
-            print(
-                "[INFO] Mobility filter active: "
-                + ", ".join(sorted(mobility_filter))
-            )
-    class_samples = list_snr_samples(
-        data_root,
-        args.modulation,
-        allowed_snrs,
-        mobility_filter,
-        args.fft_folder,
-        args.samples_per_snr,
-        random,
-        args.sampling_mode,
-        args.complex_mode,
-    )
-    samples, snr_labels, modulation_labels, mobility_labels, _ = sample_balanced_dataset(class_samples)
-    if args.label_field == "snr":
-        labels = snr_labels
-        label_name = "SNR"
-        label_display = "SNR"
-    elif args.label_field == "modulation":
-        labels = modulation_labels
-        label_name = "modulation"
-        label_display = "Modulation"
-    else:  # mobility
-        labels = mobility_labels
-        label_name = "mobility"
-        label_display = "Mobility"
-    unique_labels = np.unique(labels)
-    print(
-        f"[INFO] Loaded {samples.shape[0]} spectrograms across {len(unique_labels)} {label_name} buckets"
-    )
-    class_counts = Counter(labels)
-    print(f"[INFO] Samples per {label_name}:")
-    for name, count in sorted(class_counts.items()):
-        print(f"  {name}: {count}")
-    if args.label_field != "snr":
-        snr_counts = Counter(snr_labels)
-        print("[INFO] SNR distribution (sampling classes):")
-        for name, count in sorted(snr_counts.items()):
-            print(f"  {name}: {count}")
-    if args.label_field == "mobility":
-        modulation_counts = Counter(modulation_labels)
-        print("[INFO] Modulation distribution:")
-        for name, count in sorted(modulation_counts.items()):
-            print(f"  {name}: {count}")
-    normalization_mode = args.normalization
-    if normalization_mode == "per-sample":
-        normalized_samples = normalize_per_sample(samples)
-    else:
-        normalized_samples = normalize_dataset(samples)
-    print(f"[INFO] Normalisation mode: {normalization_mode}")
-    # Flatten spectrograms (after optional normalization) for the raw t-SNE view.
-    raw_vectors = normalized_samples.reshape(normalized_samples.shape[0], -1)
-    # Prepare LWM model and embeddings for the right subplot.
-    if args.checkpoint:
-        checkpoint_path = Path(args.checkpoint)
-        if not checkpoint_path.exists():
-            raise FileNotFoundError(f"Checkpoint not found: {checkpoint_path}")
-    else:
-        checkpoint_path = find_latest_checkpoint(Path(args.models_root))
-    print(f"[INFO] Using checkpoint: {checkpoint_path}")
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    print(f"[INFO] Using device: {device}")
-    print(f"[INFO] Pooling strategy: {args.pooling}")
-    # Determine complex handling strategy for model/patching
-    use_interleaved = False
-    if args.complex_mode == "interleaved":
-        use_interleaved = True
-    elif args.complex_mode == "auto":
-        # Heuristic: if any sample contains width > 128, assume interleaved (e.g., 128x256)
-        sample_shape = tuple(normalized_samples.shape[1:])
-        if len(sample_shape) == 2 and sample_shape[1] > 128:
-            use_interleaved = True
-    element_length = 32 if use_interleaved else 16
-    model = lwm_model(element_length=element_length, d_model=128, n_layers=12, max_len=1025, n_heads=8, dropout=0.1)
-    state_dict = torch.load(checkpoint_path, map_location=device)
-    if any(k.startswith("module.") for k in state_dict):
-        state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
-    try:
-        model.load_state_dict(state_dict, strict=False)
-    except RuntimeError as e:
-        msg = str(e)
-        # Fallback: checkpoint expects element_length=16 (magnitude), but we constructed 32 (interleaved)
-        mismatch16 = "[128, 16]" in msg or "[16]" in msg
-        mismatch32 = "[128, 32]" in msg or "[32]" in msg
-        if mismatch16 and not mismatch32:
-            print("[WARN] Checkpoint expects token dimension 16. Falling back to magnitude embedding.")
-            use_interleaved = False
-            element_length = 16
-            # Recreate model and reload
-            model = lwm_model(element_length=element_length, d_model=128, n_layers=12, max_len=1025, n_heads=8, dropout=0.1)
-            model.load_state_dict(state_dict, strict=False)
-        else:
-            raise
-    model = model.to(device).eval()
-    def collapse_interleaved_to_magnitude(spec: np.ndarray) -> np.ndarray:
-        # spec: [H, 2W] with interleaved real/imag along width -> [H, W] magnitude
-        h, w2 = spec.shape
-        if w2 % 2 != 0:
-            return spec  # cannot collapse; return as-is
-        real = spec[:, 0::2]
-        imag = spec[:, 1::2]
-        return np.sqrt(np.maximum(real * real + imag * imag, 0.0, dtype=np.float32))
-    # If we fell back to magnitude (use_interleaved False) but inputs are interleaved, collapse for embeddings only
-    embed_inputs = normalized_samples
-    if not use_interleaved and normalized_samples.shape[2] > 128:
-        collapsed = []
-        for spec in normalized_samples:
-            collapsed.append(collapse_interleaved_to_magnitude(spec))
-        embed_inputs = np.stack(collapsed).astype(np.float32, copy=False)
-    embeddings: List[np.ndarray] = []
-    for spec in embed_inputs:
-        tokens = extract_tokens(spec, device, interleaved=use_interleaved)
-        embedding = pool_embeddings(tokens, model, args.pooling)
-        embeddings.append(embedding.squeeze(0))
-    embeddings_np = np.vstack(embeddings)
-    print(f"[INFO] Generated embeddings with shape {embeddings_np.shape}")
-    if args.report_metrics:
-        compute_metrics("Raw spectrogram", raw_vectors, labels)
-        pool_label = "LWM mean" if args.pooling == "mean" else "LWM CLS"
-        compute_metrics(pool_label, embeddings_np, labels)
-        if args.metrics_only:
-            return
-    # Plot results (two subplots matching the original figure format).
-    fig, axes = plt.subplots(1, 2, figsize=(18, 7))
-    raw_title = f"Raw Spectrogram t-SNE (by {label_display})"
-    pooling_label = "Mean Pool" if args.pooling == "mean" else "CLS Token"
-    embedding_title = f"LWM Embedding t-SNE ({pooling_label}, by {label_display})"
-    run_tsne(raw_vectors, labels, raw_title, axes[0])
-    run_tsne(embeddings_np, labels, embedding_title, axes[1])
-    fig.tight_layout()
-    save_path = Path(args.save_path)
-    communication_tag: str | None = None
-    if args.profile:
-        communication_tag = args.profile
-    else:
-        root_name = Path(args.data_root).name
-        if root_name:
-            communication_tag = root_name
-    def ensure_suffix(stem: str, suffix: str) -> str:
-        return stem if stem.endswith(suffix) else f"{stem}_{suffix}"
-    updated_stem = save_path.stem
-    if communication_tag:
-        updated_stem = ensure_suffix(updated_stem, communication_tag)
-    if args.label_field != "snr":
-        label_suffix = f"by_{args.label_field}"
-        updated_stem = ensure_suffix(updated_stem, label_suffix)
-    if updated_stem != save_path.stem:
-        save_path = save_path.with_name(f"{updated_stem}{save_path.suffix}")
-    save_path.parent.mkdir(parents=True, exist_ok=True)
-    plt.savefig(save_path, dpi=600, bbox_inches="tight")
-    print(f"[INFO] Figure saved to {save_path}")
-    # Also save PDF version for paper (vector format, no resolution limit)
-    pdf_path = save_path.with_suffix('.pdf')
-    plt.savefig(pdf_path, format='pdf', bbox_inches="tight")
-    print(f"[INFO] PDF version saved to {pdf_path}")
-if __name__ == "__main__":
-    main()

task1/train_mcs_models.py DELETED Viewed

The diff for this file is too large to render. See raw diff

task2/mobility_utils.py DELETED Viewed

@@ -1,414 +0,0 @@
-#!/usr/bin/env python3
-"""Shared mobility-classification utilities used across Task 2 helpers.
-This module provides the lightweight LWM classifier head plus supporting
-sampling and normalization helpers that were previously bundled inside the
-stand-alone mobility fine-tuning scripts.  They remain available so that
-benchmarking, router training, and visualisation pipelines can reuse the same
-logic without depending on a separate CLI.
-"""
-from __future__ import annotations
-import glob
-import json
-from collections import defaultdict
-from pathlib import Path
-from typing import Any, Dict, Iterable, List, Sequence, Tuple
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from pretraining.pretrained_model import lwm as lwm_model
-from task1.train_mcs_models import (
-    _extract_metadata,
-    identify_modulation,
-    load_all_samples,
-)
-MOBILITY_LABELS = ["static", "pedestrian", "vehicular"]
-BINARY_MOBILITY_LABELS = ["vehicular", "pedestrian"]
-def load_dataset_stats(models_root: Path) -> Dict[str, float | str]:
-    """Load dataset statistics (mean/std/normalization mode) from a models directory."""
-    stats_path = models_root / "dataset_stats.json"
-    if not stats_path.exists():
-        print(
-            f"[WARN] dataset_stats.json not found under {models_root}; "
-            "falling back to per-sample normalization with mean=0/std=1.",
-            flush=True,
-        )
-        return {"mean": 0.0, "std": 1.0, "normalization": "per_sample"}
-    with open(stats_path, "r", encoding="utf-8") as f:
-        stats = json.load(f)
-    mean = float(stats.get("mean", 0.0))
-    std = float(stats.get("std", 1.0))
-    if std == 0.0:
-        std = 1.0
-    normalization = str(stats.get("normalization", stats.get("mode", "dataset")))
-    return {
-        "mean": mean,
-        "std": std,
-        "normalization": normalization,
-    }
-def gather_controlled_groups(
-    data_root: Path,
-    cities: Sequence[str],
-    comm: str,
-    mobilities: Sequence[str],
-    snrs: Sequence[str] | None,
-    fft_whitelist: Sequence[str] | None,
-) -> Dict[Tuple[str, str, str, str, str], Dict[str, List[str]]]:
-    """Group spectrogram paths by (city, modulation, rate, SNR, FFT) while balancing mobilities."""
-    groups: Dict[Tuple[str, str, str, str, str], Dict[str, List[str]]] = defaultdict(lambda: defaultdict(list))
-    mobility_set = set(mobilities)
-    snr_set = set(snrs) if snrs else None
-    fft_set = set(fft_whitelist) if fft_whitelist else None
-    for city in cities:
-        base = data_root / city / comm
-        if not base.exists():
-            continue
-        pattern = str(base / "**" / "spectrograms" / "*.pkl")
-        for path_str in glob.iglob(pattern, recursive=True):
-            path = Path(path_str)
-            rate, snr, mobility = _extract_metadata(path.parts)
-            if mobility not in mobility_set:
-                continue
-            if snr_set is not None and snr not in snr_set:
-                continue
-            fft = next((part for part in path.parts if part.startswith("win")), "fft_unknown")
-            if fft_set is not None and fft not in fft_set:
-                continue
-            _, modulation = identify_modulation(path_str)
-            if modulation is None:
-                continue
-            key = (city, modulation, rate, snr, fft)
-            groups[key][mobility].append(str(path))
-    return {key: dict(mob_map) for key, mob_map in groups.items()}
-def _collect_balanced_arrays(
-    groups: Dict[Tuple[str, str, str, str, str], Dict[str, List[str]]],
-    mobilities: Sequence[str],
-    max_per_config: int,
-    rng: np.random.Generator,
-) -> Tuple[np.ndarray, np.ndarray, Dict[str, Any]]:
-    """Load spectrogram arrays with per-configuration balance across mobilities."""
-    features: List[np.ndarray] = []
-    labels: List[np.ndarray] = []
-    mobility_to_idx = {mob: idx for idx, mob in enumerate(mobilities)}
-    per_mobility_totals = {mob: 0 for mob in mobilities}
-    matched_configs = 0
-    preview_configs: List[Tuple[str, str, str, str, str]] = []
-    for key, mobility_map in groups.items():
-        if not all(mob in mobility_map for mob in mobilities):
-            continue
-        cached_arrays: Dict[str, np.ndarray] = {}
-        per_mobility_counts: List[int] = []
-        for mobility in mobilities:
-            paths = mobility_map[mobility]
-            collected: List[np.ndarray] = []
-            for path in paths:
-                arr = load_all_samples(path)
-                if arr.size == 0:
-                    continue
-                collected.append(arr)
-            if not collected:
-                cached_arrays = {}
-                break
-            stacked = np.concatenate(collected, axis=0)
-            cached_arrays[mobility] = stacked
-            per_mobility_counts.append(stacked.shape[0])
-        if len(cached_arrays) != len(mobilities):
-            continue
-        limit = min(per_mobility_counts)
-        if max_per_config > 0:
-            limit = min(limit, max_per_config)
-        if limit == 0:
-            continue
-        for mobility in mobilities:
-            arr = cached_arrays[mobility]
-            if arr.shape[0] > limit:
-                indices = rng.permutation(arr.shape[0])[:limit]
-                arr = arr[indices]
-            features.append(arr)
-            labels.append(np.full(arr.shape[0], mobility_to_idx[mob], dtype=np.int64))
-            per_mobility_totals[mobility] += arr.shape[0]
-        if matched_configs < 5:
-            preview_configs.append(key)
-        matched_configs += 1
-    if not features:
-        return (
-            np.empty((0, 128, 128), dtype=np.float32),
-            np.empty((0,), dtype=np.int64),
-            {"per_mobility": per_mobility_totals, "matched_configs": matched_configs, "preview_configs": preview_configs},
-        )
-    stacked_features = np.concatenate(features, axis=0).astype(np.float32, copy=False)
-    stacked_labels = np.concatenate(labels, axis=0).astype(np.int64, copy=False)
-    return stacked_features, stacked_labels, {
-        "per_mobility": per_mobility_totals,
-        "matched_configs": matched_configs,
-        "preview_configs": preview_configs,
-    }
-class ResidualBlock1D(nn.Module):
-    """1D Residual block used by the Res1DCNN classification head."""
-    def __init__(self, in_channels: int, out_channels: int) -> None:
-        super().__init__()
-        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, padding=1)
-        self.bn1 = nn.BatchNorm1d(out_channels)
-        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1)
-        self.bn2 = nn.BatchNorm1d(out_channels)
-        self.shortcut = nn.Sequential()
-        if in_channels != out_channels:
-            self.shortcut = nn.Sequential(
-                nn.Conv1d(in_channels, out_channels, kernel_size=1),
-                nn.BatchNorm1d(out_channels),
-            )
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        residual = x
-        x = F.relu(self.bn1(self.conv1(x)))
-        x = self.bn2(self.conv2(x))
-        x += self.shortcut(residual)
-        return F.relu(x)
-class Res1DCNNHead(nn.Module):
-    """Compact ResNet-style 1D head for classifying 128-d embeddings."""
-    def __init__(self, input_dim: int, num_classes: int, dropout: float = 0.5) -> None:
-        super().__init__()
-        hidden_dim = 64
-        self.conv1 = nn.Conv1d(1, hidden_dim, kernel_size=3, padding=1)
-        self.bn1 = nn.BatchNorm1d(hidden_dim)
-        self.res_block = ResidualBlock1D(hidden_dim, hidden_dim)
-        self.fc = nn.Linear(hidden_dim, num_classes)
-        self.dropout = nn.Dropout(dropout)
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        x = x.unsqueeze(1)
-        x = F.relu(self.bn1(self.conv1(x)))
-        x = self.res_block(x)
-        x = F.adaptive_avg_pool1d(x, 1).squeeze(-1)
-        x = self.dropout(x)
-        return self.fc(x)
-class LWMClassifierMinimal(nn.Module):
-    """LWM backbone wrapper with configurable classifier and optional projection head."""
-    def __init__(
-        self,
-        backbone: nn.Module,
-        num_classes: int,
-        classifier_dim: int,
-        dropout: float,
-        trainable_layers: int,
-        projection_dim: int,
-        append_input_stats: bool,
-        normalization_stats: Dict[str, object] | None,
-        head_type: str = "mlp",
-    ) -> None:
-        super().__init__()
-        self.backbone = backbone
-        self.patch_size = 4
-        self.unfold = nn.Unfold(kernel_size=self.patch_size, stride=self.patch_size)
-        self.head_type = head_type
-        self.append_input_stats = bool(append_input_stats)
-        stats_info = normalization_stats or {}
-        self.normalization_mode = str(stats_info.get("normalization", "dataset")).lower()
-        self.dataset_mean = float(stats_info.get("mean", 0.0))
-        self.dataset_std = float(stats_info.get("std", 1.0))
-        if abs(self.dataset_std) < 1e-6:
-            self.dataset_std = 1e-6
-        base_dim = 128
-        stats_dim = 2 if self.append_input_stats else 0
-        input_dim = base_dim + stats_dim
-        classifier_dim = max(32, int(classifier_dim))
-        dropout = max(0.0, float(dropout))
-        if head_type == "linear":
-            self.classifier = nn.Sequential(
-                nn.LayerNorm(input_dim),
-                nn.Linear(input_dim, num_classes),
-            )
-        elif head_type == "res1dcnn":
-            self.classifier = nn.Sequential(
-                nn.LayerNorm(input_dim),
-                Res1DCNNHead(input_dim, num_classes, dropout=dropout),
-            )
-        else:
-            head_layers: List[nn.Module] = [
-                nn.LayerNorm(input_dim),
-                nn.Linear(input_dim, classifier_dim),
-                nn.GELU(),
-            ]
-            if dropout > 0:
-                head_layers.append(nn.Dropout(dropout))
-            head_layers.append(nn.Linear(classifier_dim, num_classes))
-            self.classifier = nn.Sequential(*head_layers)
-        proj_dim = int(projection_dim)
-        if proj_dim > 0:
-            self.projection_head = nn.Sequential(
-                nn.Linear(128, proj_dim),
-                nn.ReLU(inplace=True),
-                nn.Linear(proj_dim, proj_dim),
-            )
-        else:
-            self.projection_head = None
-        for param in self.backbone.parameters():
-            param.requires_grad = False
-        if trainable_layers > 0:
-            layers = getattr(self.backbone, "layers", None)
-            if layers is not None:
-                trainable_layers = min(trainable_layers, len(layers))
-                for layer in layers[-trainable_layers:]:
-                    for param in layer.parameters():
-                        param.requires_grad = True
-    def spectrogram_to_tokens(self, x: torch.Tensor) -> torch.Tensor:
-        x = x.unsqueeze(1)
-        patches = self.unfold(x).transpose(1, 2)
-        cls_token = torch.full(
-            (patches.size(0), 1, patches.size(-1)),
-            0.2,
-            dtype=patches.dtype,
-            device=patches.device,
-        )
-        return torch.cat([cls_token, patches], dim=1)
-    def forward_features(self, x: torch.Tensor) -> torch.Tensor:
-        tokens = self.spectrogram_to_tokens(x)
-        outputs = self.backbone(tokens)
-        if outputs.size(1) <= 1:
-            return outputs[:, 0, :]
-        return outputs[:, 1:, :].mean(dim=1)
-    def _collect_input_stats(self, x: torch.Tensor) -> torch.Tensor:
-        mean = x.mean(dim=(1, 2))
-        std = x.std(dim=(1, 2), unbiased=False)
-        if self.normalization_mode == "dataset":
-            mean = mean * self.dataset_std + self.dataset_mean
-            std = std * self.dataset_std
-        return torch.stack([mean, std], dim=1)
-    def forward(
-        self,
-        x: torch.Tensor,
-        *,
-        input_stats: torch.Tensor | None = None,
-        return_projection: bool = False,
-    ) -> torch.Tensor | Tuple[torch.Tensor, torch.Tensor]:
-        features = self.forward_features(x)
-        classifier_input = features
-        if self.append_input_stats:
-            stats = input_stats if input_stats is not None else self._collect_input_stats(x)
-            if stats.dtype != classifier_input.dtype:
-                stats = stats.to(classifier_input.dtype)
-            stats = stats.to(classifier_input.device)
-            classifier_input = torch.cat([classifier_input, stats], dim=1)
-        logits = self.classifier(classifier_input)
-        if return_projection:
-            projection = self.projection_head(features) if self.projection_head is not None else None
-            return logits, projection
-        return logits
-def prepare_model(
-    checkpoint: Path,
-    num_classes: int,
-    classifier_dim: int,
-    dropout: float,
-    trainable_layers: int,
-    projection_dim: int,
-    *,
-    append_input_stats: bool = False,
-    normalization_stats: Dict[str, object] | None = None,
-    head_type: str = "mlp",
-) -> nn.Module:
-    """Instantiate an LWM backbone with the minimal classifier head."""
-    backbone = lwm_model(element_length=16, d_model=128, n_layers=12, max_len=1025, n_heads=8, dropout=0.1)
-    state = torch.load(checkpoint, map_location="cpu")
-    if any(k.startswith("module.") for k in state):
-        state = {k.replace("module.", ""): v for k, v in state.items()}
-    backbone.load_state_dict(state, strict=False)
-    return LWMClassifierMinimal(
-        backbone,
-        num_classes=num_classes,
-        classifier_dim=classifier_dim,
-        dropout=dropout,
-        trainable_layers=trainable_layers,
-        projection_dim=projection_dim,
-        append_input_stats=append_input_stats,
-        normalization_stats=normalization_stats,
-        head_type=head_type,
-    )
-def supervised_contrastive_loss(
-    features: torch.Tensor,
-    labels: torch.Tensor,
-    temperature: float,
-) -> torch.Tensor:
-    """Supervised contrastive loss over a batch of feature embeddings."""
-    batch_size = features.size(0)
-    if batch_size < 2:
-        return features.new_tensor(0.0)
-    features = F.normalize(features, dim=1)
-    similarity = torch.div(torch.matmul(features, features.T), max(temperature, 1e-6))
-    logits_max, _ = similarity.max(dim=1, keepdim=True)
-    similarity = similarity - logits_max.detach()
-    device = features.device
-    labels = labels.contiguous().view(-1, 1)
-    mask = torch.eq(labels, labels.T).float().to(device)
-    logits_mask = torch.ones_like(mask) - torch.eye(batch_size, device=device)
-    mask = mask * logits_mask
-    exp_logits = torch.exp(similarity) * logits_mask
-    log_prob = similarity - torch.log(exp_logits.sum(dim=1, keepdim=True) + 1e-12)
-    mask_sum = mask.sum(dim=1)
-    valid = mask_sum > 0
-    if not torch.any(valid):
-        return features.new_tensor(0.0)
-    mean_log_prob_pos = (mask * log_prob).sum(dim=1) / mask_sum.clamp_min(1e-12)
-    loss = -mean_log_prob_pos[valid].mean()
-    return loss
-__all__ = [
-    "BINARY_MOBILITY_LABELS",
-    "LWMClassifierMinimal",
-    "MOBILITY_LABELS",
-    "Res1DCNNHead",
-    "_collect_balanced_arrays",
-    "gather_controlled_groups",
-    "load_dataset_stats",
-    "prepare_model",
-    "supervised_contrastive_loss",
-]