Spaces:

wi-lab
/

LWM-Spectro

Running

App Files Files Community

“Namhyun-Kim” commited on Dec 1, 2025

Commit

2b1a1e3

1 Parent(s): aebafe2

Sync app to fetch data from wi-lab/lwm-spectro

Browse files

Files changed (9) hide show

README.md +24 -0
app.py +464 -196
pretraining/README.md +0 -44
pretraining/__init__.py +0 -0
pretraining/pretrained_model.py +0 -180
pretraining/train_lwm_spectro.py +0 -741
pretraining/train_lwm_spectro_contrastive.py +0 -1450
pretraining/train_lwm_spectro_no_contrast.py +0 -1136
requirements.txt +2 -2

README.md ADDED Viewed

	@@ -0,0 +1,24 @@

+---
+title: LWM-Spectro Lab
+emoji: 🔍
+colorFrom: purple
+colorTo: indigo
+sdk: gradio
+sdk_version: "6.0.1"
+app_file: app.py
+pinned: false
+---
+# LWM-Spectro Lab
+One-stop lab for exploring spectrograms, LWM embeddings, and lightweight evaluation baselines.
+## Features
+- Visualize LWM embeddings or raw spectrograms with customizable filters.
+- Inspect joint SNR/Doppler performance using cached MoE embeddings and an adaptive k-NN classifier.
+- Upload your own datasets to compare raw channels vs. model embeddings.
+## Usage
+1. Select the **Spectrograms** and **t-SNE Analysis** tabs to explore embeddings.
+2. Switch to **Modulation Classification** or **Joint SNR/Doppler Evaluation** to run the k-NN prototype with adjustable train/test splits.
+3. Provide custom data (optional) to benchmark against bundled samples.

app.py CHANGED Viewed

@@ -1,52 +1,55 @@
-import json
-import sys
 from pathlib import Path
-from typing import Dict, List, Optional, Sequence, Tuple
 import gradio as gr
 import numpy as np
 import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
 import torch
 from sklearn.decomposition import PCA
 from sklearn.manifold import TSNE
-from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
-REPO_ROOT = Path(__file__).resolve().parents[1]
 APP_DIR = Path(__file__).resolve().parent
 DEMO_DATA_PATH = APP_DIR / "demo_data.pt"
 MOE_DATA_PATH = APP_DIR / "demo_data_moe.pt"
-MOE_CHECKPOINT = REPO_ROOT / "mixture" / "runs" / "embedding_router" / "moe_checkpoint.pth"
-SNR_MOB_MAPPING_PATH = REPO_ROOT / "mixture" / "runs" / "embedding_router" / "snr_mobility_mapping.json"
-if str(REPO_ROOT) not in sys.path:
-    sys.path.append(str(REPO_ROOT))
-from mixture.train_embedding_router import (  # type: ignore
-    MoEPredictor,
-    compute_selected_expert_embeddings,
-    normalize_per_sample_tensor,
-    stack_expert_embeddings,
-)
-def load_joint_mapping() -> Optional[Dict[str, object]]:
-    if not SNR_MOB_MAPPING_PATH.exists():
-        print(f"[WARN] Mapping file not found at {SNR_MOB_MAPPING_PATH}")
-        return None
-    raw = json.loads(SNR_MOB_MAPPING_PATH.read_text())
-    ordered_pairs: List[Tuple[str, str]] = []
-    for key in sorted(raw.keys(), key=lambda k: int(k)):
-        snr, mob = raw[key]
-        ordered_pairs.append((snr, mob))
-    label_names = [f"{snr} | {mob}" for snr, mob in ordered_pairs]
-    pair_to_name = {pair: name for pair, name in zip(ordered_pairs, label_names)}
     name_to_id = {name: idx for idx, name in enumerate(label_names)}
-    pair_to_id = {pair: idx for idx, pair in enumerate(ordered_pairs)}
     return {
-        "pairs": ordered_pairs,
         "label_names": label_names,
         "pair_to_name": pair_to_name,
         "name_to_id": name_to_id,
@@ -54,77 +57,43 @@ def load_joint_mapping() -> Optional[Dict[str, object]]:
     }
-def compute_moe_embeddings(
-    samples: Sequence[Dict[str, object]],
-    predictor: MoEPredictor,
-    batch_size: int = 64,
-) -> torch.Tensor:
-    router = predictor.router
-    experts = predictor.experts
-    device = predictor.device
-    embeddings: List[torch.Tensor] = []
-    with torch.no_grad():
-        for start in range(0, len(samples), batch_size):
-            batch = samples[start : start + batch_size]
-            specs = torch.cat([sample["data"] for sample in batch], dim=0).to(device)
-            specs_norm = normalize_per_sample_tensor(specs)
-            if router is not None:
-                router_logits = router(specs_norm)
-                probs = torch.softmax(router_logits, dim=1)
-                topk_vals, topk_idx = probs.topk(k=predictor.topk, dim=1)
-                weights = topk_vals / torch.clamp(topk_vals.sum(dim=1, keepdim=True), min=1e-6)
-                selected_embeddings = compute_selected_expert_embeddings(
-                    experts,
-                    specs_norm,
-                    topk_idx,
-                    allow_grad=False,
-                )
-                weighted = (weights.unsqueeze(-1) * selected_embeddings).sum(dim=1)
-            else:
-                stacked = stack_expert_embeddings(experts, specs_norm)
-                weighted = stacked.mean(dim=1)
-            embeddings.append(weighted.cpu())
-    return torch.cat(embeddings, dim=0)
-def ensure_moe_embeddings(samples: List[Dict[str, object]]) -> Tuple[List[Dict[str, object]], bool]:
     if MOE_DATA_PATH.exists():
-        cached = torch.load(MOE_DATA_PATH)
-        if len(cached) == len(samples):
-            print(f"[INFO] Loaded cached MoE embeddings from {MOE_DATA_PATH}")
-            return cached, True
-        print("[WARN] Cached MoE embeddings length mismatch. Recomputing...")
-    if not MOE_CHECKPOINT.exists():
-        print(f"[WARN] MoE checkpoint not found at {MOE_CHECKPOINT}. Skipping MoE embeddings.")
-        return samples, False
-    print("[INFO] Computing MoE embeddings using router checkpoint...")
-    predictor = MoEPredictor.from_checkpoint(MOE_CHECKPOINT)
-    moe_embeddings = compute_moe_embeddings(samples, predictor)
-    for sample, emb in zip(samples, moe_embeddings):
-        sample["moe_embedding"] = emb.detach().cpu()
-    torch.save(samples, MOE_DATA_PATH)
-    print(f"[INFO] Saved MoE-augmented dataset to {MOE_DATA_PATH}")
-    return samples, True
-def load_data(mapping: Optional[Dict[str, object]]):
     if not DEMO_DATA_PATH.exists():
         raise FileNotFoundError(f"Dataset not found at {DEMO_DATA_PATH}")
-    print(f"[INFO] Loading base dataset from {DEMO_DATA_PATH}")
-    data: List[Dict[str, object]] = torch.load(DEMO_DATA_PATH)
-    data, has_moe = ensure_moe_embeddings(data)
-    pair_to_name = mapping["pair_to_name"] if mapping else {}
-    pair_to_id = mapping["pair_to_id"] if mapping else {}
     records = []
     for i, sample in enumerate(data):
         embedding = sample["embedding"]
@@ -149,9 +118,14 @@ def load_data(mapping: Optional[Dict[str, object]]):
         joint_label = pair_to_name.get(pair)
         joint_label_id = pair_to_id.get(pair)
         records.append(
             {
-                "index": i,
                 "tech": sample["tech"],
                 "snr": sample["snr"],
                 "mod": sample["mod"],
@@ -161,11 +135,15 @@ def load_data(mapping: Optional[Dict[str, object]]):
                 "spectrogram": flat_spec,
                 "joint_label": joint_label,
                 "joint_label_id": joint_label_id,
             }
         )
     df = pd.DataFrame(records)
-    print(f"[INFO] Loaded {len(df)} samples.")
     return df, has_moe
@@ -188,50 +166,94 @@ def apply_filters(
     return filtered
-def plot_tsne(tech_filter, snr_filter, mod_filter, mob_filter, representation, color_by, perplexity, n_iter):
     filtered_df = apply_filters(df, tech_filter, snr_filter, mod_filter, mob_filter)
     if len(filtered_df) < 5:
-        return None, f"Not enough data points ({len(filtered_df)}). Need at least 5."
-    if representation == "LWM Embedding":
-        features = np.stack(filtered_df["embedding"].values)
     else:
-        features = np.stack(filtered_df["spectrogram"].values)
-        if features.shape[1] > 50:
-            pca = PCA(n_components=50, random_state=42)
-            features = pca.fit_transform(features)
-    eff_perplexity = min(perplexity, len(filtered_df) - 1)
-    tsne = TSNE(
-        n_components=2,
-        perplexity=eff_perplexity,
-        n_iter=n_iter,
-        random_state=42,
-        init="pca",
-        learning_rate="auto",
-    )
-    projections = tsne.fit_transform(features)
-    filtered_df = filtered_df.copy()
-    filtered_df["x"] = projections[:, 0]
-    filtered_df["y"] = projections[:, 1]
     fig = px.scatter(
         filtered_df,
         x="x",
         y="y",
-        color=color_by,
         hover_data=["tech", "snr", "mod", "mob"],
         title=f"t-SNE of {representation} ({len(filtered_df)} samples)",
         template="plotly_white",
     )
-    fig.update_layout(legend_title_text=color_by.capitalize())
-    return fig, f"Displayed {len(filtered_df)} samples."
 def stratified_split(filtered_df: pd.DataFrame, train_ratio: float, seed: int) -> Tuple[np.ndarray, np.ndarray]:
     rng = np.random.default_rng(int(seed))
-    train_indices: List[int] = []
-    test_indices: List[int] = []
     for label_id, group in filtered_df.groupby("joint_label_id"):
         indices = group.index.to_numpy()
@@ -247,47 +269,22 @@ def stratified_split(filtered_df: pd.DataFrame, train_ratio: float, seed: int) -
     return np.array(train_indices), np.array(test_indices)
-def compute_centroid_metrics(filtered_df: pd.DataFrame, train_idx: np.ndarray, test_idx: np.ndarray) -> Dict[str, object]:
-    train_subset = filtered_df.loc[train_idx]
-    test_subset = filtered_df.loc[test_idx]
-    train_embeddings = np.stack(train_subset["moe_embedding"].values)
-    test_embeddings = np.stack(test_subset["moe_embedding"].values)
-    train_labels = train_subset["joint_label_id"].to_numpy(dtype=int)
-    test_labels = test_subset["joint_label_id"].to_numpy(dtype=int)
-    unique_labels = np.unique(train_labels)
-    centroids = []
-    centroid_ids: List[int] = []
-    for label_id in unique_labels:
-        mask = train_labels == label_id
-        centroids.append(train_embeddings[mask].mean(axis=0))
-        centroid_ids.append(int(label_id))
-    centroids = np.stack(centroids)
-    centroid_ids = np.array(centroid_ids, dtype=int)
-    dists = ((test_embeddings[:, None, :] - centroids[None, :, :]) ** 2).sum(axis=-1)
-    preds = centroid_ids[np.argmin(dists, axis=1)]
-    accuracy = accuracy_score(test_labels, preds)
-    macro_f1 = f1_score(test_labels, preds, average="macro", labels=centroid_ids, zero_division=0)
-    active_ids = sorted(np.unique(np.concatenate([test_labels, preds])))
-    label_names = [CLASS_LABELS[i] for i in active_ids]
-    cm = confusion_matrix(test_labels, preds, labels=active_ids)
-    return {
-        "accuracy": accuracy,
-        "macro_f1": macro_f1,
-        "confusion": cm,
-        "label_names": label_names,
-        "train_size": len(train_idx),
-        "test_size": len(test_idx),
-    }
-def plot_confusion_heatmap(confusion: np.ndarray, label_names: List[str]) -> go.Figure:
     fig = go.Figure(
         data=go.Heatmap(
             z=confusion,
@@ -298,7 +295,7 @@ def plot_confusion_heatmap(confusion: np.ndarray, label_names: List[str]) -> go.
         )
     )
     fig.update_layout(
-        title="Prototype Classifier Confusion Matrix",
         xaxis_title="Predicted",
         yaxis_title="True",
         xaxis=dict(tickangle=45),
@@ -307,70 +304,312 @@ def plot_confusion_heatmap(confusion: np.ndarray, label_names: List[str]) -> go.
 def run_joint_evaluation(train_pct, seed, tech_filter, snr_filter, mod_filter, mob_filter):
-    if joint_eval_df.empty:
         fig = go.Figure()
         fig.update_layout(title="MoE embeddings unavailable", xaxis=dict(visible=False), yaxis=dict(visible=False))
-        return fig, "MoE embeddings are not available for evaluation."
     filtered = apply_filters(joint_eval_df, tech_filter, snr_filter, mod_filter, mob_filter)
     if filtered.empty:
         fig = go.Figure()
         fig.update_layout(title="No samples after filtering", xaxis=dict(visible=False), yaxis=dict(visible=False))
-        return fig, "No samples match the selected filters."
     if filtered["joint_label_id"].nunique() < 2:
         fig = go.Figure()
         fig.update_layout(title="Need at least two classes", xaxis=dict(visible=False), yaxis=dict(visible=False))
-        return fig, "Need at least two joint SNR/Doppler classes to evaluate."
     try:
         train_idx, test_idx = stratified_split(filtered, train_pct / 100.0, seed)
     except ValueError as exc:
         fig = go.Figure()
         fig.update_layout(title="Unable to split dataset", xaxis=dict(visible=False), yaxis=dict(visible=False))
-        return fig, str(exc)
-    metrics = compute_centroid_metrics(filtered, train_idx, test_idx)
-    fig = plot_confusion_heatmap(metrics["confusion"], metrics["label_names"])
     status = (
-        f"Train samples: {metrics['train_size']}\n"
-        f"Test samples: {metrics['test_size']}\n"
-        f"Accuracy: {metrics['accuracy'] * 100:.2f}%\n"
-        f"Macro F1: {metrics['macro_f1']:.3f}"
     )
-    return fig, status
 mapping_info = load_joint_mapping()
 df, has_moe_embeddings = load_data(mapping_info)
-CLASS_LABELS: List[str] = mapping_info["label_names"] if mapping_info else []
-joint_eval_df = df.copy()
-joint_eval_df = joint_eval_df[joint_eval_df["joint_label_id"].notna()]
-joint_eval_df = joint_eval_df[joint_eval_df["moe_embedding"].notna()]
 tech_choices = sorted(df["tech"].unique())
 snr_choices = sorted(df["snr"].unique())
 mod_choices = sorted(df["mod"].unique())
 mob_choices = sorted(df["mob"].unique())
-evaluation_disabled = joint_eval_df.empty
-with gr.Blocks(title="LWM-Spectro Demo") as demo:
     gr.Markdown("# 🔬 LWM-Spectro Interactive Demo")
     gr.Markdown(
         """
-    Compare **LWM embeddings** vs **Raw Spectrograms** for visualization, then evaluate **MoE embeddings**
-    with a lightweight prototype classifier for joint SNR/Doppler recognition.
     """
     )
     with gr.Tabs():
-        with gr.Tab("Visualization"):
             with gr.Row():
                 with gr.Column(scale=1, min_width=300):
                     gr.Markdown("### Filters")
-                    tech_filter = gr.CheckboxGroup(choices=tech_choices, value=tech_choices, label="Technology")
                     snr_filter = gr.Dropdown(
                         choices=snr_choices, value=None, multiselect=True, label="SNR (Empty = All)"
                     )
@@ -387,14 +626,17 @@ with gr.Blocks(title="LWM-Spectro Demo") as demo:
                         value="LWM Embedding",
                         label="Representation",
                     )
-                    color_by = gr.Dropdown(choices=["tech", "snr", "mod", "mob"], value="tech", label="Color By")
                     with gr.Accordion("Advanced t-SNE Settings", open=False):
                         perplexity = gr.Slider(minimum=5, maximum=50, value=30, step=1, label="Perplexity")
                         n_iter = gr.Slider(minimum=250, maximum=2000, value=1000, step=50, label="Iterations")
                     btn = gr.Button("Update Plot", variant="primary")
-                    status = gr.Textbox(label="Status", interactive=False)
                 with gr.Column(scale=3):
                     plot = gr.Plot(label="t-SNE Visualization")
@@ -402,19 +644,43 @@ with gr.Blocks(title="LWM-Spectro Demo") as demo:
             btn.click(
                 plot_tsne,
                 inputs=[tech_filter, snr_filter, mod_filter, mob_filter, representation, color_by, perplexity, n_iter],
-                outputs=[plot, status],
             )
             demo.load(
                 plot_tsne,
                 inputs=[tech_filter, snr_filter, mod_filter, mob_filter, representation, color_by, perplexity, n_iter],
-                outputs=[plot, status],
             )
-        with gr.Tab("Evaludation (Joint SNR/Doppler)"):
             if evaluation_disabled:
                 gr.Markdown(
-                    "⚠️ MoE embeddings are unavailable. Ensure `demo_data_moe.pt` exists or the checkpoint is present."
                 )
             with gr.Row():
@@ -422,7 +688,7 @@ with gr.Blocks(title="LWM-Spectro Demo") as demo:
                     gr.Markdown("### Evaluation Filters")
                     eval_tech_filter = gr.CheckboxGroup(
                         choices=tech_choices,
-                        value=tech_choices,
                         label="Technology",
                         interactive=not evaluation_disabled,
                     )
@@ -468,13 +734,15 @@ with gr.Blocks(title="LWM-Spectro Demo") as demo:
                     eval_btn = gr.Button("Run evaluation", variant="primary", interactive=not evaluation_disabled)
                 with gr.Column(scale=3):
-                    eval_plot = gr.Plot(label="Prototype Confusion Matrix")
-                    eval_status = gr.Textbox(label="Metrics", interactive=False)
             eval_btn.click(
                 run_joint_evaluation,
                 inputs=[train_pct, seed, eval_tech_filter, eval_snr_filter, eval_mod_filter, eval_mob_filter],
-                outputs=[eval_plot, eval_status],
             )
 if __name__ == "__main__":

+import os
+import shutil
 from pathlib import Path
+from typing import Dict, List, Tuple, Optional
 import gradio as gr
+import matplotlib.pyplot as plt
+from matplotlib.backends.backend_agg import FigureCanvasAgg
 import numpy as np
 import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
 import torch
+from huggingface_hub import hf_hub_download
 from sklearn.decomposition import PCA
 from sklearn.manifold import TSNE
+from sklearn.metrics import accuracy_score, confusion_matrix, f1_score
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.preprocessing import StandardScaler
 APP_DIR = Path(__file__).resolve().parent
 DEMO_DATA_PATH = APP_DIR / "demo_data.pt"
 MOE_DATA_PATH = APP_DIR / "demo_data_moe.pt"
+HUB_REPO_ID = "wi-lab/lwm-spectro"
+HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HF_HUB_TOKEN")
+# Fixed ordering for the 14 joint SNR/Doppler labels
+JOINT_LABELS = [
+    ("SNR-5dB", "pedestrian"),
+    ("SNR-5dB", "vehicular"),
+    ("SNR0dB", "pedestrian"),
+    ("SNR0dB", "vehicular"),
+    ("SNR5dB", "pedestrian"),
+    ("SNR5dB", "vehicular"),
+    ("SNR10dB", "pedestrian"),
+    ("SNR10dB", "vehicular"),
+    ("SNR15dB", "pedestrian"),
+    ("SNR15dB", "vehicular"),
+    ("SNR20dB", "pedestrian"),
+    ("SNR20dB", "vehicular"),
+    ("SNR25dB", "pedestrian"),
+    ("SNR25dB", "vehicular"),
+]
+def load_joint_mapping() -> Dict[str, object]:
+    label_names = [f"{snr} | {mob}" for snr, mob in JOINT_LABELS]
+    pair_to_name = {pair: name for pair, name in zip(JOINT_LABELS, label_names)}
     name_to_id = {name: idx for idx, name in enumerate(label_names)}
+    pair_to_id = {pair: idx for idx, pair in enumerate(JOINT_LABELS)}
     return {
+        "pairs": JOINT_LABELS,
         "label_names": label_names,
         "pair_to_name": pair_to_name,
         "name_to_id": name_to_id,
     }
+def _safe_load_tensor(path: Path):
+    # Torch 2.6 defaults to weights_only=True, which breaks our saved dicts.
+    return torch.load(path, weights_only=False)
+def _ensure_local_file(local_path: Path, hub_filename: str) -> Optional[Path]:
+    """Ensure a file exists locally; try Hub download if missing."""
+    if local_path.exists():
+        return local_path
+    try:
+        cached = hf_hub_download(repo_id=HUB_REPO_ID, filename=hub_filename, token=HF_TOKEN)
+        cached_path = Path(cached)
+        shutil.copyfile(cached_path, local_path)
+        print(f"[INFO] Downloaded {hub_filename} from Hub to {local_path}")
+        return local_path
+    except Exception as exc:
+        print(f"[WARN] Could not download {hub_filename} from Hub ({exc}); continuing without it.")
+        return None
+def load_augmented_samples() -> Tuple[List[Dict[str, object]], bool]:
+    _ensure_local_file(MOE_DATA_PATH, "demo_data_moe.pt")
+    _ensure_local_file(DEMO_DATA_PATH, "demo_data.pt")
     if MOE_DATA_PATH.exists():
+        print(f"[INFO] Loading MoE-augmented dataset from {MOE_DATA_PATH}")
+        return _safe_load_tensor(MOE_DATA_PATH), True
     if not DEMO_DATA_PATH.exists():
         raise FileNotFoundError(f"Dataset not found at {DEMO_DATA_PATH}")
+    print(f"[WARN] {MOE_DATA_PATH} missing; falling back to base data only")
+    return _safe_load_tensor(DEMO_DATA_PATH), False
+def load_data(mapping: Dict[str, object]):
+    data, has_moe = load_augmented_samples()
+    pair_to_name = mapping["pair_to_name"]
+    pair_to_id = mapping["pair_to_id"]
     records = []
     for i, sample in enumerate(data):
         embedding = sample["embedding"]
         joint_label = pair_to_name.get(pair)
         joint_label_id = pair_to_id.get(pair)
+        tsne_x = sample.get("tsne_x")
+        tsne_y = sample.get("tsne_y")
+        tsne_raw_x = sample.get("tsne_raw_x")
+        tsne_raw_y = sample.get("tsne_raw_y")
         records.append(
             {
+            "index": i,
                 "tech": sample["tech"],
                 "snr": sample["snr"],
                 "mod": sample["mod"],
                 "spectrogram": flat_spec,
                 "joint_label": joint_label,
                 "joint_label_id": joint_label_id,
+                "tsne_x": tsne_x,
+                "tsne_y": tsne_y,
+                "tsne_raw_x": tsne_raw_x,
+                "tsne_raw_y": tsne_raw_y,
             }
         )
     df = pd.DataFrame(records)
+    print(f"[INFO] Loaded {len(df)} samples (MoE embeddings: {has_moe})")
     return df, has_moe
     return filtered
+def plot_tsne(tech_filter, snr_filter, mod_filter, mob_filter, representation, color_label, perplexity, n_iter):
     filtered_df = apply_filters(df, tech_filter, snr_filter, mod_filter, mob_filter)
     if len(filtered_df) < 5:
+        return None
+    color_column = COLOR_OPTIONS.get(color_label, "snr")
+    tsne_cols = ("tsne_x", "tsne_y") if representation == "LWM Embedding" else ("tsne_raw_x", "tsne_raw_y")
+    has_cached = all(col in filtered_df.columns for col in tsne_cols)
+    if has_cached:
+        valid = filtered_df[tsne_cols[0]].notna().all() and filtered_df[tsne_cols[1]].notna().all()
     else:
+        valid = False
+    if valid:
+        filtered_df = filtered_df.copy()
+        filtered_df["x"] = filtered_df[tsne_cols[0]]
+        filtered_df["y"] = filtered_df[tsne_cols[1]]
+    else:
+        sampled_df = filtered_df
+        if len(sampled_df) > 1200:
+            sampled_df = sampled_df.sample(n=1200, random_state=42)
+        sampled_df = sampled_df.copy()
+        if representation == "LWM Embedding":
+            features = np.stack(sampled_df["embedding"].values)
+        else:
+            features = np.stack(sampled_df["spectrogram"].values)
+            if features.shape[1] > 50:
+                pca = PCA(n_components=50, random_state=42)
+                features = pca.fit_transform(features)
+        eff_perplexity = min(perplexity, len(sampled_df) - 1)
+        eff_perplexity = max(5, eff_perplexity)
+        tsne = TSNE(
+            n_components=2,
+            perplexity=eff_perplexity,
+            n_iter=n_iter,
+            random_state=42,
+            init="pca",
+            learning_rate="auto",
+        )
+        try:
+            projections = tsne.fit_transform(features)
+        except Exception as exc:
+            pca = PCA(n_components=2, random_state=42)
+            projections = pca.fit_transform(features)
+        sampled_df["x"] = projections[:, 0]
+        sampled_df["y"] = projections[:, 1]
+        filtered_df = sampled_df
     fig = px.scatter(
         filtered_df,
         x="x",
         y="y",
+        color=color_column,
         hover_data=["tech", "snr", "mod", "mob"],
         title=f"t-SNE of {representation} ({len(filtered_df)} samples)",
         template="plotly_white",
     )
+    height = 680 if color_label == "SNR" else 640
+    fig.update_layout(
+        legend_title_text=color_label,
+        width=640,
+        height=height,
+    )
+    fig.update_yaxes(scaleanchor="x", scaleratio=1)
+    return fig
+def build_raw_feature_matrix(samples: pd.Series, max_components: int = 256) -> np.ndarray:
+    raw_flat = []
+    for spec in samples:
+        arr = np.asarray(spec, dtype=np.float32)
+        raw_flat.append(arr.reshape(-1))
+    matrix = np.stack(raw_flat)
+    matrix = np.nan_to_num(matrix, copy=False)
+    scaler = StandardScaler()
+    matrix = scaler.fit_transform(matrix)
+    if max_components and matrix.shape[1] > max_components:
+        projector = PCA(n_components=max_components, random_state=42)
+        matrix = projector.fit_transform(matrix)
+    return matrix
 def stratified_split(filtered_df: pd.DataFrame, train_ratio: float, seed: int) -> Tuple[np.ndarray, np.ndarray]:
     rng = np.random.default_rng(int(seed))
+    train_indices = []
+    test_indices = []
     for label_id, group in filtered_df.groupby("joint_label_id"):
         indices = group.index.to_numpy()
     return np.array(train_indices), np.array(test_indices)
+def select_knn_k(train_labels: np.ndarray, max_k: int = 9) -> int:
+    if train_labels.size == 0:
+        return 1
+    class_counts = pd.Series(train_labels).value_counts()
+    min_class = int(class_counts.min())
+    heuristic = int(np.sqrt(train_labels.size))
+    candidate = max(1, min(max_k, heuristic))
+    k = max(1, min(candidate, min_class))
+    if k % 2 == 0 and k > 1:
+        k -= 1
+    return k
+def plot_confusion_heatmap(
+    confusion: np.ndarray, label_names: List[str], title: str = "Prototype Classifier Confusion Matrix"
+) -> go.Figure:
     fig = go.Figure(
         data=go.Heatmap(
             z=confusion,
         )
     )
     fig.update_layout(
+        title=title,
         xaxis_title="Predicted",
         yaxis_title="True",
         xaxis=dict(tickangle=45),
 def run_joint_evaluation(train_pct, seed, tech_filter, snr_filter, mod_filter, mob_filter):
+    if evaluation_disabled:
         fig = go.Figure()
         fig.update_layout(title="MoE embeddings unavailable", xaxis=dict(visible=False), yaxis=dict(visible=False))
+        return fig, fig, "MoE embeddings are not available in this Space build."
     filtered = apply_filters(joint_eval_df, tech_filter, snr_filter, mod_filter, mob_filter)
     if filtered.empty:
         fig = go.Figure()
         fig.update_layout(title="No samples after filtering", xaxis=dict(visible=False), yaxis=dict(visible=False))
+        return fig, fig, "No samples match the selected filters."
     if filtered["joint_label_id"].nunique() < 2:
         fig = go.Figure()
         fig.update_layout(title="Need at least two classes", xaxis=dict(visible=False), yaxis=dict(visible=False))
+        return fig, fig, "Need at least two joint SNR/Doppler classes to evaluate."
+    filtered = filtered.reset_index(drop=True)
     try:
         train_idx, test_idx = stratified_split(filtered, train_pct / 100.0, seed)
     except ValueError as exc:
         fig = go.Figure()
         fig.update_layout(title="Unable to split dataset", xaxis=dict(visible=False), yaxis=dict(visible=False))
+        return fig, fig, str(exc)
+    labels = filtered["joint_label_id"].to_numpy(dtype=int)
+    moe_features = np.stack(filtered["moe_embedding"].values)
+    raw_features = build_raw_feature_matrix(filtered["spectrogram"], max_components=256)
+    train_labels = labels[train_idx]
+    knn_k = select_knn_k(train_labels)
+    moe_metrics = compute_knn_metrics(moe_features, labels, train_idx, test_idx, knn_k, label_lookup=CLASS_LABELS)
+    raw_metrics = compute_knn_metrics(raw_features, labels, train_idx, test_idx, knn_k, label_lookup=CLASS_LABELS)
+    moe_fig = plot_confusion_heatmap(
+        moe_metrics["confusion"], moe_metrics["label_names"], title=f"MoE Embedding Confusion (k={moe_metrics['k']})"
+    )
+    raw_fig = plot_confusion_heatmap(
+        raw_metrics["confusion"], raw_metrics["label_names"], title=f"Raw Spectrogram Confusion (k={raw_metrics['k']})"
+    )
     status = (
+        f"### Joint SNR/Doppler Metrics\n"
+        f"**Train/Test Samples:** {len(train_idx)} / {len(test_idx)}  |  **Train %:** {train_pct}%  |  **Seed:** {seed}  |  **k-NN k:** {knn_k}\n\n"
+        "| Representation | Accuracy | Macro F1 |\n"
+        "| --- | --- | --- |\n"
+        f"| **MoE Embedding** | {moe_metrics['accuracy'] * 100:.2f}% | {moe_metrics['macro_f1']:.3f} |\n"
+        f"| **Raw Spectrogram** | {raw_metrics['accuracy'] * 100:.2f}% | {raw_metrics['macro_f1']:.3f} |"
+    )
+    return moe_fig, raw_fig, status
+def stratified_split_mod(df_subset: pd.DataFrame, train_ratio: float, seed: int) -> Tuple[np.ndarray, np.ndarray]:
+    rng = np.random.default_rng(int(seed))
+    train_idx = []
+    test_idx = []
+    for _, group in df_subset.groupby("mod"):
+        indices = group.index.to_numpy()
+        if indices.size < 2:
+            raise ValueError("Each modulation needs at least 2 samples.")
+        rng.shuffle(indices)
+        split = int(round(len(indices) * train_ratio))
+        split = max(1, min(len(indices) - 1, split))
+        train_idx.extend(indices[:split])
+        test_idx.extend(indices[split:])
+    return np.array(train_idx), np.array(test_idx)
+def compute_knn_metrics(
+    features: np.ndarray,
+    labels: np.ndarray,
+    train_idx: np.ndarray,
+    test_idx: np.ndarray,
+    knn_k: int,
+    label_lookup: List[str] | None = None,
+) -> Dict[str, object]:
+    train_features = features[train_idx]
+    test_features = features[test_idx]
+    train_labels = labels[train_idx]
+    test_labels = labels[test_idx]
+    candidate_k = max(1, min(int(knn_k), len(train_labels)))
+    if candidate_k % 2 == 0 and candidate_k > 1:
+        candidate_k -= 1
+    knn = KNeighborsClassifier(n_neighbors=candidate_k, metric="euclidean")
+    knn.fit(train_features, train_labels)
+    preds = knn.predict(test_features)
+    acc = accuracy_score(test_labels, preds)
+    active_labels = np.unique(np.concatenate([train_labels, test_labels, preds]))
+    macro = f1_score(test_labels, preds, labels=active_labels, average="macro", zero_division=0)
+    if label_lookup is None:
+        label_names = [str(lbl) for lbl in active_labels]
+    else:
+        label_names = [label_lookup[int(lbl)] for lbl in active_labels]
+    cm = confusion_matrix(test_labels, preds, labels=active_labels)
+    return {
+        "accuracy": acc,
+        "macro_f1": macro,
+        "confusion": cm,
+        "label_names": label_names,
+        "k": candidate_k,
+    }
+def evaluate_modulation(tech: str, train_pct: int, seed: int):
+    if not tech:
+        fig = go.Figure()
+        fig.update_layout(title="Select a technology to evaluate.", xaxis=dict(visible=False), yaxis=dict(visible=False))
+        return fig, fig, "No technology selected."
+    subset = df[df["tech"] == tech].copy().reset_index(drop=True)
+    if subset.empty or subset["mod"].nunique() < 2:
+        fig = go.Figure()
+        fig.update_layout(
+            title="Need at least two modulation classes for this technology.",
+            xaxis=dict(visible=False),
+            yaxis=dict(visible=False),
+        )
+        return fig, fig, "Not enough modulation classes."
+    try:
+        train_idx, test_idx = stratified_split_mod(subset, train_pct / 100.0, seed)
+    except ValueError as exc:
+        fig = go.Figure()
+        fig.update_layout(title=str(exc), xaxis=dict(visible=False), yaxis=dict(visible=False))
+        return fig, fig, str(exc)
+    labels = subset["mod"].astype(str).to_numpy()
+    emb_features = np.stack(subset["embedding"].values)
+    raw_features = build_raw_feature_matrix(subset["spectrogram"], max_components=256)
+    train_labels = labels[train_idx]
+    class_counts = pd.Series(train_labels).value_counts()
+    if class_counts.empty:
+        fig = go.Figure()
+        fig.update_layout(title="No modulation classes found.", xaxis=dict(visible=False), yaxis=dict(visible=False))
+        return fig, fig, "No modulation classes found."
+    knn_k = select_knn_k(train_labels)
+    emb_metrics = compute_knn_metrics(emb_features, labels, train_idx, test_idx, knn_k)
+    raw_metrics = compute_knn_metrics(raw_features, labels, train_idx, test_idx, knn_k)
+    emb_fig = plot_confusion_heatmap(emb_metrics["confusion"], emb_metrics["label_names"], title="Embedding Confusion")
+    raw_fig = plot_confusion_heatmap(raw_metrics["confusion"], raw_metrics["label_names"], title="Raw Confusion")
+    summary = (
+        f"### {tech} Modulation Metrics\n"
+        f"**Train/Test Samples:** {len(train_idx)} / {len(test_idx)}  |  **Classifier:** k-NN (k = {emb_metrics['k']})\n\n"
+        "| Representation | Accuracy | Macro F1 |\n"
+        "| --- | --- | --- |\n"
+        f"| **LWM Embedding** | {emb_metrics['accuracy'] * 100:.2f}% | {emb_metrics['macro_f1']:.3f} |\n"
+        f"| **Raw Spectrogram** | {raw_metrics['accuracy'] * 100:.2f}% | {raw_metrics['macro_f1']:.3f} |"
     )
+    return emb_fig, raw_fig, summary
+def _reshape_spectrogram(spec: np.ndarray) -> np.ndarray:
+    arr = np.asarray(spec)
+    if arr.ndim == 1:
+        side = int(round(arr.size ** 0.5))
+        if side * side == arr.size:
+            arr = arr.reshape(side, side)
+        else:
+            arr = arr.reshape(-1, side)
+    elif arr.ndim == 3:
+        arr = arr.squeeze()
+    return arr
+def _spectrogram_to_image(spec: np.ndarray, title: str) -> np.ndarray:
+    normalized = spec.astype(np.float32)
+    if np.isnan(normalized).any():
+        normalized = np.nan_to_num(normalized)
+    vmin, vmax = normalized.min(), normalized.max()
+    if vmax - vmin > 0:
+        normalized = (normalized - vmin) / (vmax - vmin)
+    fig, ax = plt.subplots(figsize=(3, 3))
+    im = ax.imshow(normalized, cmap="turbo", aspect="auto", origin="lower")
+    ax.set_xticks([])
+    ax.set_yticks([])
+    ax.set_title(title, fontsize=8)
+    cbar = fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
+    cbar.ax.tick_params(labelsize=6)
+    fig.tight_layout(pad=0.5)
+    canvas = FigureCanvasAgg(fig)
+    canvas.draw()
+    width, height = canvas.get_width_height()
+    buf = np.frombuffer(canvas.buffer_rgba(), dtype=np.uint8).reshape(height, width, 4)
+    image = buf[..., :3].copy()
+    plt.close(fig)
+    return image
+def render_spectrogram_gallery(tech, snr, mod, mob, sample_count, seed):
+    tech_list = [tech] if tech else None
+    snr_list = [snr] if snr else None
+    mod_list = [mod] if mod else None
+    mob_list = [mob] if mob else None
+    filtered = apply_filters(df, tech_list, snr_list, mod_list, mob_list)
+    if filtered.empty:
+        return [], "No spectrograms match the selected filters."
+    sample_count = max(1, int(sample_count))
+    rng = np.random.default_rng(int(seed))
+    if len(filtered) > sample_count:
+        indices = rng.choice(filtered.index.to_numpy(), size=sample_count, replace=False)
+        subset = filtered.loc[indices]
+    else:
+        subset = filtered
+    gallery_items = []
+    for _, row in subset.iterrows():
+        spec = _reshape_spectrogram(row["spectrogram"])
+        caption = f"{row['tech']} | {row['mod']} | {row['snr']} | {row['mob']}"
+        img = _spectrogram_to_image(spec, caption)
+        gallery_items.append((img, caption))
+    status = f"Showing {len(subset)} spectrograms (seed={seed})."
+    return gallery_items, status
 mapping_info = load_joint_mapping()
 df, has_moe_embeddings = load_data(mapping_info)
+CLASS_LABELS = mapping_info["label_names"]
+has_moe_column = df["moe_embedding"].apply(lambda x: x is not None)
+joint_eval_df = df[has_moe_column & df["joint_label_id"].notna()]
 tech_choices = sorted(df["tech"].unique())
 snr_choices = sorted(df["snr"].unique())
 mod_choices = sorted(df["mod"].unique())
 mob_choices = sorted(df["mob"].unique())
+TECH_TO_MODS: Dict[str, List[str]] = {
+    tech: sorted(df.loc[df["tech"] == tech, "mod"].unique().tolist()) for tech in tech_choices
+}
+COLOR_OPTIONS: Dict[str, str] = {
+    "SNR": "snr",
+    "Modulation": "mod",
+    "Mobility": "mob",
+}
+default_tech = tech_choices[:1] if tech_choices else []
+initial_spec_mod_choices = TECH_TO_MODS.get(default_tech[0], mod_choices) if default_tech else mod_choices
+evaluation_disabled = (not has_moe_embeddings) or joint_eval_df.empty
+def update_modulation_choices(selected_tech: Optional[str]):
+    choices = mod_choices
+    if selected_tech:
+        choices = TECH_TO_MODS.get(selected_tech, mod_choices)
+    return gr.Dropdown.update(choices=choices, value=None)
+with gr.Blocks(title="LWM-Spectro Lab") as demo:
     gr.Markdown("# 🔬 LWM-Spectro Interactive Demo")
     gr.Markdown(
         """
+    Compare **LWM embeddings** vs **Raw Spectrograms** for visualization, then evaluate **precomputed MoE embeddings**
+    with a lightweight k-NN prototype classifier for joint SNR/Doppler recognition.
     """
     )
     with gr.Tabs():
+        with gr.Tab("Spectrograms"):
+            gr.Markdown("Visualize raw 128×128 spectrograms with optional filters.")
+            with gr.Row():
+                with gr.Column(scale=1, min_width=320):
+                    spec_tech = gr.Dropdown(
+                        choices=tech_choices,
+                        value=default_tech[0] if default_tech else None,
+                        label="Technology",
+                    )
+                    spec_snr = gr.Dropdown(choices=snr_choices, value=None, label="SNR (optional)")
+                    spec_mod = gr.Dropdown(choices=initial_spec_mod_choices, value=None, label="Modulation (optional)")
+                    spec_mob = gr.Dropdown(choices=mob_choices, value=None, label="Mobility (optional)")
+                    spec_count = gr.Slider(minimum=1, maximum=12, step=1, value=6, label="Samples to show")
+                    spec_seed = gr.Slider(minimum=0, maximum=9999, step=1, value=0, label="Random seed")
+                    spec_btn = gr.Button("Show spectrograms", variant="primary")
+                with gr.Column(scale=3):
+                    gallery = gr.Gallery(
+                        label="Spectrogram Samples",
+                        columns=[3],
+                        rows=[3],
+                        height=560,
+                        preview=True,
+                    )
+                    gallery_status = gr.Textbox(label="Status", interactive=False)
+            spec_inputs = [spec_tech, spec_snr, spec_mod, spec_mob, spec_count, spec_seed]
+            spec_btn.click(render_spectrogram_gallery, inputs=spec_inputs, outputs=[gallery, gallery_status])
+            demo.load(render_spectrogram_gallery, inputs=spec_inputs, outputs=[gallery, gallery_status])
+            spec_tech.change(update_modulation_choices, inputs=spec_tech, outputs=spec_mod)
+        with gr.Tab("t-SNE Analysis"):
             with gr.Row():
                 with gr.Column(scale=1, min_width=300):
                     gr.Markdown("### Filters")
+                    tech_filter = gr.CheckboxGroup(choices=tech_choices, value=default_tech, label="Technology")
                     snr_filter = gr.Dropdown(
                         choices=snr_choices, value=None, multiselect=True, label="SNR (Empty = All)"
                     )
                         value="LWM Embedding",
                         label="Representation",
                     )
+                    color_by = gr.Dropdown(
+                        choices=list(COLOR_OPTIONS.keys()),
+                        value="SNR",
+                        label="Color By",
+                    )
                     with gr.Accordion("Advanced t-SNE Settings", open=False):
                         perplexity = gr.Slider(minimum=5, maximum=50, value=30, step=1, label="Perplexity")
                         n_iter = gr.Slider(minimum=250, maximum=2000, value=1000, step=50, label="Iterations")
                     btn = gr.Button("Update Plot", variant="primary")
                 with gr.Column(scale=3):
                     plot = gr.Plot(label="t-SNE Visualization")
             btn.click(
                 plot_tsne,
                 inputs=[tech_filter, snr_filter, mod_filter, mob_filter, representation, color_by, perplexity, n_iter],
+                outputs=[plot],
             )
             demo.load(
                 plot_tsne,
                 inputs=[tech_filter, snr_filter, mod_filter, mob_filter, representation, color_by, perplexity, n_iter],
+                outputs=[plot],
+            )
+        with gr.Tab("Modulation Classification"):
+            gr.Markdown("Compare LWM embeddings vs raw spectrograms for per-technology modulation classification.")
+            with gr.Row():
+                with gr.Column(scale=1, min_width=320):
+                    mod_tech = gr.Dropdown(
+                        choices=tech_choices,
+                        value=default_tech[0] if default_tech else None,
+                        label="Technology",
+                    )
+                    mod_train = gr.Slider(minimum=50, maximum=90, step=5, value=70, label="Training Percentage (%)")
+                    mod_seed = gr.Slider(minimum=0, maximum=9999, step=1, value=42, label="Random Seed")
+                    gr.Markdown("k-NN uses an adaptive k based on the number of modulation classes and available training samples.")
+                    mod_btn = gr.Button("Run modulation evaluation", variant="primary")
+                with gr.Column(scale=3):
+                    with gr.Row():
+                        emb_plot = gr.Plot(label="Embedding Confusion Matrix")
+                        raw_plot = gr.Plot(label="Raw Confusion Matrix")
+                    mod_summary = gr.Markdown(value="Select a technology and run the evaluation to view metrics.")
+            mod_btn.click(
+                evaluate_modulation,
+                inputs=[mod_tech, mod_train, mod_seed],
+                outputs=[emb_plot, raw_plot, mod_summary],
             )
+        with gr.Tab("Joint SNR/Doppler Evaluation"):
             if evaluation_disabled:
                 gr.Markdown(
+                    "⚠️ Precomputed MoE embeddings are not bundled in this Space build. Upload a dataset locally to run evaluations."
                 )
             with gr.Row():
                     gr.Markdown("### Evaluation Filters")
                     eval_tech_filter = gr.CheckboxGroup(
                         choices=tech_choices,
+                        value=default_tech,
                         label="Technology",
                         interactive=not evaluation_disabled,
                     )
                     eval_btn = gr.Button("Run evaluation", variant="primary", interactive=not evaluation_disabled)
                 with gr.Column(scale=3):
+                    with gr.Row():
+                        eval_plot = gr.Plot(label="MoE Prototype Confusion")
+                        eval_plot_raw = gr.Plot(label="Raw Prototype Confusion")
+                    eval_status = gr.Markdown(value="Run an evaluation to compare MoE vs raw baselines.")
             eval_btn.click(
                 run_joint_evaluation,
                 inputs=[train_pct, seed, eval_tech_filter, eval_snr_filter, eval_mod_filter, eval_mob_filter],
+                outputs=[eval_plot, eval_plot_raw, eval_status],
             )
 if __name__ == "__main__":

pretraining/README.md DELETED Viewed

@@ -1,44 +0,0 @@
-# 🔬 Pretraining Scripts
-This folder contains scripts for **Large Wireless Model (LWM)** pre-training.
-## 📁 File Descriptions
-### `train_lwm_spectro.py`
-- **Purpose**: Pre-train LWM model with spectrogram data
-- **Features**:
-  - Self-supervised learning through masked patch prediction
-  - Multi-size spectrogram support (32x32, 128x128)
-  - MSE loss-based reconstruction
-  - Real-time training monitoring and result storage
-### `pretrained_model.py`
-- **Purpose**: Define structure of pre-trained LWM model
-- **Features**: LWM model architecture implementation
-## 🚀 Usage
-### Basic Training Execution
-```bash
-cd pretraining
-python train_lwm_spectro.py
-```
-### GPU Memory Optimization
-```bash
-cd pretraining
-python train_lwm_spectro.py  # GPU 메모리에 맞춰 batch_size 조정
-```
-### Check Results
-Training results are automatically saved in `models/` folder:
-- `*_checkpoint.pth`: Model checkpoint
-- `*_training_history.json`: Training history
-- `*_training_curves.png`: Training curve graphs
-## 📊 Research Perspective
-These scripts are used to study **LWM's representation learning capabilities**:
-- Extract meaningful features from spectrograms
-- Generalized representation learning through unsupervised learning
-- Validate transfer learning effectiveness in downstream tasks

pretraining/__init__.py DELETED Viewed

File without changes

pretraining/pretrained_model.py DELETED Viewed

@@ -1,180 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import numpy as np
-class LayerNormalization(nn.Module):
-    """Layer norm with learnable scale and bias."""
-    def __init__(self, d_model: int, eps: float = 1e-6) -> None:
-        super().__init__()
-        self.eps = eps
-        self.alpha = nn.Parameter(torch.ones(d_model))
-        self.bias = nn.Parameter(torch.zeros(d_model))
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        mean = x.mean(dim=-1, keepdim=True)
-        std = x.std(dim=-1, keepdim=True)
-        return self.alpha * (x - mean) / (std + self.eps) + self.bias
-class Embedding(nn.Module):
-    """Linear projection + positional embedding with optional max_len override."""
-    def __init__(self, element_length: int, d_model: int, max_len: int | None = None) -> None:
-        super().__init__()
-        self.element_length = element_length
-        self.d_model = d_model
-        self.max_len = max_len if max_len is not None else 1025
-        self.proj = nn.Linear(element_length, d_model)
-        self.pos_embed = nn.Embedding(self.max_len, d_model)
-        self.norm = LayerNormalization(d_model)
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        seq_len = x.size(1)
-        if seq_len > self.max_len:
-            raise ValueError(f"Sequence length {seq_len} exceeds max_len {self.max_len}.")
-        pos = torch.arange(seq_len, dtype=torch.long, device=x.device)
-        pos_encodings = self.pos_embed(pos)
-        tok_emb = self.proj(x.float())
-        return self.norm(tok_emb + pos_encodings)
-class ScaledDotProductAttention(nn.Module):
-    """Scaled dot-product attention."""
-    def __init__(self, d_k: int) -> None:
-        super().__init__()
-        self.d_k = d_k
-    def forward(self, Q: torch.Tensor, K: torch.Tensor, V: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
-        scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(self.d_k)
-        attn = F.softmax(scores, dim=-1)
-        context = torch.matmul(attn, V)
-        return context, attn
-class MultiHeadAttention(nn.Module):
-    """Multi-head self-attention module."""
-    def __init__(self, d_model: int, n_heads: int, dropout: float) -> None:
-        super().__init__()
-        if d_model % n_heads != 0:
-            raise ValueError(f"d_model ({d_model}) must be divisible by n_heads ({n_heads}).")
-        self.d_k = d_model // n_heads
-        self.d_v = d_model // n_heads
-        self.n_heads = n_heads
-        self.W_Q = nn.Linear(d_model, self.d_k * n_heads)
-        self.W_K = nn.Linear(d_model, self.d_k * n_heads)
-        self.W_V = nn.Linear(d_model, self.d_v * n_heads)
-        self.linear = nn.Linear(n_heads * self.d_v, d_model)
-        self.dropout = nn.Dropout(dropout)
-        self.scaled_dot_attn = ScaledDotProductAttention(self.d_k)
-    def forward(self, Q: torch.Tensor, K: torch.Tensor, V: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
-        residual = Q
-        batch_size = Q.size(0)
-        q_s = self.W_Q(Q).view(batch_size, -1, self.n_heads, self.d_k).transpose(1, 2)
-        k_s = self.W_K(K).view(batch_size, -1, self.n_heads, self.d_k).transpose(1, 2)
-        v_s = self.W_V(V).view(batch_size, -1, self.n_heads, self.d_v).transpose(1, 2)
-        context, attn = self.scaled_dot_attn(q_s, k_s, v_s)
-        output = context.transpose(1, 2).contiguous().view(batch_size, -1, self.n_heads * self.d_v)
-        output = self.linear(output)
-        return residual + self.dropout(output), attn
-class PoswiseFeedForwardNet(nn.Module):
-    """Position-wise feed-forward network."""
-    def __init__(self, d_model: int, d_ff: int, dropout: float) -> None:
-        super().__init__()
-        self.fc1 = nn.Linear(d_model, d_ff)
-        self.fc2 = nn.Linear(d_ff, d_model)
-        self.dropout = nn.Dropout(dropout)
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        return self.fc2(self.dropout(F.relu(self.fc1(x))))
-class EncoderLayer(nn.Module):
-    """Transformer encoder block."""
-    def __init__(self, d_model: int, n_heads: int, d_ff: int, dropout: float) -> None:
-        super().__init__()
-        self.enc_self_attn = MultiHeadAttention(d_model, n_heads, dropout)
-        self.pos_ffn = PoswiseFeedForwardNet(d_model, d_ff, dropout)
-        self.norm1 = LayerNormalization(d_model)
-        self.norm2 = LayerNormalization(d_model)
-    def forward(self, enc_inputs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
-        attn_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs)
-        attn_outputs = self.norm1(attn_outputs)
-        ff_outputs = self.pos_ffn(attn_outputs)
-        enc_outputs = self.norm2(attn_outputs + ff_outputs)
-        return enc_outputs, attn
-class LWM(nn.Module):
-    """Large Wireless Model (Transformer encoder)."""
-    def __init__(
-        self,
-        element_length: int = 32,
-        d_model: int = 128,
-        n_layers: int = 12,
-        max_len: int | None = None,
-        n_heads: int = 8,
-        dropout: float = 0.1,
-    ) -> None:
-        super().__init__()
-        self.element_length = element_length
-        self.d_model = d_model
-        self.n_layers = n_layers
-        self.max_len = max_len if max_len is not None else 1025
-        self.n_heads = n_heads
-        self.dropout = dropout
-        self.embedding = Embedding(element_length, d_model, self.max_len)
-        self.layers = nn.ModuleList(
-            [EncoderLayer(d_model, n_heads, d_model * 4, dropout) for _ in range(n_layers)]
-        )
-        self.linear = nn.Linear(d_model, d_model)
-        self.norm = LayerNormalization(d_model)
-        embed_weight = self.embedding.proj.weight
-        _, n_dim = embed_weight.size()
-        self.decoder = nn.Linear(d_model, n_dim, bias=False)
-        self.decoder_bias = nn.Parameter(torch.zeros(n_dim))
-    def forward(
-        self,
-        input_ids: torch.Tensor,
-        masked_pos: torch.Tensor | None = None,
-    ) -> tuple[torch.Tensor, torch.Tensor] | torch.Tensor:
-        output = self.embedding(input_ids)
-        for layer in self.layers:
-            output, attn = layer(output)
-        if masked_pos is not None:
-            masked_pos = masked_pos.long()[:, :, None].expand(-1, -1, output.size(-1))
-            h_masked = torch.gather(output, 1, masked_pos)
-            h_masked = self.norm(F.relu(self.linear(h_masked)))
-            logits_lm = self.decoder(h_masked) + self.decoder_bias
-            return logits_lm, output
-        return output
-def lwm(*args, **kwargs) -> LWM:
-    """Factory to preserve backward compatibility with older imports."""
-    return LWM(*args, **kwargs)

pretraining/train_lwm_spectro.py DELETED Viewed

@@ -1,741 +0,0 @@
-#!/usr/bin/env python3
-# =============================================================================
-# train_lwm_spectro.py - LWM Pretraining with Complex-Valued Spectrogram Support
-# Modified from train_lwm_spectro_no_contrast.py to handle complex spectrograms
-# by separating real and imaginary parts and flattening them (similar to train_lwm.py)
-# =============================================================================
-# =============================================================================
-# 1. IMPORTS AND WARNINGS SETUP
-#    - Load necessary PyTorch modules, utilities, and suppress UserWarnings
-# =============================================================================
-import sys
-import os
-import argparse
-# Add project root to path (Windows compatible)
-project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-sys.path.insert(0, project_root)
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.utils.data import DataLoader, random_split, TensorDataset
-import torch.optim as optim
-from utils import (generate_spectrograms_and_labels, tokenizer_train,
-                   create_train_dataloader, count_parameters, train_lwm)
-import numpy as np
-import pretrained_model  # Assuming this contains the LWM model definition
-from torch.optim.lr_scheduler import LambdaLR
-from torch.optim import AdamW
-import warnings
-import platform
-import re
-from tqdm import tqdm
-from datetime import datetime
-import concurrent.futures
-import multiprocessing
-from collections import Counter
-from functools import lru_cache
-import json
-SNR_PATTERN = re.compile(r"SNR(-?\d+)dB")
-DOPPLER_MAP = {"static": 0, "pedestrian": 1, "vehicular": 2}
-DOPPLER_INV = {v: k for k, v in DOPPLER_MAP.items()}
-def _parse_snr_and_doppler(path: str) -> tuple[float, int]:
-    snr_db = 0.0
-    doppler_id = 0
-    matches = SNR_PATTERN.findall(path)
-    if matches:
-        try:
-            snr_db = float(matches[-1])
-        except ValueError:
-            snr_db = 0.0
-    normalized_path = os.path.normpath(path)
-    parts = normalized_path.split(os.sep)
-    for part in parts:
-        if part in DOPPLER_MAP:
-            doppler_id = DOPPLER_MAP[part]
-            break
-    return snr_db, doppler_id
-warnings.filterwarnings("ignore", category=UserWarning)
-# Use simple progress display instead of tqdm on Windows
-USE_TQDM = platform.system() != 'Windows'
-# CPU 코어 수 계산 (메모리 사용량 고려하여 보수적으로 설정)
-total_cores = multiprocessing.cpu_count()
-if total_cores >= 16:
-    MAX_WORKERS = min(8, total_cores // 2)  # 고성능 서버의 경우 8코어로 제한
-else:
-    MAX_WORKERS = max(2, total_cores // 2)  # 일반 시스템의 경우 절반 사용
-print(f"🚀 Using {MAX_WORKERS}/{total_cores} CPU cores for parallel processing")
-PRINT_CONVERSION_STATS = os.environ.get("LWM_PRINT_CONVERSION_STATS", "").strip().lower() in {"1", "true", "yes"}
-def convert_complex_to_interleaved(spectrograms):
-    """
-    Convert complex-valued spectrograms to real-imaginary interleaved format.
-    Similar to patch_maker() in train_lwm.py, this function:
-    1. Extracts real and imaginary parts
-    2. Interleaves them along the last dimension
-    Args:
-        spectrograms (np.ndarray): Complex-valued array of shape (n_samples, n_rows, n_cols)
-                                   or (n_samples, 1, n_rows, n_cols)
-    Returns:
-        np.ndarray: Real-valued array with interleaved real/imag parts
-                   Shape: (n_samples, n_rows, n_cols * 2)
-    """
-    # Handle different input shapes
-    if spectrograms.ndim == 4:
-        # Remove channel dimension if present: (n_samples, 1, n_rows, n_cols) -> (n_samples, n_rows, n_cols)
-        spectrograms = spectrograms[:, 0, :, :]
-    # Check if data is complex
-    if np.iscomplexobj(spectrograms):
-        n_samples, n_rows, n_cols = spectrograms.shape
-        # Extract real and imaginary parts
-        flat_real = spectrograms.real
-        flat_imag = spectrograms.imag
-        # Interleave real and imaginary parts along the last axis
-        # Output shape: (n_samples, n_rows, n_cols * 2)
-        interleaved = np.empty((n_samples, n_rows, n_cols * 2), dtype=np.float32)
-        interleaved[:, :, 0::2] = flat_real  # Even indices: real parts
-        interleaved[:, :, 1::2] = flat_imag  # Odd indices: imaginary parts
-        if PRINT_CONVERSION_STATS:
-            print(f"  ℹ️  Converted complex spectrograms: {spectrograms.shape} -> {interleaved.shape}")
-            print(f"      Real part range: [{flat_real.min():.2e}, {flat_real.max():.2e}]")
-            print(f"      Imag part range: [{flat_imag.min():.2e}, {flat_imag.max():.2e}]")
-        return interleaved
-    else:
-        # Already real-valued, just ensure correct shape
-        if spectrograms.ndim == 3:
-            if PRINT_CONVERSION_STATS:
-                print(f"  ℹ️  Data is already real-valued: {spectrograms.shape}")
-            return spectrograms
-        else:
-            raise ValueError(f"Unexpected spectrogram shape: {spectrograms.shape}")
-def process_single_scenario(scenario_info):
-    """단일 시나리오를 처리하는 함수 (멀티프로세싱용)"""
-    scenario_name, spectrogram_path = scenario_info
-    try:
-        # 메모리 효율성을 위해 필요한 데이터만 로드
-        scenario_spectrograms, scenario_labels = generate_spectrograms_and_labels(
-            scenario_name=scenario_name,
-            spectrogram_path=spectrogram_path,
-            cache_path=None,  # 메모리 문제로 캐시 비활성화
-        )
-        # Validate load
-        if scenario_spectrograms is None or (hasattr(scenario_spectrograms, 'size') and scenario_spectrograms.size == 0):
-            print(f"  ⚠️  No data loaded from: {spectrogram_path}")
-            return None
-        # Convert complex spectrograms to interleaved real-imaginary format
-        scenario_spectrograms = convert_complex_to_interleaved(scenario_spectrograms)
-        snr_db, doppler_id = _parse_snr_and_doppler(spectrogram_path)
-        # 데이터 분할 (인덱스만 계산)
-        total_samples = len(scenario_spectrograms)
-        train_size = int(0.8 * total_samples)
-        val_size = total_samples - train_size
-        # 메모리 절약을 위해 numpy array로 유지 (필요할 때만 tensor로 변환)
-        train_data = np.array(scenario_spectrograms[:train_size], dtype=np.float32)
-        val_data = np.array(scenario_spectrograms[train_size:], dtype=np.float32)
-        snr_array = np.full(total_samples, snr_db, dtype=np.float32)
-        doppler_array = np.full(total_samples, doppler_id, dtype=np.int64)
-        train_meta = {
-            'snr_db': snr_array[:train_size],
-            'doppler_id': doppler_array[:train_size],
-        }
-        val_meta = {
-            'snr_db': snr_array[train_size:],
-            'doppler_id': doppler_array[train_size:],
-        }
-        # 불필요한 데이터 즉시 삭제
-        del scenario_spectrograms
-        return {
-            'scenario': scenario_name,
-            'train_data': train_data,
-            'val_data': val_data,
-            'train_meta': train_meta,
-            'val_meta': val_meta,
-            'train_size': len(train_data),
-            'val_size': len(val_data)
-        }
-    except Exception as e:
-        print(f"❌ Error processing scenario {scenario_name}: {e}")
-        import traceback
-        traceback.print_exc()
-        return None
-# GPU Memory Monitor import (for Lambda) - Removed
-# =============================================================================
-# 2. SCENARIO LIST DEFINITION
-#    - Define the list of scenario names to iterate over for data generation
-# =============================================================================
-# Supported communications; can be limited via CLI
-SUPPORTED_COMM_TYPES = {"LTE", "WiFi", "5G"}
-def _parse_standard_args():
-    parser = argparse.ArgumentParser(add_help=False)
-    parser.add_argument('--standards', nargs='+', choices=SUPPORTED_COMM_TYPES,
-                        help='Specify one or more communication types to include (default: all).')
-    for comm in SUPPORTED_COMM_TYPES:
-        parser.add_argument(f'--{comm}', dest=f'flag_{comm}', action='store_true',
-                            help=f'Include only {comm} data (can be combined).')
-    parser.add_argument('--city', '--cities', dest='cities', nargs='+',
-                        help='Limit scenarios to one or more city prefixes (e.g., "0" or "city_0").')
-    parser.add_argument(
-        '--normalization',
-        choices=('per_sample', 'dataset'),
-        default='per_sample',
-        help='Normalization mode applied during tokenization (default: %(default)s).'
-    )
-    parser.add_argument('--help', action='help')
-    args, remaining = parser.parse_known_args()
-    enabled = set(SUPPORTED_COMM_TYPES)
-    if args.standards:
-        enabled = set(args.standards)
-    else:
-        flagged = {comm for comm in SUPPORTED_COMM_TYPES if getattr(args, f'flag_{comm}', False)}
-        if flagged:
-            enabled = flagged
-    selected_cities: list[str] | None = None
-    if args.cities:
-        selected_cities = []
-        for city_token in args.cities:
-            token = str(city_token).strip()
-            if not token:
-                continue
-            if token.startswith('city_'):
-                selected_cities.append(token)
-            else:
-                selected_cities.append(f'city_{token}')
-        if not selected_cities:
-            selected_cities = None
-    # Return remaining args to allow downstream parsing if needed
-    sys.argv = [sys.argv[0]] + remaining
-    return enabled, selected_cities, args.normalization
-ENABLED_COMM_TYPES, ENABLED_CITY_PREFIXES, NORMALIZATION_MODE = _parse_standard_args()
-MAX_SCENARIOS = int(os.environ.get("LWM_MAX_SCENARIOS", "0")) or None
-def _extract_scenario_token(file_path):
-    """Derive the base scenario token (without city) from the file path."""
-    normalized_path = os.path.normpath(file_path)
-    parts = normalized_path.split(os.sep)
-    scenario_parts = []
-    for i, part in enumerate(parts):
-        if part in SUPPORTED_COMM_TYPES:
-            trailing = parts[i:i + 5]
-            if trailing:
-                scenario_parts = trailing[:5]
-            break
-    if not scenario_parts:
-        # Fallback for datasets where the communication type is only captured in the filename
-        base_name = os.path.splitext(os.path.basename(file_path))[0]
-        if base_name.startswith('spectrogram_'):
-            tokens = base_name.split('_')[1:]  # drop 'spectrogram'
-            if tokens and tokens[0] in SUPPORTED_COMM_TYPES:
-                scenario_parts = tokens[:5] if len(tokens) >= 5 else tokens
-    return '_'.join(scenario_parts) if scenario_parts else None
-@lru_cache(maxsize=1)
-def _collect_scenario_file_info():
-    import glob
-    scenario_entries = []
-    # New MATLAB receiver pipeline output
-    new_base = os.path.join('ls_data', 'MATLAB', 'receiver_pipeline')
-    if os.path.isdir(new_base):
-        patterns = [os.path.join(new_base, '*', '**', 'spectrogram_*.mat')]
-        for pattern in patterns:
-            for file_path in sorted(glob.glob(pattern, recursive=True)):
-                norm = os.path.normpath(file_path)
-                parts = norm.split(os.sep)
-                # Determine a grouping token similar to city_name; use the standard folder name
-                try:
-                    idx = parts.index('receiver_pipeline')
-                    city_name = parts[idx + 1] if idx + 1 < len(parts) else 'receiver_pipeline'
-                except ValueError:
-                    city_name = 'receiver_pipeline'
-                base_token = _extract_scenario_token(file_path)
-                if not base_token:
-                    continue
-                comm_type = base_token.split('_', 1)[0]
-                if comm_type not in ENABLED_COMM_TYPES:
-                    continue
-                scenario_id = f"{city_name}::{base_token}"
-                scenario_entries.append((scenario_id, file_path, city_name, base_token))
-    # Legacy repo layouts under spectrograms/city_*
-    import glob as _glob
-    for city_dir in sorted(_glob.glob(os.path.join('spectrograms', 'city_*'))):
-        if not os.path.isdir(city_dir):
-            continue
-        city_name = os.path.basename(city_dir)
-        if ENABLED_CITY_PREFIXES:
-            if not any(city_name.startswith(prefix) for prefix in ENABLED_CITY_PREFIXES):
-                continue
-        # Look for complex spectrogram outputs; support both nested and flat layouts
-        candidate_patterns = [
-            os.path.join(city_dir, '**', 'complex_raw', '**', 'spectrogram_*.mat'),
-            os.path.join(city_dir, '**', 'spectrogram_*.mat'),
-        ]
-        city_files = []
-        seen_paths = set()
-        for pattern in candidate_patterns:
-            for file_path in sorted(_glob.glob(pattern, recursive=True)):
-                if not file_path.lower().endswith('.mat'):
-                    continue
-                if file_path in seen_paths:
-                    continue
-                seen_paths.add(file_path)
-                city_files.append(file_path)
-        # Fallback: 512FFT pattern (기존 호환성)
-        if not city_files:
-            pattern = os.path.join(city_dir, '**', '512FFT', '**', 'spectrograms', '*.pkl')
-            city_files = sorted(_glob.glob(pattern, recursive=True))
-        for file_path in city_files:
-            base_token = _extract_scenario_token(file_path)
-            if not base_token:
-                continue
-            comm_type = base_token.split('_', 1)[0]
-            if comm_type not in ENABLED_COMM_TYPES:
-                continue
-            scenario_id = f"{city_name}::{base_token}"
-            scenario_entries.append((scenario_id, file_path, city_name, base_token))
-    if MAX_SCENARIOS:
-        scenario_entries = scenario_entries[:MAX_SCENARIOS]
-    return scenario_entries
-def scenarios_list():
-    scenario_entries = _collect_scenario_file_info()
-    if not scenario_entries:
-        print("⚠️  No spectrogram files found for pretraining.")
-        return np.array([])
-    print(f"Enabled communication types: {sorted(ENABLED_COMM_TYPES)}")
-    if ENABLED_CITY_PREFIXES:
-        print(f"Selected city prefixes: {sorted(ENABLED_CITY_PREFIXES)}")
-    city_counts = Counter(entry[2] for entry in scenario_entries)
-    print("Using scenarios from the following city datasets:")
-    for city_name, count in city_counts.items():
-        print(f"  - {city_name}: {count} files")
-    print(f"Total scenarios selected: {len(scenario_entries)}")
-    return np.array([entry[0] for entry in scenario_entries])
-# =============================================================================
-# 3. SCENARIO PROPERTIES MAPPING
-#    - Map each scenario name to its corresponding properties
-# =============================================================================
-def scenario_prop():
-    scenario_entries = _collect_scenario_file_info()
-    row_column_users = {}
-    for scenario_id, file_path, city_name, _ in scenario_entries:
-        row_column_users[scenario_id] = {
-            'spectrogram_path': file_path,
-            'cache_path': os.path.join('spectrograms', city_name, 'spectrogram_cache_128x128.pkl')
-        }
-    return row_column_users
-# =============================================================================
-# 4. TRAINING PARAMETERS AND HYPERPARAMETERS
-#    - Set training epochs, batch sizes, learning rates, model dimensions, etc.
-# =============================================================================
-EPOCHS = 20  # Increased for better convergence
-# Optimized batch size for A100 GPU (40GB)
-BATCH_SIZE = 16
-VAL_BATCH_SIZE = 16
-WARMUP_EPOCHS = 5
-BASE_LR = 5e-4
-MIN_LR = 1e-8
-# Updated for 128x128 complex spectrograms with real-imaginary interleaving
-N_ROWS = 4
-N_COLUMNS = 4
-ELEMENT_LENGTH = N_ROWS * N_COLUMNS * 2  # Complex spectrograms: 2x for real+imaginary interleaving
-D_MODEL = 128
-MAX_LEN = 1025  # (128/4) * (128/4) + 1 = 32 * 32 + 1 = 1024 + 1 for [CLS] token
-# Interleaving keeps the same number of spatial patches (32x32) while doubling patch width
-# so each token covers 4x4 complex bins (real+imag) and sequence length stays at 1025.
-N_LAYERS = 12
-device_idx = 0
-WEIGHT_DECAY = 0.05
-BETA1 = 0.9
-BETA2 = 0.999
-MASK_PERCENT = 0.6
-N_HEADS = 8
-DROPOUT = 0.1
-print(f"📊 Model configuration for complex spectrograms:")
-print(f"   Patch size: {N_ROWS}x{N_COLUMNS}")
-print(f"   Element length: {ELEMENT_LENGTH} (includes real+imag interleaving)")
-print(f"   Max sequence length: {MAX_LEN}")
-# =============================================================================
-# 5. DATA GENERATION LOOP
-#    - Iterate over scenarios to generate spectrogram samples and labels
-# =============================================================================
-scenarios = scenarios_list()
-scenario_properties = scenario_prop()
-# Collect all training and validation data separately
-train_spectrogram_chunks = []
-val_spectrogram_chunks = []
-train_label_chunks = []
-val_label_chunks = []
-train_meta_chunks = []
-val_meta_chunks = []
-print(f"📂 Loading {len(scenarios)} scenarios...")
-# TEMP: Modified to not use cache
-print("⚠️  TEMPORARY FIX: Skipping cache to avoid memory issues")
-cache_path = None  # Disable cache usage
-# 단일 프로세스 시나리오 처리 (멀티프로세싱 비활성화)
-scenario_info_list = []
-missing_props = []
-for scenario in scenarios:
-    props = scenario_properties.get(scenario)
-    if props is None:
-        missing_props.append(scenario)
-        continue
-    scenario_info_list.append((scenario, props["spectrogram_path"]))
-if missing_props:
-    print("⚠️  Missing metadata for the following scenarios; skipping:")
-    for scen in missing_props:
-        print(f"    - {scen}")
-print(f"📂 Loading {len(scenario_info_list)} scenarios using single process...")
-# 단일 프로세스로 처리
-successful_scenarios = 0
-scenario_results = []
-for scenario_info in tqdm(scenario_info_list, desc="Processing scenarios", unit="scenario"):
-    scenario_name = scenario_info[0]
-    try:
-        result = process_single_scenario(scenario_info)
-        if result is not None:
-            # 데이터 수집 (시나리오 단위로 누적)
-            train_spectrogram_chunks.append(result['train_data'])
-            val_spectrogram_chunks.append(result['val_data'])
-            train_label_chunks.append(np.zeros(result['train_size'], dtype=np.int64))
-            val_label_chunks.append(np.zeros(result['val_size'], dtype=np.int64))
-            train_meta_chunks.append(result['train_meta'])
-            val_meta_chunks.append(result['val_meta'])
-            successful_scenarios += 1
-    except Exception as e:
-        print(f"❌ Scenario {scenario_name} processing failed: {e}")
-print(f"✅ Processing completed! Successful scenarios: {successful_scenarios}/{len(scenario_info_list)}")
-if not train_spectrogram_chunks or not val_spectrogram_chunks:
-    raise ValueError("No spectrogram data collected; check scenario configuration.")
-print("🔄 Collating spectrogram arrays...")
-train_spectrograms = np.concatenate(train_spectrogram_chunks, axis=0).astype(np.float32, copy=False)
-val_spectrograms = np.concatenate(val_spectrogram_chunks, axis=0).astype(np.float32, copy=False)
-train_labels = np.concatenate(train_label_chunks, axis=0)
-val_labels = np.concatenate(val_label_chunks, axis=0)
-def _concat_metadata_dicts(dict_list):
-    if not dict_list:
-        return {}
-    keys = dict_list[0].keys()
-    return {k: np.concatenate([d[k] for d in dict_list], axis=0) for k in keys}
-train_metadata = _concat_metadata_dicts(train_meta_chunks)
-val_metadata = _concat_metadata_dicts(val_meta_chunks)
-del train_spectrogram_chunks, val_spectrogram_chunks, train_label_chunks, val_label_chunks
-del train_meta_chunks, val_meta_chunks
-print(f"Training spectrograms shape: {train_spectrograms.shape}")
-print(f"Validation spectrograms shape: {val_spectrograms.shape}")
-print(f"Memory usage: {train_spectrograms.nbytes + val_spectrograms.nbytes + train_labels.nbytes + val_labels.nbytes:,} bytes")
-train_mean = float(train_spectrograms.mean())
-train_std = float(train_spectrograms.std())
-if abs(train_std) < 1e-6:
-    print("⚠️  Training std near zero, using epsilon for stability")
-    train_std = 1e-6
-dataset_normalization = {'mean': train_mean, 'std': train_std, 'normalization': NORMALIZATION_MODE}
-print(f"Dataset normalization stats -> mean: {train_mean:.4f}, std: {train_std:.4f}")
-# =============================================================================
-# 6. DATA TOKENIZATION
-#    - Tokenize spectrogram matrices into input sequences with masking for pretraining
-# =============================================================================
-# Tokenize training data
-print("🔄 Starting tokenization of training data...")
-preprocessed_train = tokenizer_train(
-    train_spectrograms,
-    max_len=MAX_LEN,
-    masking_percent=MASK_PERCENT,
-    mask=True,
-    seed=42,
-    metadata=train_metadata,
-    dataset_stats=dataset_normalization,
-    normalization=NORMALIZATION_MODE,
-    interleaved=True,
-)
-print("✅ Training data tokenization completed!")
-# Tokenize validation data (with masking for pretraining evaluation)
-print("🔄 Starting tokenization of validation data...")
-preprocessed_val = tokenizer_train(
-    val_spectrograms,
-    max_len=MAX_LEN,
-    masking_percent=MASK_PERCENT,
-    mask=True,  # Apply masking for pretraining evaluation
-    seed=42,
-    metadata=val_metadata,
-    dataset_stats=dataset_normalization,
-    normalization=NORMALIZATION_MODE,
-    interleaved=True,
-)
-print("✅ Validation data tokenization completed!")
-# =============================================================================
-# 7. TRAIN/VALIDATION DATA SETUP
-#    - Use pre-split training and validation data
-# =============================================================================
-SEED = 42
-torch.manual_seed(SEED)
-np.random.seed(SEED)
-# Use pre-split data
-train_data = preprocessed_train
-val_data = preprocessed_val
-# =============================================================================
-# 8. DATALOADER CREATION
-#    - Build PyTorch DataLoader objects for batched training and validation
-# =============================================================================
-# Handle different data formats
-print("🔧 Creating data loaders...")
-if isinstance(train_data, dict):
-    print(f"  Training data format: dict with {len(train_data)} sequence lengths")
-    # Training data with masking
-    train_loaders = create_train_dataloader(train_data, batch_size=BATCH_SIZE, shuffle=True)
-else:
-    print(f"  Training data format: tensor with shape {train_data.shape}")
-    # Training data without masking (fallback)
-    train_dataset = TensorDataset(train_data)
-    train_loaders = {'seq_0': DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)}
-if isinstance(val_data, dict):
-    print(f"  Validation data format: dict with {len(val_data)} sequence lengths")
-    # Validation data with masking
-    val_loaders = create_train_dataloader(val_data, batch_size=VAL_BATCH_SIZE, shuffle=False)
-else:
-    print(f"  Validation data format: tensor with shape {val_data.shape}")
-    # Validation data without masking
-    val_dataset = TensorDataset(val_data)
-    val_loaders = {'seq_0': DataLoader(val_dataset, batch_size=VAL_BATCH_SIZE, shuffle=False)}
-print("✅ Data loaders created successfully!")
-# =============================================================================
-# 9. MODEL INITIALIZATION
-#    - Instantiate the LWM transformer model and optionally load pre-trained weights
-#    - Wrap with DataParallel for multi-GPU support
-# =============================================================================
-# Device selection with MPS support for Mac
-print("🔧 Setting up device and GPU configuration...")
-if torch.cuda.is_available():
-    device_count = torch.cuda.device_count()
-    print(f"  CUDA available: {device_count} GPU(s) detected")
-    device = torch.device("cuda:0")
-    # On Windows, use only available GPUs
-    gpu_ids = list(range(device_count))  # 0, 1, 2... auto-detect
-    print(f"  Using CUDA GPUs: {gpu_ids}")
-    # GPU memory status
-    for i in gpu_ids:
-        try:
-            mem_total = torch.cuda.get_device_properties(i).total_memory / 1024**3
-            mem_allocated = torch.cuda.memory_allocated(i) / 1024**3
-            print(f"    GPU {i}: Total: {mem_total:.1f}GB, Allocated: {mem_allocated:.1f}GB")
-        except Exception as e:
-            print(f"  GPU {i}: Error getting memory info - {e}")
-elif torch.backends.mps.is_available():
-    device = torch.device("mps")
-    gpu_ids = []  # MPS doesn't support DataParallel
-    print("  Using MPS (Apple Silicon GPU)")
-else:
-    device = torch.device("cpu")
-    gpu_ids = []
-    print("  Using CPU")
-print(f"  Final device: {device}")
-print(f"  GPU IDs for DataParallel: {gpu_ids}")
-print("🤖 Initializing LWM model...")
-print(f"  Model parameters: element_length={ELEMENT_LENGTH}, d_model={D_MODEL}, n_layers={N_LAYERS}, max_len={MAX_LEN}, n_heads={N_HEADS}")
-try:
-    model = pretrained_model.lwm(
-        element_length=ELEMENT_LENGTH,  # Complex spectrograms with real-imag interleaving
-        d_model=D_MODEL,
-        n_layers=N_LAYERS,
-        max_len=MAX_LEN,
-        n_heads=N_HEADS,
-        dropout=DROPOUT
-    )
-    print("  ✅ Model created successfully")
-    print(f"  Moving model to device: {device}")
-    # MPS only supports float32, so set dtype
-    if 'mps' in str(device):
-        model = model.to(device).float()
-        print("  ✅ Model moved to MPS device (float32)")
-    else:
-        model = model.to(device)
-        print("  ✅ Model moved to device successfully")
-except Exception as e:
-    print(f"  ❌ Model initialization failed: {e}")
-    import traceback
-    traceback.print_exc()
-    exit(1)
-# Optional: Load pre-trained model
-load_model = False
-if load_model:
-    model.load_state_dict(torch.load("models/model_checkpoint.pth", map_location=device))
-    print("Pre-trained model loaded successfully.")
-# Use DataParallel for multi-GPU support (skip for MPS)
-if gpu_ids:
-    model = nn.DataParallel(model, device_ids=gpu_ids)
-    print(f"Model loaded successfully on GPU {device.index}")
-else:
-    print(f"Model loaded successfully on {device}")
-n_parameters = count_parameters(model)
-print(f"Number of trainable parameters: {n_parameters:,}")
-# =============================================================================
-# 10. OPTIMIZER AND LEARNING RATE SCHEDULER
-#     - Configure AdamW optimizer and a cosine-with-warmup LR schedule based on total steps
-# =============================================================================
-TOTAL_STEPS = sum(len(loader) for loader in train_loaders.values()) * EPOCHS
-WARMUP_STEPS = sum(len(loader) for loader in train_loaders.values()) * WARMUP_EPOCHS
-optimizer = AdamW(
-    model.parameters(),
-    lr=BASE_LR,
-    betas=(BETA1, BETA2),
-    weight_decay=WEIGHT_DECAY
-)
-def lr_lambda(current_step):
-    if current_step < WARMUP_STEPS:
-        return current_step / WARMUP_STEPS
-    else:
-        scaled_progress = (current_step - WARMUP_STEPS) / (TOTAL_STEPS - WARMUP_STEPS)
-        cosine_decay = 0.5 * (1 + np.cos(np.pi * scaled_progress))
-        return cosine_decay * (BASE_LR - MIN_LR) / BASE_LR + MIN_LR / BASE_LR
-scheduler = LambdaLR(optimizer, lr_lambda=lr_lambda)
-# =============================================================================
-# 11. PRE-TRAINING LOOP
-#     - Call the train_lwm utility to run the pre-training epochs, logging metrics and saving models
-# =============================================================================
-# Create timestamp-based save directory
-timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-save_dir = f"models/{timestamp}_complex"
-print(f"📁 Models and logs will be saved to: {save_dir}")
-os.makedirs(save_dir, exist_ok=True)
-stats_path = os.path.join(save_dir, "dataset_stats.json")
-with open(stats_path, 'w') as f:
-    json.dump(dataset_normalization, f, indent=2)
-print(f"📝 Saved dataset stats to {stats_path}")
-comm_selection = sorted(ENABLED_COMM_TYPES) if ENABLED_COMM_TYPES else []
-if comm_selection:
-    comm_suffix = "_" + "-".join(comm_selection)
-else:
-    comm_suffix = ""
-if comm_selection:
-    print(f"[INFO] Communication standards for this run: {', '.join(comm_selection)}")
-if __name__ == "__main__":
-    pretrained_model_output = train_lwm(
-        model,
-        train_loaders,
-        val_loaders,
-        optimizer,
-        scheduler,
-        EPOCHS,
-        device=device,
-        save_dir=save_dir,
-        log_file="training_log.csv",
-        checkpoint_suffix=comm_suffix + "_complex",
-    )
-    print("🎉 Training completed successfully!")

pretraining/train_lwm_spectro_contrastive.py DELETED Viewed

@@ -1,1450 +0,0 @@
-#!/usr/bin/env python3
-# =============================================================================
-# train_lwm_spectro_contrastive.py - LWM Pretraining with Contrastive Learning
-# Extended from train_lwm_spectro.py to add modulation/mobility contrastive learning
-#
-# Key additions:
-# - Contrastive learning module with projection head
-# - Multi-task loss: MLM + Contrastive (modulation + mobility)
-# - Hard negative mining
-# - Supervised contrastive loss (SupCon)
-# =============================================================================
-# =============================================================================
-# 1. IMPORTS AND WARNINGS SETUP
-# =============================================================================
-import sys
-import os
-import argparse
-import math
-# Add project root to path (Windows compatible)
-project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-sys.path.insert(0, project_root)
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.utils.data import DataLoader, random_split, TensorDataset, Dataset
-import torch.optim as optim
-from utils import (generate_spectrograms_and_labels, tokenizer_train,
-                   create_train_dataloader, count_parameters)
-import numpy as np
-import pretrained_model  # Assuming this contains the LWM model definition
-from torch.optim.lr_scheduler import LambdaLR
-from torch.optim import AdamW
-import warnings
-import platform
-import re
-from tqdm import tqdm
-from datetime import datetime
-import concurrent.futures
-import multiprocessing
-from collections import Counter
-from functools import lru_cache
-import json
-from typing import Dict, Tuple, List, Optional
-SNR_PATTERN = re.compile(r"SNR(-?\d+)dB")
-DOPPLER_MAP = {"static": 0, "pedestrian": 1, "vehicular": 2}
-DOPPLER_INV = {v: k for k, v in DOPPLER_MAP.items()}
-# Dynamic modulation mapping - will be built from actual data
-MODULATION_MAP = {}  # Will be populated: {"BPSK": 0, "QPSK": 1, ...}
-MODULATION_INV = {}  # Will be populated: {0: "BPSK", 1: "QPSK", ...}
-# Standard-to-modulation mapping (for reference only - not used in code)
-# Note: Actual modulations are dynamically discovered from file paths
-# These match the MCS definitions in MATLAB/receiver_pipeline/getMCSDefinitions.m
-STANDARD_MODULATIONS = {
-    "WiFi": [
-        "BPSK", "QPSK", "16QAM", "64QAM"
-        # From getMCSDefinitions.m WiFi MCS table:
-        # - MCS 0: BPSK rate1-2
-        # - MCS 1-2: QPSK rate1-2, rate3-4
-        # - MCS 3-4: 16QAM rate1-2, rate3-4
-        # - MCS 5-7: 64QAM rate2-3, rate3-4, rate5-6
-        # Note: Your MATLAB pipeline uses 802.11a/g MCS (no 256QAM/1024QAM)
-    ],
-    "LTE": [
-        "QPSK", "16QAM", "64QAM"
-        # From getMCSDefinitions.m LTE MCS table:
-        # - MCS 0-2: QPSK rate1-3, rate1-2, rate3-4
-        # - MCS 3-4: 16QAM rate1-2, rate3-4
-        # - MCS 5-6: 64QAM rate2-3, rate3-4
-        # Note: Your MATLAB pipeline does NOT include 256QAM
-    ],
-    "5G": [
-        "QPSK", "16QAM", "64QAM", "256QAM"
-        # From getMCSDefinitions.m 5G MCS table:
-        # - MCS 0-1: QPSK rate1-3, rate1-2
-        # - MCS 2-3: 16QAM rate1-2, rate3-4
-        # - MCS 4-5: 64QAM rate2-3, rate3-4
-        # - MCS 6: 256QAM rate3-4
-    ],
-}
-# Important: This mapping is for documentation only
-# The actual modulations used in your dataset may differ
-# They will be automatically discovered from file paths
-def _parse_metadata(path: str) -> Dict[str, any]:
-    """
-    Parse SNR, Doppler, and Modulation from file path.
-    Modulation is dynamically extracted and added to global MODULATION_MAP.
-    Returns:
-        dict with keys: snr_db, doppler_id, modulation_id, modulation_name
-    """
-    global MODULATION_MAP, MODULATION_INV
-    snr_db = 0.0
-    doppler_id = 0
-    modulation_name = "Unknown"
-    # Parse SNR
-    matches = SNR_PATTERN.findall(path)
-    if matches:
-        try:
-            snr_db = float(matches[-1])
-        except ValueError:
-            snr_db = 0.0
-    # Parse Doppler
-    normalized_path = os.path.normpath(path)
-    parts = normalized_path.split(os.sep)
-    for part in parts:
-        if part in DOPPLER_MAP:
-            doppler_id = DOPPLER_MAP[part]
-            break
-    # Parse Modulation (dynamic - look for common modulation patterns)
-    # Patterns: BPSK, QPSK, 8PSK, 16QAM, 32QAM, 64QAM, 256QAM, 1024QAM, etc.
-    # Note: We ONLY use explicit modulation names in the path, not code rates
-    # since the same code rate can be used with different modulations
-    modulation_patterns = [
-        r"BPSK",
-        r"QPSK",
-        r"8PSK",
-        r"16QAM",
-        r"32QAM",
-        r"64QAM",
-        r"128QAM",
-        r"256QAM",
-        r"512QAM",
-        r"1024QAM",
-    ]
-    for pattern in modulation_patterns:
-        if re.search(pattern, path, re.IGNORECASE):
-            modulation_name = pattern
-            break
-    # Add to global mapping if new
-    if modulation_name != "Unknown" and modulation_name not in MODULATION_MAP:
-        modulation_id = len(MODULATION_MAP)
-        MODULATION_MAP[modulation_name] = modulation_id
-        MODULATION_INV[modulation_id] = modulation_name
-    elif modulation_name in MODULATION_MAP:
-        modulation_id = MODULATION_MAP[modulation_name]
-    else:
-        modulation_id = -1  # Unknown
-    return {
-        'snr_db': snr_db,
-        'doppler_id': doppler_id,
-        'modulation_id': modulation_id,
-        'modulation_name': modulation_name
-    }
-warnings.filterwarnings("ignore", category=UserWarning)
-# Use simple progress display instead of tqdm on Windows
-USE_TQDM = platform.system() != 'Windows'
-# CPU 코어 수 계산 (메모리 사용량 고려하여 보수적으로 설정)
-total_cores = multiprocessing.cpu_count()
-if total_cores >= 16:
-    MAX_WORKERS = min(8, total_cores // 2)
-else:
-    MAX_WORKERS = max(2, total_cores // 2)
-print(f"🚀 Using {MAX_WORKERS}/{total_cores} CPU cores for parallel processing")
-PRINT_CONVERSION_STATS = os.environ.get("LWM_PRINT_CONVERSION_STATS", "").strip().lower() in {"1", "true", "yes"}
-# =============================================================================
-# 2. CONTRASTIVE LEARNING COMPONENTS
-# =============================================================================
-class ProjectionHead(nn.Module):
-    """
-    Projection head for contrastive learning (SimCLR-style).
-    Projects encoder output to a lower-dimensional space for contrastive loss.
-    """
-    def __init__(self, d_model: int, projection_dim: int = 128):
-        super().__init__()
-        self.projection = nn.Sequential(
-            nn.Linear(d_model, d_model),
-            nn.ReLU(),
-            nn.Linear(d_model, projection_dim)
-        )
-    def forward(self, x):
-        """
-        Args:
-            x: (batch, seq_len, d_model) - Encoder output
-        Returns:
-            z: (batch, projection_dim) - Projected embeddings
-        """
-        # Global average pooling over sequence dimension
-        pooled = x.mean(dim=1)  # (batch, d_model)
-        z = self.projection(pooled)  # (batch, projection_dim)
-        z = F.normalize(z, dim=1)  # L2 normalize
-        return z
-class ContrastiveLWM(nn.Module):
-    """
-    LWM model with contrastive learning projection heads.
-    """
-    def __init__(self, lwm_encoder, projection_dim: int = 128, input_dim: int = 32):
-        super().__init__()
-        self.encoder = lwm_encoder
-        # MLM reconstruction head: project d_model back to input_dim
-        self.mlm_head = nn.Linear(lwm_encoder.d_model, input_dim)
-        # Separate projection heads for modulation and mobility
-        self.modulation_projection = ProjectionHead(lwm_encoder.d_model, projection_dim)
-        self.mobility_projection = ProjectionHead(lwm_encoder.d_model, projection_dim)
-    def forward(self, x, return_projections: bool = False):
-        """
-        Args:
-            x: Input tokens
-            return_projections: If True, return contrastive projections and MLM predictions
-        Returns:
-            If return_projections:
-                mlm_predictions, z_modulation, z_mobility
-            Else:
-                mlm_predictions (for MLM task only)
-        """
-        # Forward through encoder
-        encoder_out = self.encoder(x)  # (batch, seq_len, d_model)
-        # MLM prediction head (always compute for reconstruction)
-        mlm_predictions = self.mlm_head(encoder_out)  # (batch, seq_len, input_dim)
-        if return_projections:
-            z_mod = self.modulation_projection(encoder_out)
-            z_mob = self.mobility_projection(encoder_out)
-            return mlm_predictions, z_mod, z_mob
-        else:
-            return mlm_predictions
-def supervised_contrastive_loss(
-    embeddings: torch.Tensor,
-    labels: torch.Tensor,
-    temperature: float = 0.07,
-    base_temperature: float = 0.07
-) -> torch.Tensor:
-    """
-    Supervised Contrastive Loss (SupCon) from Khosla et al. 2020.
-    Args:
-        embeddings: (batch, dim) - Normalized embeddings
-        labels: (batch,) - Class labels
-        temperature: Temperature scaling
-        base_temperature: Base temperature for normalization
-    Returns:
-        loss: Scalar SupCon loss
-    """
-    batch_size = embeddings.size(0)
-    # Compute similarity matrix
-    sim_matrix = torch.matmul(embeddings, embeddings.T) / temperature  # (batch, batch)
-    # Mask for positives (same label)
-    labels = labels.contiguous().view(-1, 1)
-    mask_pos = torch.eq(labels, labels.T).float().to(embeddings.device)  # (batch, batch)
-    # Remove diagonal (self-similarity)
-    logits_mask = torch.scatter(
-        torch.ones_like(mask_pos),
-        1,
-        torch.arange(batch_size).view(-1, 1).to(embeddings.device),
-        0
-    )
-    mask_pos = mask_pos * logits_mask
-    # Compute log probabilities
-    exp_sim = torch.exp(sim_matrix) * logits_mask
-    log_prob = sim_matrix - torch.log(exp_sim.sum(dim=1, keepdim=True) + 1e-8)
-    # Mean over positives
-    mean_log_prob_pos = (mask_pos * log_prob).sum(dim=1) / (mask_pos.sum(dim=1) + 1e-8)
-    # Loss
-    loss = -(temperature / base_temperature) * mean_log_prob_pos
-    loss = loss.mean()
-    return loss
-class ContrastiveDataset(Dataset):
-    """
-    Dataset wrapper that provides contrastive learning triplets.
-    """
-    def __init__(
-        self,
-        spectrograms: np.ndarray,
-        labels: np.ndarray,
-        metadata: Dict[str, np.ndarray],
-        indices_by_modulation: Dict[int, List[int]],
-        indices_by_mobility: Dict[int, List[int]]
-    ):
-        self.spectrograms = spectrograms
-        self.labels = labels
-        self.metadata = metadata
-        self.indices_by_modulation = indices_by_modulation
-        self.indices_by_mobility = indices_by_mobility
-    def __len__(self):
-        return len(self.spectrograms)
-    def __getitem__(self, idx):
-        """
-        Returns anchor sample with its metadata.
-        """
-        spectrogram = self.spectrograms[idx]
-        label = self.labels[idx]
-        metadata = {
-            'snr_db': self.metadata['snr_db'][idx],
-            'doppler_id': self.metadata['doppler_id'][idx],
-            'modulation_id': self.metadata['modulation_id'][idx]
-        }
-        return spectrogram, label, metadata
-# =============================================================================
-# 3. DATA CONVERSION AND PREPROCESSING
-# =============================================================================
-def convert_complex_to_interleaved(spectrograms):
-    """
-    Convert complex-valued spectrograms to real-imaginary interleaved format.
-    Args:
-        spectrograms (np.ndarray): Complex-valued array of shape (n_samples, n_rows, n_cols)
-                                   or (n_samples, 1, n_rows, n_cols)
-    Returns:
-        np.ndarray: Real-valued array with interleaved real/imag parts
-                   Shape: (n_samples, n_rows, n_cols * 2)
-    """
-    # Handle different input shapes
-    if spectrograms.ndim == 4:
-        spectrograms = spectrograms[:, 0, :, :]
-    # Check if data is complex
-    if np.iscomplexobj(spectrograms):
-        n_samples, n_rows, n_cols = spectrograms.shape
-        # Extract real and imaginary parts
-        flat_real = spectrograms.real
-        flat_imag = spectrograms.imag
-        # Interleave real and imaginary parts along the last axis
-        interleaved = np.empty((n_samples, n_rows, n_cols * 2), dtype=np.float32)
-        interleaved[:, :, 0::2] = flat_real  # Even indices: real parts
-        interleaved[:, :, 1::2] = flat_imag  # Odd indices: imaginary parts
-        if PRINT_CONVERSION_STATS:
-            print(f"  ℹ️  Converted complex spectrograms: {spectrograms.shape} -> {interleaved.shape}")
-            print(f"      Real part range: [{flat_real.min():.2e}, {flat_real.max():.2e}]")
-            print(f"      Imag part range: [{flat_imag.min():.2e}, {flat_imag.max():.2e}]")
-        return interleaved
-    else:
-        # Already real-valued
-        if spectrograms.ndim == 3:
-            if PRINT_CONVERSION_STATS:
-                print(f"  ℹ️  Data is already real-valued: {spectrograms.shape}")
-            return spectrograms
-        else:
-            raise ValueError(f"Unexpected spectrogram shape: {spectrograms.shape}")
-def process_single_scenario(scenario_info):
-    """단일 시나리오를 처리하는 함수 (멀티프로세싱용)"""
-    scenario_name, spectrogram_path = scenario_info
-    try:
-        # Parse metadata from path
-        path_metadata = _parse_metadata(spectrogram_path)
-        # 메모리 효율성을 위해 필요한 데이터만 로드
-        scenario_spectrograms, scenario_labels = generate_spectrograms_and_labels(
-            scenario_name=scenario_name,
-            spectrogram_path=spectrogram_path,
-            cache_path=None,  # 메모리 문제로 캐시 비활성화
-        )
-        # Validate load
-        if scenario_spectrograms is None or (hasattr(scenario_spectrograms, 'size') and scenario_spectrograms.size == 0):
-            print(f"  ⚠️  No data loaded from: {spectrogram_path}")
-            return None
-        # Convert complex spectrograms to interleaved real-imaginary format
-        scenario_spectrograms = convert_complex_to_interleaved(scenario_spectrograms)
-        # 데이터 분할 (인덱스만 계산)
-        total_samples = len(scenario_spectrograms)
-        train_size = int(0.8 * total_samples)
-        val_size = total_samples - train_size
-        # 메모리 절약을 위해 numpy array로 유지
-        train_data = np.array(scenario_spectrograms[:train_size], dtype=np.float32)
-        val_data = np.array(scenario_spectrograms[train_size:], dtype=np.float32)
-        # Metadata arrays
-        snr_array = np.full(total_samples, path_metadata['snr_db'], dtype=np.float32)
-        doppler_array = np.full(total_samples, path_metadata['doppler_id'], dtype=np.int64)
-        modulation_array = np.full(total_samples, path_metadata['modulation_id'], dtype=np.int64)
-        train_meta = {
-            'snr_db': snr_array[:train_size],
-            'doppler_id': doppler_array[:train_size],
-            'modulation_id': modulation_array[:train_size],
-        }
-        val_meta = {
-            'snr_db': snr_array[train_size:],
-            'doppler_id': doppler_array[train_size:],
-            'modulation_id': modulation_array[train_size:],
-        }
-        # 불필요한 데이터 즉시 삭제
-        del scenario_spectrograms
-        return {
-            'scenario': scenario_name,
-            'train_data': train_data,
-            'val_data': val_data,
-            'train_meta': train_meta,
-            'val_meta': val_meta,
-            'train_size': len(train_data),
-            'val_size': len(val_data)
-        }
-    except Exception as e:
-        print(f"❌ Error processing scenario {scenario_name}: {e}")
-        import traceback
-        traceback.print_exc()
-        return None
-# =============================================================================
-# 4. SCENARIO LIST AND PROPERTIES (Same as original)
-# =============================================================================
-SUPPORTED_COMM_TYPES = {"LTE", "WiFi", "5G"}
-def _parse_standard_args():
-    parser = argparse.ArgumentParser(add_help=False)
-    parser.add_argument('--standards', nargs='+', choices=SUPPORTED_COMM_TYPES,
-                        help='Specify one or more communication types to include (default: all).')
-    for comm in SUPPORTED_COMM_TYPES:
-        parser.add_argument(f'--{comm}', dest=f'flag_{comm}', action='store_true',
-                            help=f'Include only {comm} data (can be combined).')
-    parser.add_argument('--city', '--cities', dest='cities', nargs='+',
-                        help='Limit scenarios to one or more city prefixes (e.g., "0" or "city_0").')
-    parser.add_argument(
-        '--normalization',
-        choices=('per_sample', 'dataset'),
-        default='per_sample',
-        help='Normalization mode applied during tokenization (default: %(default)s).'
-    )
-    parser.add_argument('--help', action='help')
-    args, remaining = parser.parse_known_args()
-    enabled = set(SUPPORTED_COMM_TYPES)
-    if args.standards:
-        enabled = set(args.standards)
-    else:
-        flagged = {comm for comm in SUPPORTED_COMM_TYPES if getattr(args, f'flag_{comm}', False)}
-        if flagged:
-            enabled = flagged
-    selected_cities: list[str] | None = None
-    if args.cities:
-        selected_cities = []
-        for city_token in args.cities:
-            token = str(city_token).strip()
-            if not token:
-                continue
-            if token.startswith('city_'):
-                selected_cities.append(token)
-            else:
-                selected_cities.append(f'city_{token}')
-        if not selected_cities:
-            selected_cities = None
-    sys.argv = [sys.argv[0]] + remaining
-    return enabled, selected_cities, args.normalization
-ENABLED_COMM_TYPES, ENABLED_CITY_PREFIXES, NORMALIZATION_MODE = _parse_standard_args()
-MAX_SCENARIOS = int(os.environ.get("LWM_MAX_SCENARIOS", "0")) or None
-def _extract_scenario_token(file_path):
-    """Derive the base scenario token (without city) from the file path."""
-    normalized_path = os.path.normpath(file_path)
-    parts = normalized_path.split(os.sep)
-    scenario_parts = []
-    for i, part in enumerate(parts):
-        if part in SUPPORTED_COMM_TYPES:
-            trailing = parts[i:i + 5]
-            if trailing:
-                scenario_parts = trailing[:5]
-            break
-    if not scenario_parts:
-        base_name = os.path.splitext(os.path.basename(file_path))[0]
-        if base_name.startswith('spectrogram_'):
-            tokens = base_name.split('_')[1:]
-            if tokens and tokens[0] in SUPPORTED_COMM_TYPES:
-                scenario_parts = tokens[:5] if len(tokens) >= 5 else tokens
-    return '_'.join(scenario_parts) if scenario_parts else None
-@lru_cache(maxsize=1)
-def _collect_scenario_file_info():
-    import glob
-    scenario_entries = []
-    # New MATLAB receiver pipeline output
-    new_base = os.path.join('ls_data', 'MATLAB', 'receiver_pipeline')
-    if os.path.isdir(new_base):
-        patterns = [os.path.join(new_base, '*', '**', 'spectrogram_*.mat')]
-        for pattern in patterns:
-            for file_path in sorted(glob.glob(pattern, recursive=True)):
-                norm = os.path.normpath(file_path)
-                parts = norm.split(os.sep)
-                try:
-                    idx = parts.index('receiver_pipeline')
-                    city_name = parts[idx + 1] if idx + 1 < len(parts) else 'receiver_pipeline'
-                except ValueError:
-                    city_name = 'receiver_pipeline'
-                base_token = _extract_scenario_token(file_path)
-                if not base_token:
-                    continue
-                comm_type = base_token.split('_', 1)[0]
-                if comm_type not in ENABLED_COMM_TYPES:
-                    continue
-                scenario_id = f"{city_name}::{base_token}"
-                scenario_entries.append((scenario_id, file_path, city_name, base_token))
-    # Legacy repo layouts under spectrograms/city_*
-    import glob as _glob
-    for city_dir in sorted(_glob.glob(os.path.join('spectrograms', 'city_*'))):
-        if not os.path.isdir(city_dir):
-            continue
-        city_name = os.path.basename(city_dir)
-        if ENABLED_CITY_PREFIXES:
-            if not any(city_name.startswith(prefix) for prefix in ENABLED_CITY_PREFIXES):
-                continue
-        candidate_patterns = [
-            os.path.join(city_dir, '**', 'complex_raw', '**', 'spectrogram_*.mat'),
-            os.path.join(city_dir, '**', 'spectrogram_*.mat'),
-        ]
-        city_files = []
-        seen_paths = set()
-        for pattern in candidate_patterns:
-            for file_path in sorted(_glob.glob(pattern, recursive=True)):
-                if not file_path.lower().endswith('.mat'):
-                    continue
-                if file_path in seen_paths:
-                    continue
-                seen_paths.add(file_path)
-                city_files.append(file_path)
-        if not city_files:
-            pattern = os.path.join(city_dir, '**', '512FFT', '**', 'spectrograms', '*.pkl')
-            city_files = sorted(_glob.glob(pattern, recursive=True))
-        for file_path in city_files:
-            base_token = _extract_scenario_token(file_path)
-            if not base_token:
-                continue
-            comm_type = base_token.split('_', 1)[0]
-            if comm_type not in ENABLED_COMM_TYPES:
-                continue
-            scenario_id = f"{city_name}::{base_token}"
-            scenario_entries.append((scenario_id, file_path, city_name, base_token))
-    if MAX_SCENARIOS:
-        scenario_entries = scenario_entries[:MAX_SCENARIOS]
-    return scenario_entries
-def scenarios_list():
-    scenario_entries = _collect_scenario_file_info()
-    if not scenario_entries:
-        print("⚠️  No spectrogram files found for pretraining.")
-        return np.array([])
-    print(f"Enabled communication types: {sorted(ENABLED_COMM_TYPES)}")
-    if ENABLED_CITY_PREFIXES:
-        print(f"Selected city prefixes: {sorted(ENABLED_CITY_PREFIXES)}")
-    city_counts = Counter(entry[2] for entry in scenario_entries)
-    print("Using scenarios from the following city datasets:")
-    for city_name, count in city_counts.items():
-        print(f"  - {city_name}: {count} files")
-    print(f"Total scenarios selected: {len(scenario_entries)}")
-    return np.array([entry[0] for entry in scenario_entries])
-def scenario_prop():
-    scenario_entries = _collect_scenario_file_info()
-    row_column_users = {}
-    for scenario_id, file_path, city_name, _ in scenario_entries:
-        row_column_users[scenario_id] = {
-            'spectrogram_path': file_path,
-            'cache_path': os.path.join('spectrograms', city_name, 'spectrogram_cache_128x128.pkl')
-        }
-    return row_column_users
-# =============================================================================
-# 5. TRAINING PARAMETERS AND HYPERPARAMETERS
-# =============================================================================
-EPOCHS = 20
-BATCH_SIZE = 64
-VAL_BATCH_SIZE = 64
-WARMUP_EPOCHS = 5
-BASE_LR = 5e-4
-MIN_LR = 1e-5  # Base LR의 1/50 (was 1e-8, too small for effective learning)
-# Gradient accumulation for larger effective batch size
-ACCUMULATION_STEPS = 4  # Effective batch size = 64 × 4 = 256
-# Model parameters
-N_ROWS = 4
-N_COLUMNS = 4
-ELEMENT_LENGTH = N_ROWS * N_COLUMNS * 2  # Complex spectrograms
-D_MODEL = 128
-MAX_LEN = 1025
-N_LAYERS = 12
-device_idx = 0
-WEIGHT_DECAY = 0.05
-BETA1 = 0.9
-BETA2 = 0.999
-MASK_PERCENT = 0.6
-N_HEADS = 8
-DROPOUT = 0.1
-# Contrastive learning parameters
-PROJECTION_DIM = 128
-CONTRASTIVE_TEMPERATURE = 0.07
-CONTRASTIVE_WEIGHT_MODULATION = 50.0  # Increased from 0.5 to match MLM loss scale
-CONTRASTIVE_WEIGHT_MOBILITY = 30.0     # Increased from 0.3 to match MLM loss scale
-MLM_WEIGHT = 1.0
-print(f"📊 Model configuration for complex spectrograms with contrastive learning:")
-print(f"   Patch size: {N_ROWS}x{N_COLUMNS}")
-print(f"   Element length: {ELEMENT_LENGTH} (includes real+imag interleaving)")
-print(f"   Max sequence length: {MAX_LEN}")
-print(f"   Batch size: {BATCH_SIZE} (physical), {BATCH_SIZE * ACCUMULATION_STEPS} (effective)")
-print(f"   Gradient accumulation steps: {ACCUMULATION_STEPS}")
-print(f"   Projection dim: {PROJECTION_DIM}")
-print(f"   Contrastive temperature: {CONTRASTIVE_TEMPERATURE}")
-print(f"   Loss weights - MLM: {MLM_WEIGHT}, Modulation: {CONTRASTIVE_WEIGHT_MODULATION}, Mobility: {CONTRASTIVE_WEIGHT_MOBILITY}")
-# =============================================================================
-# 6. DATA GENERATION AND LOADING
-# =============================================================================
-scenarios = scenarios_list()
-scenario_properties = scenario_prop()
-train_spectrogram_chunks = []
-val_spectrogram_chunks = []
-train_label_chunks = []
-val_label_chunks = []
-train_meta_chunks = []
-val_meta_chunks = []
-print(f"📂 Loading {len(scenarios)} scenarios...")
-scenario_info_list = []
-missing_props = []
-for scenario in scenarios:
-    props = scenario_properties.get(scenario)
-    if props is None:
-        missing_props.append(scenario)
-        continue
-    scenario_info_list.append((scenario, props["spectrogram_path"]))
-if missing_props:
-    print("⚠️  Missing metadata for the following scenarios; skipping:")
-    for scen in missing_props:
-        print(f"    - {scen}")
-print(f"📂 Loading {len(scenario_info_list)} scenarios using {MAX_WORKERS} workers...")
-successful_scenarios = 0
-# Parallel processing with progress bar
-from multiprocessing import Pool
-with Pool(processes=MAX_WORKERS) as pool:
-    results = list(tqdm(
-        pool.imap(process_single_scenario, scenario_info_list),
-        total=len(scenario_info_list),
-        desc="Processing scenarios",
-        unit="scenario"
-    ))
-for result in results:
-    if result is not None:
-        train_spectrogram_chunks.append(result['train_data'])
-        val_spectrogram_chunks.append(result['val_data'])
-        train_label_chunks.append(np.zeros(result['train_size'], dtype=np.int64))
-        val_label_chunks.append(np.zeros(result['val_size'], dtype=np.int64))
-        train_meta_chunks.append(result['train_meta'])
-        val_meta_chunks.append(result['val_meta'])
-        successful_scenarios += 1
-print(f"✅ Processing completed! Successful scenarios: {successful_scenarios}/{len(scenario_info_list)}")
-if not train_spectrogram_chunks or not val_spectrogram_chunks:
-    raise ValueError("No spectrogram data collected; check scenario configuration.")
-print("🔄 Collating spectrogram arrays...")
-train_spectrograms = np.concatenate(train_spectrogram_chunks, axis=0).astype(np.float32, copy=False)
-val_spectrograms = np.concatenate(val_spectrogram_chunks, axis=0).astype(np.float32, copy=False)
-train_labels = np.concatenate(train_label_chunks, axis=0)
-val_labels = np.concatenate(val_label_chunks, axis=0)
-def _concat_metadata_dicts(dict_list):
-    if not dict_list:
-        return {}
-    keys = dict_list[0].keys()
-    return {k: np.concatenate([d[k] for d in dict_list], axis=0) for k in keys}
-train_metadata = _concat_metadata_dicts(train_meta_chunks)
-val_metadata = _concat_metadata_dicts(val_meta_chunks)
-del train_spectrogram_chunks, val_spectrogram_chunks, train_label_chunks, val_label_chunks
-del train_meta_chunks, val_meta_chunks
-print(f"Training spectrograms shape: {train_spectrograms.shape}")
-print(f"Validation spectrograms shape: {val_spectrograms.shape}")
-print(f"Memory usage: {train_spectrograms.nbytes + val_spectrograms.nbytes:,} bytes")
-# Print metadata statistics
-print(f"\n📊 Metadata statistics:")
-print(f"  Discovered modulation schemes: {len(MODULATION_MAP)}")
-for mod_name, mod_id in sorted(MODULATION_MAP.items(), key=lambda x: x[1]):
-    count_train = np.sum(train_metadata['modulation_id'] == mod_id)
-    count_val = np.sum(val_metadata['modulation_id'] == mod_id)
-    print(f"    {mod_name} (ID={mod_id}): {count_train} train, {count_val} val samples")
-print(f"\n  Modulation distribution (train):")
-for mod_id in np.unique(train_metadata['modulation_id']):
-    count = np.sum(train_metadata['modulation_id'] == mod_id)
-    mod_name = MODULATION_INV.get(mod_id, f"Unknown({mod_id})")
-    print(f"    {mod_name}: {count} samples ({100*count/len(train_metadata['modulation_id']):.1f}%)")
-print(f"  Mobility distribution (train):")
-for mob_id in np.unique(train_metadata['doppler_id']):
-    count = np.sum(train_metadata['doppler_id'] == mob_id)
-    mob_name = DOPPLER_INV.get(mob_id, f"Unknown({mob_id})")
-    print(f"    {mob_name}: {count} samples ({100*count/len(train_metadata['doppler_id']):.1f}%)")
-train_mean = float(train_spectrograms.mean())
-train_std = float(train_spectrograms.std())
-if abs(train_std) < 1e-6:
-    print("⚠️  Training std near zero, using epsilon for stability")
-    train_std = 1e-6
-dataset_normalization = {'mean': train_mean, 'std': train_std, 'normalization': NORMALIZATION_MODE}
-print(f"Dataset normalization stats -> mean: {train_mean:.4f}, std: {train_std:.4f}")
-# =============================================================================
-# 7. BUILD INDEX FOR CONTRASTIVE SAMPLING
-# =============================================================================
-def build_class_indices(metadata: Dict[str, np.ndarray]) -> Tuple[Dict, Dict]:
-    """
-    Build index mapping from modulation/mobility ID to sample indices.
-    """
-    indices_by_modulation = {}
-    indices_by_mobility = {}
-    for idx in range(len(metadata['modulation_id'])):
-        mod_id = int(metadata['modulation_id'][idx])
-        mob_id = int(metadata['doppler_id'][idx])
-        if mod_id not in indices_by_modulation:
-            indices_by_modulation[mod_id] = []
-        indices_by_modulation[mod_id].append(idx)
-        if mob_id not in indices_by_mobility:
-            indices_by_mobility[mob_id] = []
-        indices_by_mobility[mob_id].append(idx)
-    return indices_by_modulation, indices_by_mobility
-print("🔍 Building class indices for contrastive learning...")
-train_indices_by_modulation, train_indices_by_mobility = build_class_indices(train_metadata)
-val_indices_by_modulation, val_indices_by_mobility = build_class_indices(val_metadata)
-print("✅ Class indices built successfully!")
-# =============================================================================
-# 8. DATA TOKENIZATION
-# =============================================================================
-print("🔄 Starting tokenization of training data...")
-preprocessed_train = tokenizer_train(
-    train_spectrograms,
-    max_len=MAX_LEN,
-    masking_percent=MASK_PERCENT,
-    mask=True,
-    seed=42,
-    metadata=train_metadata,
-    dataset_stats=dataset_normalization,
-    normalization=NORMALIZATION_MODE,
-    interleaved=True,
-)
-print("✅ Training data tokenization completed!")
-print("🔄 Starting tokenization of validation data...")
-preprocessed_val = tokenizer_train(
-    val_spectrograms,
-    max_len=MAX_LEN,
-    masking_percent=MASK_PERCENT,
-    mask=True,
-    seed=42,
-    metadata=val_metadata,
-    dataset_stats=dataset_normalization,
-    normalization=NORMALIZATION_MODE,
-    interleaved=True,
-)
-print("✅ Validation data tokenization completed!")
-# =============================================================================
-# 9. TRAIN/VALIDATION DATA SETUP
-# =============================================================================
-SEED = 42
-torch.manual_seed(SEED)
-np.random.seed(SEED)
-train_data = preprocessed_train
-val_data = preprocessed_val
-# =============================================================================
-# 10. DATALOADER CREATION
-# =============================================================================
-print("🔧 Creating data loaders...")
-if isinstance(train_data, dict):
-    print(f"  Training data format: dict with {len(train_data)} sequence lengths")
-    train_loaders = create_train_dataloader(train_data, batch_size=BATCH_SIZE, shuffle=True)
-else:
-    print(f"  Training data format: tensor with shape {train_data.shape}")
-    train_dataset = TensorDataset(train_data)
-    train_loaders = {'seq_0': DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)}
-if isinstance(val_data, dict):
-    print(f"  Validation data format: dict with {len(val_data)} sequence lengths")
-    val_loaders = create_train_dataloader(val_data, batch_size=VAL_BATCH_SIZE, shuffle=False)
-else:
-    print(f"  Validation data format: tensor with shape {val_data.shape}")
-    val_dataset = TensorDataset(val_data)
-    val_loaders = {'seq_0': DataLoader(val_dataset, batch_size=VAL_BATCH_SIZE, shuffle=False)}
-print("✅ Data loaders created successfully!")
-# =============================================================================
-# 11. MODEL INITIALIZATION
-# =============================================================================
-print("🔧 Setting up device and GPU configuration...")
-if torch.cuda.is_available():
-    device_count = torch.cuda.device_count()
-    print(f"  CUDA available: {device_count} GPU(s) detected")
-    device = torch.device("cuda:0")
-    gpu_ids = list(range(device_count))
-    print(f"  Using CUDA GPUs: {gpu_ids}")
-    for i in gpu_ids:
-        try:
-            mem_total = torch.cuda.get_device_properties(i).total_memory / 1024**3
-            mem_allocated = torch.cuda.memory_allocated(i) / 1024**3
-            print(f"    GPU {i}: Total: {mem_total:.1f}GB, Allocated: {mem_allocated:.1f}GB")
-        except Exception as e:
-            print(f"  GPU {i}: Error getting memory info - {e}")
-elif torch.backends.mps.is_available():
-    device = torch.device("mps")
-    gpu_ids = []
-    print("  Using MPS (Apple Silicon GPU)")
-else:
-    device = torch.device("cpu")
-    gpu_ids = []
-    print("  Using CPU")
-print(f"  Final device: {device}")
-print(f"  GPU IDs for DataParallel: {gpu_ids}")
-print("🤖 Initializing LWM model with contrastive learning...")
-print(f"  Model parameters: element_length={ELEMENT_LENGTH}, d_model={D_MODEL}, n_layers={N_LAYERS}, max_len={MAX_LEN}, n_heads={N_HEADS}")
-try:
-    # Create base LWM encoder
-    lwm_encoder = pretrained_model.lwm(
-        element_length=ELEMENT_LENGTH,
-        d_model=D_MODEL,
-        n_layers=N_LAYERS,
-        max_len=MAX_LEN,
-        n_heads=N_HEADS,
-        dropout=DROPOUT
-    )
-    # Wrap with contrastive learning module
-    # MLM head must output patch dimension (ELEMENT_LENGTH), not full spectrogram width
-    # Each token represents a 4×4×2 patch = 32 elements
-    model = ContrastiveLWM(lwm_encoder, projection_dim=PROJECTION_DIM, input_dim=ELEMENT_LENGTH)
-    print(f"  ✅ Model created with input_dim={ELEMENT_LENGTH} (patch dimension)")
-    print(f"  Moving model to device: {device}")
-    if 'mps' in str(device):
-        model = model.to(device).float()
-        print("  ✅ Model moved to MPS device (float32)")
-    else:
-        model = model.to(device)
-        print("  ✅ Model moved to device successfully")
-except Exception as e:
-    print(f"  ❌ Model initialization failed: {e}")
-    import traceback
-    traceback.print_exc()
-    exit(1)
-# Use DataParallel for multi-GPU support
-if gpu_ids:
-    model = nn.DataParallel(model, device_ids=gpu_ids)
-    print(f"Model loaded successfully on GPU {device.index}")
-else:
-    print(f"Model loaded successfully on {device}")
-n_parameters = count_parameters(model)
-print(f"Number of trainable parameters: {n_parameters:,}")
-# =============================================================================
-# 12. OPTIMIZER AND LEARNING RATE SCHEDULER
-# =============================================================================
-# Account for gradient accumulation: scheduler step is called once per ACCUMULATION_STEPS batches
-# So actual optimizer steps = total_batches / ACCUMULATION_STEPS
-total_batches_per_epoch = sum(len(loader) for loader in train_loaders.values())
-actual_steps_per_epoch = math.ceil(total_batches_per_epoch / ACCUMULATION_STEPS)
-TOTAL_STEPS = actual_steps_per_epoch * EPOCHS
-WARMUP_STEPS = actual_steps_per_epoch * WARMUP_EPOCHS
-print(f"📊 Learning rate schedule:")
-print(f"   Total batches per epoch: {total_batches_per_epoch}")
-print(f"   Accumulation steps: {ACCUMULATION_STEPS}")
-print(f"   Actual optimizer steps per epoch: {actual_steps_per_epoch}")
-print(f"   Total training steps: {TOTAL_STEPS}")
-print(f"   Warmup steps: {WARMUP_STEPS}")
-optimizer = AdamW(
-    model.parameters(),
-    lr=BASE_LR,
-    betas=(BETA1, BETA2),
-    weight_decay=WEIGHT_DECAY
-)
-def lr_lambda(current_step):
-    if current_step < WARMUP_STEPS:
-        return current_step / WARMUP_STEPS
-    else:
-        scaled_progress = (current_step - WARMUP_STEPS) / (TOTAL_STEPS - WARMUP_STEPS)
-        cosine_decay = 0.5 * (1 + np.cos(np.pi * scaled_progress))
-        return cosine_decay * (BASE_LR - MIN_LR) / BASE_LR + MIN_LR / BASE_LR
-scheduler = LambdaLR(optimizer, lr_lambda=lr_lambda)
-# =============================================================================
-# 13. TRAINING LOOP WITH CONTRASTIVE LEARNING
-# =============================================================================
-def train_epoch_contrastive(
-    model,
-    train_loaders,
-    optimizer,
-    scheduler,
-    device,
-    epoch,
-    train_metadata
-):
-    """
-    Train one epoch with MLM + Contrastive Learning with Gradient Accumulation.
-    """
-    model.train()
-    total_mlm_loss = 0.0
-    total_contrastive_mod_loss = 0.0
-    total_contrastive_mob_loss = 0.0
-    total_loss = 0.0
-    total_batches = 0
-    criterion = nn.MSELoss(reduction='sum')
-    # Initialize gradient accumulation
-    optimizer.zero_grad()
-    accumulation_counter = 0
-    for seq_key, loader in train_loaders.items():
-        for batch_idx, batch in enumerate(tqdm(loader, desc=f"Epoch {epoch+1}", leave=False)):
-            # Unpack batch - expect (input_ids, masked_tokens, masked_pos, snr_db, doppler_id, power_stats, snr_id, modulation_id)
-            if len(batch) >= 8:
-                input_ids = batch[0].to(device)
-                masked_tokens = batch[1].to(device)
-                masked_pos = batch[2].to(device)
-                snr_db = batch[3].to(device)
-                doppler_id = batch[4].to(device)
-                power_stats = batch[5].to(device)
-                snr_id = batch[6].to(device)
-                modulation_id = batch[7].to(device)
-                has_metadata = True
-            elif len(batch) == 3:
-                input_ids = batch[0].to(device)
-                masked_tokens = batch[1].to(device)
-                masked_pos = batch[2].to(device)
-                has_metadata = False
-            else:
-                input_ids = batch[0].to(device)
-                has_metadata = False
-            # Forward pass with projections
-            mlm_predictions, z_mod, z_mob = model(input_ids, return_projections=True)
-            # MLM Loss (reconstruction)
-            if len(batch) >= 3 and masked_tokens.numel() > 0:
-                batch_size = input_ids.size(0)
-                mlm_loss = 0.0
-                for i in range(batch_size):
-                    # Get masked positions for this sample
-                    sample_masked_pos = masked_pos[i]
-                    sample_masked_tokens = masked_tokens[i]
-                    # Skip if no masked positions
-                    if sample_masked_pos.numel() == 0:
-                        continue
-                    # Get predictions at masked positions
-                    predictions = mlm_predictions[i, sample_masked_pos, :]
-                    targets = sample_masked_tokens
-                    # Ensure shapes match
-                    if predictions.size(0) != targets.size(0):
-                        # Adjust if needed
-                        min_len = min(predictions.size(0), targets.size(0))
-                        predictions = predictions[:min_len]
-                        targets = targets[:min_len]
-                    # MSE loss
-                    mlm_loss += criterion(predictions, targets)
-                mlm_loss = mlm_loss / batch_size if batch_size > 0 else torch.tensor(0.0, device=device)
-            else:
-                mlm_loss = torch.zeros(1, device=device)
-            # Contrastive losses (only if we have metadata)
-            if has_metadata:
-                # DEBUG: Print batch statistics
-                if batch_idx == 0 and epoch == 0:  # Only first batch of first epoch
-                    print(f"\n🔍 DEBUG - Batch analysis:")
-                    print(f"  Batch size: {modulation_id.size(0)}")
-                    print(f"  Modulation IDs: {modulation_id.cpu().numpy()}")
-                    print(f"  Unique modulations: {torch.unique(modulation_id).cpu().numpy()}")
-                    print(f"  Doppler IDs: {doppler_id.cpu().numpy()}")
-                    print(f"  Unique doppler: {torch.unique(doppler_id).cpu().numpy()}")
-                # Modulation contrastive loss
-                # Filter out unknown modulations (-1)
-                valid_mod_mask = modulation_id >= 0
-                if valid_mod_mask.sum() > 1:  # Need at least 2 samples
-                    z_mod_valid = z_mod[valid_mod_mask]
-                    mod_labels_valid = modulation_id[valid_mod_mask]
-                    # Check if we have positive pairs
-                    unique_mods, counts = torch.unique(mod_labels_valid, return_counts=True)
-                    has_positive_pairs = (counts > 1).any()
-                    if has_positive_pairs:
-                        contrastive_mod_loss = supervised_contrastive_loss(
-                            z_mod_valid,
-                            mod_labels_valid,
-                            temperature=CONTRASTIVE_TEMPERATURE
-                        )
-                        if batch_idx == 0 and epoch == 0:
-                            print(f"  Modulation contrastive loss: {contrastive_mod_loss.item():.4f}")
-                    else:
-                        contrastive_mod_loss = torch.zeros(1, device=device)
-                        if batch_idx == 0 and epoch == 0:
-                            print(f"  No positive pairs for modulation - loss set to 0")
-                else:
-                    contrastive_mod_loss = torch.zeros(1, device=device)
-                    if batch_idx == 0 and epoch == 0:
-                        print(f"  Not enough valid modulation samples - loss set to 0")
-                # Mobility contrastive loss
-                z_mob_valid = z_mob
-                mob_labels_valid = doppler_id
-                if mob_labels_valid.numel() > 1:
-                    unique_mobs, counts = torch.unique(mob_labels_valid, return_counts=True)
-                    has_positive_pairs = (counts > 1).any()
-                    if has_positive_pairs:
-                        contrastive_mob_loss = supervised_contrastive_loss(
-                            z_mob_valid,
-                            mob_labels_valid,
-                            temperature=CONTRASTIVE_TEMPERATURE
-                        )
-                        if batch_idx == 0 and epoch == 0:
-                            print(f"  Mobility contrastive loss: {contrastive_mob_loss.item():.4f}")
-                    else:
-                        contrastive_mob_loss = torch.zeros(1, device=device)
-                        if batch_idx == 0 and epoch == 0:
-                            print(f"  No positive pairs for mobility - loss set to 0")
-                else:
-                    contrastive_mob_loss = torch.zeros(1, device=device)
-                    if batch_idx == 0 and epoch == 0:
-                        print(f"  Not enough mobility samples - loss set to 0")
-            else:
-                contrastive_mod_loss = torch.zeros(1, device=device)
-                contrastive_mob_loss = torch.zeros(1, device=device)
-            # Combined loss
-            loss = (
-                MLM_WEIGHT * mlm_loss +
-                CONTRASTIVE_WEIGHT_MODULATION * contrastive_mod_loss +
-                CONTRASTIVE_WEIGHT_MOBILITY * contrastive_mob_loss
-            )
-            # Normalize loss by accumulation steps
-            loss = loss / ACCUMULATION_STEPS
-            # Backward pass (accumulate gradients)
-            loss.backward()
-            # Accumulate losses (denormalized for logging)
-            total_mlm_loss += mlm_loss.item()
-            total_contrastive_mod_loss += contrastive_mod_loss.item()
-            total_contrastive_mob_loss += contrastive_mob_loss.item()
-            total_loss += (loss.item() * ACCUMULATION_STEPS)  # Denormalize for logging
-            total_batches += 1
-            accumulation_counter += 1
-            # Perform optimizer step every ACCUMULATION_STEPS
-            if accumulation_counter % ACCUMULATION_STEPS == 0:
-                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
-                optimizer.step()
-                scheduler.step()
-                optimizer.zero_grad()
-                accumulation_counter = 0
-    # Handle remaining gradients if total batches not divisible by ACCUMULATION_STEPS
-    if accumulation_counter > 0:
-        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
-        optimizer.step()
-        scheduler.step()
-        optimizer.zero_grad()
-    # Average losses
-    avg_mlm_loss = total_mlm_loss / total_batches if total_batches > 0 else 0
-    avg_contrastive_mod_loss = total_contrastive_mod_loss / total_batches if total_batches > 0 else 0
-    avg_contrastive_mob_loss = total_contrastive_mob_loss / total_batches if total_batches > 0 else 0
-    avg_total_loss = total_loss / total_batches if total_batches > 0 else 0
-    return {
-        'mlm_loss': avg_mlm_loss,
-        'contrastive_mod_loss': avg_contrastive_mod_loss,
-        'contrastive_mob_loss': avg_contrastive_mob_loss,
-        'total_loss': avg_total_loss
-    }
-def validate_epoch_contrastive(
-    model,
-    val_loaders,
-    device,
-    epoch
-):
-    """
-    Validate one epoch with MLM + Contrastive Learning.
-    """
-    model.eval()
-    total_mlm_loss = 0.0
-    total_contrastive_mod_loss = 0.0
-    total_contrastive_mob_loss = 0.0
-    total_loss = 0.0
-    total_batches = 0
-    criterion = nn.MSELoss(reduction='sum')
-    with torch.no_grad():
-        for seq_key, loader in val_loaders.items():
-            for batch_idx, batch in enumerate(loader):
-                # Unpack batch
-                if len(batch) >= 8:
-                    input_ids = batch[0].to(device)
-                    masked_tokens = batch[1].to(device)
-                    masked_pos = batch[2].to(device)
-                    snr_db = batch[3].to(device)
-                    doppler_id = batch[4].to(device)
-                    power_stats = batch[5].to(device)
-                    snr_id = batch[6].to(device)
-                    modulation_id = batch[7].to(device)
-                    has_metadata = True
-                elif len(batch) == 3:
-                    input_ids = batch[0].to(device)
-                    masked_tokens = batch[1].to(device)
-                    masked_pos = batch[2].to(device)
-                    has_metadata = False
-                else:
-                    input_ids = batch[0].to(device)
-                    has_metadata = False
-                # Forward pass
-                mlm_predictions, z_mod, z_mob = model(input_ids, return_projections=True)
-                # MLM Loss
-                if len(batch) >= 3 and masked_tokens.numel() > 0:
-                    batch_size = input_ids.size(0)
-                    mlm_loss = 0.0
-                    for i in range(batch_size):
-                        sample_masked_pos = masked_pos[i]
-                        sample_masked_tokens = masked_tokens[i]
-                        if sample_masked_pos.numel() == 0:
-                            continue
-                        predictions = mlm_predictions[i, sample_masked_pos, :]
-                        targets = sample_masked_tokens
-                        if predictions.size(0) != targets.size(0):
-                            min_len = min(predictions.size(0), targets.size(0))
-                            predictions = predictions[:min_len]
-                            targets = targets[:min_len]
-                        mlm_loss += criterion(predictions, targets)
-                    mlm_loss = mlm_loss / batch_size if batch_size > 0 else torch.tensor(0.0, device=device)
-                else:
-                    mlm_loss = torch.zeros(1, device=device)
-                # Contrastive losses
-                if has_metadata:
-                    valid_mod_mask = modulation_id >= 0
-                    if valid_mod_mask.sum() > 1:
-                        z_mod_valid = z_mod[valid_mod_mask]
-                        mod_labels_valid = modulation_id[valid_mod_mask]
-                        contrastive_mod_loss = supervised_contrastive_loss(
-                            z_mod_valid,
-                            mod_labels_valid,
-                            temperature=CONTRASTIVE_TEMPERATURE
-                        )
-                    else:
-                        contrastive_mod_loss = torch.zeros(1, device=device)
-                    if doppler_id.numel() > 1:
-                        contrastive_mob_loss = supervised_contrastive_loss(
-                            z_mob,
-                            doppler_id,
-                            temperature=CONTRASTIVE_TEMPERATURE
-                        )
-                    else:
-                        contrastive_mob_loss = torch.zeros(1, device=device)
-                else:
-                    contrastive_mod_loss = torch.zeros(1, device=device)
-                    contrastive_mob_loss = torch.zeros(1, device=device)
-                loss = (
-                    MLM_WEIGHT * mlm_loss +
-                    CONTRASTIVE_WEIGHT_MODULATION * contrastive_mod_loss +
-                    CONTRASTIVE_WEIGHT_MOBILITY * contrastive_mob_loss
-                )
-                total_mlm_loss += mlm_loss.item()
-                total_contrastive_mod_loss += contrastive_mod_loss.item()
-                total_contrastive_mob_loss += contrastive_mob_loss.item()
-                total_loss += loss.item()
-                total_batches += 1
-    avg_mlm_loss = total_mlm_loss / total_batches if total_batches > 0 else 0
-    avg_contrastive_mod_loss = total_contrastive_mod_loss / total_batches if total_batches > 0 else 0
-    avg_contrastive_mob_loss = total_contrastive_mob_loss / total_batches if total_batches > 0 else 0
-    avg_total_loss = total_loss / total_batches if total_batches > 0 else 0
-    return {
-        'mlm_loss': avg_mlm_loss,
-        'contrastive_mod_loss': avg_contrastive_mod_loss,
-        'contrastive_mob_loss': avg_contrastive_mob_loss,
-        'total_loss': avg_total_loss
-    }
-# =============================================================================
-# 14. MAIN TRAINING LOOP
-# =============================================================================
-timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-save_dir = f"models/{timestamp}_contrastive"
-print(f"📁 Models and logs will be saved to: {save_dir}")
-os.makedirs(save_dir, exist_ok=True)
-stats_path = os.path.join(save_dir, "dataset_stats.json")
-with open(stats_path, 'w') as f:
-    json.dump(dataset_normalization, f, indent=2)
-print(f"📝 Saved dataset stats to {stats_path}")
-# Save training configuration
-config = {
-    'epochs': EPOCHS,
-    'batch_size': BATCH_SIZE,
-    'effective_batch_size': BATCH_SIZE * ACCUMULATION_STEPS,
-    'accumulation_steps': ACCUMULATION_STEPS,
-    'learning_rate': BASE_LR,
-    'element_length': ELEMENT_LENGTH,
-    'd_model': D_MODEL,
-    'n_layers': N_LAYERS,
-    'n_heads': N_HEADS,
-    'projection_dim': PROJECTION_DIM,
-    'contrastive_temperature': CONTRASTIVE_TEMPERATURE,
-    'mlm_weight': MLM_WEIGHT,
-    'contrastive_weight_modulation': CONTRASTIVE_WEIGHT_MODULATION,
-    'contrastive_weight_mobility': CONTRASTIVE_WEIGHT_MOBILITY,
-    'modulation_map': MODULATION_MAP,
-    'doppler_map': DOPPLER_MAP,
-    'num_modulations': len(MODULATION_MAP),
-}
-config_path = os.path.join(save_dir, "config.json")
-with open(config_path, 'w') as f:
-    json.dump(config, f, indent=2)
-print(f"📝 Saved training config to {config_path}")
-# Training log
-log_path = os.path.join(save_dir, "training_log.csv")
-with open(log_path, 'w') as f:
-    f.write("epoch,train_mlm_loss,train_contrastive_mod_loss,train_contrastive_mob_loss,train_total_loss,")
-    f.write("val_mlm_loss,val_contrastive_mod_loss,val_contrastive_mob_loss,val_total_loss,learning_rate\n")
-print("\n" + "="*80)
-print("🚀 Starting training with contrastive learning!")
-print("="*80 + "\n")
-if __name__ == "__main__":
-    best_val_loss = float('inf')
-    for epoch in range(EPOCHS):
-        print(f"\n{'='*80}")
-        print(f"Epoch {epoch+1}/{EPOCHS}")
-        print(f"{'='*80}")
-        # Train
-        train_metrics = train_epoch_contrastive(
-            model, train_loaders, optimizer, scheduler, device, epoch, train_metadata
-        )
-        # Validate
-        val_metrics = validate_epoch_contrastive(
-            model, val_loaders, device, epoch
-        )
-        # Log metrics
-        current_lr = optimizer.param_groups[0]['lr']
-        print(f"\nEpoch {epoch+1} Results:")
-        print(f"  Train - MLM: {train_metrics['mlm_loss']:.4f}, "
-              f"ContrastMod: {train_metrics['contrastive_mod_loss']:.4f}, "
-              f"ContrastMob: {train_metrics['contrastive_mob_loss']:.4f}, "
-              f"Total: {train_metrics['total_loss']:.4f}")
-        print(f"  Val   - MLM: {val_metrics['mlm_loss']:.4f}, "
-              f"ContrastMod: {val_metrics['contrastive_mod_loss']:.4f}, "
-              f"ContrastMob: {val_metrics['contrastive_mob_loss']:.4f}, "
-              f"Total: {val_metrics['total_loss']:.4f}")
-        print(f"  Learning Rate: {current_lr:.6f}")
-        # Save to log
-        with open(log_path, 'a') as f:
-            f.write(f"{epoch+1},{train_metrics['mlm_loss']:.6f},"
-                   f"{train_metrics['contrastive_mod_loss']:.6f},"
-                   f"{train_metrics['contrastive_mob_loss']:.6f},"
-                   f"{train_metrics['total_loss']:.6f},"
-                   f"{val_metrics['mlm_loss']:.6f},"
-                   f"{val_metrics['contrastive_mod_loss']:.6f},"
-                   f"{val_metrics['contrastive_mob_loss']:.6f},"
-                   f"{val_metrics['total_loss']:.6f},"
-                   f"{current_lr:.8f}\n")
-        # Save best model
-        if val_metrics['total_loss'] < best_val_loss:
-            best_val_loss = val_metrics['total_loss']
-            checkpoint_path = os.path.join(save_dir, "best_model_contrastive.pth")
-            if isinstance(model, nn.DataParallel):
-                torch.save(model.module.state_dict(), checkpoint_path)
-            else:
-                torch.save(model.state_dict(), checkpoint_path)
-            print(f"  ✅ Saved best model to {checkpoint_path}")
-        # Save periodic checkpoint
-        if (epoch + 1) % 5 == 0:
-            checkpoint_path = os.path.join(save_dir, f"checkpoint_epoch{epoch+1}_contrastive.pth")
-            if isinstance(model, nn.DataParallel):
-                torch.save(model.module.state_dict(), checkpoint_path)
-            else:
-                torch.save(model.state_dict(), checkpoint_path)
-            print(f"  💾 Saved checkpoint to {checkpoint_path}")
-    print("\n" + "="*80)
-    print("🎉 Training completed successfully!")
-    print(f"📁 Models saved to: {save_dir}")
-    print(f"📊 Training log: {log_path}")
-    print("="*80 + "\n")

pretraining/train_lwm_spectro_no_contrast.py DELETED Viewed

@@ -1,1136 +0,0 @@
-#!/usr/bin/env python3
-# =============================================================================
-# 1. IMPORTS AND WARNINGS SETUP
-#    - Load necessary PyTorch modules, utilities, and suppress UserWarnings
-# =============================================================================
-import sys
-import os
-import argparse
-# Add project root to path (Windows compatible)
-project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-sys.path.insert(0, project_root)
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.utils.data import DataLoader, IterableDataset
-import torch.distributed as dist
-import torch.optim as optim
-from utils import (generate_spectrograms_and_labels, tokenizer_train,
-                   count_parameters, train_lwm)
-import numpy as np
-import pretrained_model  # Assuming this contains the LWM model definition
-from torch.optim.lr_scheduler import LambdaLR
-from torch.optim import AdamW
-import warnings
-import platform
-import re
-from tqdm import tqdm
-from datetime import datetime
-import concurrent.futures
-import multiprocessing
-from collections import Counter
-from functools import lru_cache
-import json
-import random
-import math
-from typing import Any, Dict, Optional, List, Tuple
-import time
-LOG_ALL_RANKS = False
-SNR_PATTERN = re.compile(r"SNR(-?\d+)dB")
-DOPPLER_MAP = {"static": 0, "pedestrian": 1, "vehicular": 2}
-DOPPLER_INV = {v: k for k, v in DOPPLER_MAP.items()}
-def _is_hpu_available() -> bool:
-    hpu = getattr(torch, "hpu", None)
-    if hpu is None:
-        return False
-    is_available = getattr(hpu, "is_available", None)
-    available = False
-    if callable(is_available):
-        try:
-            available = bool(is_available())
-        except Exception:
-            available = False
-    if not available:
-        # Try initializing the Habana runtime lazily
-        try:
-            import habana_frameworks.torch.core as htcore  # type: ignore
-            init_fn = getattr(htcore, "hpu_initialize", None)
-            if callable(init_fn):
-                init_fn()
-            else:
-                inference_init = getattr(htcore, "hpu_inference_initialize", None)
-                if callable(inference_init):
-                    inference_init()
-            available = bool(is_available())
-        except Exception:
-            available = False
-    return available
-def _get_hpu_device_count() -> int:
-    hpu = getattr(torch, "hpu", None)
-    if hpu is None:
-        return 0
-    device_count_fn = getattr(hpu, "device_count", None)
-    if callable(device_count_fn):
-        try:
-            return int(device_count_fn())
-        except Exception:
-            return 0
-    return 1 if _is_hpu_available() else 0
-def _initialize_distributed(hpu_available: bool, backend_override: Optional[str] = None) -> Dict[str, Any]:
-    context: Dict[str, Any] = {
-        "is_distributed": False,
-        "backend": None,
-        "rank": 0,
-        "world_size": 1,
-        "local_rank": 0,
-        "is_primary": True,
-    }
-    if not dist.is_available():
-        return context
-    required_env = ("RANK", "WORLD_SIZE")
-    if not all(key in os.environ for key in required_env):
-        return context
-    if dist.is_initialized():
-        context["is_distributed"] = True
-        context["backend"] = dist.get_backend()
-        context["rank"] = dist.get_rank()
-        context["world_size"] = dist.get_world_size()
-        context["local_rank"] = int(os.environ.get("LOCAL_RANK", context["rank"]))
-        context["is_primary"] = context["rank"] == 0
-        return context
-    backend = backend_override or os.environ.get("LWM_DISTRIBUTED_BACKEND")
-    if not backend:
-        if hpu_available:
-            backend = "hccl"
-        elif torch.cuda.is_available():
-            backend = "nccl"
-        else:
-            backend = "gloo"
-    dist.init_process_group(backend=backend, init_method="env://")
-    context["is_distributed"] = True
-    context["backend"] = backend
-    context["rank"] = dist.get_rank()
-    context["world_size"] = dist.get_world_size()
-    context["local_rank"] = int(os.environ.get("LOCAL_RANK", context["rank"]))
-    context["is_primary"] = context["rank"] == 0
-    return context
-def _broadcast_object(obj: Any, src: int = 0) -> Any:
-    if not dist.is_available() or not dist.is_initialized():
-        return obj
-    object_list = [obj]
-    dist.broadcast_object_list(object_list, src=src)
-    return object_list[0]
-def _should_log(context: Dict[str, Any]) -> bool:
-    return LOG_ALL_RANKS or (not context.get("is_distributed")) or context.get("is_primary", True)
-def _barrier(context: Dict[str, Any]) -> None:
-    if context.get("is_distributed") and dist.is_available() and dist.is_initialized():
-        dist.barrier()
-def _parse_snr_and_doppler(path: str) -> tuple[float, int]:
-    snr_db = 0.0
-    doppler_id = 0
-    matches = SNR_PATTERN.findall(path)
-    if matches:
-        try:
-            snr_db = float(matches[-1])
-        except ValueError:
-            snr_db = 0.0
-    normalized_path = os.path.normpath(path)
-    parts = normalized_path.split(os.sep)
-    for part in parts:
-        if part in DOPPLER_MAP:
-            doppler_id = DOPPLER_MAP[part]
-            break
-    return snr_db, doppler_id
-def _parse_runtime_args():
-    parser = argparse.ArgumentParser(add_help=False)
-    parser.add_argument(
-        "--device",
-        default=os.environ.get("LWM_DEVICE", "auto"),
-        choices=("auto", "cpu", "cuda", "hpu", "mps"),
-        help="Select accelerator device (default: auto)."
-    )
-    parser.add_argument(
-        "--dist-backend",
-        dest="dist_backend",
-        default=os.environ.get("LWM_DIST_BACKEND"),
-        help="Override torch.distributed backend."
-    )
-    parser.add_argument(
-        "--log-all-ranks",
-        action="store_true",
-        help="If set, every rank prints logs instead of rank 0 only."
-    )
-    args, remaining = parser.parse_known_args()
-    sys.argv = [sys.argv[0]] + remaining
-    return args
-warnings.filterwarnings("ignore", category=UserWarning)
-RUNTIME_ARGS = _parse_runtime_args()
-if getattr(RUNTIME_ARGS, "dist_backend", None) and RUNTIME_ARGS.dist_backend not in {"gloo", "nccl", "hccl"}:
-    raise ValueError(f"Unsupported dist backend override: {RUNTIME_ARGS.dist_backend}")
-LOG_ALL_RANKS = bool(getattr(RUNTIME_ARGS, "log_all_ranks", False))
-TRAIN_SPLIT_FRACTION = 0.8
-VAL_SPLIT_FRACTION = 1.0 - TRAIN_SPLIT_FRACTION
-DEFAULT_SAMPLES_PER_SCENARIO = int(os.environ.get("LWM_SAMPLES_PER_SCENARIO", "1000"))
-# Use simple progress display instead of tqdm on Windows
-USE_TQDM = platform.system() != 'Windows'
-HPU_AVAILABLE = _is_hpu_available()
-distributed_context = _initialize_distributed(HPU_AVAILABLE, backend_override=getattr(RUNTIME_ARGS, "dist_backend", None))
-LOG_PRIMARY = _should_log(distributed_context)
-HPU_DEBUG_LOG = os.environ.get("LWM_DEBUG_HPU_INIT", "").lower() in {"1", "true", "yes"}
-def _debug_hpu(message: str) -> None:
-    if not HPU_DEBUG_LOG:
-        return
-    rank = distributed_context.get("rank", 0)
-    print(f"[HPU-DEBUG rank {rank}] {message}", flush=True)
-if distributed_context["is_distributed"] and LOG_PRIMARY:
-    print(
-        f"🔗 Distributed initialized -> backend={distributed_context['backend']}, "
-        f"world_size={distributed_context['world_size']}, rank={distributed_context['rank']}"
-    )
-# CPU 코어 수 계산 (메모리 사용량 고려하여 보수적으로 설정)
-total_cores = multiprocessing.cpu_count()
-if total_cores >= 16:
-    MAX_WORKERS = min(8, total_cores // 2)  # 고성능 서버의 경우 8코어로 제한
-else:
-    MAX_WORKERS = max(2, total_cores // 2)  # 일반 시스템의 경우 절반 사용
-if LOG_PRIMARY:
-    print(f"🚀 Using {MAX_WORKERS}/{total_cores} CPU cores for parallel processing")
-def process_single_scenario(scenario_info):
-    """단일 시나리오를 처리하는 함수 (멀티프로세싱용)"""
-    scenario_name, spectrogram_path = scenario_info
-    try:
-        # 메모리 효율성을 위해 필요한 데이터만 로드
-        scenario_spectrograms, scenario_labels = generate_spectrograms_and_labels(
-            scenario_name=scenario_name,
-            spectrogram_path=spectrogram_path,
-            cache_path=None,  # 메모리 문제로 캐시 비활성화
-        )
-        snr_db, doppler_id = _parse_snr_and_doppler(spectrogram_path)
-        # 데이터 분할 (인덱스만 계산)
-        total_samples = len(scenario_spectrograms)
-        train_size = int(TRAIN_SPLIT_FRACTION * total_samples)
-        val_size = total_samples - train_size
-        # 메모리 절약을 위해 numpy array로 유지 (필요할 때만 tensor로 변환)
-        train_data = np.array(scenario_spectrograms[:train_size], dtype=np.float32)
-        val_data = np.array(scenario_spectrograms[train_size:], dtype=np.float32)
-        snr_array = np.full(total_samples, snr_db, dtype=np.float32)
-        doppler_array = np.full(total_samples, doppler_id, dtype=np.int64)
-        train_meta = {
-            'snr_db': snr_array[:train_size],
-            'doppler_id': doppler_array[:train_size],
-        }
-        val_meta = {
-            'snr_db': snr_array[train_size:],
-            'doppler_id': doppler_array[train_size:],
-        }
-        # 불필요한 데이터 즉시 삭제
-        del scenario_spectrograms
-        return {
-            'scenario': scenario_name,
-            'train_data': train_data,
-            'val_data': val_data,
-            'train_meta': train_meta,
-            'val_meta': val_meta,
-            'train_size': len(train_data),
-            'val_size': len(val_data)
-        }
-    except Exception as e:
-        context = globals().get("distributed_context", {})
-        if LOG_PRIMARY or not context.get("is_distributed", False):
-            print(f"❌ Error processing scenario {scenario_name}: {e}")
-        return None
-# GPU Memory Monitor import (for Lambda) - Removed
-class StreamingMaskedSpectrogramDataset(IterableDataset):
-    """Stream spectrogram samples scenario-by-scenario to limit peak memory usage."""
-    def __init__(
-        self,
-        scenario_info_list,
-        split,
-        normalization_mode,
-        dataset_stats,
-        mask_percent,
-        max_len,
-        seed=42,
-        shuffle=True,
-        rank: int = 0,
-        world_size: int = 1,
-    ):
-        super().__init__()
-        if split not in {"train", "val"}:
-            raise ValueError(f"Unsupported split '{split}'. Expected 'train' or 'val'.")
-        self.scenario_info_list = list(scenario_info_list)
-        self.split = split
-        self.normalization_mode = normalization_mode
-        self.dataset_stats = dataset_stats or {'mean': 0.0, 'std': 1.0, 'normalization': normalization_mode}
-        self.mask_percent = mask_percent
-        self.max_len = max_len
-        self.seed = seed
-        self.shuffle = shuffle
-        self._epoch = 0
-        self.num_samples = 0  # Populated after dataset summary
-        self.rank = rank
-        self.world_size = max(1, world_size)
-    def _format_sample(self, sample_dict):
-        input_ids = torch.from_numpy(sample_dict['input_ids']).float()
-        masked_tokens = torch.from_numpy(sample_dict['masked_tokens']).float()
-        masked_pos = torch.from_numpy(sample_dict['masked_pos']).long()
-        snr_db = torch.tensor(sample_dict.get('snr_db', 0.0), dtype=torch.float32)
-        doppler_id = torch.tensor(sample_dict.get('doppler_id', 0), dtype=torch.long)
-        power_stats = torch.tensor(sample_dict.get('power_stats', np.zeros(2, dtype=np.float32)), dtype=torch.float32)
-        snr_id = torch.tensor(sample_dict.get('snr_id', -1), dtype=torch.long)
-        modulation_id = torch.tensor(sample_dict.get('modulation_id', -1), dtype=torch.long)
-        return (
-            input_ids,
-            masked_tokens,
-            masked_pos,
-            snr_db,
-            doppler_id,
-            power_stats,
-            snr_id,
-            modulation_id,
-        )
-    def __iter__(self):
-        order = list(self.scenario_info_list)
-        if self.shuffle and order:
-            rng = random.Random(self.seed + self._epoch)
-            rng.shuffle(order)
-        epoch_seed = self.seed + self._epoch
-        self._epoch += 1
-        for idx, (scenario_name, spectrogram_path) in enumerate(order):
-            if self.world_size > 1 and (idx % self.world_size) != self.rank:
-                continue
-            result = process_single_scenario((scenario_name, spectrogram_path))
-            if result is None:
-                continue
-            data_key = 'train_data' if self.split == 'train' else 'val_data'
-            meta_key = 'train_meta' if self.split == 'train' else 'val_meta'
-            spectrograms = result.get(data_key)
-            metadata = result.get(meta_key)
-            if spectrograms is None or len(spectrograms) == 0:
-                continue
-            scenario_seed = (epoch_seed + idx) % (2**32)
-            tokenized = tokenizer_train(
-                spectrograms,
-                max_len=self.max_len,
-                masking_percent=self.mask_percent,
-                mask=True,
-                seed=scenario_seed,
-                metadata=metadata,
-                dataset_stats=self.dataset_stats,
-                normalization=self.normalization_mode,
-                show_progress=False,
-            )
-            for samples in tokenized.values():
-                for sample_dict in samples:
-                    yield self._format_sample(sample_dict)
-            del tokenized, spectrograms, metadata, result
-def summarize_scenarios(scenario_info_list, normalization_mode):
-    """Calculate dataset-level normalization stats and sample counts without storing all data in memory."""
-    total_sum = 0.0
-    total_sq = 0.0
-    total_count = 0
-    train_samples = 0
-    val_samples = 0
-    iterator = scenario_info_list
-    if USE_TQDM and LOG_PRIMARY:
-        iterator = tqdm(scenario_info_list, desc="Summarizing scenarios", unit="scenario")
-    for scenario_name, spectrogram_path in iterator:
-        result = process_single_scenario((scenario_name, spectrogram_path))
-        if result is None:
-            continue
-        train_data = result.get('train_data')
-        val_data = result.get('val_data')
-        if isinstance(train_data, np.ndarray):
-            train_samples += train_data.shape[0]
-            if normalization_mode == "dataset" and train_data.size > 0:
-                arr64 = train_data.astype(np.float64, copy=False)
-                total_sum += arr64.sum()
-                total_sq += np.square(arr64).sum(dtype=np.float64)
-                total_count += arr64.size
-        if isinstance(val_data, np.ndarray):
-            val_samples += val_data.shape[0]
-        del result
-    if normalization_mode == "dataset":
-        if total_count == 0:
-            raise ValueError("Unable to compute dataset statistics: no training samples available.")
-        mean = float(total_sum / total_count)
-        variance = max(float(total_sq / total_count - mean ** 2), 1e-12)
-        std = float(np.sqrt(variance))
-    else:
-        mean = 0.0
-        std = 1.0
-    stats = {'mean': mean, 'std': std, 'normalization': normalization_mode}
-    return stats, train_samples, val_samples
-# =============================================================================
-# 2. SCENARIO LIST DEFINITION
-#    - Define the list of scenario names to iterate over for data generation
-# =============================================================================
-# Supported communications; can be limited via CLI
-SUPPORTED_COMM_TYPES = {"LTE", "WiFi", "5G"}
-def _parse_standard_args():
-    parser = argparse.ArgumentParser(add_help=False)
-    parser.add_argument('--standards', nargs='+', choices=SUPPORTED_COMM_TYPES,
-                        help='Specify one or more communication types to include (default: all).')
-    for comm in SUPPORTED_COMM_TYPES:
-        parser.add_argument(f'--{comm}', dest=f'flag_{comm}', action='store_true',
-                            help=f'Include only {comm} data (can be combined).')
-    parser.add_argument('--city', '--cities', dest='cities', nargs='+',
-                        help='Limit scenarios to one or more city prefixes (e.g., "0" or "city_0").')
-    parser.add_argument(
-        '--normalization',
-        choices=('per_sample', 'dataset'),
-        default='per_sample',
-        help='Normalization mode applied during tokenization (default: %(default)s).'
-    )
-    parser.add_argument('--help', action='help')
-    args, remaining = parser.parse_known_args()
-    enabled = set(SUPPORTED_COMM_TYPES)
-    if args.standards:
-        enabled = set(args.standards)
-    else:
-        flagged = {comm for comm in SUPPORTED_COMM_TYPES if getattr(args, f'flag_{comm}', False)}
-        if flagged:
-            enabled = flagged
-    selected_cities: list[str] | None = None
-    if args.cities:
-        selected_cities = []
-        for city_token in args.cities:
-            token = str(city_token).strip()
-            if not token:
-                continue
-            if token.startswith('city_'):
-                selected_cities.append(token)
-            else:
-                selected_cities.append(f'city_{token}')
-        if not selected_cities:
-            selected_cities = None
-    # Return remaining args to allow downstream parsing if needed
-    sys.argv = [sys.argv[0]] + remaining
-    return enabled, selected_cities, args.normalization
-ENABLED_COMM_TYPES, ENABLED_CITY_PREFIXES, NORMALIZATION_MODE = _parse_standard_args()
-MAX_SCENARIOS = int(os.environ.get("LWM_MAX_SCENARIOS", "0")) or None
-SCENARIO_ENTRIES: Optional[List[Tuple[str, str, str, str]]] = None
-def _scenario_manifest_path() -> str:
-    """Build cache file path based on selected comm types and city filters."""
-    comm_token = "-".join(sorted(ENABLED_COMM_TYPES)) if ENABLED_COMM_TYPES else "all"
-    city_token = "-".join(sorted(ENABLED_CITY_PREFIXES)) if ENABLED_CITY_PREFIXES else "all"
-    limit_token = MAX_SCENARIOS if MAX_SCENARIOS is not None else "all"
-    filename = f"_scenario_entries_{comm_token}_{city_token}_max{limit_token}.json"
-    return os.path.join("spectrograms", filename)
-def _get_scenario_entries() -> List[Tuple[str, str, str, str]]:
-    """Gather scenario metadata once on rank 0 and share via disk cache. Avoids long-lived collectives."""
-    global SCENARIO_ENTRIES
-    if SCENARIO_ENTRIES is not None:
-        return SCENARIO_ENTRIES
-    manifest_path = _scenario_manifest_path()
-    refresh_requested = os.environ.get("LWM_REFRESH_SCENARIOS", "").lower() in {"1", "true", "yes"}
-    def _load_manifest() -> Optional[List[Tuple[str, str, str, str]]]:
-        try:
-            with open(manifest_path, "r", encoding="utf-8") as f:
-                raw_entries = json.load(f)
-        except FileNotFoundError:
-            return None
-        except Exception as exc:
-            if LOG_PRIMARY:
-                print(f"⚠️  Unable to read scenario manifest {manifest_path}: {exc}", flush=True)
-            return None
-        entries: List[Tuple[str, str, str, str]] = []
-        for item in raw_entries:
-            if isinstance(item, dict):
-                entries.append(
-                    (
-                        item.get("scenario_id", ""),
-                        item.get("file_path", ""),
-                        item.get("city_name", ""),
-                        item.get("base_token", ""),
-                    )
-                )
-            elif isinstance(item, (list, tuple)) and len(item) == 4:
-                entries.append((str(item[0]), str(item[1]), str(item[2]), str(item[3])))
-        return entries if entries else None
-    def _save_manifest(entries_to_save: List[Tuple[str, str, str, str]]) -> None:
-        try:
-            os.makedirs(os.path.dirname(manifest_path), exist_ok=True)
-            tmp_path = f"{manifest_path}.tmp"
-            payload = [
-                {
-                    "scenario_id": scenario_id,
-                    "file_path": file_path,
-                    "city_name": city_name,
-                    "base_token": base_token,
-                }
-                for scenario_id, file_path, city_name, base_token in entries_to_save
-            ]
-            with open(tmp_path, "w", encoding="utf-8") as f:
-                json.dump(payload, f)
-            os.replace(tmp_path, manifest_path)
-            if LOG_PRIMARY:
-                print(f"📊 [debug] Scenario manifest saved to {manifest_path}", flush=True)
-        except Exception as exc:
-            if LOG_PRIMARY:
-                print(f"⚠️  Failed to save scenario manifest {manifest_path}: {exc}", flush=True)
-    entries: Optional[List[Tuple[str, str, str, str]]] = None
-    if distributed_context["is_distributed"]:
-        entries = None if refresh_requested else _load_manifest()
-        if entries is None:
-            if distributed_context["is_primary"]:
-                if LOG_PRIMARY:
-                    print("📊 [debug] Rank0 starting scenario discovery", flush=True)
-                entries = _collect_scenario_file_info()
-                if LOG_PRIMARY:
-                    print(f"📊 [debug] Rank0 collected {len(entries)} scenario entries", flush=True)
-                _save_manifest(entries)
-            else:
-                deadline = time.time() + 300.0
-                while time.time() < deadline:
-                    entries = _load_manifest()
-                    if entries is not None:
-                        break
-                    time.sleep(1.0)
-                if entries is None:
-                    raise RuntimeError(
-                        f"Scenario manifest {manifest_path} not found after waiting. "
-                        "Run with LWM_REFRESH_SCENARIOS=1 on a single rank to regenerate."
-                    )
-        elif LOG_PRIMARY and distributed_context["is_primary"]:
-            print(f"📊 [debug] Rank0 loaded {len(entries)} scenario entries from manifest", flush=True)
-    else:
-        entries = None if refresh_requested else _load_manifest()
-        if entries is None:
-            if LOG_PRIMARY:
-                print("📊 [debug] Single-process scenario discovery", flush=True)
-            entries = _collect_scenario_file_info()
-            if LOG_PRIMARY:
-                print(f"📊 [debug] Collected {len(entries)} scenario entries (single process)", flush=True)
-            _save_manifest(entries)
-        elif LOG_PRIMARY:
-            print(f"📊 [debug] Loaded {len(entries)} scenario entries from manifest", flush=True)
-    if entries is None:
-        entries = []
-    SCENARIO_ENTRIES = entries
-    return entries
-def _extract_scenario_token(file_path):
-    """Derive the base scenario token (without city) from the file path."""
-    normalized_path = os.path.normpath(file_path)
-    parts = normalized_path.split(os.sep)
-    scenario_parts = []
-    for i, part in enumerate(parts):
-        if part in SUPPORTED_COMM_TYPES:
-            if i + 4 < len(parts):
-                scenario_parts = [part] + parts[i + 1:i + 5]
-            break
-    return '_'.join(scenario_parts) if scenario_parts else None
-@lru_cache(maxsize=1)
-def _collect_scenario_file_info():
-    import glob
-    if LOG_PRIMARY:
-        print("📊 [debug] _collect_scenario_file_info scanning directories...", flush=True)
-    city_dirs = []
-    for d in sorted(glob.glob(os.path.join('spectrograms', 'city_*'))):
-        if not os.path.isdir(d):
-            continue
-        city_dirs.append(d)
-    scenario_entries = []
-    for city_dir in city_dirs:
-        city_name = os.path.basename(city_dir)
-        if ENABLED_CITY_PREFIXES:
-            if not any(city_name.startswith(prefix) for prefix in ENABLED_CITY_PREFIXES):
-                continue
-        pattern = os.path.join(city_dir, '**', '512FFT', '**', 'spectrograms', '*.pkl')
-        city_files = sorted(glob.glob(pattern, recursive=True))
-        for file_path in city_files:
-            base_token = _extract_scenario_token(file_path)
-            if not base_token:
-                continue
-            scenario_id = f"{city_name}::{base_token}"
-            comm_type = base_token.split('_', 1)[0]
-            if comm_type not in ENABLED_COMM_TYPES:
-                continue
-            scenario_entries.append((scenario_id, file_path, city_name, base_token))
-    if MAX_SCENARIOS:
-        scenario_entries = scenario_entries[:MAX_SCENARIOS]
-    if LOG_PRIMARY:
-        print(f"📊 [debug] _collect_scenario_file_info found {len(scenario_entries)} entries", flush=True)
-    return scenario_entries
-def scenarios_list():
-    scenario_entries = _get_scenario_entries()
-    if not scenario_entries:
-        if LOG_PRIMARY:
-            print("⚠️  No spectrogram files found for pretraining.", flush=True)
-        return np.array([])
-    if LOG_PRIMARY:
-        print(f"📊 [debug] scenarios_list received {len(scenario_entries)} entries", flush=True)
-        print(f"Enabled communication types: {sorted(ENABLED_COMM_TYPES)}", flush=True)
-        if ENABLED_CITY_PREFIXES:
-            print(f"Selected city prefixes: {sorted(ENABLED_CITY_PREFIXES)}", flush=True)
-        city_counts = Counter(entry[2] for entry in scenario_entries)
-        print("Using scenarios from the following city datasets:", flush=True)
-        for city_name, count in city_counts.items():
-            print(f"  - {city_name}: {count} files", flush=True)
-        print(f"Total scenarios selected: {len(scenario_entries)}", flush=True)
-    return np.array([entry[0] for entry in scenario_entries])
-# =============================================================================
-# 3. SCENARIO PROPERTIES MAPPING
-#    - Map each scenario name to its corresponding properties
-# =============================================================================
-def scenario_prop():
-    scenario_entries = _get_scenario_entries()
-    row_column_users = {}
-    for scenario_id, file_path, city_name, _ in scenario_entries:
-        row_column_users[scenario_id] = {
-            'spectrogram_path': file_path,
-            'cache_path': os.path.join('spectrograms', city_name, 'spectrogram_cache_128x128.pkl')
-        }
-    return row_column_users
-# =============================================================================
-# 4. TRAINING PARAMETERS AND HYPERPARAMETERS
-#    - Set training epochs, batch sizes, learning rates, model dimensions, etc.
-# =============================================================================
-EPOCHS = 20  # Increased for better convergence
-# Optimized batch size for A100 GPU (40GB)
-BATCH_SIZE = 16
-VAL_BATCH_SIZE = 16
-WARMUP_EPOCHS = 5
-BASE_LR = 5e-4
-MIN_LR = 1e-8
-# Updated for 128x128 spectrograms
-N_ROWS = 4
-N_COLUMNS = 4
-ELEMENT_LENGTH = N_ROWS * N_COLUMNS  # Real-valued spectrograms (no complex interleaving)
-D_MODEL = 128
-MAX_LEN = 1025  # (128/4)^2 + 1 = 1024 + 1 for [CLS] token
-N_LAYERS = 12
-device_idx = 0
-WEIGHT_DECAY = 0.05
-BETA1 = 0.9
-BETA2 = 0.999
-MASK_PERCENT = 0.6
-N_HEADS = 8
-DROPOUT = 0.1
-# =============================================================================
-# 5. DATA GENERATION LOOP
-#    - Iterate over scenarios to generate spectrogram samples and labels
-# =============================================================================
-scenarios = scenarios_list()
-scenario_properties = scenario_prop()
-if LOG_PRIMARY:
-    print(f"📂 Loading {len(scenarios)} scenarios...")
-scenario_info_list = []
-missing_props = []
-for scenario in scenarios:
-    props = scenario_properties.get(scenario)
-    if props is None:
-        missing_props.append(scenario)
-        continue
-    scenario_info_list.append((scenario, props["spectrogram_path"]))
-if distributed_context["is_distributed"] and len(scenario_info_list) < distributed_context["world_size"]:
-    if LOG_PRIMARY:
-        print("❌ Distributed configuration requires at least one scenario per process. "
-              f"Found {len(scenario_info_list)} scenarios for world size {distributed_context['world_size']}.")
-    raise ValueError("Insufficient scenarios for the requested distributed world size.")
-if missing_props and LOG_PRIMARY:
-    print("⚠️  Missing metadata for the following scenarios; skipping:")
-    for scen in missing_props:
-        print(f"    - {scen}")
-if LOG_PRIMARY:
-    print(f"📂 Preparing {len(scenario_info_list)} scenarios with streaming loaders...")
-if NORMALIZATION_MODE == "dataset":
-    if distributed_context["is_distributed"] and not distributed_context["is_primary"]:
-        dataset_normalization = None
-        train_sample_count = 0
-        val_sample_count = 0
-    else:
-        dataset_normalization, train_sample_count, val_sample_count = summarize_scenarios(
-            scenario_info_list,
-            NORMALIZATION_MODE,
-        )
-    if distributed_context["is_distributed"]:
-        payload = [dataset_normalization, train_sample_count, val_sample_count]
-        dataset_normalization, train_sample_count, val_sample_count = _broadcast_object(payload, src=0)
-else:
-    train_samples_per_scenario = int(TRAIN_SPLIT_FRACTION * DEFAULT_SAMPLES_PER_SCENARIO)
-    val_samples_per_scenario = max(DEFAULT_SAMPLES_PER_SCENARIO - train_samples_per_scenario, 0)
-    dataset_normalization = {'mean': 0.0, 'std': 1.0, 'normalization': NORMALIZATION_MODE}
-    train_sample_count = len(scenario_info_list) * train_samples_per_scenario
-    val_sample_count = len(scenario_info_list) * val_samples_per_scenario
-    if LOG_PRIMARY:
-        print(f"  Assuming {DEFAULT_SAMPLES_PER_SCENARIO} samples per scenario ({train_samples_per_scenario} train / {val_samples_per_scenario} val)")
-if LOG_PRIMARY:
-    print(f"  Training samples: {train_sample_count}")
-    print(f"  Validation samples: {val_sample_count}")
-if train_sample_count == 0:
-    raise ValueError("No training samples available after filtering scenarios.")
-if NORMALIZATION_MODE == "dataset":
-    if LOG_PRIMARY:
-        print(f"Dataset normalization stats -> mean: {dataset_normalization['mean']:.4f}, std: {dataset_normalization['std']:.4f}")
-else:
-    if LOG_PRIMARY:
-        print("Dataset normalization stats -> using per-sample normalization")
-SEED = 42
-torch.manual_seed(SEED)
-np.random.seed(SEED)
-world_size = max(1, distributed_context["world_size"])
-train_samples_per_rank = math.ceil(train_sample_count / world_size) if distributed_context["is_distributed"] else train_sample_count
-val_samples_per_rank = math.ceil(val_sample_count / world_size) if distributed_context["is_distributed"] else val_sample_count
-train_dataset = StreamingMaskedSpectrogramDataset(
-    scenario_info_list,
-    split="train",
-    normalization_mode=NORMALIZATION_MODE,
-    dataset_stats=dataset_normalization,
-    mask_percent=MASK_PERCENT,
-    max_len=MAX_LEN,
-    seed=SEED,
-    shuffle=True,
-    rank=distributed_context["rank"],
-    world_size=world_size,
-)
-train_dataset.num_samples = train_samples_per_rank
-val_dataset = StreamingMaskedSpectrogramDataset(
-    scenario_info_list,
-    split="val",
-    normalization_mode=NORMALIZATION_MODE,
-    dataset_stats=dataset_normalization,
-    mask_percent=MASK_PERCENT,
-    max_len=MAX_LEN,
-    seed=SEED,
-    shuffle=False,
-    rank=distributed_context["rank"],
-    world_size=world_size,
-)
-val_dataset.num_samples = val_samples_per_rank
-if LOG_PRIMARY:
-    print("🔧 Creating streaming data loaders...")
-train_loaders = {
-    'stream': DataLoader(
-        train_dataset,
-        batch_size=BATCH_SIZE,
-        shuffle=False,
-        num_workers=0,
-        pin_memory=True,
-    )
-}
-val_loaders = {
-    'stream': DataLoader(
-        val_dataset,
-        batch_size=VAL_BATCH_SIZE,
-        shuffle=False,
-        num_workers=0,
-        pin_memory=True,
-    )
-}
-if LOG_PRIMARY:
-    print("✅ Data loaders created successfully!")
-# =============================================================================
-# 9. MODEL INITIALIZATION
-#    - Instantiate the LWM transformer model and optionally load pre-trained weights
-#    - Wrap with DataParallel for multi-GPU support
-# =============================================================================
-# Device selection with HPU, CUDA, and MPS support
-if LOG_PRIMARY:
-    print("🔧 Setting up device and accelerator configuration...")
-requested_device = getattr(RUNTIME_ARGS, "device", "auto") or "auto"
-requested_device = requested_device.lower()
-runtime_device = requested_device
-if runtime_device == "auto":
-    if HPU_AVAILABLE:
-        runtime_device = "hpu"
-    elif torch.cuda.is_available():
-        runtime_device = "cuda"
-    elif torch.backends.mps.is_available():
-        runtime_device = "mps"
-    else:
-        runtime_device = "cpu"
-if runtime_device in {"hpu", "auto"} and not HPU_AVAILABLE:
-    if os.environ.get("HABANA_VISIBLE_DEVICES") and LOG_PRIMARY:
-        print("⚠️  HABANA_VISIBLE_DEVICES is set but Habana PyTorch extensions are not available.")
-        print("   Install the Habana PyTorch distribution or activate the appropriate environment.")
-device = torch.device("cpu")
-gpu_ids: list[int] = []
-ddp_device_ids: Optional[list[int]] = None
-if runtime_device == "hpu":
-    if not HPU_AVAILABLE:
-        raise RuntimeError("HPU device requested but torch.hpu is not available. "
-                           "Install the Habana PyTorch distribution or select --device cpu.")
-    hpu_module = getattr(torch, "hpu", None)
-    # Get local rank first before any HPU operations
-    local_rank = distributed_context["local_rank"] if distributed_context["is_distributed"] else 0
-    _debug_hpu(f"Entering HPU device setup (local_rank={local_rank}, world_size={distributed_context.get('world_size')})")
-    # Query device count locally (safe after Habana runtime init)
-    hpu_count = max(1, _get_hpu_device_count())
-    if LOG_PRIMARY or HPU_DEBUG_LOG:
-        _debug_hpu(f"Detected {hpu_count} HPU devices via local query")
-    device = torch.device("hpu")
-    if hpu_module is not None and hasattr(hpu_module, "set_device"):
-        try:
-            _debug_hpu(f"Calling torch.hpu.set_device({local_rank})")
-            hpu_module.set_device(local_rank)
-            _debug_hpu("torch.hpu.set_device completed successfully")
-        except Exception as exc:
-            _debug_hpu(f"set_device raised exception: {exc}")
-            if LOG_PRIMARY:
-                print(f"  ⚠️  Unable to set HPU device {local_rank}: {exc}")
-    ddp_device_ids = [local_rank] if distributed_context["is_distributed"] else None
-    if LOG_PRIMARY:
-        if hpu_count > 0:
-            print(f"  HPU available: {hpu_count} device(s) detected")
-        if distributed_context["is_distributed"]:
-            print(f"  Using HPU local rank: {local_rank}")
-elif runtime_device == "cuda":
-    if not torch.cuda.is_available():
-        raise RuntimeError("CUDA device requested but torch.cuda.is_available() is False.")
-    device_count = torch.cuda.device_count()
-    if LOG_PRIMARY:
-        print(f"  CUDA available: {device_count} GPU(s) detected")
-    if distributed_context["is_distributed"]:
-        local_rank = distributed_context["local_rank"]
-        torch.cuda.set_device(local_rank)
-        device = torch.device("cuda", local_rank)
-        ddp_device_ids = [local_rank]
-        if LOG_PRIMARY:
-            print(f"  Using CUDA local rank: {local_rank}")
-    else:
-        device = torch.device("cuda:0")
-        gpu_ids = list(range(device_count))
-        if LOG_PRIMARY:
-            print(f"  Using CUDA GPUs: {gpu_ids}")
-        for i in gpu_ids:
-            try:
-                mem_total = torch.cuda.get_device_properties(i).total_memory / 1024**3
-                mem_allocated = torch.cuda.memory_allocated(i) / 1024**3
-                if LOG_PRIMARY:
-                    print(f"    GPU {i}: Total: {mem_total:.1f}GB, Allocated: {mem_allocated:.1f}GB")
-            except Exception as exc:
-                if LOG_PRIMARY:
-                    print(f"  GPU {i}: Error getting memory info - {exc}")
-elif runtime_device == "mps":
-    if not torch.backends.mps.is_available():
-        raise RuntimeError("MPS device requested but torch.backends.mps.is_available() is False.")
-    device = torch.device("mps")
-    if LOG_PRIMARY:
-        print("  Using MPS (Apple Silicon GPU)")
-elif runtime_device == "cpu":
-    device = torch.device("cpu")
-    if LOG_PRIMARY:
-        print("  Using CPU")
-else:
-    raise ValueError(f"Unsupported device selection: {runtime_device}")
-distributed_context["device_type"] = device.type
-if LOG_PRIMARY:
-    print(f"  Final device: {device}")
-    if gpu_ids:
-        print(f"  GPU IDs for DataParallel: {gpu_ids}")
-if LOG_PRIMARY:
-    print("🤖 Initializing LWM model...")
-    print(f"  Model parameters: element_length={ELEMENT_LENGTH}, d_model={D_MODEL}, n_layers={N_LAYERS}, max_len={MAX_LEN}, n_heads={N_HEADS}")
-try:
-    model = pretrained_model.lwm(
-        element_length=ELEMENT_LENGTH,  # Real-valued spectrograms
-        d_model=D_MODEL,
-        n_layers=N_LAYERS,
-        max_len=MAX_LEN,  # Use pre-calculated value for safety
-        n_heads=N_HEADS,
-        dropout=DROPOUT
-    )
-    if LOG_PRIMARY:
-        print("  ✅ Model created successfully")
-        print(f"  Moving model to device: {device}")
-    # MPS only supports float32, so set dtype
-    if 'mps' in str(device):
-        model = model.to(device).float()
-        if LOG_PRIMARY:
-            print("  ✅ Model moved to MPS device (float32)")
-    else:
-        model = model.to(device)
-        if LOG_PRIMARY:
-            print("  ✅ Model moved to device successfully")
-    # Synchronize all processes after moving model to device
-    # This prevents memory contention issues in multi-HPU/GPU setups
-    if distributed_context["is_distributed"]:
-        torch.distributed.barrier()
-        if LOG_PRIMARY:
-            print("  ✅ All processes synchronized after model transfer")
-except Exception as e:
-    print(f"  ❌ Model initialization failed: {e}")
-    import traceback
-    traceback.print_exc()
-    exit(1)
-# Optional: Load pre-trained model
-load_model = False
-if load_model:
-    model.load_state_dict(torch.load("models/model_checkpoint.pth", map_location=device))
-    if LOG_PRIMARY:
-        print("Pre-trained model loaded successfully.")
-# Wrap model for parallel/distributed execution
-if distributed_context["is_distributed"]:
-    # Additional barrier before DDP wrapping to ensure all processes are ready
-    torch.distributed.barrier()
-    ddp_kwargs: Dict[str, Any] = {"broadcast_buffers": False}
-    if ddp_device_ids:
-        ddp_kwargs["device_ids"] = ddp_device_ids
-        ddp_kwargs["output_device"] = ddp_device_ids[0]
-    else:
-        ddp_kwargs["device_ids"] = None
-    model = nn.parallel.DistributedDataParallel(model, **ddp_kwargs)
-    if LOG_PRIMARY:
-        print(f"Model wrapped with DistributedDataParallel on rank {distributed_context['rank']}")
-elif gpu_ids:
-    model = nn.DataParallel(model, device_ids=gpu_ids)
-    if LOG_PRIMARY:
-        print(f"Model loaded successfully with DataParallel on CUDA devices {gpu_ids}")
-else:
-    if LOG_PRIMARY:
-        print(f"Model loaded successfully on {device}")
-n_parameters = count_parameters(model, log=LOG_PRIMARY)
-if LOG_PRIMARY:
-    print(f"Number of trainable parameters: {n_parameters:,}")
-# =============================================================================
-# 10. OPTIMIZER AND LEARNING RATE SCHEDULER
-#     - Configure AdamW optimizer and a cosine-with-warmup LR schedule based on total steps
-# =============================================================================
-steps_per_epoch = max(1, math.ceil(train_samples_per_rank / BATCH_SIZE))
-TOTAL_STEPS = steps_per_epoch * EPOCHS
-WARMUP_STEPS = steps_per_epoch * WARMUP_EPOCHS
-optimizer = AdamW(
-    model.parameters(),
-    lr=BASE_LR,
-    betas=(BETA1, BETA2),
-    weight_decay=WEIGHT_DECAY
-)
-def lr_lambda(current_step):
-    if current_step < WARMUP_STEPS:
-        return current_step / WARMUP_STEPS
-    else:
-        scaled_progress = (current_step - WARMUP_STEPS) / (TOTAL_STEPS - WARMUP_STEPS)
-        cosine_decay = 0.5 * (1 + np.cos(np.pi * scaled_progress))
-        return cosine_decay * (BASE_LR - MIN_LR) / BASE_LR + MIN_LR / BASE_LR
-scheduler = LambdaLR(optimizer, lr_lambda=lr_lambda)
-# =============================================================================
-# 11. PRE-TRAINING LOOP
-#     - Call the train_lwm utility to run the pre-training epochs, logging metrics and saving models
-# =============================================================================
-# Create timestamp-based save directory
-if distributed_context["is_distributed"]:
-    timestamp_source = datetime.now().strftime("%Y%m%d_%H%M%S") if LOG_PRIMARY else None
-    timestamp = _broadcast_object(timestamp_source, src=0)
-else:
-    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-save_dir = f"models/{timestamp}"
-if LOG_PRIMARY:
-    print(f"📁 Models and logs will be saved to: {save_dir}")
-os.makedirs(save_dir, exist_ok=True)
-stats_path = os.path.join(save_dir, "dataset_stats.json")
-if LOG_PRIMARY:
-    with open(stats_path, 'w') as f:
-        json.dump(dataset_normalization, f, indent=2)
-    print(f"📝 Saved dataset stats to {stats_path}")
-_barrier(distributed_context)
-comm_selection = sorted(ENABLED_COMM_TYPES) if ENABLED_COMM_TYPES else []
-if comm_selection:
-    comm_suffix = "_" + "-".join(comm_selection)
-else:
-    comm_suffix = ""
-if comm_selection and LOG_PRIMARY:
-    print(f"[INFO] Communication standards for this run: {', '.join(comm_selection)}")
-if __name__ == "__main__":
-    # Patch: Ensure patches is not a dict before converting to tensor
-    def safe_tensor_from_patches(patches, device):
-        if isinstance(patches, dict):
-            key = max(patches.keys())
-            patches = patches[key]
-        return torch.tensor(patches, dtype=torch.float32).to(device)
-    # Pass this function to train_lwm if needed, or use inside train_lwm
-    pretrained_model = train_lwm(
-        model,
-        train_loaders,
-        val_loaders,
-        optimizer,
-        scheduler,
-        EPOCHS,
-        device=device,
-        save_dir=save_dir,
-        log_file="training_log.csv",
-        checkpoint_suffix=comm_suffix,
-        distributed_context=distributed_context,
-        # If train_lwm needs to convert patches, use safe_tensor_from_patches
-    )
-    _barrier(distributed_context)
-    if LOG_PRIMARY:
-        print("🏁 Training run complete.")
-    if distributed_context["is_distributed"]:
-        dist.destroy_process_group()
-SNR_PATTERN = re.compile(r"SNR(-?\d+)dB")
-DOPPLER_MAP = {"static": 0, "pedestrian": 1, "vehicular": 2}
-DOPPLER_INV = {v: k for k, v in DOPPLER_MAP.items()}
-def _parse_snr_and_doppler(path: str) -> tuple[float, int]:
-    snr_db = 0.0
-    doppler_id = 0
-    matches = SNR_PATTERN.findall(path)
-    if matches:
-        try:
-            snr_db = float(matches[-1])
-        except ValueError:
-            snr_db = 0.0
-    normalized_path = os.path.normpath(path)
-    parts = normalized_path.split(os.sep)
-    for part in parts:
-        if part in DOPPLER_MAP:
-            doppler_id = DOPPLER_MAP[part]
-            break
-    return snr_db, doppler_id

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 # UI/Hub
-gradio==3.50.2
-huggingface_hub==0.23.4
 # Core
 torch

 # UI/Hub
+gradio==6.0.1
+huggingface_hub>=0.33.5,<2.0
 # Core
 torch