Spaces:
Running
Running
Fix demo data download auth + avoid synthetic shadowing
Browse files
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import os
|
| 2 |
import shutil
|
|
|
|
| 3 |
from pathlib import Path
|
| 4 |
from typing import Dict, List, Tuple, Optional
|
| 5 |
|
|
@@ -26,12 +27,25 @@ HUB_REPO_ID = "wi-lab/lwm-spectro"
|
|
| 26 |
|
| 27 |
def _get_hf_token() -> str | None:
|
| 28 |
# Spaces / HF Hub tooling uses a few common names.
|
| 29 |
-
|
| 30 |
os.getenv("HF_TOKEN")
|
| 31 |
or os.getenv("HF_HUB_TOKEN")
|
| 32 |
or os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 33 |
or os.getenv("HF_API_TOKEN")
|
|
|
|
|
|
|
| 34 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
|
| 37 |
HF_TOKEN = _get_hf_token()
|
|
@@ -141,38 +155,8 @@ def _normalize_sample(sample: Dict[str, object]) -> Dict[str, object]:
|
|
| 141 |
|
| 142 |
|
| 143 |
def _create_dummy_dataset(base_path: Path, moe_path: Path) -> None:
|
| 144 |
-
"""
|
| 145 |
-
|
| 146 |
-
rng = np.random.default_rng(42)
|
| 147 |
-
samples: List[Dict[str, object]] = []
|
| 148 |
-
techs = ["LTE", "WiFi", "5G"]
|
| 149 |
-
snrs = ["SNR0dB", "SNR10dB", "SNR20dB"]
|
| 150 |
-
mods = ["QPSK", "16QAM", "64QAM"]
|
| 151 |
-
mobs = ["pedestrian", "vehicular"]
|
| 152 |
-
|
| 153 |
-
for i in range(30):
|
| 154 |
-
tech = techs[i % len(techs)]
|
| 155 |
-
snr = snrs[i % len(snrs)]
|
| 156 |
-
mob = mobs[i % len(mobs)]
|
| 157 |
-
mod = mods[i % len(mods)]
|
| 158 |
-
spectrogram = rng.normal(size=(128, 128)).astype(np.float32)
|
| 159 |
-
embedding = rng.normal(size=(128,)).astype(np.float32)
|
| 160 |
-
moe_embedding = rng.normal(size=(128,)).astype(np.float32)
|
| 161 |
-
samples.append(
|
| 162 |
-
{
|
| 163 |
-
"tech": tech,
|
| 164 |
-
"snr": snr,
|
| 165 |
-
"mod": mod,
|
| 166 |
-
"mob": mob,
|
| 167 |
-
"data": spectrogram,
|
| 168 |
-
"embedding": embedding,
|
| 169 |
-
"moe_embedding": moe_embedding,
|
| 170 |
-
}
|
| 171 |
-
)
|
| 172 |
-
|
| 173 |
-
torch.save(samples, base_path)
|
| 174 |
-
torch.save(samples, moe_path)
|
| 175 |
-
print(f"[INFO] Synthetic dataset written to {base_path} and {moe_path}")
|
| 176 |
|
| 177 |
|
| 178 |
def _create_dummy_samples() -> List[Dict[str, object]]:
|
|
@@ -210,20 +194,41 @@ def _ensure_local_file(local_path: Path, hub_filename: str) -> Optional[Path]:
|
|
| 210 |
"""Ensure a file exists locally; try Hub download if missing."""
|
| 211 |
if local_path.exists() and not _is_git_lfs_pointer(local_path):
|
| 212 |
return local_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
try:
|
| 214 |
cached = hf_hub_download(
|
| 215 |
-
repo_id=
|
| 216 |
filename=hub_filename,
|
| 217 |
-
token=
|
| 218 |
-
repo_type="
|
| 219 |
)
|
| 220 |
cached_path = Path(cached)
|
| 221 |
-
|
| 222 |
-
# failures if the repo directory is not writable.
|
| 223 |
-
print(f"[INFO] Using cached Hub file for {hub_filename}: {cached_path}")
|
| 224 |
return cached_path
|
| 225 |
except Exception as exc:
|
| 226 |
-
print(
|
|
|
|
|
|
|
| 227 |
return None
|
| 228 |
|
| 229 |
|
|
@@ -239,14 +244,13 @@ def load_augmented_samples() -> Tuple[List[Dict[str, object]], bool]:
|
|
| 239 |
print(f"[WARN] MoE data missing; falling back to base data: {base_path}")
|
| 240 |
return _safe_load_tensor(base_path), False
|
| 241 |
|
| 242 |
-
# Last resort:
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
return _create_dummy_samples(), False
|
| 250 |
|
| 251 |
|
| 252 |
def load_data(mapping: Dict[str, object]):
|
|
@@ -890,6 +894,8 @@ mapping_info = load_joint_mapping()
|
|
| 890 |
df, has_moe_embeddings = load_data(mapping_info)
|
| 891 |
CLASS_LABELS = mapping_info["label_names"]
|
| 892 |
|
|
|
|
|
|
|
| 893 |
has_moe_column = df["moe_embedding"].apply(lambda x: x is not None)
|
| 894 |
joint_eval_df = df[has_moe_column & df["joint_label_id"].notna()]
|
| 895 |
|
|
@@ -922,6 +928,7 @@ def update_modulation_choices(selected_tech: Optional[str]):
|
|
| 922 |
|
| 923 |
with gr.Blocks(title="LWM-Spectro Lab") as demo:
|
| 924 |
gr.Markdown("# 🔬 LWM-Spectro Interactive Demo")
|
|
|
|
| 925 |
gr.Markdown(
|
| 926 |
"""
|
| 927 |
**Having trouble seeing plots/images?**
|
|
|
|
| 1 |
import os
|
| 2 |
import shutil
|
| 3 |
+
import netrc
|
| 4 |
from pathlib import Path
|
| 5 |
from typing import Dict, List, Tuple, Optional
|
| 6 |
|
|
|
|
| 27 |
|
| 28 |
def _get_hf_token() -> str | None:
|
| 29 |
# Spaces / HF Hub tooling uses a few common names.
|
| 30 |
+
token = (
|
| 31 |
os.getenv("HF_TOKEN")
|
| 32 |
or os.getenv("HF_HUB_TOKEN")
|
| 33 |
or os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 34 |
or os.getenv("HF_API_TOKEN")
|
| 35 |
+
or os.getenv("HUGGINGFACE_TOKEN")
|
| 36 |
+
or os.getenv("HUGGINGFACE_ACCESS_TOKEN")
|
| 37 |
)
|
| 38 |
+
if token:
|
| 39 |
+
return token
|
| 40 |
+
|
| 41 |
+
# If a token exists in ~/.netrc (common in some environments), use it.
|
| 42 |
+
try:
|
| 43 |
+
auth = netrc.netrc().authenticators("huggingface.co")
|
| 44 |
+
if auth and auth[2]:
|
| 45 |
+
return auth[2]
|
| 46 |
+
except Exception:
|
| 47 |
+
return None
|
| 48 |
+
return None
|
| 49 |
|
| 50 |
|
| 51 |
HF_TOKEN = _get_hf_token()
|
|
|
|
| 155 |
|
| 156 |
|
| 157 |
def _create_dummy_dataset(base_path: Path, moe_path: Path) -> None:
|
| 158 |
+
"""Deprecated: kept for backward compatibility, but avoided in production."""
|
| 159 |
+
raise RuntimeError("Synthetic on-disk dataset generation disabled")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
|
| 162 |
def _create_dummy_samples() -> List[Dict[str, object]]:
|
|
|
|
| 194 |
"""Ensure a file exists locally; try Hub download if missing."""
|
| 195 |
if local_path.exists() and not _is_git_lfs_pointer(local_path):
|
| 196 |
return local_path
|
| 197 |
+
|
| 198 |
+
# Prefer a stored token if present (Spaces sometimes have credentials available
|
| 199 |
+
# even when HF_TOKEN env var is not explicitly set).
|
| 200 |
+
token = HF_TOKEN or True
|
| 201 |
+
|
| 202 |
+
# Try multiple repo types because the artifact may live under a model or dataset repo.
|
| 203 |
+
for repo_type in ("model", "dataset"):
|
| 204 |
+
try:
|
| 205 |
+
cached = hf_hub_download(
|
| 206 |
+
repo_id=HUB_REPO_ID,
|
| 207 |
+
filename=hub_filename,
|
| 208 |
+
token=token,
|
| 209 |
+
repo_type=repo_type,
|
| 210 |
+
)
|
| 211 |
+
cached_path = Path(cached)
|
| 212 |
+
print(f"[INFO] Using cached Hub file for {hub_filename}: {cached_path} (repo_type={repo_type})")
|
| 213 |
+
return cached_path
|
| 214 |
+
except Exception as exc:
|
| 215 |
+
last_exc = exc
|
| 216 |
+
|
| 217 |
+
# Final fallback: try downloading from the Space repo itself (useful when artifacts are stored in Space).
|
| 218 |
try:
|
| 219 |
cached = hf_hub_download(
|
| 220 |
+
repo_id="wi-lab/LWM-Spectro",
|
| 221 |
filename=hub_filename,
|
| 222 |
+
token=token,
|
| 223 |
+
repo_type="space",
|
| 224 |
)
|
| 225 |
cached_path = Path(cached)
|
| 226 |
+
print(f"[INFO] Using cached Space file for {hub_filename}: {cached_path}")
|
|
|
|
|
|
|
| 227 |
return cached_path
|
| 228 |
except Exception as exc:
|
| 229 |
+
print(
|
| 230 |
+
f"[WARN] Could not download {hub_filename} from Hub ({last_exc}) or Space repo ({exc}); continuing without it."
|
| 231 |
+
)
|
| 232 |
return None
|
| 233 |
|
| 234 |
|
|
|
|
| 244 |
print(f"[WARN] MoE data missing; falling back to base data: {base_path}")
|
| 245 |
return _safe_load_tensor(base_path), False
|
| 246 |
|
| 247 |
+
# Last resort: in-memory synthetic data (keeps app alive, but clearly not the full demo dataset).
|
| 248 |
+
print(
|
| 249 |
+
"[WARN] Falling back to a tiny synthetic dataset (30 samples). "
|
| 250 |
+
"This usually means the real demo_data*.pt could not be downloaded. "
|
| 251 |
+
"If the Hub repo is private, add a Space secret named HF_TOKEN with read access."
|
| 252 |
+
)
|
| 253 |
+
return _create_dummy_samples(), False
|
|
|
|
| 254 |
|
| 255 |
|
| 256 |
def load_data(mapping: Dict[str, object]):
|
|
|
|
| 894 |
df, has_moe_embeddings = load_data(mapping_info)
|
| 895 |
CLASS_LABELS = mapping_info["label_names"]
|
| 896 |
|
| 897 |
+
DATASET_STATUS = f"Dataset loaded: {len(df)} samples | MoE embeddings: {'yes' if has_moe_embeddings else 'no'}"
|
| 898 |
+
|
| 899 |
has_moe_column = df["moe_embedding"].apply(lambda x: x is not None)
|
| 900 |
joint_eval_df = df[has_moe_column & df["joint_label_id"].notna()]
|
| 901 |
|
|
|
|
| 928 |
|
| 929 |
with gr.Blocks(title="LWM-Spectro Lab") as demo:
|
| 930 |
gr.Markdown("# 🔬 LWM-Spectro Interactive Demo")
|
| 931 |
+
gr.Markdown(f"**{DATASET_STATUS}**")
|
| 932 |
gr.Markdown(
|
| 933 |
"""
|
| 934 |
**Having trouble seeing plots/images?**
|