Spaces:
Running
Running
Namhyun Kim
commited on
Commit
·
513757b
1
Parent(s):
97d5381
Show dataset/token status; mark synthetic fallback
Browse files
app.py
CHANGED
|
@@ -232,6 +232,9 @@ def _ensure_local_file(local_path: Path, hub_filename: str) -> Optional[Path]:
|
|
| 232 |
return None
|
| 233 |
|
| 234 |
|
|
|
|
|
|
|
|
|
|
| 235 |
def load_augmented_samples() -> Tuple[List[Dict[str, object]], bool]:
|
| 236 |
moe_path = _ensure_local_file(MOE_DATA_PATH, "demo_data_moe.pt")
|
| 237 |
base_path = _ensure_local_file(DEMO_DATA_PATH, "demo_data.pt")
|
|
@@ -245,6 +248,8 @@ def load_augmented_samples() -> Tuple[List[Dict[str, object]], bool]:
|
|
| 245 |
return _safe_load_tensor(base_path), False
|
| 246 |
|
| 247 |
# Last resort: in-memory synthetic data (keeps app alive, but clearly not the full demo dataset).
|
|
|
|
|
|
|
| 248 |
print(
|
| 249 |
"[WARN] Falling back to a tiny synthetic dataset (30 samples). "
|
| 250 |
"This usually means the real demo_data*.pt could not be downloaded. "
|
|
@@ -894,7 +899,12 @@ mapping_info = load_joint_mapping()
|
|
| 894 |
df, has_moe_embeddings = load_data(mapping_info)
|
| 895 |
CLASS_LABELS = mapping_info["label_names"]
|
| 896 |
|
| 897 |
-
DATASET_STATUS =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 898 |
|
| 899 |
has_moe_column = df["moe_embedding"].apply(lambda x: x is not None)
|
| 900 |
joint_eval_df = df[has_moe_column & df["joint_label_id"].notna()]
|
|
|
|
| 232 |
return None
|
| 233 |
|
| 234 |
|
| 235 |
+
USING_SYNTHETIC_DATA = False
|
| 236 |
+
|
| 237 |
+
|
| 238 |
def load_augmented_samples() -> Tuple[List[Dict[str, object]], bool]:
|
| 239 |
moe_path = _ensure_local_file(MOE_DATA_PATH, "demo_data_moe.pt")
|
| 240 |
base_path = _ensure_local_file(DEMO_DATA_PATH, "demo_data.pt")
|
|
|
|
| 248 |
return _safe_load_tensor(base_path), False
|
| 249 |
|
| 250 |
# Last resort: in-memory synthetic data (keeps app alive, but clearly not the full demo dataset).
|
| 251 |
+
global USING_SYNTHETIC_DATA
|
| 252 |
+
USING_SYNTHETIC_DATA = True
|
| 253 |
print(
|
| 254 |
"[WARN] Falling back to a tiny synthetic dataset (30 samples). "
|
| 255 |
"This usually means the real demo_data*.pt could not be downloaded. "
|
|
|
|
| 899 |
df, has_moe_embeddings = load_data(mapping_info)
|
| 900 |
CLASS_LABELS = mapping_info["label_names"]
|
| 901 |
|
| 902 |
+
DATASET_STATUS = (
|
| 903 |
+
f"Dataset loaded: {len(df)} samples | "
|
| 904 |
+
f"MoE embeddings: {'yes' if has_moe_embeddings else 'no'} | "
|
| 905 |
+
f"HF token detected: {'yes' if HF_TOKEN else 'no'} | "
|
| 906 |
+
f"Synthetic fallback: {'yes' if USING_SYNTHETIC_DATA else 'no'}"
|
| 907 |
+
)
|
| 908 |
|
| 909 |
has_moe_column = df["moe_embedding"].apply(lambda x: x is not None)
|
| 910 |
joint_eval_df = df[has_moe_column & df["joint_label_id"].notna()]
|