Spaces:
Running
Running
Make demo data source configurable (repo/revision)
Browse files
app.py
CHANGED
|
@@ -22,7 +22,13 @@ from sklearn.preprocessing import StandardScaler
|
|
| 22 |
APP_DIR = Path(__file__).resolve().parent
|
| 23 |
DEMO_DATA_PATH = APP_DIR / "demo_data.pt"
|
| 24 |
MOE_DATA_PATH = APP_DIR / "demo_data_moe.pt"
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
def _get_hf_token() -> str | None:
|
|
@@ -207,6 +213,7 @@ def _ensure_local_file(local_path: Path, hub_filename: str) -> Optional[Path]:
|
|
| 207 |
filename=hub_filename,
|
| 208 |
token=token,
|
| 209 |
repo_type=repo_type,
|
|
|
|
| 210 |
)
|
| 211 |
cached_path = Path(cached)
|
| 212 |
print(f"[INFO] Using cached Hub file for {hub_filename}: {cached_path} (repo_type={repo_type})")
|
|
@@ -221,6 +228,7 @@ def _ensure_local_file(local_path: Path, hub_filename: str) -> Optional[Path]:
|
|
| 221 |
filename=hub_filename,
|
| 222 |
token=token,
|
| 223 |
repo_type="space",
|
|
|
|
| 224 |
)
|
| 225 |
cached_path = Path(cached)
|
| 226 |
print(f"[INFO] Using cached Space file for {hub_filename}: {cached_path}")
|
|
@@ -236,8 +244,8 @@ USING_SYNTHETIC_DATA = False
|
|
| 236 |
|
| 237 |
|
| 238 |
def load_augmented_samples() -> Tuple[List[Dict[str, object]], bool]:
|
| 239 |
-
moe_path = _ensure_local_file(MOE_DATA_PATH,
|
| 240 |
-
base_path = _ensure_local_file(DEMO_DATA_PATH,
|
| 241 |
|
| 242 |
if moe_path and moe_path.exists() and not _is_git_lfs_pointer(moe_path):
|
| 243 |
print(f"[INFO] Loading MoE-augmented dataset from {moe_path}")
|
|
@@ -903,7 +911,8 @@ DATASET_STATUS = (
|
|
| 903 |
f"Dataset loaded: {len(df)} samples | "
|
| 904 |
f"MoE embeddings: {'yes' if has_moe_embeddings else 'no'} | "
|
| 905 |
f"HF token detected: {'yes' if HF_TOKEN else 'no'} | "
|
| 906 |
-
f"Synthetic fallback: {'yes' if USING_SYNTHETIC_DATA else 'no'}"
|
|
|
|
| 907 |
)
|
| 908 |
|
| 909 |
has_moe_column = df["moe_embedding"].apply(lambda x: x is not None)
|
|
|
|
| 22 |
APP_DIR = Path(__file__).resolve().parent
|
| 23 |
DEMO_DATA_PATH = APP_DIR / "demo_data.pt"
|
| 24 |
MOE_DATA_PATH = APP_DIR / "demo_data_moe.pt"
|
| 25 |
+
|
| 26 |
+
# Where to download the demo tensors from.
|
| 27 |
+
# Configure in Space settings if the default repo is private or you need to pin an older revision.
|
| 28 |
+
HUB_REPO_ID = os.getenv("LWM_SPECTRO_DEMO_REPO_ID", "wi-lab/lwm-spectro")
|
| 29 |
+
HUB_REVISION = os.getenv("LWM_SPECTRO_DEMO_REVISION") # optional git sha / tag / branch
|
| 30 |
+
HUB_DEMO_DATA_FILENAME = os.getenv("LWM_SPECTRO_DEMO_DATA_FILENAME", "demo_data.pt")
|
| 31 |
+
HUB_MOE_DATA_FILENAME = os.getenv("LWM_SPECTRO_MOE_DATA_FILENAME", "demo_data_moe.pt")
|
| 32 |
|
| 33 |
|
| 34 |
def _get_hf_token() -> str | None:
|
|
|
|
| 213 |
filename=hub_filename,
|
| 214 |
token=token,
|
| 215 |
repo_type=repo_type,
|
| 216 |
+
revision=HUB_REVISION,
|
| 217 |
)
|
| 218 |
cached_path = Path(cached)
|
| 219 |
print(f"[INFO] Using cached Hub file for {hub_filename}: {cached_path} (repo_type={repo_type})")
|
|
|
|
| 228 |
filename=hub_filename,
|
| 229 |
token=token,
|
| 230 |
repo_type="space",
|
| 231 |
+
revision=None,
|
| 232 |
)
|
| 233 |
cached_path = Path(cached)
|
| 234 |
print(f"[INFO] Using cached Space file for {hub_filename}: {cached_path}")
|
|
|
|
| 244 |
|
| 245 |
|
| 246 |
def load_augmented_samples() -> Tuple[List[Dict[str, object]], bool]:
|
| 247 |
+
moe_path = _ensure_local_file(MOE_DATA_PATH, HUB_MOE_DATA_FILENAME)
|
| 248 |
+
base_path = _ensure_local_file(DEMO_DATA_PATH, HUB_DEMO_DATA_FILENAME)
|
| 249 |
|
| 250 |
if moe_path and moe_path.exists() and not _is_git_lfs_pointer(moe_path):
|
| 251 |
print(f"[INFO] Loading MoE-augmented dataset from {moe_path}")
|
|
|
|
| 911 |
f"Dataset loaded: {len(df)} samples | "
|
| 912 |
f"MoE embeddings: {'yes' if has_moe_embeddings else 'no'} | "
|
| 913 |
f"HF token detected: {'yes' if HF_TOKEN else 'no'} | "
|
| 914 |
+
f"Synthetic fallback: {'yes' if USING_SYNTHETIC_DATA else 'no'} | "
|
| 915 |
+
f"Demo repo: {HUB_REPO_ID}@{HUB_REVISION or 'main'}"
|
| 916 |
)
|
| 917 |
|
| 918 |
has_moe_column = df["moe_embedding"].apply(lambda x: x is not None)
|