stevekor commited on
Commit
818195e
·
1 Parent(s): 513757b

Make demo data source configurable (repo/revision)

Browse files
Files changed (1) hide show
  1. app.py +13 -4
app.py CHANGED
@@ -22,7 +22,13 @@ from sklearn.preprocessing import StandardScaler
22
  APP_DIR = Path(__file__).resolve().parent
23
  DEMO_DATA_PATH = APP_DIR / "demo_data.pt"
24
  MOE_DATA_PATH = APP_DIR / "demo_data_moe.pt"
25
- HUB_REPO_ID = "wi-lab/lwm-spectro"
 
 
 
 
 
 
26
 
27
 
28
  def _get_hf_token() -> str | None:
@@ -207,6 +213,7 @@ def _ensure_local_file(local_path: Path, hub_filename: str) -> Optional[Path]:
207
  filename=hub_filename,
208
  token=token,
209
  repo_type=repo_type,
 
210
  )
211
  cached_path = Path(cached)
212
  print(f"[INFO] Using cached Hub file for {hub_filename}: {cached_path} (repo_type={repo_type})")
@@ -221,6 +228,7 @@ def _ensure_local_file(local_path: Path, hub_filename: str) -> Optional[Path]:
221
  filename=hub_filename,
222
  token=token,
223
  repo_type="space",
 
224
  )
225
  cached_path = Path(cached)
226
  print(f"[INFO] Using cached Space file for {hub_filename}: {cached_path}")
@@ -236,8 +244,8 @@ USING_SYNTHETIC_DATA = False
236
 
237
 
238
  def load_augmented_samples() -> Tuple[List[Dict[str, object]], bool]:
239
- moe_path = _ensure_local_file(MOE_DATA_PATH, "demo_data_moe.pt")
240
- base_path = _ensure_local_file(DEMO_DATA_PATH, "demo_data.pt")
241
 
242
  if moe_path and moe_path.exists() and not _is_git_lfs_pointer(moe_path):
243
  print(f"[INFO] Loading MoE-augmented dataset from {moe_path}")
@@ -903,7 +911,8 @@ DATASET_STATUS = (
903
  f"Dataset loaded: {len(df)} samples | "
904
  f"MoE embeddings: {'yes' if has_moe_embeddings else 'no'} | "
905
  f"HF token detected: {'yes' if HF_TOKEN else 'no'} | "
906
- f"Synthetic fallback: {'yes' if USING_SYNTHETIC_DATA else 'no'}"
 
907
  )
908
 
909
  has_moe_column = df["moe_embedding"].apply(lambda x: x is not None)
 
22
  APP_DIR = Path(__file__).resolve().parent
23
  DEMO_DATA_PATH = APP_DIR / "demo_data.pt"
24
  MOE_DATA_PATH = APP_DIR / "demo_data_moe.pt"
25
+
26
+ # Where to download the demo tensors from.
27
+ # Configure in Space settings if the default repo is private or you need to pin an older revision.
28
+ HUB_REPO_ID = os.getenv("LWM_SPECTRO_DEMO_REPO_ID", "wi-lab/lwm-spectro")
29
+ HUB_REVISION = os.getenv("LWM_SPECTRO_DEMO_REVISION") # optional git sha / tag / branch
30
+ HUB_DEMO_DATA_FILENAME = os.getenv("LWM_SPECTRO_DEMO_DATA_FILENAME", "demo_data.pt")
31
+ HUB_MOE_DATA_FILENAME = os.getenv("LWM_SPECTRO_MOE_DATA_FILENAME", "demo_data_moe.pt")
32
 
33
 
34
  def _get_hf_token() -> str | None:
 
213
  filename=hub_filename,
214
  token=token,
215
  repo_type=repo_type,
216
+ revision=HUB_REVISION,
217
  )
218
  cached_path = Path(cached)
219
  print(f"[INFO] Using cached Hub file for {hub_filename}: {cached_path} (repo_type={repo_type})")
 
228
  filename=hub_filename,
229
  token=token,
230
  repo_type="space",
231
+ revision=None,
232
  )
233
  cached_path = Path(cached)
234
  print(f"[INFO] Using cached Space file for {hub_filename}: {cached_path}")
 
244
 
245
 
246
  def load_augmented_samples() -> Tuple[List[Dict[str, object]], bool]:
247
+ moe_path = _ensure_local_file(MOE_DATA_PATH, HUB_MOE_DATA_FILENAME)
248
+ base_path = _ensure_local_file(DEMO_DATA_PATH, HUB_DEMO_DATA_FILENAME)
249
 
250
  if moe_path and moe_path.exists() and not _is_git_lfs_pointer(moe_path):
251
  print(f"[INFO] Loading MoE-augmented dataset from {moe_path}")
 
911
  f"Dataset loaded: {len(df)} samples | "
912
  f"MoE embeddings: {'yes' if has_moe_embeddings else 'no'} | "
913
  f"HF token detected: {'yes' if HF_TOKEN else 'no'} | "
914
+ f"Synthetic fallback: {'yes' if USING_SYNTHETIC_DATA else 'no'} | "
915
+ f"Demo repo: {HUB_REPO_ID}@{HUB_REVISION or 'main'}"
916
  )
917
 
918
  has_moe_column = df["moe_embedding"].apply(lambda x: x is not None)