Spaces:
beetson
/
Runtime error

beetson commited on
Commit
488646d
·
verified ·
1 Parent(s): 27eb64f
Files changed (1) hide show
  1. app.py +61 -9
app.py CHANGED
@@ -15,6 +15,20 @@
15
 
16
  import os
17
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  os.system("rm -rf /data-nvme/zerogpu-offload/")
19
  os.system("pip install --no-build-isolation chumpy")
20
  os.system("pip uninstall -y basicsr")
@@ -390,15 +404,53 @@ def _build_model(cfg):
390
 
391
  def launch_pretrained():
392
  from huggingface_hub import snapshot_download, hf_hub_download
393
- hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='assets.tar', local_dir="./")
394
- os.system("tar -xf assets.tar && rm assets.tar")
395
- # hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='LHM-0.5B.tar', local_dir="./")
396
- # os.system("tar -xf LHM-0.5B.tar && rm LHM-0.5B.tar")
397
- hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='LHM_prior_model.tar', local_dir="./")
398
- os.system("tar -xf LHM_prior_model.tar && rm LHM_prior_model.tar")
399
- # replace the weight of full body
400
- hf_hub_download(repo_id="3DAIGC/LHM-500M-HF", repo_type='model', filename='config.json', local_dir="./exps/releases/video_human_benchmark/human-lrm-500M/step_060000/")
401
- hf_hub_download(repo_id="3DAIGC/LHM-500M-HF", repo_type='model', filename='model.safetensors', local_dir="./exps/releases/video_human_benchmark/human-lrm-500M/step_060000/")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
 
403
  def launch_env_not_compile_with_cuda():
404
  os.system("pip install --no-build-isolation chumpy")
 
15
 
16
  import os
17
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
18
+
19
+ # Redirect all model caches to persistent storage if /data is mounted.
20
+ # Without this, the 28GB of model downloads happen on every cold start
21
+ # and we hit the 30-minute HF Spaces healthcheck timeout.
22
+ if os.path.isdir("/data") and os.access("/data", os.W_OK):
23
+ os.makedirs("/data/hf_cache", exist_ok=True)
24
+ os.makedirs("/data/torch_cache", exist_ok=True)
25
+ os.environ["HF_HOME"] = "/data/hf_cache"
26
+ os.environ["HUGGINGFACE_HUB_CACHE"] = "/data/hf_cache"
27
+ os.environ["TORCH_HOME"] = "/data/torch_cache"
28
+ print("[persist] Using /data for HF and torch caches")
29
+ else:
30
+ print("[persist] /data not available, using ephemeral storage")
31
+
32
  os.system("rm -rf /data-nvme/zerogpu-offload/")
33
  os.system("pip install --no-build-isolation chumpy")
34
  os.system("pip uninstall -y basicsr")
 
404
 
405
  def launch_pretrained():
406
  from huggingface_hub import snapshot_download, hf_hub_download
407
+
408
+ persist = "/data" if (os.path.isdir("/data") and os.access("/data", os.W_OK)) else None
409
+
410
+ if persist:
411
+ # Cache extracted models on persistent storage and symlink into app dir.
412
+ # First boot still does the full download; subsequent boots are fast.
413
+ extract_root = f"{persist}/extracted"
414
+ os.makedirs(extract_root, exist_ok=True)
415
+
416
+ # assets.tar -> /data/extracted/assets
417
+ if not os.path.isdir(f"{extract_root}/assets"):
418
+ print("[persist] Downloading assets.tar to /data...")
419
+ hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='assets.tar', local_dir=persist)
420
+ os.system(f"cd {extract_root} && tar -xf {persist}/assets.tar && rm {persist}/assets.tar")
421
+ else:
422
+ print("[persist] assets/ already cached, skipping download")
423
+ os.system(f"rm -rf ./assets && ln -sfn {extract_root}/assets ./assets")
424
+
425
+ # LHM_prior_model.tar -> /data/extracted/pretrained_models (and possibly other dirs)
426
+ if not os.path.isdir(f"{extract_root}/pretrained_models"):
427
+ print("[persist] Downloading LHM_prior_model.tar to /data...")
428
+ hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='LHM_prior_model.tar', local_dir=persist)
429
+ os.system(f"cd {extract_root} && tar -xf {persist}/LHM_prior_model.tar && rm {persist}/LHM_prior_model.tar")
430
+ else:
431
+ print("[persist] pretrained_models/ already cached, skipping download")
432
+ os.system(f"rm -rf ./pretrained_models && ln -sfn {extract_root}/pretrained_models ./pretrained_models")
433
+
434
+ # 500M-HF weights -> /data/extracted/exps/.../step_060000
435
+ target_rel = "exps/releases/video_human_benchmark/human-lrm-500M/step_060000"
436
+ target_abs = f"{extract_root}/{target_rel}"
437
+ if not os.path.isfile(f"{target_abs}/model.safetensors"):
438
+ print("[persist] Downloading 500M-HF weights to /data...")
439
+ os.makedirs(target_abs, exist_ok=True)
440
+ hf_hub_download(repo_id="3DAIGC/LHM-500M-HF", repo_type='model', filename='config.json', local_dir=target_abs)
441
+ hf_hub_download(repo_id="3DAIGC/LHM-500M-HF", repo_type='model', filename='model.safetensors', local_dir=target_abs)
442
+ else:
443
+ print("[persist] 500M-HF weights already cached, skipping download")
444
+ os.makedirs("./exps/releases/video_human_benchmark/human-lrm-500M", exist_ok=True)
445
+ os.system(f"rm -rf ./{target_rel} && ln -sfn {target_abs} ./{target_rel}")
446
+ else:
447
+ # Original behavior (ephemeral storage)
448
+ hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='assets.tar', local_dir="./")
449
+ os.system("tar -xf assets.tar && rm assets.tar")
450
+ hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='LHM_prior_model.tar', local_dir="./")
451
+ os.system("tar -xf LHM_prior_model.tar && rm LHM_prior_model.tar")
452
+ hf_hub_download(repo_id="3DAIGC/LHM-500M-HF", repo_type='model', filename='config.json', local_dir="./exps/releases/video_human_benchmark/human-lrm-500M/step_060000/")
453
+ hf_hub_download(repo_id="3DAIGC/LHM-500M-HF", repo_type='model', filename='model.safetensors', local_dir="./exps/releases/video_human_benchmark/human-lrm-500M/step_060000/")
454
 
455
  def launch_env_not_compile_with_cuda():
456
  os.system("pip install --no-build-isolation chumpy")