Spaces:

Bils
/

ShortiFoley

Running on Zero

App Files Files Community

Bils commited on Aug 30, 2025

Commit

2d16d53

verified ·

1 Parent(s): f8f20d3

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -37

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os, sys, json, tempfile, subprocess, shutil, uuid, glob
 from pathlib import Path
-from typing import Optional, Tuple, List
 import gradio as gr
 import spaces
@@ -20,13 +20,13 @@ ASSETS.mkdir(exist_ok=True)
 APP_TITLE   = os.environ.get("APP_TITLE", "Foley Studio · ZeroGPU")
 APP_TAGLINE = os.environ.get("APP_TAGLINE", "Generate scene-true foley for short clips (ZeroGPU-ready).")
-PRIMARY_COLOR = os.environ.get("PRIMARY_COLOR", "#6B5BFF")  # UI accent only
 # ZeroGPU-safe defaults (tweak in Space Secrets if needed)
-MAX_SECS = int(os.environ.get("MAX_SECS", "15"))        # keep clips short for ZeroGPU window
-TARGET_H = int(os.environ.get("TARGET_H", "480"))       # downscale target height
-SR       = int(os.environ.get("TARGET_SR", "48000"))    # WAV sample rate
-ZEROGPU_DURATION = int(os.environ.get("ZEROGPU_DURATION", "110"))  # must be <= platform limit
 def sh(cmd: str):
     print(">>", cmd)
@@ -44,13 +44,11 @@ def ffprobe_duration(path: str) -> float:
 def _clone_without_lfs():
     """
-    Clone repo while skipping LFS smudge to avoid demo video downloads.
     Falls back to sparse checkout with only essential paths.
     """
     if REPO_DIR.exists():
         return
-    # Attempt 1: shallow clone with LFS disabled
     try:
         sh(
             "GIT_LFS_SKIP_SMUDGE=1 "
@@ -64,7 +62,6 @@ def _clone_without_lfs():
     except subprocess.CalledProcessError as e:
         print("Shallow clone with LFS skipped failed, trying sparse checkout…", e)
-    # Attempt 2: sparse checkout minimal files
     REPO_DIR.mkdir(parents=True, exist_ok=True)
     sh(f"git -C {REPO_DIR} init")
     sh(
@@ -82,7 +79,6 @@ def _clone_without_lfs():
         "LICENSE",
         "README.md",
     ]) + "\n")
-    # Try main, fallback to master
     try:
         sh(f"git -C {REPO_DIR} fetch --depth 1 origin main")
         sh(f"git -C {REPO_DIR} checkout main")
@@ -94,7 +90,6 @@ def prepare_once():
     """Clone code (skip LFS), download weights, set env, prepare dirs."""
     _clone_without_lfs()
-    # Ensure we can import their package later
     if str(REPO_DIR) not in sys.path:
         sys.path.insert(0, str(REPO_DIR))
@@ -113,21 +108,33 @@ def prepare_once():
 prepare_once()
-# Prefer safetensors globally (avoids bad *.bin)
 os.environ["TRANSFORMERS_PREFER_SAFETENSORS"] = "1"
-def _purge_clap_pt_bins():
     """
-    Remove any cached pytorch_model.bin for laion/larger_clap_general so
-    Transformers loads the safetensors instead.
     """
     cache_root = Path.home() / ".cache" / "huggingface" / "hub"
-    patterns = [
-        cache_root / "models--laion--larger_clap_general" / "snapshots" / "*" / "pytorch_model.bin",
-        cache_root / "models--laion--larger_clap_general" / "snapshots" / "*" / "model.bin",
         cache_root / "models--laion--larger_clap_general" / "snapshots" / "*" / "*.bin",
-    ]
-    for pat in patterns:
         for f in glob.glob(str(pat)):
             try:
                 Path(f).unlink()
@@ -135,14 +142,13 @@ def _purge_clap_pt_bins():
             except Exception:
                 pass
-# ---- Friendly dependency checks (clear errors during boot) -------------------
 try:
-    import audiotools  # provided by the PyPI package 'descript-audiotools'
 except Exception as e:
     raise RuntimeError(
-        "Missing module 'audiotools'. Install it via the PyPI package "
-        "'descript-audiotools' (e.g., add 'descript-audiotools>=0.7.2' "
-        "to requirements.txt) and restart the Space."
     ) from e
 try:
@@ -151,11 +157,11 @@ try:
     import easydict   # noqa: F401
 except Exception as e:
     raise RuntimeError(
-        "Missing config deps. Please add to requirements.txt: "
-        "'omegaconf>=2.3.0', 'pyyaml', and 'easydict'."
     ) from e
-# Now safe to import Tencent internals
 from hunyuanvideo_foley.utils.model_utils import load_model, denoise_process
 from hunyuanvideo_foley.utils.feature_utils import feature_process
 from hunyuanvideo_foley.utils.media_utils import merge_audio_video
@@ -190,7 +196,6 @@ def auto_load_models() -> str:
     if not os.path.exists(MODEL_PATH):
         os.makedirs(MODEL_PATH, exist_ok=True)
     if not os.path.exists(CONFIG_PATH):
         return f"❌ Config file not found: {CONFIG_PATH}"
@@ -199,7 +204,8 @@ def auto_load_models() -> str:
     logger.info(f"MODEL_PATH:  {MODEL_PATH}")
     logger.info(f"CONFIG_PATH: {CONFIG_PATH}")
-    # Make sure any corrupted *.bin in CLAP cache is removed
     _purge_clap_pt_bins()
     _model_dict, _cfg = load_model(MODEL_PATH, CONFIG_PATH, _device)
@@ -214,9 +220,9 @@ logger.info(auto_load_models())
 # ========= Preprocessing =========
 def preprocess_video(in_path: str) -> Tuple[str, float]:
     """
-    - Validate/trim to <= MAX_SECS.
-    - Downscale to TARGET_H (keep AR), strip original audio.
-    - Return processed mp4 path and final duration.
     """
     dur = ffprobe_duration(in_path)
     if dur == 0:
@@ -227,7 +233,7 @@ def preprocess_video(in_path: str) -> Tuple[str, float]:
     processed = temp_dir / "proc.mp4"
     trim_args = ["-t", str(MAX_SECS)] if dur > MAX_SECS else []
-    # Normalize container & remove audio
     sh(" ".join([
         "ffmpeg", "-y", "-i", f"\"{in_path}\"",
         *trim_args,
@@ -237,7 +243,7 @@ def preprocess_video(in_path: str) -> Tuple[str, float]:
         f"\"{trimmed}\""
     ]))
-    # Downscale to TARGET_H; ensure mod2 width, baseline profile
     vf = f"scale=-2:{TARGET_H}:flags=bicubic"
     sh(" ".join([
         "ffmpeg", "-y", "-i", f"\"{trimmed}\"",
@@ -254,7 +260,7 @@ def preprocess_video(in_path: str) -> Tuple[str, float]:
     return str(processed), final_dur
 # ========= Inference (ZeroGPU) =========
-@spaces.GPU(duration=ZEROGPU_DURATION)  # tune via env if needed
 @torch.inference_mode()
 def run_model(video_path: str, prompt_text: str,
               guidance_scale: float = 4.5,

 import os, sys, json, tempfile, subprocess, shutil, uuid, glob
 from pathlib import Path
+from typing import Tuple, List
 import gradio as gr
 import spaces
 APP_TITLE   = os.environ.get("APP_TITLE", "Foley Studio · ZeroGPU")
 APP_TAGLINE = os.environ.get("APP_TAGLINE", "Generate scene-true foley for short clips (ZeroGPU-ready).")
+PRIMARY_COLOR = os.environ.get("PRIMARY_COLOR", "#6B5BFF")
 # ZeroGPU-safe defaults (tweak in Space Secrets if needed)
+MAX_SECS = int(os.environ.get("MAX_SECS", "15"))
+TARGET_H = int(os.environ.get("TARGET_H", "480"))
+SR       = int(os.environ.get("TARGET_SR", "48000"))
+ZEROGPU_DURATION = int(os.environ.get("ZEROGPU_DURATION", "110"))
 def sh(cmd: str):
     print(">>", cmd)
 def _clone_without_lfs():
     """
+    Clone repo while skipping LFS smudge to avoid huge demo assets.
     Falls back to sparse checkout with only essential paths.
     """
     if REPO_DIR.exists():
         return
     try:
         sh(
             "GIT_LFS_SKIP_SMUDGE=1 "
     except subprocess.CalledProcessError as e:
         print("Shallow clone with LFS skipped failed, trying sparse checkout…", e)
     REPO_DIR.mkdir(parents=True, exist_ok=True)
     sh(f"git -C {REPO_DIR} init")
     sh(
         "LICENSE",
         "README.md",
     ]) + "\n")
     try:
         sh(f"git -C {REPO_DIR} fetch --depth 1 origin main")
         sh(f"git -C {REPO_DIR} checkout main")
     """Clone code (skip LFS), download weights, set env, prepare dirs."""
     _clone_without_lfs()
     if str(REPO_DIR) not in sys.path:
         sys.path.insert(0, str(REPO_DIR))
 prepare_once()
+# Prefer safetensors & fast transfer
 os.environ["TRANSFORMERS_PREFER_SAFETENSORS"] = "1"
+os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
+def ensure_clap_safetensors():
     """
+    Proactively cache ONLY safetensors for laion/larger_clap_general so
+    Transformers never selects a stale/corrupt *.bin.
     """
+    snapshot_download(
+        repo_id="laion/larger_clap_general",
+        allow_patterns=[
+            "*.safetensors", "config.json", "*.json", "*.txt",
+            "tokenizer*", "*merges*", "*vocab*"
+        ],
+        ignore_patterns=["*.bin"],
+        resume_download=True,
+        local_dir=None,
+        local_dir_use_symlinks=False,
+    )
+def _purge_clap_pt_bins():
+    """Remove any cached .bin for laion/larger_clap_general."""
     cache_root = Path.home() / ".cache" / "huggingface" / "hub"
+    for pat in [
         cache_root / "models--laion--larger_clap_general" / "snapshots" / "*" / "*.bin",
+    ]:
         for f in glob.glob(str(pat)):
             try:
                 Path(f).unlink()
             except Exception:
                 pass
+# ---- Dependency guards (clear errors during boot) ---------------------------
 try:
+    import audiotools  # provided by PyPI package 'descript-audiotools'
 except Exception as e:
     raise RuntimeError(
+        "Missing module 'audiotools'. Install via PyPI package "
+        "'descript-audiotools' (add 'descript-audiotools>=0.7.2' to requirements.txt)."
     ) from e
 try:
     import easydict   # noqa: F401
 except Exception as e:
     raise RuntimeError(
+        "Missing config deps. Add to requirements.txt: "
+        "'omegaconf>=2.3.0', 'pyyaml', 'easydict'."
     ) from e
+# Import Tencent internals after guards
 from hunyuanvideo_foley.utils.model_utils import load_model, denoise_process
 from hunyuanvideo_foley.utils.feature_utils import feature_process
 from hunyuanvideo_foley.utils.media_utils import merge_audio_video
     if not os.path.exists(MODEL_PATH):
         os.makedirs(MODEL_PATH, exist_ok=True)
     if not os.path.exists(CONFIG_PATH):
         return f"❌ Config file not found: {CONFIG_PATH}"
     logger.info(f"MODEL_PATH:  {MODEL_PATH}")
     logger.info(f"CONFIG_PATH: {CONFIG_PATH}")
+    # Ensure CLAP uses safetensors; nuke any .bin first
+    ensure_clap_safetensors()
     _purge_clap_pt_bins()
     _model_dict, _cfg = load_model(MODEL_PATH, CONFIG_PATH, _device)
 # ========= Preprocessing =========
 def preprocess_video(in_path: str) -> Tuple[str, float]:
     """
+    - Trim to <= MAX_SECS
+    - Downscale to TARGET_H (keep AR), strip audio
+    - Return processed mp4 path and final duration
     """
     dur = ffprobe_duration(in_path)
     if dur == 0:
     processed = temp_dir / "proc.mp4"
     trim_args = ["-t", str(MAX_SECS)] if dur > MAX_SECS else []
+    # Normalize & remove audio
     sh(" ".join([
         "ffmpeg", "-y", "-i", f"\"{in_path}\"",
         *trim_args,
         f"\"{trimmed}\""
     ]))
+    # Downscale to TARGET_H; ensure mod2 width
     vf = f"scale=-2:{TARGET_H}:flags=bicubic"
     sh(" ".join([
         "ffmpeg", "-y", "-i", f"\"{trimmed}\"",
     return str(processed), final_dur
 # ========= Inference (ZeroGPU) =========
+@spaces.GPU(duration=ZEROGPU_DURATION)
 @torch.inference_mode()
 def run_model(video_path: str, prompt_text: str,
               guidance_scale: float = 4.5,