Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,9 +7,6 @@ os.environ.setdefault("HF_PREFER_SAFETENSORS", "1")
|
|
| 7 |
|
| 8 |
import sys
|
| 9 |
import json
|
| 10 |
-
import uuid
|
| 11 |
-
import time
|
| 12 |
-
import shutil
|
| 13 |
import base64
|
| 14 |
import random
|
| 15 |
import tempfile
|
|
@@ -43,30 +40,15 @@ WATERMARK_NOTE = "Made with ❤️ by bilsimaging.com"
|
|
| 43 |
# ZeroGPU limit (<=120)
|
| 44 |
GPU_DURATION = int(os.environ.get("GPU_DURATION_SECS", "110"))
|
| 45 |
|
| 46 |
-
# Globals
|
| 47 |
_model_dict = None
|
| 48 |
_cfg = None
|
| 49 |
_device: Optional[torch.device] = None
|
| 50 |
|
| 51 |
|
| 52 |
# ------------
|
| 53 |
-
# Small helpers
|
| 54 |
# ------------
|
| 55 |
-
def _setup_device(pref: str = "auto", gpu_id: int = 0) -> torch.device:
|
| 56 |
-
"""Pick CUDA if available, else MPS, else CPU."""
|
| 57 |
-
if pref == "auto":
|
| 58 |
-
if torch.cuda.is_available():
|
| 59 |
-
d = torch.device(f"cuda:{gpu_id}")
|
| 60 |
-
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
| 61 |
-
d = torch.device("mps")
|
| 62 |
-
else:
|
| 63 |
-
d = torch.device("cpu")
|
| 64 |
-
else:
|
| 65 |
-
d = torch.device(pref)
|
| 66 |
-
logger.info(f"Using {d}")
|
| 67 |
-
return d
|
| 68 |
-
|
| 69 |
-
|
| 70 |
def _ensure_repo() -> None:
|
| 71 |
"""Shallow-clone Tencent repo with LFS smudge disabled (avoid LFS quota checkout)."""
|
| 72 |
if REPO_DIR.exists():
|
|
@@ -105,26 +87,30 @@ def prepare_once() -> None:
|
|
| 105 |
# -----------------------
|
| 106 |
# Model load & inference
|
| 107 |
# -----------------------
|
| 108 |
-
def auto_load_models() -> str:
|
| 109 |
"""
|
| 110 |
-
Load HunyuanVideo-Foley + encoders on the
|
| 111 |
-
|
| 112 |
"""
|
| 113 |
global _model_dict, _cfg, _device
|
| 114 |
|
| 115 |
if _model_dict is not None and _cfg is not None:
|
| 116 |
return "✅ Model already loaded."
|
| 117 |
|
| 118 |
-
#
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
sys.path.append(str(REPO_DIR))
|
| 122 |
from hunyuanvideo_foley.utils.model_utils import load_model
|
| 123 |
|
| 124 |
-
_device =
|
| 125 |
logger.info("Loading HunyuanVideo-Foley model...")
|
| 126 |
logger.info(f"MODEL_PATH: {WEIGHTS_DIR}")
|
| 127 |
logger.info(f"CONFIG_PATH: {CONFIG_PATH}")
|
|
|
|
| 128 |
|
| 129 |
try:
|
| 130 |
_model_dict, _cfg = load_model(str(WEIGHTS_DIR), str(CONFIG_PATH), _device)
|
|
@@ -222,9 +208,12 @@ def infer_single_video(
|
|
| 222 |
Generate Foley audio for an uploaded video (1–6 variants).
|
| 223 |
Returns: (list of output video paths, status message)
|
| 224 |
"""
|
| 225 |
-
#
|
|
|
|
|
|
|
|
|
|
| 226 |
if _model_dict is None or _cfg is None:
|
| 227 |
-
msg = auto_load_models()
|
| 228 |
if not str(msg).startswith("✅"):
|
| 229 |
return [], f"❌ {msg}"
|
| 230 |
|
|
@@ -261,8 +250,15 @@ def infer_single_video(
|
|
| 261 |
return outs, f"✅ Generated {len(outs)} result(s). Saved to {OUTPUTS_DIR}/"
|
| 262 |
|
| 263 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
# -------------
|
| 265 |
-
# Gradio UI (with MCP+
|
| 266 |
# -------------
|
| 267 |
def _about_html() -> str:
|
| 268 |
return f"""
|
|
@@ -292,8 +288,7 @@ def _about_html() -> str:
|
|
| 292 |
<p>This Space exposes an <b>MCP server</b> and simple REST endpoints (see “API & MCP” tab).
|
| 293 |
Perfect for pipelines and tools like <b>n8n</b>.</p>
|
| 294 |
|
| 295 |
-
|
| 296 |
-
<p>Each output writes a JSON sidecar including: <i>{WATERMARK_NOTE}</i>. Ask if you want a visible overlay.</p>
|
| 297 |
</div>
|
| 298 |
"""
|
| 299 |
|
|
@@ -307,6 +302,7 @@ def create_ui() -> gr.Blocks:
|
|
| 307 |
.generate-btn button{ font-weight:800; border-radius:12px; padding:10px 18px;}
|
| 308 |
.minor-btn button{ border-radius:10px;}
|
| 309 |
.muted{ color:#64748b; }
|
|
|
|
| 310 |
"""
|
| 311 |
with gr.Blocks(title="ShortiFoley — HunyuanVideo-Foley", css=css) as demo:
|
| 312 |
|
|
@@ -367,12 +363,13 @@ def create_ui() -> gr.Blocks:
|
|
| 367 |
api_description="Generate Foley audio for an uploaded video. Returns up to 6 video+audio files."
|
| 368 |
)
|
| 369 |
|
|
|
|
| 370 |
load_btn.click(
|
| 371 |
-
fn=
|
| 372 |
inputs=[],
|
| 373 |
outputs=[status],
|
| 374 |
api_name="/load_model",
|
| 375 |
-
api_description="Load/initialize the ShortiFoley model and encoders."
|
| 376 |
)
|
| 377 |
|
| 378 |
# Toggle visibility based on variants
|
|
@@ -403,8 +400,7 @@ def create_ui() -> gr.Blocks:
|
|
| 403 |
|
| 404 |
# Refresh via button
|
| 405 |
refresh.click(_refresh_gallery, outputs=[gallery])
|
| 406 |
-
|
| 407 |
-
# Also refresh after generation finishes (chain on the event, NOT the button)
|
| 408 |
gen_evt.then(_refresh_gallery, inputs=None, outputs=[gallery])
|
| 409 |
|
| 410 |
with gr.Tab("API & MCP"):
|
|
@@ -430,6 +426,13 @@ def create_ui() -> gr.Blocks:
|
|
| 430 |
with gr.Tab("ℹ️ About"):
|
| 431 |
gr.HTML(_about_html())
|
| 432 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
# ---- REST + MCP endpoints (inside Blocks) ----
|
| 434 |
def _download_to_tmp(url: str) -> str:
|
| 435 |
try:
|
|
@@ -469,10 +472,9 @@ def create_ui() -> gr.Blocks:
|
|
| 469 |
num_inference_steps: int = 50,
|
| 470 |
sample_nums: int = 1,
|
| 471 |
) -> Dict[str, List[str]]:
|
|
|
|
| 472 |
if _model_dict is None or _cfg is None:
|
| 473 |
-
|
| 474 |
-
if not str(msg).startswith("✅"):
|
| 475 |
-
raise RuntimeError(msg)
|
| 476 |
local = _normalize_video_input(video_url_or_b64)
|
| 477 |
outs, msg = infer_single_video(local, text_prompt, guidance_scale, num_inference_steps, sample_nums)
|
| 478 |
return {"videos": outs, "message": msg}
|
|
@@ -480,14 +482,14 @@ def create_ui() -> gr.Blocks:
|
|
| 480 |
@gr.api
|
| 481 |
def load_model_tool() -> str:
|
| 482 |
"""Ensure model is loaded on server (convenient for MCP/REST)."""
|
| 483 |
-
return
|
| 484 |
|
| 485 |
@gr.mcp.resource("shortifoley://status")
|
| 486 |
def shortifoley_status() -> str:
|
| 487 |
"""Return a simple readiness string for MCP clients."""
|
| 488 |
ready = _model_dict is not None and _cfg is not None
|
| 489 |
dev = "cuda" if (_device and _device.type == "cuda") else ("mps" if (_device and _device.type == "mps") else "cpu")
|
| 490 |
-
return f"ShortiFoley status: {'ready' if ready else '
|
| 491 |
|
| 492 |
@gr.mcp.prompt()
|
| 493 |
def foley_prompt(name: str = "default") -> str:
|
|
@@ -497,9 +499,8 @@ def create_ui() -> gr.Blocks:
|
|
| 497 |
"Example: 'Soft leather footfalls on wet pavement with distant traffic hiss; occasional splashes.'"
|
| 498 |
)
|
| 499 |
|
| 500 |
-
#
|
| 501 |
-
demo.load(
|
| 502 |
-
demo.load(lambda: gr.update(value=_list_gallery()), inputs=None, outputs=[gallery])
|
| 503 |
|
| 504 |
return demo
|
| 505 |
|
|
@@ -511,7 +512,7 @@ def set_seeds(s: int = 1):
|
|
| 511 |
|
| 512 |
|
| 513 |
# -------------
|
| 514 |
-
# App bootstrap
|
| 515 |
# -------------
|
| 516 |
if __name__ == "__main__":
|
| 517 |
logger.remove()
|
|
@@ -521,7 +522,7 @@ if __name__ == "__main__":
|
|
| 521 |
logger.info("===== Application Startup =====\n")
|
| 522 |
prepare_once()
|
| 523 |
|
| 524 |
-
# Probe imports (early surfacing)
|
| 525 |
sys.path.append(str(REPO_DIR))
|
| 526 |
try:
|
| 527 |
from hunyuanvideo_foley.utils.model_utils import load_model, denoise_process # noqa: F401
|
|
|
|
| 7 |
|
| 8 |
import sys
|
| 9 |
import json
|
|
|
|
|
|
|
|
|
|
| 10 |
import base64
|
| 11 |
import random
|
| 12 |
import tempfile
|
|
|
|
| 40 |
# ZeroGPU limit (<=120)
|
| 41 |
GPU_DURATION = int(os.environ.get("GPU_DURATION_SECS", "110"))
|
| 42 |
|
| 43 |
+
# Globals (NO CUDA INIT HERE)
|
| 44 |
_model_dict = None
|
| 45 |
_cfg = None
|
| 46 |
_device: Optional[torch.device] = None
|
| 47 |
|
| 48 |
|
| 49 |
# ------------
|
| 50 |
+
# Small helpers (CPU-only; avoid touching CUDA here)
|
| 51 |
# ------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
def _ensure_repo() -> None:
|
| 53 |
"""Shallow-clone Tencent repo with LFS smudge disabled (avoid LFS quota checkout)."""
|
| 54 |
if REPO_DIR.exists():
|
|
|
|
| 87 |
# -----------------------
|
| 88 |
# Model load & inference
|
| 89 |
# -----------------------
|
| 90 |
+
def auto_load_models(device: Optional[torch.device] = None) -> str:
|
| 91 |
"""
|
| 92 |
+
Load HunyuanVideo-Foley + encoders on the given device.
|
| 93 |
+
MUST be called only inside a @spaces.GPU context with device=cuda:0.
|
| 94 |
"""
|
| 95 |
global _model_dict, _cfg, _device
|
| 96 |
|
| 97 |
if _model_dict is not None and _cfg is not None:
|
| 98 |
return "✅ Model already loaded."
|
| 99 |
|
| 100 |
+
# DO NOT probe CUDA here unless device is passed from GPU context
|
| 101 |
+
if device is None:
|
| 102 |
+
return "❌ Load the model inside a GPU task first (use the Load button or run Generate)."
|
| 103 |
+
|
| 104 |
+
os.environ["HF_PREFER_SAFETENSORS"] = "1" # enforce again for safety
|
| 105 |
|
| 106 |
sys.path.append(str(REPO_DIR))
|
| 107 |
from hunyuanvideo_foley.utils.model_utils import load_model
|
| 108 |
|
| 109 |
+
_device = device
|
| 110 |
logger.info("Loading HunyuanVideo-Foley model...")
|
| 111 |
logger.info(f"MODEL_PATH: {WEIGHTS_DIR}")
|
| 112 |
logger.info(f"CONFIG_PATH: {CONFIG_PATH}")
|
| 113 |
+
logger.info(f"TARGET_DEVICE: {_device}")
|
| 114 |
|
| 115 |
try:
|
| 116 |
_model_dict, _cfg = load_model(str(WEIGHTS_DIR), str(CONFIG_PATH), _device)
|
|
|
|
| 208 |
Generate Foley audio for an uploaded video (1–6 variants).
|
| 209 |
Returns: (list of output video paths, status message)
|
| 210 |
"""
|
| 211 |
+
# Safe: inside GPU context, we can use CUDA
|
| 212 |
+
device = torch.device("cuda:0")
|
| 213 |
+
|
| 214 |
+
# Lazy-load if needed on GPU
|
| 215 |
if _model_dict is None or _cfg is None:
|
| 216 |
+
msg = auto_load_models(device)
|
| 217 |
if not str(msg).startswith("✅"):
|
| 218 |
return [], f"❌ {msg}"
|
| 219 |
|
|
|
|
| 250 |
return outs, f"✅ Generated {len(outs)} result(s). Saved to {OUTPUTS_DIR}/"
|
| 251 |
|
| 252 |
|
| 253 |
+
# Separate GPU task to preload model (used by the Load button & API)
|
| 254 |
+
@spaces.GPU(duration=GPU_DURATION)
|
| 255 |
+
def gpu_load_models() -> str:
|
| 256 |
+
device = torch.device("cuda:0")
|
| 257 |
+
return auto_load_models(device)
|
| 258 |
+
|
| 259 |
+
|
| 260 |
# -------------
|
| 261 |
+
# Gradio UI (with MCP + REST endpoints)
|
| 262 |
# -------------
|
| 263 |
def _about_html() -> str:
|
| 264 |
return f"""
|
|
|
|
| 288 |
<p>This Space exposes an <b>MCP server</b> and simple REST endpoints (see “API & MCP” tab).
|
| 289 |
Perfect for pipelines and tools like <b>n8n</b>.</p>
|
| 290 |
|
| 291 |
+
|
|
|
|
| 292 |
</div>
|
| 293 |
"""
|
| 294 |
|
|
|
|
| 302 |
.generate-btn button{ font-weight:800; border-radius:12px; padding:10px 18px;}
|
| 303 |
.minor-btn button{ border-radius:10px;}
|
| 304 |
.muted{ color:#64748b; }
|
| 305 |
+
.footer-text{ margin-top:16px; text-align:center; color:#475569; font-size:.95rem;}
|
| 306 |
"""
|
| 307 |
with gr.Blocks(title="ShortiFoley — HunyuanVideo-Foley", css=css) as demo:
|
| 308 |
|
|
|
|
| 363 |
api_description="Generate Foley audio for an uploaded video. Returns up to 6 video+audio files."
|
| 364 |
)
|
| 365 |
|
| 366 |
+
# Load model (GPU-safe)
|
| 367 |
load_btn.click(
|
| 368 |
+
fn=gpu_load_models,
|
| 369 |
inputs=[],
|
| 370 |
outputs=[status],
|
| 371 |
api_name="/load_model",
|
| 372 |
+
api_description="Load/initialize the ShortiFoley model and encoders (runs on GPU)."
|
| 373 |
)
|
| 374 |
|
| 375 |
# Toggle visibility based on variants
|
|
|
|
| 400 |
|
| 401 |
# Refresh via button
|
| 402 |
refresh.click(_refresh_gallery, outputs=[gallery])
|
| 403 |
+
# Also refresh after generation finishes
|
|
|
|
| 404 |
gen_evt.then(_refresh_gallery, inputs=None, outputs=[gallery])
|
| 405 |
|
| 406 |
with gr.Tab("API & MCP"):
|
|
|
|
| 426 |
with gr.Tab("ℹ️ About"):
|
| 427 |
gr.HTML(_about_html())
|
| 428 |
|
| 429 |
+
# Footer
|
| 430 |
+
gr.HTML("""
|
| 431 |
+
<div class="footer-text">
|
| 432 |
+
<p>🚀 Created by <b>bilsimaging.com</b> • Powered by HunyuanVideo-Foley • Generate high-quality audio from video and text descriptions</p>
|
| 433 |
+
</div>
|
| 434 |
+
""")
|
| 435 |
+
|
| 436 |
# ---- REST + MCP endpoints (inside Blocks) ----
|
| 437 |
def _download_to_tmp(url: str) -> str:
|
| 438 |
try:
|
|
|
|
| 472 |
num_inference_steps: int = 50,
|
| 473 |
sample_nums: int = 1,
|
| 474 |
) -> Dict[str, List[str]]:
|
| 475 |
+
# Ensure model is ready (GPU-safe path)
|
| 476 |
if _model_dict is None or _cfg is None:
|
| 477 |
+
_ = gpu_load_models()
|
|
|
|
|
|
|
| 478 |
local = _normalize_video_input(video_url_or_b64)
|
| 479 |
outs, msg = infer_single_video(local, text_prompt, guidance_scale, num_inference_steps, sample_nums)
|
| 480 |
return {"videos": outs, "message": msg}
|
|
|
|
| 482 |
@gr.api
|
| 483 |
def load_model_tool() -> str:
|
| 484 |
"""Ensure model is loaded on server (convenient for MCP/REST)."""
|
| 485 |
+
return gpu_load_models()
|
| 486 |
|
| 487 |
@gr.mcp.resource("shortifoley://status")
|
| 488 |
def shortifoley_status() -> str:
|
| 489 |
"""Return a simple readiness string for MCP clients."""
|
| 490 |
ready = _model_dict is not None and _cfg is not None
|
| 491 |
dev = "cuda" if (_device and _device.type == "cuda") else ("mps" if (_device and _device.type == "mps") else "cpu")
|
| 492 |
+
return f"ShortiFoley status: {'ready' if ready else 'idle'} | device={dev} | outputs={OUTPUTS_DIR}"
|
| 493 |
|
| 494 |
@gr.mcp.prompt()
|
| 495 |
def foley_prompt(name: str = "default") -> str:
|
|
|
|
| 499 |
"Example: 'Soft leather footfalls on wet pavement with distant traffic hiss; occasional splashes.'"
|
| 500 |
)
|
| 501 |
|
| 502 |
+
# IMPORTANT: Do NOT auto-load models here to avoid CUDA init in main process
|
| 503 |
+
demo.load(lambda: "Ready. Click 'Load model' or 'Generate' to start.", inputs=None, outputs=None)
|
|
|
|
| 504 |
|
| 505 |
return demo
|
| 506 |
|
|
|
|
| 512 |
|
| 513 |
|
| 514 |
# -------------
|
| 515 |
+
# App bootstrap (CPU only)
|
| 516 |
# -------------
|
| 517 |
if __name__ == "__main__":
|
| 518 |
logger.remove()
|
|
|
|
| 522 |
logger.info("===== Application Startup =====\n")
|
| 523 |
prepare_once()
|
| 524 |
|
| 525 |
+
# Probe imports (early surfacing) — CPU-safe
|
| 526 |
sys.path.append(str(REPO_DIR))
|
| 527 |
try:
|
| 528 |
from hunyuanvideo_foley.utils.model_utils import load_model, denoise_process # noqa: F401
|