Spaces:
Running on Zero
Running on Zero
Commit ·
15539fe
1
Parent(s): d9929fb
fix: move MMAUDIO_WINDOW and HUNYUAN_MAX_DUR before MODEL_CONFIGS
Browse filesBoth constants were defined after MODEL_CONFIGS referenced them,
causing NameError on startup. Moved them up alongside the other
per-model constants and removed the now-duplicate definitions.
app.py
CHANGED
|
@@ -299,8 +299,10 @@ TARO_MODEL_DUR = TARO_TRUNCATE / TARO_SR # 8.192 s
|
|
| 299 |
TARO_SECS_PER_STEP = 0.05 # measured 0.043s/step on H200 (8.2s video, 2 segs × 25 steps = 2.2s wall)
|
| 300 |
|
| 301 |
TARO_LOAD_OVERHEAD = 15 # seconds: model load + CAVP feature extraction
|
|
|
|
| 302 |
MMAUDIO_SECS_PER_STEP = 0.25 # measured 0.230s/step on H200 (8.3s video, 2 segs × 25 steps = 11.5s wall)
|
| 303 |
MMAUDIO_LOAD_OVERHEAD = 15
|
|
|
|
| 304 |
HUNYUAN_SECS_PER_STEP = 0.35 # measured 0.328s/step on H200 (8.3s video, 1 seg × 50 steps = 16.4s wall)
|
| 305 |
HUNYUAN_LOAD_OVERHEAD = 55 # ~55s to load the 10GB XXL model weights into GPU
|
| 306 |
GPU_DURATION_CAP = 300 # hard cap per call — never reserve more than this
|
|
@@ -610,7 +612,6 @@ def generate_taro(video_file, seed_val, cfg_scale, num_steps, mode,
|
|
| 610 |
# generate() handles all feature extraction + decoding internally.
|
| 611 |
# ================================================================== #
|
| 612 |
|
| 613 |
-
MMAUDIO_WINDOW = 8.0 # seconds — MMAudio's fixed generation window
|
| 614 |
|
| 615 |
|
| 616 |
def _mmaudio_duration(video_file, prompt, negative_prompt, seed_val,
|
|
@@ -780,7 +781,6 @@ def generate_mmaudio(video_file, prompt, negative_prompt, seed_val,
|
|
| 780 |
# Default guidance_scale=4.5, num_inference_steps=50
|
| 781 |
# ================================================================== #
|
| 782 |
|
| 783 |
-
HUNYUAN_MAX_DUR = 15.0 # seconds
|
| 784 |
|
| 785 |
|
| 786 |
def _hunyuan_duration(video_file, prompt, negative_prompt, seed_val,
|
|
|
|
| 299 |
TARO_SECS_PER_STEP = 0.05 # measured 0.043s/step on H200 (8.2s video, 2 segs × 25 steps = 2.2s wall)
|
| 300 |
|
| 301 |
TARO_LOAD_OVERHEAD = 15 # seconds: model load + CAVP feature extraction
|
| 302 |
+
MMAUDIO_WINDOW = 8.0 # seconds — MMAudio's fixed generation window
|
| 303 |
MMAUDIO_SECS_PER_STEP = 0.25 # measured 0.230s/step on H200 (8.3s video, 2 segs × 25 steps = 11.5s wall)
|
| 304 |
MMAUDIO_LOAD_OVERHEAD = 15
|
| 305 |
+
HUNYUAN_MAX_DUR = 15.0 # seconds — HunyuanFoley max video duration
|
| 306 |
HUNYUAN_SECS_PER_STEP = 0.35 # measured 0.328s/step on H200 (8.3s video, 1 seg × 50 steps = 16.4s wall)
|
| 307 |
HUNYUAN_LOAD_OVERHEAD = 55 # ~55s to load the 10GB XXL model weights into GPU
|
| 308 |
GPU_DURATION_CAP = 300 # hard cap per call — never reserve more than this
|
|
|
|
| 612 |
# generate() handles all feature extraction + decoding internally.
|
| 613 |
# ================================================================== #
|
| 614 |
|
|
|
|
| 615 |
|
| 616 |
|
| 617 |
def _mmaudio_duration(video_file, prompt, negative_prompt, seed_val,
|
|
|
|
| 781 |
# Default guidance_scale=4.5, num_inference_steps=50
|
| 782 |
# ================================================================== #
|
| 783 |
|
|
|
|
| 784 |
|
| 785 |
|
| 786 |
def _hunyuan_duration(video_file, prompt, negative_prompt, seed_val,
|