BoxOfColors commited on
Commit
15539fe
·
1 Parent(s): d9929fb

fix: move MMAUDIO_WINDOW and HUNYUAN_MAX_DUR before MODEL_CONFIGS

Browse files

Both constants were defined after MODEL_CONFIGS referenced them,
causing NameError on startup. Moved them up alongside the other
per-model constants and removed the now-duplicate definitions.

Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -299,8 +299,10 @@ TARO_MODEL_DUR = TARO_TRUNCATE / TARO_SR # 8.192 s
299
  TARO_SECS_PER_STEP = 0.05 # measured 0.043s/step on H200 (8.2s video, 2 segs × 25 steps = 2.2s wall)
300
 
301
  TARO_LOAD_OVERHEAD = 15 # seconds: model load + CAVP feature extraction
 
302
  MMAUDIO_SECS_PER_STEP = 0.25 # measured 0.230s/step on H200 (8.3s video, 2 segs × 25 steps = 11.5s wall)
303
  MMAUDIO_LOAD_OVERHEAD = 15
 
304
  HUNYUAN_SECS_PER_STEP = 0.35 # measured 0.328s/step on H200 (8.3s video, 1 seg × 50 steps = 16.4s wall)
305
  HUNYUAN_LOAD_OVERHEAD = 55 # ~55s to load the 10GB XXL model weights into GPU
306
  GPU_DURATION_CAP = 300 # hard cap per call — never reserve more than this
@@ -610,7 +612,6 @@ def generate_taro(video_file, seed_val, cfg_scale, num_steps, mode,
610
  # generate() handles all feature extraction + decoding internally.
611
  # ================================================================== #
612
 
613
- MMAUDIO_WINDOW = 8.0 # seconds — MMAudio's fixed generation window
614
 
615
 
616
  def _mmaudio_duration(video_file, prompt, negative_prompt, seed_val,
@@ -780,7 +781,6 @@ def generate_mmaudio(video_file, prompt, negative_prompt, seed_val,
780
  # Default guidance_scale=4.5, num_inference_steps=50
781
  # ================================================================== #
782
 
783
- HUNYUAN_MAX_DUR = 15.0 # seconds
784
 
785
 
786
  def _hunyuan_duration(video_file, prompt, negative_prompt, seed_val,
 
299
  TARO_SECS_PER_STEP = 0.05 # measured 0.043s/step on H200 (8.2s video, 2 segs × 25 steps = 2.2s wall)
300
 
301
  TARO_LOAD_OVERHEAD = 15 # seconds: model load + CAVP feature extraction
302
+ MMAUDIO_WINDOW = 8.0 # seconds — MMAudio's fixed generation window
303
  MMAUDIO_SECS_PER_STEP = 0.25 # measured 0.230s/step on H200 (8.3s video, 2 segs × 25 steps = 11.5s wall)
304
  MMAUDIO_LOAD_OVERHEAD = 15
305
+ HUNYUAN_MAX_DUR = 15.0 # seconds — HunyuanFoley max video duration
306
  HUNYUAN_SECS_PER_STEP = 0.35 # measured 0.328s/step on H200 (8.3s video, 1 seg × 50 steps = 16.4s wall)
307
  HUNYUAN_LOAD_OVERHEAD = 55 # ~55s to load the 10GB XXL model weights into GPU
308
  GPU_DURATION_CAP = 300 # hard cap per call — never reserve more than this
 
612
  # generate() handles all feature extraction + decoding internally.
613
  # ================================================================== #
614
 
 
615
 
616
 
617
  def _mmaudio_duration(video_file, prompt, negative_prompt, seed_val,
 
781
  # Default guidance_scale=4.5, num_inference_steps=50
782
  # ================================================================== #
783
 
 
784
 
785
 
786
  def _hunyuan_duration(video_file, prompt, negative_prompt, seed_val,