BoxOfColors commited on
Commit
39d7b17
Β·
1 Parent(s): 679c604

Lower GPU duration estimates and add 300s hard cap

Browse files

SECS_PER_STEP estimates were calibrated for a slower GPU. H200 is much faster:
TARO: 2.5 β†’ 0.8 s/step
MMAudio: 2.5 β†’ 0.8 s/step
HunyuanFoley: 5.0 β†’ 2.0 s/step
Load overheads trimmed similarly (TARO 30β†’20, MMAudio 20β†’15, Hunyuan 30β†’20).

Add GPU_DURATION_CAP=300s β€” a single call can never reserve more than 5 min
regardless of video length or sample count, preventing quota exhaustion on
long videos. The diagnostic print now shows both the raw calc and capped value.

56s video / 1 sample / 25 steps: was 592s reserved, now 200s.

Files changed (1) hide show
  1. app.py +16 -12
app.py CHANGED
@@ -165,13 +165,14 @@ TARO_FPS = 4
165
  TARO_TRUNCATE_FRAME = int(TARO_FPS * TARO_TRUNCATE / TARO_SR) # 32
166
  TARO_TRUNCATE_ONSET = 120
167
  TARO_MODEL_DUR = TARO_TRUNCATE / TARO_SR # 8.192 s
168
- TARO_SECS_PER_STEP = 2.5 # estimated GPU-seconds per diffusion step
169
 
170
- TARO_LOAD_OVERHEAD = 30 # seconds: model load + CAVP feature extraction
171
- MMAUDIO_SECS_PER_STEP = 2.5 # estimated GPU-seconds per flow-matching step
172
- MMAUDIO_LOAD_OVERHEAD = 20
173
- HUNYUAN_SECS_PER_STEP = 5.0 # estimated GPU-seconds per denoising step (heavier model)
174
- HUNYUAN_LOAD_OVERHEAD = 30
 
175
 
176
  _TARO_INFERENCE_CACHE: dict = {}
177
 
@@ -192,8 +193,9 @@ def _taro_duration(video_file, seed_val, cfg_scale, num_steps, mode,
192
  except Exception:
193
  n_segs = 1
194
  secs = int(num_samples) * n_segs * int(num_steps) * TARO_SECS_PER_STEP + TARO_LOAD_OVERHEAD
195
- print(f"[duration] TARO: {int(num_samples)}s Γ— {n_segs}seg Γ— {int(num_steps)}steps = {secs:.0f}s reserved")
196
- return max(60, int(secs))
 
197
 
198
 
199
  def _taro_infer_segment(
@@ -400,8 +402,9 @@ def _mmaudio_duration(video_file, prompt, negative_prompt, seed_val,
400
  except Exception:
401
  n_segs = 1
402
  secs = int(num_samples) * n_segs * int(num_steps) * MMAUDIO_SECS_PER_STEP + MMAUDIO_LOAD_OVERHEAD
403
- print(f"[duration] MMAudio: {int(num_samples)}s Γ— {n_segs}seg Γ— {int(num_steps)}steps = {secs:.0f}s reserved")
404
- return max(60, int(secs))
 
405
 
406
 
407
  @spaces.GPU(duration=_mmaudio_duration)
@@ -550,8 +553,9 @@ def _hunyuan_duration(video_file, prompt, negative_prompt, seed_val,
550
  except Exception:
551
  n_segs = 1
552
  secs = int(num_samples) * n_segs * int(num_steps) * HUNYUAN_SECS_PER_STEP + HUNYUAN_LOAD_OVERHEAD
553
- print(f"[duration] HunyuanFoley: {int(num_samples)}s Γ— {n_segs}seg Γ— {int(num_steps)}steps = {secs:.0f}s reserved")
554
- return max(60, int(secs))
 
555
 
556
 
557
  @spaces.GPU(duration=_hunyuan_duration)
 
165
  TARO_TRUNCATE_FRAME = int(TARO_FPS * TARO_TRUNCATE / TARO_SR) # 32
166
  TARO_TRUNCATE_ONSET = 120
167
  TARO_MODEL_DUR = TARO_TRUNCATE / TARO_SR # 8.192 s
168
+ TARO_SECS_PER_STEP = 0.8 # estimated GPU-seconds per diffusion step on H200
169
 
170
+ TARO_LOAD_OVERHEAD = 20 # seconds: model load + CAVP feature extraction
171
+ MMAUDIO_SECS_PER_STEP = 0.8 # estimated GPU-seconds per flow-matching step on H200
172
+ MMAUDIO_LOAD_OVERHEAD = 15
173
+ HUNYUAN_SECS_PER_STEP = 2.0 # estimated GPU-seconds per denoising step on H200 (heavier model)
174
+ HUNYUAN_LOAD_OVERHEAD = 20
175
+ GPU_DURATION_CAP = 300 # hard cap per call β€” never reserve more than this
176
 
177
  _TARO_INFERENCE_CACHE: dict = {}
178
 
 
193
  except Exception:
194
  n_segs = 1
195
  secs = int(num_samples) * n_segs * int(num_steps) * TARO_SECS_PER_STEP + TARO_LOAD_OVERHEAD
196
+ result = min(GPU_DURATION_CAP, max(60, int(secs)))
197
+ print(f"[duration] TARO: {int(num_samples)}samp Γ— {n_segs}seg Γ— {int(num_steps)}steps β†’ {secs:.0f}s β†’ capped {result}s")
198
+ return result
199
 
200
 
201
  def _taro_infer_segment(
 
402
  except Exception:
403
  n_segs = 1
404
  secs = int(num_samples) * n_segs * int(num_steps) * MMAUDIO_SECS_PER_STEP + MMAUDIO_LOAD_OVERHEAD
405
+ result = min(GPU_DURATION_CAP, max(60, int(secs)))
406
+ print(f"[duration] MMAudio: {int(num_samples)}samp Γ— {n_segs}seg Γ— {int(num_steps)}steps β†’ {secs:.0f}s β†’ capped {result}s")
407
+ return result
408
 
409
 
410
  @spaces.GPU(duration=_mmaudio_duration)
 
553
  except Exception:
554
  n_segs = 1
555
  secs = int(num_samples) * n_segs * int(num_steps) * HUNYUAN_SECS_PER_STEP + HUNYUAN_LOAD_OVERHEAD
556
+ result = min(GPU_DURATION_CAP, max(60, int(secs)))
557
+ print(f"[duration] HunyuanFoley: {int(num_samples)}samp Γ— {n_segs}seg Γ— {int(num_steps)}steps β†’ {secs:.0f}s β†’ capped {result}s")
558
+ return result
559
 
560
 
561
  @spaces.GPU(duration=_hunyuan_duration)