Spaces:
Running on Zero
Running on Zero
Commit Β·
39d7b17
1
Parent(s): 679c604
Lower GPU duration estimates and add 300s hard cap
Browse filesSECS_PER_STEP estimates were calibrated for a slower GPU. H200 is much faster:
TARO: 2.5 β 0.8 s/step
MMAudio: 2.5 β 0.8 s/step
HunyuanFoley: 5.0 β 2.0 s/step
Load overheads trimmed similarly (TARO 30β20, MMAudio 20β15, Hunyuan 30β20).
Add GPU_DURATION_CAP=300s β a single call can never reserve more than 5 min
regardless of video length or sample count, preventing quota exhaustion on
long videos. The diagnostic print now shows both the raw calc and capped value.
56s video / 1 sample / 25 steps: was 592s reserved, now 200s.
app.py
CHANGED
|
@@ -165,13 +165,14 @@ TARO_FPS = 4
|
|
| 165 |
TARO_TRUNCATE_FRAME = int(TARO_FPS * TARO_TRUNCATE / TARO_SR) # 32
|
| 166 |
TARO_TRUNCATE_ONSET = 120
|
| 167 |
TARO_MODEL_DUR = TARO_TRUNCATE / TARO_SR # 8.192 s
|
| 168 |
-
TARO_SECS_PER_STEP =
|
| 169 |
|
| 170 |
-
TARO_LOAD_OVERHEAD
|
| 171 |
-
MMAUDIO_SECS_PER_STEP =
|
| 172 |
-
MMAUDIO_LOAD_OVERHEAD =
|
| 173 |
-
HUNYUAN_SECS_PER_STEP =
|
| 174 |
-
HUNYUAN_LOAD_OVERHEAD =
|
|
|
|
| 175 |
|
| 176 |
_TARO_INFERENCE_CACHE: dict = {}
|
| 177 |
|
|
@@ -192,8 +193,9 @@ def _taro_duration(video_file, seed_val, cfg_scale, num_steps, mode,
|
|
| 192 |
except Exception:
|
| 193 |
n_segs = 1
|
| 194 |
secs = int(num_samples) * n_segs * int(num_steps) * TARO_SECS_PER_STEP + TARO_LOAD_OVERHEAD
|
| 195 |
-
|
| 196 |
-
|
|
|
|
| 197 |
|
| 198 |
|
| 199 |
def _taro_infer_segment(
|
|
@@ -400,8 +402,9 @@ def _mmaudio_duration(video_file, prompt, negative_prompt, seed_val,
|
|
| 400 |
except Exception:
|
| 401 |
n_segs = 1
|
| 402 |
secs = int(num_samples) * n_segs * int(num_steps) * MMAUDIO_SECS_PER_STEP + MMAUDIO_LOAD_OVERHEAD
|
| 403 |
-
|
| 404 |
-
|
|
|
|
| 405 |
|
| 406 |
|
| 407 |
@spaces.GPU(duration=_mmaudio_duration)
|
|
@@ -550,8 +553,9 @@ def _hunyuan_duration(video_file, prompt, negative_prompt, seed_val,
|
|
| 550 |
except Exception:
|
| 551 |
n_segs = 1
|
| 552 |
secs = int(num_samples) * n_segs * int(num_steps) * HUNYUAN_SECS_PER_STEP + HUNYUAN_LOAD_OVERHEAD
|
| 553 |
-
|
| 554 |
-
|
|
|
|
| 555 |
|
| 556 |
|
| 557 |
@spaces.GPU(duration=_hunyuan_duration)
|
|
|
|
| 165 |
TARO_TRUNCATE_FRAME = int(TARO_FPS * TARO_TRUNCATE / TARO_SR) # 32
|
| 166 |
TARO_TRUNCATE_ONSET = 120
|
| 167 |
TARO_MODEL_DUR = TARO_TRUNCATE / TARO_SR # 8.192 s
|
| 168 |
+
TARO_SECS_PER_STEP = 0.8 # estimated GPU-seconds per diffusion step on H200
|
| 169 |
|
| 170 |
+
TARO_LOAD_OVERHEAD = 20 # seconds: model load + CAVP feature extraction
|
| 171 |
+
MMAUDIO_SECS_PER_STEP = 0.8 # estimated GPU-seconds per flow-matching step on H200
|
| 172 |
+
MMAUDIO_LOAD_OVERHEAD = 15
|
| 173 |
+
HUNYUAN_SECS_PER_STEP = 2.0 # estimated GPU-seconds per denoising step on H200 (heavier model)
|
| 174 |
+
HUNYUAN_LOAD_OVERHEAD = 20
|
| 175 |
+
GPU_DURATION_CAP = 300 # hard cap per call β never reserve more than this
|
| 176 |
|
| 177 |
_TARO_INFERENCE_CACHE: dict = {}
|
| 178 |
|
|
|
|
| 193 |
except Exception:
|
| 194 |
n_segs = 1
|
| 195 |
secs = int(num_samples) * n_segs * int(num_steps) * TARO_SECS_PER_STEP + TARO_LOAD_OVERHEAD
|
| 196 |
+
result = min(GPU_DURATION_CAP, max(60, int(secs)))
|
| 197 |
+
print(f"[duration] TARO: {int(num_samples)}samp Γ {n_segs}seg Γ {int(num_steps)}steps β {secs:.0f}s β capped {result}s")
|
| 198 |
+
return result
|
| 199 |
|
| 200 |
|
| 201 |
def _taro_infer_segment(
|
|
|
|
| 402 |
except Exception:
|
| 403 |
n_segs = 1
|
| 404 |
secs = int(num_samples) * n_segs * int(num_steps) * MMAUDIO_SECS_PER_STEP + MMAUDIO_LOAD_OVERHEAD
|
| 405 |
+
result = min(GPU_DURATION_CAP, max(60, int(secs)))
|
| 406 |
+
print(f"[duration] MMAudio: {int(num_samples)}samp Γ {n_segs}seg Γ {int(num_steps)}steps β {secs:.0f}s β capped {result}s")
|
| 407 |
+
return result
|
| 408 |
|
| 409 |
|
| 410 |
@spaces.GPU(duration=_mmaudio_duration)
|
|
|
|
| 553 |
except Exception:
|
| 554 |
n_segs = 1
|
| 555 |
secs = int(num_samples) * n_segs * int(num_steps) * HUNYUAN_SECS_PER_STEP + HUNYUAN_LOAD_OVERHEAD
|
| 556 |
+
result = min(GPU_DURATION_CAP, max(60, int(secs)))
|
| 557 |
+
print(f"[duration] HunyuanFoley: {int(num_samples)}samp Γ {n_segs}seg Γ {int(num_steps)}steps β {secs:.0f}s β capped {result}s")
|
| 558 |
+
return result
|
| 559 |
|
| 560 |
|
| 561 |
@spaces.GPU(duration=_hunyuan_duration)
|