Commit ·
a4f4e65
1
Parent(s): 5aeadc9
Calibrate GPU duration constants from measured H200 timings
Browse files
app.py
CHANGED
|
@@ -166,13 +166,13 @@ TARO_FPS = 4
|
|
| 166 |
TARO_TRUNCATE_FRAME = int(TARO_FPS * TARO_TRUNCATE / TARO_SR) # 32
|
| 167 |
TARO_TRUNCATE_ONSET = 120
|
| 168 |
TARO_MODEL_DUR = TARO_TRUNCATE / TARO_SR # 8.192 s
|
| 169 |
-
TARO_SECS_PER_STEP = 0.
|
| 170 |
|
| 171 |
-
TARO_LOAD_OVERHEAD =
|
| 172 |
-
MMAUDIO_SECS_PER_STEP = 0.
|
| 173 |
MMAUDIO_LOAD_OVERHEAD = 15
|
| 174 |
-
HUNYUAN_SECS_PER_STEP =
|
| 175 |
-
HUNYUAN_LOAD_OVERHEAD =
|
| 176 |
GPU_DURATION_CAP = 300 # hard cap per call — never reserve more than this
|
| 177 |
|
| 178 |
_TARO_INFERENCE_CACHE: dict = {}
|
|
@@ -778,7 +778,7 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
|
|
| 778 |
with gr.Column():
|
| 779 |
taro_video = gr.Video(label="Input Video")
|
| 780 |
taro_seed = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
|
| 781 |
-
taro_cfg = gr.Slider(label="CFG Scale", minimum=1, maximum=15, value=
|
| 782 |
taro_steps = gr.Slider(label="Sampling Steps", minimum=10, maximum=50, value=25, step=1)
|
| 783 |
taro_mode = gr.Radio(label="Sampling Mode", choices=["sde", "ode"], value="sde")
|
| 784 |
taro_cf_dur = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1)
|
|
|
|
| 166 |
TARO_TRUNCATE_FRAME = int(TARO_FPS * TARO_TRUNCATE / TARO_SR) # 32
|
| 167 |
TARO_TRUNCATE_ONSET = 120
|
| 168 |
TARO_MODEL_DUR = TARO_TRUNCATE / TARO_SR # 8.192 s
|
| 169 |
+
TARO_SECS_PER_STEP = 0.05 # measured 0.043s/step on H200 (8.2s video, 2 segs × 25 steps = 2.2s wall)
|
| 170 |
|
| 171 |
+
TARO_LOAD_OVERHEAD = 15 # seconds: model load + CAVP feature extraction
|
| 172 |
+
MMAUDIO_SECS_PER_STEP = 0.25 # measured 0.230s/step on H200 (8.3s video, 2 segs × 25 steps = 11.5s wall)
|
| 173 |
MMAUDIO_LOAD_OVERHEAD = 15
|
| 174 |
+
HUNYUAN_SECS_PER_STEP = 0.35 # measured 0.328s/step on H200 (8.3s video, 1 seg × 50 steps = 16.4s wall)
|
| 175 |
+
HUNYUAN_LOAD_OVERHEAD = 55 # ~55s to load the 10GB XXL model weights into GPU
|
| 176 |
GPU_DURATION_CAP = 300 # hard cap per call — never reserve more than this
|
| 177 |
|
| 178 |
_TARO_INFERENCE_CACHE: dict = {}
|
|
|
|
| 778 |
with gr.Column():
|
| 779 |
taro_video = gr.Video(label="Input Video")
|
| 780 |
taro_seed = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
|
| 781 |
+
taro_cfg = gr.Slider(label="CFG Scale", minimum=1, maximum=15, value=7.5, step=0.5)
|
| 782 |
taro_steps = gr.Slider(label="Sampling Steps", minimum=10, maximum=50, value=25, step=1)
|
| 783 |
taro_mode = gr.Radio(label="Sampling Mode", choices=["sde", "ode"], value="sde")
|
| 784 |
taro_cf_dur = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1)
|