Spaces:
Running on Zero
Running on Zero
Commit Β·
63065b2
1
Parent(s): 3272260
Fix: pre-download MMAudio CLIP model at startup to avoid GPU window download
Browse filesapple/DFN5B-CLIP-ViT-H-14-384 (3.95GB) was being downloaded by open_clip
inside the ZeroGPU GPU window on cold workers, consuming ~5-10s of the
allocated budget before inference started. Pre-download via snapshot_download
at startup so it reads from cache inside the GPU window, same pattern as
the existing CLAP pre-download. Reverts MMAUDIO_LOAD_OVERHEAD back to 30s.
Also adds _regenInFlight per-slot guard to prevent queuing multiple regen
jobs from rapid re-clicks on the same slot.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
app.py
CHANGED
|
@@ -72,6 +72,14 @@ print("Pre-downloading CLAP model (laion/larger_clap_general)β¦")
|
|
| 72 |
snapshot_download(repo_id="laion/larger_clap_general")
|
| 73 |
print("CLAP model pre-downloaded.")
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
# ================================================================== #
|
| 76 |
# SHARED CONSTANTS / HELPERS #
|
| 77 |
# ================================================================== #
|
|
@@ -355,7 +363,7 @@ TARO_SECS_PER_STEP = 0.025 # measured 0.023s/step on H200; was 0.05, tightened
|
|
| 355 |
TARO_LOAD_OVERHEAD = 15 # seconds: model load + CAVP feature extraction
|
| 356 |
MMAUDIO_WINDOW = 8.0 # seconds β MMAudio's fixed generation window
|
| 357 |
MMAUDIO_SECS_PER_STEP = 0.25 # measured 0.230s/step on H200 (8.3s video, 2 segs Γ 25 steps = 11.5s wall)
|
| 358 |
-
MMAUDIO_LOAD_OVERHEAD = 30 # 15s warm +
|
| 359 |
HUNYUAN_MAX_DUR = 15.0 # seconds β HunyuanFoley max video duration
|
| 360 |
HUNYUAN_SECS_PER_STEP = 0.35 # measured 0.328s/step on H200 (8.3s video, 1 seg Γ 50 steps = 16.4s wall)
|
| 361 |
HUNYUAN_LOAD_OVERHEAD = 55 # ~55s to load the 10GB XXL model weights into GPU
|
|
@@ -2210,6 +2218,13 @@ _GLOBAL_JS = """
|
|
| 2210 |
// If targetModel matches the slot's own prefix, uses the per-slot regen_* endpoint.
|
| 2211 |
// Otherwise uses the shared xregen_* cross-model endpoint.
|
| 2212 |
function fireRegen(slot_id, seg_idx, targetModel) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2213 |
const prefix = slot_id.split('_')[0]; // owning tab: 'taro'|'mma'|'hf'
|
| 2214 |
const slotNum = parseInt(slot_id.split('_')[1], 10);
|
| 2215 |
|
|
@@ -2401,6 +2416,7 @@ _GLOBAL_JS = """
|
|
| 2401 |
}
|
| 2402 |
if (msg.msg === 'process_completed') {
|
| 2403 |
es.close();
|
|
|
|
| 2404 |
var errMsg = msg.output && msg.output.error;
|
| 2405 |
var hadError = !!errMsg;
|
| 2406 |
console.log('[fireRegen] completed for', slot_id, 'error:', hadError, errMsg || '');
|
|
@@ -2416,35 +2432,18 @@ _GLOBAL_JS = """
|
|
| 2416 |
var vidElR = document.getElementById('slot_vid_' + slot_id);
|
| 2417 |
if (vidElR) { var vR = vidElR.querySelector('video'); if (vR) { vR.setAttribute('src', preRegenVideoSrc); vR.src = preRegenVideoSrc; vR.load(); } }
|
| 2418 |
}
|
| 2419 |
-
//
|
| 2420 |
-
var iframeEl = document.getElementById('wf_iframe_' + slot_id);
|
| 2421 |
-
if (!iframeEl) {
|
| 2422 |
-
// waveform may have been restored into preRegenWaveHtml β find via slot_wave wrapper
|
| 2423 |
-
var waveWrap = document.getElementById('slot_wave_' + slot_id);
|
| 2424 |
-
if (waveWrap) iframeEl = waveWrap.querySelector('iframe[id^="wf_iframe_"]');
|
| 2425 |
-
}
|
| 2426 |
-
if (iframeEl) {
|
| 2427 |
-
iframeEl.style.transition = 'box-shadow 0.15s';
|
| 2428 |
-
iframeEl.style.boxShadow = '0 0 0 2px #e05252';
|
| 2429 |
-
setTimeout(function() { iframeEl.style.boxShadow = 'none'; }, 3000);
|
| 2430 |
-
}
|
| 2431 |
-
// Pick a human-readable message based on the error text
|
| 2432 |
var isAbort = toastMsg.toLowerCase().indexOf('aborted') !== -1;
|
| 2433 |
var isTimeout = toastMsg.toLowerCase().indexOf('timeout') !== -1;
|
| 2434 |
-
var
|
| 2435 |
-
? '\u26a0
|
| 2436 |
-
: '\u26a0
|
| 2437 |
var statusBar = document.getElementById('wf_statusbar_' + slot_id);
|
| 2438 |
if (statusBar) {
|
| 2439 |
statusBar.style.color = '#e05252';
|
| 2440 |
-
statusBar.textContent =
|
| 2441 |
setTimeout(function() { statusBar.style.color = '#888'; statusBar.textContent = 'Click a segment to regenerate \u00a0|\u00a0 Playhead syncs to video'; }, 8000);
|
| 2442 |
}
|
| 2443 |
-
if (lbl) {
|
| 2444 |
-
lbl.style.color = '#e05252';
|
| 2445 |
-
lbl.textContent = isAbort || isTimeout ? 'Cold-start abort β segment unchanged, try again' : 'Regen failed β segment unchanged';
|
| 2446 |
-
setTimeout(function() { lbl.style.color = '#aaa'; lbl.textContent = ''; }, 8000);
|
| 2447 |
-
}
|
| 2448 |
} else {
|
| 2449 |
if (lbl) lbl.textContent = 'Done';
|
| 2450 |
var src = _pendingVideoSrc;
|
|
@@ -2465,9 +2464,12 @@ _GLOBAL_JS = """
|
|
| 2465 |
}
|
| 2466 |
if (msg.msg === 'close_stream') { es.close(); }
|
| 2467 |
};
|
| 2468 |
-
es.onerror = function() { es.close(); };
|
| 2469 |
}
|
| 2470 |
|
|
|
|
|
|
|
|
|
|
| 2471 |
// Shared popup element created once and reused across all slots
|
| 2472 |
let _popup = null;
|
| 2473 |
let _pendingSlot = null, _pendingIdx = null;
|
|
|
|
| 72 |
snapshot_download(repo_id="laion/larger_clap_general")
|
| 73 |
print("CLAP model pre-downloaded.")
|
| 74 |
|
| 75 |
+
# Pre-download MMAudio's CLIP model (apple/DFN5B-CLIP-ViT-H-14-384, ~3.95 GB).
|
| 76 |
+
# open_clip.create_model_from_pretrained('hf-hub:apple/DFN5B-CLIP-ViT-H-14-384')
|
| 77 |
+
# fetches this at first use β inside the GPU window on cold workers β which
|
| 78 |
+
# burns ~5-10s of the allocated ZeroGPU budget before inference even starts.
|
| 79 |
+
print("Pre-downloading MMAudio CLIP model (apple/DFN5B-CLIP-ViT-H-14-384)β¦")
|
| 80 |
+
snapshot_download(repo_id="apple/DFN5B-CLIP-ViT-H-14-384")
|
| 81 |
+
print("MMAudio CLIP model pre-downloaded.")
|
| 82 |
+
|
| 83 |
# ================================================================== #
|
| 84 |
# SHARED CONSTANTS / HELPERS #
|
| 85 |
# ================================================================== #
|
|
|
|
| 363 |
TARO_LOAD_OVERHEAD = 15 # seconds: model load + CAVP feature extraction
|
| 364 |
MMAUDIO_WINDOW = 8.0 # seconds β MMAudio's fixed generation window
|
| 365 |
MMAUDIO_SECS_PER_STEP = 0.25 # measured 0.230s/step on H200 (8.3s video, 2 segs Γ 25 steps = 11.5s wall)
|
| 366 |
+
MMAUDIO_LOAD_OVERHEAD = 30 # 15s warm + 15s model init; open_clip pre-downloaded at startup
|
| 367 |
HUNYUAN_MAX_DUR = 15.0 # seconds β HunyuanFoley max video duration
|
| 368 |
HUNYUAN_SECS_PER_STEP = 0.35 # measured 0.328s/step on H200 (8.3s video, 1 seg Γ 50 steps = 16.4s wall)
|
| 369 |
HUNYUAN_LOAD_OVERHEAD = 55 # ~55s to load the 10GB XXL model weights into GPU
|
|
|
|
| 2218 |
// If targetModel matches the slot's own prefix, uses the per-slot regen_* endpoint.
|
| 2219 |
// Otherwise uses the shared xregen_* cross-model endpoint.
|
| 2220 |
function fireRegen(slot_id, seg_idx, targetModel) {
|
| 2221 |
+
// Block if a regen is already in-flight for this slot
|
| 2222 |
+
if (_regenInFlight[slot_id]) {
|
| 2223 |
+
console.log('[fireRegen] blocked β regen already in-flight for', slot_id);
|
| 2224 |
+
return;
|
| 2225 |
+
}
|
| 2226 |
+
_regenInFlight[slot_id] = true;
|
| 2227 |
+
|
| 2228 |
const prefix = slot_id.split('_')[0]; // owning tab: 'taro'|'mma'|'hf'
|
| 2229 |
const slotNum = parseInt(slot_id.split('_')[1], 10);
|
| 2230 |
|
|
|
|
| 2416 |
}
|
| 2417 |
if (msg.msg === 'process_completed') {
|
| 2418 |
es.close();
|
| 2419 |
+
_regenInFlight[slot_id] = false;
|
| 2420 |
var errMsg = msg.output && msg.output.error;
|
| 2421 |
var hadError = !!errMsg;
|
| 2422 |
console.log('[fireRegen] completed for', slot_id, 'error:', hadError, errMsg || '');
|
|
|
|
| 2432 |
var vidElR = document.getElementById('slot_vid_' + slot_id);
|
| 2433 |
if (vidElR) { var vR = vidElR.querySelector('video'); if (vR) { vR.setAttribute('src', preRegenVideoSrc); vR.src = preRegenVideoSrc; vR.load(); } }
|
| 2434 |
}
|
| 2435 |
+
// Update the statusbar (query after restore so we get the freshly-restored element)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2436 |
var isAbort = toastMsg.toLowerCase().indexOf('aborted') !== -1;
|
| 2437 |
var isTimeout = toastMsg.toLowerCase().indexOf('timeout') !== -1;
|
| 2438 |
+
var failMsg = isAbort || isTimeout
|
| 2439 |
+
? '\u26a0 GPU cold-start β segment unchanged, try again'
|
| 2440 |
+
: '\u26a0 Regen failed β segment unchanged';
|
| 2441 |
var statusBar = document.getElementById('wf_statusbar_' + slot_id);
|
| 2442 |
if (statusBar) {
|
| 2443 |
statusBar.style.color = '#e05252';
|
| 2444 |
+
statusBar.textContent = failMsg;
|
| 2445 |
setTimeout(function() { statusBar.style.color = '#888'; statusBar.textContent = 'Click a segment to regenerate \u00a0|\u00a0 Playhead syncs to video'; }, 8000);
|
| 2446 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2447 |
} else {
|
| 2448 |
if (lbl) lbl.textContent = 'Done';
|
| 2449 |
var src = _pendingVideoSrc;
|
|
|
|
| 2464 |
}
|
| 2465 |
if (msg.msg === 'close_stream') { es.close(); }
|
| 2466 |
};
|
| 2467 |
+
es.onerror = function() { es.close(); _regenInFlight[slot_id] = false; };
|
| 2468 |
}
|
| 2469 |
|
| 2470 |
+
// Track in-flight regen per slot β prevents queuing multiple jobs from rapid clicks
|
| 2471 |
+
var _regenInFlight = {};
|
| 2472 |
+
|
| 2473 |
// Shared popup element created once and reused across all slots
|
| 2474 |
let _popup = null;
|
| 2475 |
let _pendingSlot = null, _pendingIdx = null;
|