BoxOfColors Claude Sonnet 4.6 commited on
Commit
63065b2
Β·
1 Parent(s): 3272260

Fix: pre-download MMAudio CLIP model at startup to avoid GPU window download

Browse files

apple/DFN5B-CLIP-ViT-H-14-384 (3.95GB) was being downloaded by open_clip
inside the ZeroGPU GPU window on cold workers, consuming ~5-10s of the
allocated budget before inference started. Pre-download via snapshot_download
at startup so it reads from cache inside the GPU window, same pattern as
the existing CLAP pre-download. Reverts MMAUDIO_LOAD_OVERHEAD back to 30s.

Also adds _regenInFlight per-slot guard to prevent queuing multiple regen
jobs from rapid re-clicks on the same slot.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +26 -24
app.py CHANGED
@@ -72,6 +72,14 @@ print("Pre-downloading CLAP model (laion/larger_clap_general)…")
72
  snapshot_download(repo_id="laion/larger_clap_general")
73
  print("CLAP model pre-downloaded.")
74
 
 
 
 
 
 
 
 
 
75
  # ================================================================== #
76
  # SHARED CONSTANTS / HELPERS #
77
  # ================================================================== #
@@ -355,7 +363,7 @@ TARO_SECS_PER_STEP = 0.025 # measured 0.023s/step on H200; was 0.05, tightened
355
  TARO_LOAD_OVERHEAD = 15 # seconds: model load + CAVP feature extraction
356
  MMAUDIO_WINDOW = 8.0 # seconds β€” MMAudio's fixed generation window
357
  MMAUDIO_SECS_PER_STEP = 0.25 # measured 0.230s/step on H200 (8.3s video, 2 segs Γ— 25 steps = 11.5s wall)
358
- MMAUDIO_LOAD_OVERHEAD = 30 # 15s warm + up to 30s cold-start model download
359
  HUNYUAN_MAX_DUR = 15.0 # seconds β€” HunyuanFoley max video duration
360
  HUNYUAN_SECS_PER_STEP = 0.35 # measured 0.328s/step on H200 (8.3s video, 1 seg Γ— 50 steps = 16.4s wall)
361
  HUNYUAN_LOAD_OVERHEAD = 55 # ~55s to load the 10GB XXL model weights into GPU
@@ -2210,6 +2218,13 @@ _GLOBAL_JS = """
2210
  // If targetModel matches the slot's own prefix, uses the per-slot regen_* endpoint.
2211
  // Otherwise uses the shared xregen_* cross-model endpoint.
2212
  function fireRegen(slot_id, seg_idx, targetModel) {
 
 
 
 
 
 
 
2213
  const prefix = slot_id.split('_')[0]; // owning tab: 'taro'|'mma'|'hf'
2214
  const slotNum = parseInt(slot_id.split('_')[1], 10);
2215
 
@@ -2401,6 +2416,7 @@ _GLOBAL_JS = """
2401
  }
2402
  if (msg.msg === 'process_completed') {
2403
  es.close();
 
2404
  var errMsg = msg.output && msg.output.error;
2405
  var hadError = !!errMsg;
2406
  console.log('[fireRegen] completed for', slot_id, 'error:', hadError, errMsg || '');
@@ -2416,35 +2432,18 @@ _GLOBAL_JS = """
2416
  var vidElR = document.getElementById('slot_vid_' + slot_id);
2417
  if (vidElR) { var vR = vidElR.querySelector('video'); if (vR) { vR.setAttribute('src', preRegenVideoSrc); vR.src = preRegenVideoSrc; vR.load(); } }
2418
  }
2419
- // Flash the waveform iframe border red so it's obvious the segment didn't change
2420
- var iframeEl = document.getElementById('wf_iframe_' + slot_id);
2421
- if (!iframeEl) {
2422
- // waveform may have been restored into preRegenWaveHtml β€” find via slot_wave wrapper
2423
- var waveWrap = document.getElementById('slot_wave_' + slot_id);
2424
- if (waveWrap) iframeEl = waveWrap.querySelector('iframe[id^="wf_iframe_"]');
2425
- }
2426
- if (iframeEl) {
2427
- iframeEl.style.transition = 'box-shadow 0.15s';
2428
- iframeEl.style.boxShadow = '0 0 0 2px #e05252';
2429
- setTimeout(function() { iframeEl.style.boxShadow = 'none'; }, 3000);
2430
- }
2431
- // Pick a human-readable message based on the error text
2432
  var isAbort = toastMsg.toLowerCase().indexOf('aborted') !== -1;
2433
  var isTimeout = toastMsg.toLowerCase().indexOf('timeout') !== -1;
2434
- var userMsg = isAbort || isTimeout
2435
- ? '\u26a0\ufe0f GPU cold-start β€” segment unchanged, try again'
2436
- : '\u26a0\ufe0f Regen failed β€” segment unchanged';
2437
  var statusBar = document.getElementById('wf_statusbar_' + slot_id);
2438
  if (statusBar) {
2439
  statusBar.style.color = '#e05252';
2440
- statusBar.textContent = userMsg;
2441
  setTimeout(function() { statusBar.style.color = '#888'; statusBar.textContent = 'Click a segment to regenerate \u00a0|\u00a0 Playhead syncs to video'; }, 8000);
2442
  }
2443
- if (lbl) {
2444
- lbl.style.color = '#e05252';
2445
- lbl.textContent = isAbort || isTimeout ? 'Cold-start abort β€” segment unchanged, try again' : 'Regen failed β€” segment unchanged';
2446
- setTimeout(function() { lbl.style.color = '#aaa'; lbl.textContent = ''; }, 8000);
2447
- }
2448
  } else {
2449
  if (lbl) lbl.textContent = 'Done';
2450
  var src = _pendingVideoSrc;
@@ -2465,9 +2464,12 @@ _GLOBAL_JS = """
2465
  }
2466
  if (msg.msg === 'close_stream') { es.close(); }
2467
  };
2468
- es.onerror = function() { es.close(); };
2469
  }
2470
 
 
 
 
2471
  // Shared popup element created once and reused across all slots
2472
  let _popup = null;
2473
  let _pendingSlot = null, _pendingIdx = null;
 
72
  snapshot_download(repo_id="laion/larger_clap_general")
73
  print("CLAP model pre-downloaded.")
74
 
75
+ # Pre-download MMAudio's CLIP model (apple/DFN5B-CLIP-ViT-H-14-384, ~3.95 GB).
76
+ # open_clip.create_model_from_pretrained('hf-hub:apple/DFN5B-CLIP-ViT-H-14-384')
77
+ # fetches this at first use β€” inside the GPU window on cold workers β€” which
78
+ # burns ~5-10s of the allocated ZeroGPU budget before inference even starts.
79
+ print("Pre-downloading MMAudio CLIP model (apple/DFN5B-CLIP-ViT-H-14-384)…")
80
+ snapshot_download(repo_id="apple/DFN5B-CLIP-ViT-H-14-384")
81
+ print("MMAudio CLIP model pre-downloaded.")
82
+
83
  # ================================================================== #
84
  # SHARED CONSTANTS / HELPERS #
85
  # ================================================================== #
 
363
  TARO_LOAD_OVERHEAD = 15 # seconds: model load + CAVP feature extraction
364
  MMAUDIO_WINDOW = 8.0 # seconds β€” MMAudio's fixed generation window
365
  MMAUDIO_SECS_PER_STEP = 0.25 # measured 0.230s/step on H200 (8.3s video, 2 segs Γ— 25 steps = 11.5s wall)
366
+ MMAUDIO_LOAD_OVERHEAD = 30 # 15s warm + 15s model init; open_clip pre-downloaded at startup
367
  HUNYUAN_MAX_DUR = 15.0 # seconds β€” HunyuanFoley max video duration
368
  HUNYUAN_SECS_PER_STEP = 0.35 # measured 0.328s/step on H200 (8.3s video, 1 seg Γ— 50 steps = 16.4s wall)
369
  HUNYUAN_LOAD_OVERHEAD = 55 # ~55s to load the 10GB XXL model weights into GPU
 
2218
  // If targetModel matches the slot's own prefix, uses the per-slot regen_* endpoint.
2219
  // Otherwise uses the shared xregen_* cross-model endpoint.
2220
  function fireRegen(slot_id, seg_idx, targetModel) {
2221
+ // Block if a regen is already in-flight for this slot
2222
+ if (_regenInFlight[slot_id]) {
2223
+ console.log('[fireRegen] blocked β€” regen already in-flight for', slot_id);
2224
+ return;
2225
+ }
2226
+ _regenInFlight[slot_id] = true;
2227
+
2228
  const prefix = slot_id.split('_')[0]; // owning tab: 'taro'|'mma'|'hf'
2229
  const slotNum = parseInt(slot_id.split('_')[1], 10);
2230
 
 
2416
  }
2417
  if (msg.msg === 'process_completed') {
2418
  es.close();
2419
+ _regenInFlight[slot_id] = false;
2420
  var errMsg = msg.output && msg.output.error;
2421
  var hadError = !!errMsg;
2422
  console.log('[fireRegen] completed for', slot_id, 'error:', hadError, errMsg || '');
 
2432
  var vidElR = document.getElementById('slot_vid_' + slot_id);
2433
  if (vidElR) { var vR = vidElR.querySelector('video'); if (vR) { vR.setAttribute('src', preRegenVideoSrc); vR.src = preRegenVideoSrc; vR.load(); } }
2434
  }
2435
+ // Update the statusbar (query after restore so we get the freshly-restored element)
 
 
 
 
 
 
 
 
 
 
 
 
2436
  var isAbort = toastMsg.toLowerCase().indexOf('aborted') !== -1;
2437
  var isTimeout = toastMsg.toLowerCase().indexOf('timeout') !== -1;
2438
+ var failMsg = isAbort || isTimeout
2439
+ ? '\u26a0 GPU cold-start β€” segment unchanged, try again'
2440
+ : '\u26a0 Regen failed β€” segment unchanged';
2441
  var statusBar = document.getElementById('wf_statusbar_' + slot_id);
2442
  if (statusBar) {
2443
  statusBar.style.color = '#e05252';
2444
+ statusBar.textContent = failMsg;
2445
  setTimeout(function() { statusBar.style.color = '#888'; statusBar.textContent = 'Click a segment to regenerate \u00a0|\u00a0 Playhead syncs to video'; }, 8000);
2446
  }
 
 
 
 
 
2447
  } else {
2448
  if (lbl) lbl.textContent = 'Done';
2449
  var src = _pendingVideoSrc;
 
2464
  }
2465
  if (msg.msg === 'close_stream') { es.close(); }
2466
  };
2467
+ es.onerror = function() { es.close(); _regenInFlight[slot_id] = false; };
2468
  }
2469
 
2470
+ // Track in-flight regen per slot β€” prevents queuing multiple jobs from rapid clicks
2471
+ var _regenInFlight = {};
2472
+
2473
  // Shared popup element created once and reused across all slots
2474
  let _popup = null;
2475
  let _pendingSlot = null, _pendingIdx = null;