BoxOfColors Claude Sonnet 4.6 commited on
Commit
d916fd2
·
1 Parent(s): 305c050

fix: switch regen to /gradio_api/call/{api_name} for proper ZeroGPU quota attribution

Browse files

The old /gradio_api/queue/join endpoint bypasses HF's proxy auth injection,
causing ZeroGPU to treat regen calls as unauthenticated (2min quota) even
for Pro users. The newer /gradio_api/call/{api_name} endpoint is what
Gradio's own Svelte client uses and correctly carries the session through
HF's proxy so quota is attributed to the logged-in account.

Also updates _listenAndApply to use the call-based SSE stream
(/gradio_api/call/{api_name}/{event_id}) with named events (generating,
complete, error) instead of the queue/data message format.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +90 -86
app.py CHANGED
@@ -2070,13 +2070,7 @@ _GLOBAL_JS = """
2070
  }
2071
  }
2072
 
2073
- const fnIndex = getFnIndex(apiName);
2074
- if (fnIndex === undefined) {
2075
- console.warn('[fireRegen] fn_index not found for api_name:', apiName, 'cache:', _fnIndexCache);
2076
- return;
2077
- }
2078
-
2079
- console.log('[fireRegen] calling api', apiName, 'fn_index', fnIndex, 'seg', seg_idx);
2080
 
2081
  // Snapshot current waveform HTML + video src before mutating anything,
2082
  // so we can restore on error (e.g. quota exceeded).
@@ -2091,27 +2085,27 @@ _GLOBAL_JS = """
2091
  const lbl = document.getElementById('wf_seglabel_' + slot_id);
2092
  if (lbl) lbl.textContent = 'Regenerating Seg ' + (seg_idx + 1) + '...';
2093
 
2094
- fetch('/gradio_api/queue/join', {
 
 
 
2095
  method: 'POST',
2096
  credentials: 'include',
2097
  headers: {'Content-Type': 'application/json'},
2098
  body: JSON.stringify({
2099
  data: data,
2100
- fn_index: fnIndex,
2101
  session_hash: window.__gradio_session_hash__,
2102
- event_data: null,
2103
- trigger_id: null
2104
  })
2105
  }).then(function(r) { return r.json(); }).then(function(j) {
2106
  if (!j.event_id) { console.error('[fireRegen] no event_id:', j); return; }
2107
  console.log('[fireRegen] queued, event_id:', j.event_id);
2108
  // Subscribe to SSE stream and apply outputs when ready
2109
- _listenAndApply(j.event_id, slot_id, seg_idx, _preRegenWaveHtml, _preRegenVideoSrc);
2110
  }).catch(function(e) {
2111
  console.error('[fireRegen] fetch error:', e);
2112
  if (lbl) lbl.textContent = 'Error — see console';
2113
  var sb = document.getElementById('wf_statusbar_' + slot_id);
2114
- if (sb) { sb.style.color = '#e05252'; sb.textContent = ' Request failed: ' + e.message; }
2115
  });
2116
  }
2117
 
@@ -2148,15 +2142,18 @@ _GLOBAL_JS = """
2148
  }, isError ? 8000 : 3000);
2149
  }
2150
 
2151
- function _listenAndApply(eventId, slot_id, seg_idx, preRegenWaveHtml, preRegenVideoSrc) {
2152
  var _pendingVideoSrc = null; // track the latest resolved video URL
2153
- const es = new EventSource('/gradio_api/queue/data?session_hash=' + window.__gradio_session_hash__);
2154
- es.onmessage = function(e) {
2155
- var msg;
2156
- try { msg = JSON.parse(e.data); } catch(_) { return; }
2157
- if (msg.event_id !== eventId) return;
2158
- if (msg.msg === 'process_generating' || msg.msg === 'process_completed') {
2159
- var out = msg.output;
 
 
 
2160
  if (out && out.data) {
2161
  // data[0] = video update, data[1] = waveform HTML update
2162
  var vidUpdate = out.data[0];
@@ -2198,75 +2195,82 @@ _GLOBAL_JS = """
2198
  }
2199
  }
2200
  }
 
 
2201
 
2202
- if (msg.msg === 'process_completed') {
2203
- es.close();
2204
- var errMsg = msg.output && msg.output.error;
2205
- var hadError = !!errMsg;
2206
- console.log('[fireRegen] completed for', slot_id, 'error:', hadError, errMsg || '');
2207
-
2208
- var lbl = document.getElementById('wf_seglabel_' + slot_id);
2209
-
2210
- if (hadError) {
2211
- var toastMsg = typeof errMsg === 'string' ? errMsg : JSON.stringify(errMsg);
2212
-
2213
- // Restore waveform HTML FIRST — the pending spinner may have replaced
2214
- // the container (including the statusbar span), so we need the original
2215
- // HTML back before we can write the error message into it.
2216
- if (preRegenWaveHtml !== null) {
2217
- var waveEl = document.getElementById('slot_wave_' + slot_id);
2218
- if (waveEl) waveEl.innerHTML = preRegenWaveHtml;
2219
- }
2220
-
2221
- // Restore video src to pre-regen snapshot
2222
- if (preRegenVideoSrc !== null) {
2223
- var vidElR = document.getElementById('slot_vid_' + slot_id);
2224
- if (vidElR) {
2225
- var vR = vidElR.querySelector('video');
2226
- if (vR) { vR.setAttribute('src', preRegenVideoSrc); vR.src = preRegenVideoSrc; vR.load(); }
2227
- }
2228
- }
2229
-
2230
- // Now write error into status bar (container is restored so element exists)
2231
- var statusBar = document.getElementById('wf_statusbar_' + slot_id);
2232
- if (statusBar) {
2233
- statusBar.style.color = '#e05252';
2234
- statusBar.textContent = '\u26a0 ' + toastMsg;
2235
- setTimeout(function() {
2236
- statusBar.style.color = '#888';
2237
- statusBar.textContent = 'Click a segment to regenerate \u00a0|\u00a0 Playhead syncs to video';
2238
- }, 15000);
2239
- }
2240
-
2241
- // Also update the segment label below the waveform
2242
- var lbl2 = document.getElementById('wf_seglabel_' + slot_id);
2243
- if (lbl2) lbl2.textContent = 'Quota exceeded — try again later';
2244
- } else {
2245
- if (lbl) lbl.textContent = 'Done';
2246
-
2247
- // Apply video src AFTER Gradio/Svelte finishes its own re-render cycle.
2248
- // We try at 50ms, 300ms, and 800ms to catch whenever Svelte settles.
2249
- var src = _pendingVideoSrc;
2250
- if (src) {
2251
- _applyVideoSrc(slot_id, src);
2252
- setTimeout(function() { _applyVideoSrc(slot_id, src); }, 50);
2253
- setTimeout(function() { _applyVideoSrc(slot_id, src); }, 300);
2254
- setTimeout(function() { _applyVideoSrc(slot_id, src); }, 800);
2255
- // Also install a MutationObserver for 2s to re-apply if Svelte resets src
2256
- var vidEl = document.getElementById('slot_vid_' + slot_id);
2257
- if (vidEl) {
2258
- var obs = new MutationObserver(function() {
2259
- _applyVideoSrc(slot_id, src);
2260
- });
2261
- obs.observe(vidEl, {subtree: true, attributes: true, attributeFilter: ['src'], childList: true});
2262
- setTimeout(function() { obs.disconnect(); }, 2000);
2263
- }
2264
- }
2265
  }
2266
  }
2267
  }
2268
- if (msg.msg === 'close_stream') { es.close(); }
2269
- };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2270
  es.onerror = function() { es.close(); };
2271
  }
2272
 
 
2070
  }
2071
  }
2072
 
2073
+ console.log('[fireRegen] calling api', apiName, 'seg', seg_idx);
 
 
 
 
 
 
2074
 
2075
  // Snapshot current waveform HTML + video src before mutating anything,
2076
  // so we can restore on error (e.g. quota exceeded).
 
2085
  const lbl = document.getElementById('wf_seglabel_' + slot_id);
2086
  if (lbl) lbl.textContent = 'Regenerating Seg ' + (seg_idx + 1) + '...';
2087
 
2088
+ // Use the newer /gradio_api/call/{api_name} endpoint — this is what Gradio's
2089
+ // own Svelte client uses and correctly carries the HF session through the
2090
+ // proxy so ZeroGPU can attribute quota to the logged-in user's Pro account.
2091
+ fetch('/gradio_api/call/' + apiName, {
2092
  method: 'POST',
2093
  credentials: 'include',
2094
  headers: {'Content-Type': 'application/json'},
2095
  body: JSON.stringify({
2096
  data: data,
 
2097
  session_hash: window.__gradio_session_hash__,
 
 
2098
  })
2099
  }).then(function(r) { return r.json(); }).then(function(j) {
2100
  if (!j.event_id) { console.error('[fireRegen] no event_id:', j); return; }
2101
  console.log('[fireRegen] queued, event_id:', j.event_id);
2102
  // Subscribe to SSE stream and apply outputs when ready
2103
+ _listenAndApply(j.event_id, slot_id, seg_idx, _preRegenWaveHtml, _preRegenVideoSrc, apiName);
2104
  }).catch(function(e) {
2105
  console.error('[fireRegen] fetch error:', e);
2106
  if (lbl) lbl.textContent = 'Error — see console';
2107
  var sb = document.getElementById('wf_statusbar_' + slot_id);
2108
+ if (sb) { sb.style.color = '#e05252'; sb.textContent = '\u26a0 Request failed: ' + e.message; }
2109
  });
2110
  }
2111
 
 
2142
  }, isError ? 8000 : 3000);
2143
  }
2144
 
2145
+ function _listenAndApply(eventId, slot_id, seg_idx, preRegenWaveHtml, preRegenVideoSrc, apiName) {
2146
  var _pendingVideoSrc = null; // track the latest resolved video URL
2147
+ // Use the call-based SSE endpoint — same one Gradio's Svelte client uses,
2148
+ // so HF's proxy correctly forwards the session for ZeroGPU quota attribution.
2149
+ // Message format: event: generating|complete|error, data: JSON array
2150
+ const es = new EventSource('/gradio_api/call/' + apiName + '/' + eventId);
2151
+ es.addEventListener('generating', function(e) {
2152
+ var data;
2153
+ try { data = JSON.parse(e.data); } catch(_) { return; }
2154
+ // data is a plain array: [vidUpdate, waveUpdate]
2155
+ var out = { data: data };
2156
+ (function handleData(out) {
2157
  if (out && out.data) {
2158
  // data[0] = video update, data[1] = waveform HTML update
2159
  var vidUpdate = out.data[0];
 
2195
  }
2196
  }
2197
  }
2198
+ })(out);
2199
+ });
2200
 
2201
+ function _onComplete(errMsg) {
2202
+ es.close();
2203
+ var hadError = !!errMsg;
2204
+ console.log('[fireRegen] completed for', slot_id, 'error:', hadError, errMsg || '');
2205
+ var lbl = document.getElementById('wf_seglabel_' + slot_id);
2206
+ if (hadError) {
2207
+ var toastMsg = typeof errMsg === 'string' ? errMsg : JSON.stringify(errMsg);
2208
+ // Restore waveform HTML FIRST so the statusbar span exists
2209
+ if (preRegenWaveHtml !== null) {
2210
+ var waveEl = document.getElementById('slot_wave_' + slot_id);
2211
+ if (waveEl) waveEl.innerHTML = preRegenWaveHtml;
2212
+ }
2213
+ // Restore video src
2214
+ if (preRegenVideoSrc !== null) {
2215
+ var vidElR = document.getElementById('slot_vid_' + slot_id);
2216
+ if (vidElR) {
2217
+ var vR = vidElR.querySelector('video');
2218
+ if (vR) { vR.setAttribute('src', preRegenVideoSrc); vR.src = preRegenVideoSrc; vR.load(); }
2219
+ }
2220
+ }
2221
+ // Write error into status bar
2222
+ var statusBar = document.getElementById('wf_statusbar_' + slot_id);
2223
+ if (statusBar) {
2224
+ statusBar.style.color = '#e05252';
2225
+ statusBar.textContent = '\u26a0 ' + toastMsg;
2226
+ setTimeout(function() {
2227
+ statusBar.style.color = '#888';
2228
+ statusBar.textContent = 'Click a segment to regenerate \u00a0|\u00a0 Playhead syncs to video';
2229
+ }, 15000);
2230
+ }
2231
+ var lbl2 = document.getElementById('wf_seglabel_' + slot_id);
2232
+ if (lbl2) lbl2.textContent = 'Quota exceeded — try again later';
2233
+ } else {
2234
+ if (lbl) lbl.textContent = 'Done';
2235
+ var src = _pendingVideoSrc;
2236
+ if (src) {
2237
+ _applyVideoSrc(slot_id, src);
2238
+ setTimeout(function() { _applyVideoSrc(slot_id, src); }, 50);
2239
+ setTimeout(function() { _applyVideoSrc(slot_id, src); }, 300);
2240
+ setTimeout(function() { _applyVideoSrc(slot_id, src); }, 800);
2241
+ var vidEl = document.getElementById('slot_vid_' + slot_id);
2242
+ if (vidEl) {
2243
+ var obs = new MutationObserver(function() { _applyVideoSrc(slot_id, src); });
2244
+ obs.observe(vidEl, {subtree: true, attributes: true, attributeFilter: ['src'], childList: true});
2245
+ setTimeout(function() { obs.disconnect(); }, 2000);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2246
  }
2247
  }
2248
  }
2249
+ }
2250
+
2251
+ es.addEventListener('complete', function(e) {
2252
+ var data;
2253
+ try { data = JSON.parse(e.data); } catch(_) { data = []; }
2254
+ // Final output array — extract video URL if present
2255
+ if (data && data[0]) {
2256
+ var vidU = data[0];
2257
+ var newSrc = null;
2258
+ if (vidU.value && vidU.value.video && vidU.value.video.url) newSrc = vidU.value.video.url;
2259
+ else if (vidU.video && vidU.video.url) newSrc = vidU.video.url;
2260
+ else if (vidU.value && vidU.value.url) newSrc = vidU.value.url;
2261
+ else if (typeof vidU.value === 'string') newSrc = vidU.value;
2262
+ else if (vidU.url) newSrc = vidU.url;
2263
+ if (newSrc) _pendingVideoSrc = newSrc;
2264
+ }
2265
+ _onComplete(null);
2266
+ });
2267
+
2268
+ es.addEventListener('error', function(e) {
2269
+ var errMsg = 'Unknown error';
2270
+ try { var d = JSON.parse(e.data); errMsg = d.error || d.message || JSON.stringify(d); } catch(_) { errMsg = e.data || 'Unknown error'; }
2271
+ _onComplete(errMsg);
2272
+ });
2273
+
2274
  es.onerror = function() { es.close(); };
2275
  }
2276