BoxOfColors Claude Sonnet 4.6 commited on
Commit
af578ae
Β·
1 Parent(s): 1afc0fb

Use Gradio's postMessage zerogpu-headers for regen quota attribution

Browse files

Instead of the unreliable x-ip-token relay, now use the same mechanism
Gradio's own JS client uses: postMessage("zerogpu-headers") to the HF
parent frame, which responds with x-zerogpu-token and x-zerogpu-uuid.
These are the actual headers that ZeroGPU uses for Pro quota attribution.
The HF token input in Settings is kept as a fallback.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +67 -73
app.py CHANGED
@@ -1976,44 +1976,51 @@ _GLOBAL_JS = """
1976
  if (window._wf_global_listener) return; // already registered
1977
  window._wf_global_listener = true;
1978
 
1979
- // Fetch a fresh x-ip-token via a Gradio endpoint where HF's proxy injects it.
1980
- // Returns a Promise<string> β€” resolves with the token (or '' on failure).
1981
- // Called immediately before each regen fetch so the token is always fresh
1982
- // (the JWT expires in ~170s, so caching across regen calls is unreliable).
1983
- function _fetchIpToken() {
 
 
 
1984
  return new Promise(function(resolve) {
1985
- var ipFnIndex = null;
1986
- var deps = window.gradio_config && window.gradio_config.dependencies;
1987
- if (deps) deps.forEach(function(d, i) { if (d.api_name === 'get_ip_token') ipFnIndex = i; });
1988
- if (ipFnIndex === null) { resolve(''); return; }
1989
- fetch('/gradio_api/queue/join', {
1990
- method: 'POST',
1991
- credentials: 'include',
1992
- headers: {'Content-Type': 'application/json'},
1993
- body: JSON.stringify({
1994
- data: [], fn_index: ipFnIndex,
1995
- session_hash: window.__gradio_session_hash__,
1996
- event_data: null, trigger_id: null
1997
- })
1998
- }).then(function(r) { return r.json(); }).then(function(j) {
1999
- if (!j.event_id) { resolve(''); return; }
2000
- var es = new EventSource('/gradio_api/queue/data?session_hash=' + window.__gradio_session_hash__);
2001
- var done = false;
2002
- es.onmessage = function(e) {
2003
- var msg; try { msg = JSON.parse(e.data); } catch(_) { return; }
2004
- if (msg.event_id !== j.event_id) return;
2005
- if (msg.msg === 'process_completed') {
2006
- es.close(); done = true;
2007
- var token = msg.output && msg.output.data && msg.output.data[0];
2008
- console.log('[zerogpu] fresh x-ip-token acquired, length:', token ? token.length : 0);
2009
- resolve(token || '');
 
2010
  }
2011
- if (msg.msg === 'close_stream') { es.close(); if (!done) { done = true; resolve(''); } }
2012
- };
2013
- es.onerror = function() { es.close(); if (!done) { done = true; resolve(''); } };
2014
- // Timeout safety: if SSE hangs, don't block regen forever
2015
- setTimeout(function() { if (!done) { done = true; es.close(); resolve(''); } }, 5000);
2016
- }).catch(function() { resolve(''); });
 
 
 
2017
  });
2018
  }
2019
 
@@ -2139,24 +2146,27 @@ _GLOBAL_JS = """
2139
  console.warn('[fireRegen] fn_index not found for api_name:', apiName);
2140
  return;
2141
  }
2142
- // Build auth headers for the regen call.
2143
- // Prefer a user-supplied HF token (Authorization: Bearer) which is the
2144
- // documented way for ZeroGPU to attribute quota to a logged-in Pro account.
2145
- // Fall back to the x-ip-token relay approach if no HF token is provided.
2146
- var userHfToken = '';
2147
- var hfTokenEl = document.getElementById('hf_token_input');
2148
- if (hfTokenEl) { var inp = hfTokenEl.querySelector('input,textarea'); if (inp) userHfToken = (inp.value || '').trim(); }
2149
-
2150
- var _doRegen = function(ipToken) {
2151
  var regenHeaders = {'Content-Type': 'application/json'};
2152
- if (userHfToken) {
2153
- regenHeaders['Authorization'] = 'Bearer ' + userHfToken;
2154
- console.log('[fireRegen] using HF token for Pro quota attribution');
2155
- } else if (ipToken) {
2156
- regenHeaders['x-ip-token'] = ipToken;
2157
- console.log('[fireRegen] using fresh x-ip-token, len:', ipToken.length);
2158
  } else {
2159
- console.warn('[fireRegen] no auth available, regen may use anonymous quota');
 
 
 
 
 
 
 
 
 
2160
  }
2161
  fetch('/gradio_api/queue/join', {
2162
  method: 'POST',
@@ -2179,13 +2189,7 @@ _GLOBAL_JS = """
2179
  var sb = document.getElementById('wf_statusbar_' + slot_id);
2180
  if (sb) { sb.style.color = '#e05252'; sb.textContent = '\u26a0 Request failed: ' + e.message; }
2181
  });
2182
- };
2183
- // If user provided HF token, skip x-ip-token relay (token is sufficient)
2184
- if (userHfToken) {
2185
- _doRegen('');
2186
- } else {
2187
- _fetchIpToken().then(_doRegen);
2188
- }
2189
  }
2190
 
2191
  // Subscribe to Gradio SSE stream for an event and apply outputs to DOM.
@@ -2638,20 +2642,10 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
2638
  api_name="xregen_hunyuan",
2639
  )
2640
 
2641
- # ---- x-ip-token relay ----
2642
- # ZeroGPU uses the x-ip-token header to attribute quota to the logged-in user.
2643
- # HF's proxy injects it on browser requests but NOT on raw JS fetch() calls.
2644
- # Solution: expose a lightweight Gradio endpoint that reads the token from the
2645
- # incoming request (where HF DOES inject it) and returns it. The JS _GLOBAL_JS
2646
- # calls _fetchIpToken() immediately before each regen fetch, getting a fresh
2647
- # token every time (the JWT expires ~170s so caching is unreliable).
2648
- _ip_token_tb = gr.Textbox(render=False)
2649
- def _get_ip_token(request: gr.Request):
2650
- return request.headers.get("x-ip-token", "")
2651
- gr.Button(render=False).click(
2652
- fn=_get_ip_token, inputs=[], outputs=[_ip_token_tb],
2653
- api_name="get_ip_token",
2654
- )
2655
 
2656
  print("[startup] app.py fully loaded β€” regen handlers registered, SSR disabled")
2657
  demo.queue(max_size=10).launch(ssr_mode=False, height=900, allowed_paths=["/tmp"])
 
1976
  if (window._wf_global_listener) return; // already registered
1977
  window._wf_global_listener = true;
1978
 
1979
+ // ── ZeroGPU quota attribution ──
1980
+ // HF Spaces run inside an iframe on huggingface.co. Gradio's own JS client
1981
+ // gets ZeroGPU auth headers (x-zerogpu-token, x-zerogpu-uuid) by sending a
1982
+ // postMessage("zerogpu-headers") to the parent frame. The parent responds
1983
+ // with a Map of headers that must be included on queue/join calls.
1984
+ // We replicate this exact mechanism so our raw regen fetch() calls are
1985
+ // attributed to the logged-in user's Pro quota.
1986
+ function _fetchZerogpuHeaders() {
1987
  return new Promise(function(resolve) {
1988
+ // Check if we're in an HF iframe with zerogpu support
1989
+ if (typeof window === 'undefined' || window.parent === window || !window.supports_zerogpu_headers) {
1990
+ console.log('[zerogpu] not in HF iframe or no zerogpu support');
1991
+ resolve({});
1992
+ return;
1993
+ }
1994
+ // Determine origin β€” same logic as Gradio's client
1995
+ var hostname = window.location.hostname;
1996
+ var hfhubdev = 'dev.spaces.huggingface.tech';
1997
+ var origin = hostname.includes('.dev.')
1998
+ ? 'https://moon-' + hostname.split('.')[1] + '.' + hfhubdev
1999
+ : 'https://huggingface.co';
2000
+ // Use MessageChannel just like Gradio's post_message helper
2001
+ var channel = new MessageChannel();
2002
+ var done = false;
2003
+ channel.port1.onmessage = function(ev) {
2004
+ channel.port1.close();
2005
+ done = true;
2006
+ var headers = ev.data;
2007
+ if (headers && typeof headers === 'object') {
2008
+ // Convert Map to plain object if needed
2009
+ var obj = {};
2010
+ if (typeof headers.forEach === 'function') {
2011
+ headers.forEach(function(v, k) { obj[k] = v; });
2012
+ } else {
2013
+ obj = headers;
2014
  }
2015
+ console.log('[zerogpu] got headers from parent:', Object.keys(obj).join(', '));
2016
+ resolve(obj);
2017
+ } else {
2018
+ resolve({});
2019
+ }
2020
+ };
2021
+ window.parent.postMessage('zerogpu-headers', origin, [channel.port2]);
2022
+ // Timeout: don't block regen if parent doesn't respond
2023
+ setTimeout(function() { if (!done) { done = true; channel.port1.close(); resolve({}); } }, 3000);
2024
  });
2025
  }
2026
 
 
2146
  console.warn('[fireRegen] fn_index not found for api_name:', apiName);
2147
  return;
2148
  }
2149
+ // Get ZeroGPU auth headers from the HF parent frame (same mechanism
2150
+ // Gradio's own JS client uses), then fire the regen queue/join call.
2151
+ // Falls back to user-supplied HF token if zerogpu headers aren't available.
2152
+ _fetchZerogpuHeaders().then(function(zerogpuHeaders) {
 
 
 
 
 
2153
  var regenHeaders = {'Content-Type': 'application/json'};
2154
+ var hasZerogpu = zerogpuHeaders && Object.keys(zerogpuHeaders).length > 0;
2155
+ if (hasZerogpu) {
2156
+ // Merge zerogpu headers (x-zerogpu-token, x-zerogpu-uuid)
2157
+ for (var k in zerogpuHeaders) { regenHeaders[k] = zerogpuHeaders[k]; }
2158
+ console.log('[fireRegen] using zerogpu headers from parent frame');
 
2159
  } else {
2160
+ // Fallback: try user-supplied HF token from Settings
2161
+ var userHfToken = '';
2162
+ var hfTokenEl = document.getElementById('hf_token_input');
2163
+ if (hfTokenEl) { var inp = hfTokenEl.querySelector('input,textarea'); if (inp) userHfToken = (inp.value || '').trim(); }
2164
+ if (userHfToken) {
2165
+ regenHeaders['Authorization'] = 'Bearer ' + userHfToken;
2166
+ console.log('[fireRegen] using HF token fallback for quota attribution');
2167
+ } else {
2168
+ console.warn('[fireRegen] no zerogpu headers or HF token β€” may use anonymous quota');
2169
+ }
2170
  }
2171
  fetch('/gradio_api/queue/join', {
2172
  method: 'POST',
 
2189
  var sb = document.getElementById('wf_statusbar_' + slot_id);
2190
  if (sb) { sb.style.color = '#e05252'; sb.textContent = '\u26a0 Request failed: ' + e.message; }
2191
  });
2192
+ });
 
 
 
 
 
 
2193
  }
2194
 
2195
  // Subscribe to Gradio SSE stream for an event and apply outputs to DOM.
 
2642
  api_name="xregen_hunyuan",
2643
  )
2644
 
2645
+ # NOTE: ZeroGPU quota attribution is now handled via postMessage("zerogpu-headers")
2646
+ # to the HF parent frame β€” the same mechanism Gradio's own JS client uses.
2647
+ # This replaced the old x-ip-token relay approach which was unreliable.
2648
+ # The HF token input in Settings is kept as a fallback.
 
 
 
 
 
 
 
 
 
 
2649
 
2650
  print("[startup] app.py fully loaded β€” regen handlers registered, SSR disabled")
2651
  demo.queue(max_size=10).launch(ssr_mode=False, height=900, allowed_paths=["/tmp"])