BoxOfColors Claude Sonnet 4.6 commited on
Commit
8b3b191
·
1 Parent(s): 401978a

Fix ZeroGPU Pro quota: fetch fresh x-ip-token before each regen call

Browse files

The JWT token expires in ~170s. Previously we fetched once on page load
and cached it, so by the time the user triggers regen the token was
already expired. Now _fetchIpToken() returns a Promise and fireRegen()
awaits a fresh token immediately before every queue/join call, ensuring
quota is always attributed to the logged-in Pro user.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +58 -59
app.py CHANGED
@@ -1970,52 +1970,46 @@ _GLOBAL_JS = """
1970
  if (window._wf_global_listener) return; // already registered
1971
  window._wf_global_listener = true;
1972
 
1973
- // Fetch the x-ip-token via a Gradio endpoint where HF's proxy injects it.
1974
- // Store in window.__hf_ip_token so all regen fetch() calls can include it,
1975
- // ensuring ZeroGPU attributes quota to the logged-in user's Pro account.
1976
- window.__hf_ip_token = '';
1977
  function _fetchIpToken() {
1978
- fetch('/gradio_api/queue/join', {
1979
- method: 'POST',
1980
- credentials: 'include',
1981
- headers: {'Content-Type': 'application/json'},
1982
- body: JSON.stringify({
1983
- data: [], fn_index: null, session_hash: window.__gradio_session_hash__,
1984
- event_data: null, trigger_id: null
1985
- })
1986
- }).catch(function() {});
1987
- // Use the dedicated get_ip_token endpoint
1988
- var ipFnIndex = null;
1989
- var deps = window.gradio_config && window.gradio_config.dependencies;
1990
- if (deps) deps.forEach(function(d, i) { if (d.api_name === 'get_ip_token') ipFnIndex = i; });
1991
- if (ipFnIndex === null) return;
1992
- fetch('/gradio_api/queue/join', {
1993
- method: 'POST',
1994
- credentials: 'include',
1995
- headers: {'Content-Type': 'application/json'},
1996
- body: JSON.stringify({
1997
- data: [], fn_index: ipFnIndex,
1998
- session_hash: window.__gradio_session_hash__,
1999
- event_data: null, trigger_id: null
2000
- })
2001
- }).then(function(r) { return r.json(); }).then(function(j) {
2002
- if (!j.event_id) return;
2003
- var es = new EventSource('/gradio_api/queue/data?session_hash=' + window.__gradio_session_hash__);
2004
- es.onmessage = function(e) {
2005
- var msg; try { msg = JSON.parse(e.data); } catch(_) { return; }
2006
- if (msg.event_id !== j.event_id) return;
2007
- if (msg.msg === 'process_completed') {
2008
- es.close();
2009
- var token = msg.output && msg.output.data && msg.output.data[0];
2010
- if (token) { window.__hf_ip_token = token; console.log('[zerogpu] x-ip-token acquired, length:', token.length); }
2011
- }
2012
- if (msg.msg === 'close_stream') es.close();
2013
- };
2014
- es.onerror = function() { es.close(); };
2015
- }).catch(function() {});
2016
  }
2017
- // Run after Gradio config is ready
2018
- setTimeout(_fetchIpToken, 1500);
2019
 
2020
  // Cache: api_name -> fn_index, built once from gradio_config.dependencies
2021
  let _fnIndexCache = null;
@@ -2139,19 +2133,24 @@ _GLOBAL_JS = """
2139
  console.warn('[fireRegen] fn_index not found for api_name:', apiName);
2140
  return;
2141
  }
2142
- var regenHeaders = {'Content-Type': 'application/json'};
2143
- if (window.__hf_ip_token) regenHeaders['x-ip-token'] = window.__hf_ip_token;
2144
- fetch('/gradio_api/queue/join', {
2145
- method: 'POST',
2146
- credentials: 'include',
2147
- headers: regenHeaders,
2148
- body: JSON.stringify({
2149
- data: data,
2150
- fn_index: fnIndex,
2151
- session_hash: window.__gradio_session_hash__,
2152
- event_data: null,
2153
- trigger_id: null
2154
- })
 
 
 
 
 
2155
  }).then(function(r) { return r.json(); }).then(function(j) {
2156
  if (!j.event_id) { console.error('[fireRegen] no event_id:', j); return; }
2157
  console.log('[fireRegen] queued, event_id:', j.event_id);
@@ -2607,8 +2606,8 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
2607
  # HF's proxy injects it on browser requests but NOT on raw JS fetch() calls.
2608
  # Solution: expose a lightweight Gradio endpoint that reads the token from the
2609
  # incoming request (where HF DOES inject it) and returns it. The JS _GLOBAL_JS
2610
- # calls this on page load, stores the result in window.__hf_ip_token, and
2611
- # includes it as a header on all subsequent queue/join fetch calls.
2612
  _ip_token_tb = gr.Textbox(render=False)
2613
  def _get_ip_token(request: gr.Request):
2614
  return request.headers.get("x-ip-token", "")
 
1970
  if (window._wf_global_listener) return; // already registered
1971
  window._wf_global_listener = true;
1972
 
1973
+ // Fetch a fresh x-ip-token via a Gradio endpoint where HF's proxy injects it.
1974
+ // Returns a Promise<string> resolves with the token (or '' on failure).
1975
+ // Called immediately before each regen fetch so the token is always fresh
1976
+ // (the JWT expires in ~170s, so caching across regen calls is unreliable).
1977
  function _fetchIpToken() {
1978
+ return new Promise(function(resolve) {
1979
+ var ipFnIndex = null;
1980
+ var deps = window.gradio_config && window.gradio_config.dependencies;
1981
+ if (deps) deps.forEach(function(d, i) { if (d.api_name === 'get_ip_token') ipFnIndex = i; });
1982
+ if (ipFnIndex === null) { resolve(''); return; }
1983
+ fetch('/gradio_api/queue/join', {
1984
+ method: 'POST',
1985
+ credentials: 'include',
1986
+ headers: {'Content-Type': 'application/json'},
1987
+ body: JSON.stringify({
1988
+ data: [], fn_index: ipFnIndex,
1989
+ session_hash: window.__gradio_session_hash__,
1990
+ event_data: null, trigger_id: null
1991
+ })
1992
+ }).then(function(r) { return r.json(); }).then(function(j) {
1993
+ if (!j.event_id) { resolve(''); return; }
1994
+ var es = new EventSource('/gradio_api/queue/data?session_hash=' + window.__gradio_session_hash__);
1995
+ var done = false;
1996
+ es.onmessage = function(e) {
1997
+ var msg; try { msg = JSON.parse(e.data); } catch(_) { return; }
1998
+ if (msg.event_id !== j.event_id) return;
1999
+ if (msg.msg === 'process_completed') {
2000
+ es.close(); done = true;
2001
+ var token = msg.output && msg.output.data && msg.output.data[0];
2002
+ console.log('[zerogpu] fresh x-ip-token acquired, length:', token ? token.length : 0);
2003
+ resolve(token || '');
2004
+ }
2005
+ if (msg.msg === 'close_stream') { es.close(); if (!done) { done = true; resolve(''); } }
2006
+ };
2007
+ es.onerror = function() { es.close(); if (!done) { done = true; resolve(''); } };
2008
+ // Timeout safety: if SSE hangs, don't block regen forever
2009
+ setTimeout(function() { if (!done) { done = true; es.close(); resolve(''); } }, 5000);
2010
+ }).catch(function() { resolve(''); });
2011
+ });
 
 
 
 
2012
  }
 
 
2013
 
2014
  // Cache: api_name -> fn_index, built once from gradio_config.dependencies
2015
  let _fnIndexCache = null;
 
2133
  console.warn('[fireRegen] fn_index not found for api_name:', apiName);
2134
  return;
2135
  }
2136
+ // Fetch a fresh x-ip-token immediately before queuing (JWT expires ~170s,
2137
+ // so we always grab a new one to ensure Pro quota attribution).
2138
+ _fetchIpToken().then(function(ipToken) {
2139
+ var regenHeaders = {'Content-Type': 'application/json'};
2140
+ if (ipToken) { regenHeaders['x-ip-token'] = ipToken; console.log('[fireRegen] using fresh x-ip-token, len:', ipToken.length); }
2141
+ else { console.warn('[fireRegen] no x-ip-token available, regen may use anonymous quota'); }
2142
+ return fetch('/gradio_api/queue/join', {
2143
+ method: 'POST',
2144
+ credentials: 'include',
2145
+ headers: regenHeaders,
2146
+ body: JSON.stringify({
2147
+ data: data,
2148
+ fn_index: fnIndex,
2149
+ session_hash: window.__gradio_session_hash__,
2150
+ event_data: null,
2151
+ trigger_id: null
2152
+ })
2153
+ });
2154
  }).then(function(r) { return r.json(); }).then(function(j) {
2155
  if (!j.event_id) { console.error('[fireRegen] no event_id:', j); return; }
2156
  console.log('[fireRegen] queued, event_id:', j.event_id);
 
2606
  # HF's proxy injects it on browser requests but NOT on raw JS fetch() calls.
2607
  # Solution: expose a lightweight Gradio endpoint that reads the token from the
2608
  # incoming request (where HF DOES inject it) and returns it. The JS _GLOBAL_JS
2609
+ # calls _fetchIpToken() immediately before each regen fetch, getting a fresh
2610
+ # token every time (the JWT expires ~170s so caching is unreliable).
2611
  _ip_token_tb = gr.Textbox(render=False)
2612
  def _get_ip_token(request: gr.Request):
2613
  return request.headers.get("x-ip-token", "")