BoxOfColors Claude Sonnet 4.6 commited on
Commit
401978a
·
1 Parent(s): 2fdad19

fix: relay x-ip-token from HF proxy to JS for ZeroGPU Pro quota attribution

Browse files

ZeroGPU identifies the user via the x-ip-token header injected by HF's
proxy on browser requests, but NOT on raw JS fetch() calls.

Solution:
- Add get_ip_token Gradio endpoint that reads x-ip-token from gr.Request
- On page load, JS calls this endpoint to retrieve the token and stores
it in window.__hf_ip_token
- All regen fetch() calls now include x-ip-token as a request header,
so ZeroGPU correctly attributes quota to the logged-in Pro account

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +64 -1
app.py CHANGED
@@ -1970,6 +1970,52 @@ _GLOBAL_JS = """
1970
  if (window._wf_global_listener) return; // already registered
1971
  window._wf_global_listener = true;
1972
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1973
 
1974
  // Cache: api_name -> fn_index, built once from gradio_config.dependencies
1975
  let _fnIndexCache = null;
@@ -2093,10 +2139,12 @@ _GLOBAL_JS = """
2093
  console.warn('[fireRegen] fn_index not found for api_name:', apiName);
2094
  return;
2095
  }
 
 
2096
  fetch('/gradio_api/queue/join', {
2097
  method: 'POST',
2098
  credentials: 'include',
2099
- headers: {'Content-Type': 'application/json'},
2100
  body: JSON.stringify({
2101
  data: data,
2102
  fn_index: fnIndex,
@@ -2554,5 +2602,20 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
2554
  api_name="xregen_hunyuan",
2555
  )
2556
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2557
  print("[startup] app.py fully loaded — regen handlers registered, SSR disabled")
2558
  demo.queue(max_size=10).launch(ssr_mode=False, height=900, allowed_paths=["/tmp"])
 
1970
  if (window._wf_global_listener) return; // already registered
1971
  window._wf_global_listener = true;
1972
 
1973
+ // Fetch the x-ip-token via a Gradio endpoint where HF's proxy injects it.
1974
+ // Store in window.__hf_ip_token so all regen fetch() calls can include it,
1975
+ // ensuring ZeroGPU attributes quota to the logged-in user's Pro account.
1976
+ window.__hf_ip_token = '';
1977
+ function _fetchIpToken() {
1978
+ fetch('/gradio_api/queue/join', {
1979
+ method: 'POST',
1980
+ credentials: 'include',
1981
+ headers: {'Content-Type': 'application/json'},
1982
+ body: JSON.stringify({
1983
+ data: [], fn_index: null, session_hash: window.__gradio_session_hash__,
1984
+ event_data: null, trigger_id: null
1985
+ })
1986
+ }).catch(function() {});
1987
+ // Use the dedicated get_ip_token endpoint
1988
+ var ipFnIndex = null;
1989
+ var deps = window.gradio_config && window.gradio_config.dependencies;
1990
+ if (deps) deps.forEach(function(d, i) { if (d.api_name === 'get_ip_token') ipFnIndex = i; });
1991
+ if (ipFnIndex === null) return;
1992
+ fetch('/gradio_api/queue/join', {
1993
+ method: 'POST',
1994
+ credentials: 'include',
1995
+ headers: {'Content-Type': 'application/json'},
1996
+ body: JSON.stringify({
1997
+ data: [], fn_index: ipFnIndex,
1998
+ session_hash: window.__gradio_session_hash__,
1999
+ event_data: null, trigger_id: null
2000
+ })
2001
+ }).then(function(r) { return r.json(); }).then(function(j) {
2002
+ if (!j.event_id) return;
2003
+ var es = new EventSource('/gradio_api/queue/data?session_hash=' + window.__gradio_session_hash__);
2004
+ es.onmessage = function(e) {
2005
+ var msg; try { msg = JSON.parse(e.data); } catch(_) { return; }
2006
+ if (msg.event_id !== j.event_id) return;
2007
+ if (msg.msg === 'process_completed') {
2008
+ es.close();
2009
+ var token = msg.output && msg.output.data && msg.output.data[0];
2010
+ if (token) { window.__hf_ip_token = token; console.log('[zerogpu] x-ip-token acquired, length:', token.length); }
2011
+ }
2012
+ if (msg.msg === 'close_stream') es.close();
2013
+ };
2014
+ es.onerror = function() { es.close(); };
2015
+ }).catch(function() {});
2016
+ }
2017
+ // Run after Gradio config is ready
2018
+ setTimeout(_fetchIpToken, 1500);
2019
 
2020
  // Cache: api_name -> fn_index, built once from gradio_config.dependencies
2021
  let _fnIndexCache = null;
 
2139
  console.warn('[fireRegen] fn_index not found for api_name:', apiName);
2140
  return;
2141
  }
2142
+ var regenHeaders = {'Content-Type': 'application/json'};
2143
+ if (window.__hf_ip_token) regenHeaders['x-ip-token'] = window.__hf_ip_token;
2144
  fetch('/gradio_api/queue/join', {
2145
  method: 'POST',
2146
  credentials: 'include',
2147
+ headers: regenHeaders,
2148
  body: JSON.stringify({
2149
  data: data,
2150
  fn_index: fnIndex,
 
2602
  api_name="xregen_hunyuan",
2603
  )
2604
 
2605
+ # ---- x-ip-token relay ----
2606
+ # ZeroGPU uses the x-ip-token header to attribute quota to the logged-in user.
2607
+ # HF's proxy injects it on browser requests but NOT on raw JS fetch() calls.
2608
+ # Solution: expose a lightweight Gradio endpoint that reads the token from the
2609
+ # incoming request (where HF DOES inject it) and returns it. The JS _GLOBAL_JS
2610
+ # calls this on page load, stores the result in window.__hf_ip_token, and
2611
+ # includes it as a header on all subsequent queue/join fetch calls.
2612
+ _ip_token_tb = gr.Textbox(render=False)
2613
+ def _get_ip_token(request: gr.Request):
2614
+ return request.headers.get("x-ip-token", "")
2615
+ gr.Button(render=False).click(
2616
+ fn=_get_ip_token, inputs=[], outputs=[_ip_token_tb],
2617
+ api_name="get_ip_token",
2618
+ )
2619
+
2620
  print("[startup] app.py fully loaded — regen handlers registered, SSR disabled")
2621
  demo.queue(max_size=10).launch(ssr_mode=False, height=900, allowed_paths=["/tmp"])