Spaces:
Running on Zero
Running on Zero
Commit ·
8b3b191
1
Parent(s): 401978a
Fix ZeroGPU Pro quota: fetch fresh x-ip-token before each regen call
Browse filesThe JWT token expires in ~170s. Previously we fetched once on page load
and cached it, so by the time the user triggers regen the token was
already expired. Now _fetchIpToken() returns a Promise and fireRegen()
awaits a fresh token immediately before every queue/join call, ensuring
quota is always attributed to the logged-in Pro user.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
app.py
CHANGED
|
@@ -1970,52 +1970,46 @@ _GLOBAL_JS = """
|
|
| 1970 |
if (window._wf_global_listener) return; // already registered
|
| 1971 |
window._wf_global_listener = true;
|
| 1972 |
|
| 1973 |
-
// Fetch
|
| 1974 |
-
//
|
| 1975 |
-
//
|
| 1976 |
-
|
| 1977 |
function _fetchIpToken() {
|
| 1978 |
-
|
| 1979 |
-
|
| 1980 |
-
|
| 1981 |
-
|
| 1982 |
-
|
| 1983 |
-
|
| 1984 |
-
|
| 1985 |
-
|
| 1986 |
-
|
| 1987 |
-
|
| 1988 |
-
|
| 1989 |
-
|
| 1990 |
-
|
| 1991 |
-
|
| 1992 |
-
|
| 1993 |
-
|
| 1994 |
-
|
| 1995 |
-
|
| 1996 |
-
|
| 1997 |
-
|
| 1998 |
-
|
| 1999 |
-
|
| 2000 |
-
|
| 2001 |
-
|
| 2002 |
-
|
| 2003 |
-
|
| 2004 |
-
|
| 2005 |
-
|
| 2006 |
-
|
| 2007 |
-
|
| 2008 |
-
|
| 2009 |
-
|
| 2010 |
-
|
| 2011 |
-
|
| 2012 |
-
if (msg.msg === 'close_stream') es.close();
|
| 2013 |
-
};
|
| 2014 |
-
es.onerror = function() { es.close(); };
|
| 2015 |
-
}).catch(function() {});
|
| 2016 |
}
|
| 2017 |
-
// Run after Gradio config is ready
|
| 2018 |
-
setTimeout(_fetchIpToken, 1500);
|
| 2019 |
|
| 2020 |
// Cache: api_name -> fn_index, built once from gradio_config.dependencies
|
| 2021 |
let _fnIndexCache = null;
|
|
@@ -2139,19 +2133,24 @@ _GLOBAL_JS = """
|
|
| 2139 |
console.warn('[fireRegen] fn_index not found for api_name:', apiName);
|
| 2140 |
return;
|
| 2141 |
}
|
| 2142 |
-
|
| 2143 |
-
|
| 2144 |
-
|
| 2145 |
-
|
| 2146 |
-
|
| 2147 |
-
|
| 2148 |
-
|
| 2149 |
-
|
| 2150 |
-
|
| 2151 |
-
|
| 2152 |
-
|
| 2153 |
-
|
| 2154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2155 |
}).then(function(r) { return r.json(); }).then(function(j) {
|
| 2156 |
if (!j.event_id) { console.error('[fireRegen] no event_id:', j); return; }
|
| 2157 |
console.log('[fireRegen] queued, event_id:', j.event_id);
|
|
@@ -2607,8 +2606,8 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
|
|
| 2607 |
# HF's proxy injects it on browser requests but NOT on raw JS fetch() calls.
|
| 2608 |
# Solution: expose a lightweight Gradio endpoint that reads the token from the
|
| 2609 |
# incoming request (where HF DOES inject it) and returns it. The JS _GLOBAL_JS
|
| 2610 |
-
# calls
|
| 2611 |
-
#
|
| 2612 |
_ip_token_tb = gr.Textbox(render=False)
|
| 2613 |
def _get_ip_token(request: gr.Request):
|
| 2614 |
return request.headers.get("x-ip-token", "")
|
|
|
|
| 1970 |
if (window._wf_global_listener) return; // already registered
|
| 1971 |
window._wf_global_listener = true;
|
| 1972 |
|
| 1973 |
+
// Fetch a fresh x-ip-token via a Gradio endpoint where HF's proxy injects it.
|
| 1974 |
+
// Returns a Promise<string> — resolves with the token (or '' on failure).
|
| 1975 |
+
// Called immediately before each regen fetch so the token is always fresh
|
| 1976 |
+
// (the JWT expires in ~170s, so caching across regen calls is unreliable).
|
| 1977 |
function _fetchIpToken() {
|
| 1978 |
+
return new Promise(function(resolve) {
|
| 1979 |
+
var ipFnIndex = null;
|
| 1980 |
+
var deps = window.gradio_config && window.gradio_config.dependencies;
|
| 1981 |
+
if (deps) deps.forEach(function(d, i) { if (d.api_name === 'get_ip_token') ipFnIndex = i; });
|
| 1982 |
+
if (ipFnIndex === null) { resolve(''); return; }
|
| 1983 |
+
fetch('/gradio_api/queue/join', {
|
| 1984 |
+
method: 'POST',
|
| 1985 |
+
credentials: 'include',
|
| 1986 |
+
headers: {'Content-Type': 'application/json'},
|
| 1987 |
+
body: JSON.stringify({
|
| 1988 |
+
data: [], fn_index: ipFnIndex,
|
| 1989 |
+
session_hash: window.__gradio_session_hash__,
|
| 1990 |
+
event_data: null, trigger_id: null
|
| 1991 |
+
})
|
| 1992 |
+
}).then(function(r) { return r.json(); }).then(function(j) {
|
| 1993 |
+
if (!j.event_id) { resolve(''); return; }
|
| 1994 |
+
var es = new EventSource('/gradio_api/queue/data?session_hash=' + window.__gradio_session_hash__);
|
| 1995 |
+
var done = false;
|
| 1996 |
+
es.onmessage = function(e) {
|
| 1997 |
+
var msg; try { msg = JSON.parse(e.data); } catch(_) { return; }
|
| 1998 |
+
if (msg.event_id !== j.event_id) return;
|
| 1999 |
+
if (msg.msg === 'process_completed') {
|
| 2000 |
+
es.close(); done = true;
|
| 2001 |
+
var token = msg.output && msg.output.data && msg.output.data[0];
|
| 2002 |
+
console.log('[zerogpu] fresh x-ip-token acquired, length:', token ? token.length : 0);
|
| 2003 |
+
resolve(token || '');
|
| 2004 |
+
}
|
| 2005 |
+
if (msg.msg === 'close_stream') { es.close(); if (!done) { done = true; resolve(''); } }
|
| 2006 |
+
};
|
| 2007 |
+
es.onerror = function() { es.close(); if (!done) { done = true; resolve(''); } };
|
| 2008 |
+
// Timeout safety: if SSE hangs, don't block regen forever
|
| 2009 |
+
setTimeout(function() { if (!done) { done = true; es.close(); resolve(''); } }, 5000);
|
| 2010 |
+
}).catch(function() { resolve(''); });
|
| 2011 |
+
});
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2012 |
}
|
|
|
|
|
|
|
| 2013 |
|
| 2014 |
// Cache: api_name -> fn_index, built once from gradio_config.dependencies
|
| 2015 |
let _fnIndexCache = null;
|
|
|
|
| 2133 |
console.warn('[fireRegen] fn_index not found for api_name:', apiName);
|
| 2134 |
return;
|
| 2135 |
}
|
| 2136 |
+
// Fetch a fresh x-ip-token immediately before queuing (JWT expires ~170s,
|
| 2137 |
+
// so we always grab a new one to ensure Pro quota attribution).
|
| 2138 |
+
_fetchIpToken().then(function(ipToken) {
|
| 2139 |
+
var regenHeaders = {'Content-Type': 'application/json'};
|
| 2140 |
+
if (ipToken) { regenHeaders['x-ip-token'] = ipToken; console.log('[fireRegen] using fresh x-ip-token, len:', ipToken.length); }
|
| 2141 |
+
else { console.warn('[fireRegen] no x-ip-token available, regen may use anonymous quota'); }
|
| 2142 |
+
return fetch('/gradio_api/queue/join', {
|
| 2143 |
+
method: 'POST',
|
| 2144 |
+
credentials: 'include',
|
| 2145 |
+
headers: regenHeaders,
|
| 2146 |
+
body: JSON.stringify({
|
| 2147 |
+
data: data,
|
| 2148 |
+
fn_index: fnIndex,
|
| 2149 |
+
session_hash: window.__gradio_session_hash__,
|
| 2150 |
+
event_data: null,
|
| 2151 |
+
trigger_id: null
|
| 2152 |
+
})
|
| 2153 |
+
});
|
| 2154 |
}).then(function(r) { return r.json(); }).then(function(j) {
|
| 2155 |
if (!j.event_id) { console.error('[fireRegen] no event_id:', j); return; }
|
| 2156 |
console.log('[fireRegen] queued, event_id:', j.event_id);
|
|
|
|
| 2606 |
# HF's proxy injects it on browser requests but NOT on raw JS fetch() calls.
|
| 2607 |
# Solution: expose a lightweight Gradio endpoint that reads the token from the
|
| 2608 |
# incoming request (where HF DOES inject it) and returns it. The JS _GLOBAL_JS
|
| 2609 |
+
# calls _fetchIpToken() immediately before each regen fetch, getting a fresh
|
| 2610 |
+
# token every time (the JWT expires ~170s so caching is unreliable).
|
| 2611 |
_ip_token_tb = gr.Textbox(render=False)
|
| 2612 |
def _get_ip_token(request: gr.Request):
|
| 2613 |
return request.headers.get("x-ip-token", "")
|