Spaces:
Running on Zero
Running on Zero
Commit Β·
af578ae
1
Parent(s): 1afc0fb
Use Gradio's postMessage zerogpu-headers for regen quota attribution
Browse filesInstead of the unreliable x-ip-token relay, now use the same mechanism
Gradio's own JS client uses: postMessage("zerogpu-headers") to the HF
parent frame, which responds with x-zerogpu-token and x-zerogpu-uuid.
These are the actual headers that ZeroGPU uses for Pro quota attribution.
The HF token input in Settings is kept as a fallback.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
app.py
CHANGED
|
@@ -1976,44 +1976,51 @@ _GLOBAL_JS = """
|
|
| 1976 |
if (window._wf_global_listener) return; // already registered
|
| 1977 |
window._wf_global_listener = true;
|
| 1978 |
|
| 1979 |
-
//
|
| 1980 |
-
//
|
| 1981 |
-
//
|
| 1982 |
-
// (
|
| 1983 |
-
|
|
|
|
|
|
|
|
|
|
| 1984 |
return new Promise(function(resolve) {
|
| 1985 |
-
|
| 1986 |
-
|
| 1987 |
-
|
| 1988 |
-
|
| 1989 |
-
|
| 1990 |
-
|
| 1991 |
-
|
| 1992 |
-
|
| 1993 |
-
|
| 1994 |
-
|
| 1995 |
-
|
| 1996 |
-
|
| 1997 |
-
|
| 1998 |
-
|
| 1999 |
-
|
| 2000 |
-
|
| 2001 |
-
|
| 2002 |
-
|
| 2003 |
-
|
| 2004 |
-
|
| 2005 |
-
|
| 2006 |
-
|
| 2007 |
-
|
| 2008 |
-
|
| 2009 |
-
|
|
|
|
| 2010 |
}
|
| 2011 |
-
|
| 2012 |
-
|
| 2013 |
-
|
| 2014 |
-
|
| 2015 |
-
|
| 2016 |
-
}
|
|
|
|
|
|
|
|
|
|
| 2017 |
});
|
| 2018 |
}
|
| 2019 |
|
|
@@ -2139,24 +2146,27 @@ _GLOBAL_JS = """
|
|
| 2139 |
console.warn('[fireRegen] fn_index not found for api_name:', apiName);
|
| 2140 |
return;
|
| 2141 |
}
|
| 2142 |
-
//
|
| 2143 |
-
//
|
| 2144 |
-
//
|
| 2145 |
-
|
| 2146 |
-
var userHfToken = '';
|
| 2147 |
-
var hfTokenEl = document.getElementById('hf_token_input');
|
| 2148 |
-
if (hfTokenEl) { var inp = hfTokenEl.querySelector('input,textarea'); if (inp) userHfToken = (inp.value || '').trim(); }
|
| 2149 |
-
|
| 2150 |
-
var _doRegen = function(ipToken) {
|
| 2151 |
var regenHeaders = {'Content-Type': 'application/json'};
|
| 2152 |
-
|
| 2153 |
-
|
| 2154 |
-
|
| 2155 |
-
|
| 2156 |
-
|
| 2157 |
-
console.log('[fireRegen] using fresh x-ip-token, len:', ipToken.length);
|
| 2158 |
} else {
|
| 2159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2160 |
}
|
| 2161 |
fetch('/gradio_api/queue/join', {
|
| 2162 |
method: 'POST',
|
|
@@ -2179,13 +2189,7 @@ _GLOBAL_JS = """
|
|
| 2179 |
var sb = document.getElementById('wf_statusbar_' + slot_id);
|
| 2180 |
if (sb) { sb.style.color = '#e05252'; sb.textContent = '\u26a0 Request failed: ' + e.message; }
|
| 2181 |
});
|
| 2182 |
-
};
|
| 2183 |
-
// If user provided HF token, skip x-ip-token relay (token is sufficient)
|
| 2184 |
-
if (userHfToken) {
|
| 2185 |
-
_doRegen('');
|
| 2186 |
-
} else {
|
| 2187 |
-
_fetchIpToken().then(_doRegen);
|
| 2188 |
-
}
|
| 2189 |
}
|
| 2190 |
|
| 2191 |
// Subscribe to Gradio SSE stream for an event and apply outputs to DOM.
|
|
@@ -2638,20 +2642,10 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
|
|
| 2638 |
api_name="xregen_hunyuan",
|
| 2639 |
)
|
| 2640 |
|
| 2641 |
-
#
|
| 2642 |
-
#
|
| 2643 |
-
#
|
| 2644 |
-
#
|
| 2645 |
-
# incoming request (where HF DOES inject it) and returns it. The JS _GLOBAL_JS
|
| 2646 |
-
# calls _fetchIpToken() immediately before each regen fetch, getting a fresh
|
| 2647 |
-
# token every time (the JWT expires ~170s so caching is unreliable).
|
| 2648 |
-
_ip_token_tb = gr.Textbox(render=False)
|
| 2649 |
-
def _get_ip_token(request: gr.Request):
|
| 2650 |
-
return request.headers.get("x-ip-token", "")
|
| 2651 |
-
gr.Button(render=False).click(
|
| 2652 |
-
fn=_get_ip_token, inputs=[], outputs=[_ip_token_tb],
|
| 2653 |
-
api_name="get_ip_token",
|
| 2654 |
-
)
|
| 2655 |
|
| 2656 |
print("[startup] app.py fully loaded β regen handlers registered, SSR disabled")
|
| 2657 |
demo.queue(max_size=10).launch(ssr_mode=False, height=900, allowed_paths=["/tmp"])
|
|
|
|
| 1976 |
if (window._wf_global_listener) return; // already registered
|
| 1977 |
window._wf_global_listener = true;
|
| 1978 |
|
| 1979 |
+
// ββ ZeroGPU quota attribution ββ
|
| 1980 |
+
// HF Spaces run inside an iframe on huggingface.co. Gradio's own JS client
|
| 1981 |
+
// gets ZeroGPU auth headers (x-zerogpu-token, x-zerogpu-uuid) by sending a
|
| 1982 |
+
// postMessage("zerogpu-headers") to the parent frame. The parent responds
|
| 1983 |
+
// with a Map of headers that must be included on queue/join calls.
|
| 1984 |
+
// We replicate this exact mechanism so our raw regen fetch() calls are
|
| 1985 |
+
// attributed to the logged-in user's Pro quota.
|
| 1986 |
+
function _fetchZerogpuHeaders() {
|
| 1987 |
return new Promise(function(resolve) {
|
| 1988 |
+
// Check if we're in an HF iframe with zerogpu support
|
| 1989 |
+
if (typeof window === 'undefined' || window.parent === window || !window.supports_zerogpu_headers) {
|
| 1990 |
+
console.log('[zerogpu] not in HF iframe or no zerogpu support');
|
| 1991 |
+
resolve({});
|
| 1992 |
+
return;
|
| 1993 |
+
}
|
| 1994 |
+
// Determine origin β same logic as Gradio's client
|
| 1995 |
+
var hostname = window.location.hostname;
|
| 1996 |
+
var hfhubdev = 'dev.spaces.huggingface.tech';
|
| 1997 |
+
var origin = hostname.includes('.dev.')
|
| 1998 |
+
? 'https://moon-' + hostname.split('.')[1] + '.' + hfhubdev
|
| 1999 |
+
: 'https://huggingface.co';
|
| 2000 |
+
// Use MessageChannel just like Gradio's post_message helper
|
| 2001 |
+
var channel = new MessageChannel();
|
| 2002 |
+
var done = false;
|
| 2003 |
+
channel.port1.onmessage = function(ev) {
|
| 2004 |
+
channel.port1.close();
|
| 2005 |
+
done = true;
|
| 2006 |
+
var headers = ev.data;
|
| 2007 |
+
if (headers && typeof headers === 'object') {
|
| 2008 |
+
// Convert Map to plain object if needed
|
| 2009 |
+
var obj = {};
|
| 2010 |
+
if (typeof headers.forEach === 'function') {
|
| 2011 |
+
headers.forEach(function(v, k) { obj[k] = v; });
|
| 2012 |
+
} else {
|
| 2013 |
+
obj = headers;
|
| 2014 |
}
|
| 2015 |
+
console.log('[zerogpu] got headers from parent:', Object.keys(obj).join(', '));
|
| 2016 |
+
resolve(obj);
|
| 2017 |
+
} else {
|
| 2018 |
+
resolve({});
|
| 2019 |
+
}
|
| 2020 |
+
};
|
| 2021 |
+
window.parent.postMessage('zerogpu-headers', origin, [channel.port2]);
|
| 2022 |
+
// Timeout: don't block regen if parent doesn't respond
|
| 2023 |
+
setTimeout(function() { if (!done) { done = true; channel.port1.close(); resolve({}); } }, 3000);
|
| 2024 |
});
|
| 2025 |
}
|
| 2026 |
|
|
|
|
| 2146 |
console.warn('[fireRegen] fn_index not found for api_name:', apiName);
|
| 2147 |
return;
|
| 2148 |
}
|
| 2149 |
+
// Get ZeroGPU auth headers from the HF parent frame (same mechanism
|
| 2150 |
+
// Gradio's own JS client uses), then fire the regen queue/join call.
|
| 2151 |
+
// Falls back to user-supplied HF token if zerogpu headers aren't available.
|
| 2152 |
+
_fetchZerogpuHeaders().then(function(zerogpuHeaders) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2153 |
var regenHeaders = {'Content-Type': 'application/json'};
|
| 2154 |
+
var hasZerogpu = zerogpuHeaders && Object.keys(zerogpuHeaders).length > 0;
|
| 2155 |
+
if (hasZerogpu) {
|
| 2156 |
+
// Merge zerogpu headers (x-zerogpu-token, x-zerogpu-uuid)
|
| 2157 |
+
for (var k in zerogpuHeaders) { regenHeaders[k] = zerogpuHeaders[k]; }
|
| 2158 |
+
console.log('[fireRegen] using zerogpu headers from parent frame');
|
|
|
|
| 2159 |
} else {
|
| 2160 |
+
// Fallback: try user-supplied HF token from Settings
|
| 2161 |
+
var userHfToken = '';
|
| 2162 |
+
var hfTokenEl = document.getElementById('hf_token_input');
|
| 2163 |
+
if (hfTokenEl) { var inp = hfTokenEl.querySelector('input,textarea'); if (inp) userHfToken = (inp.value || '').trim(); }
|
| 2164 |
+
if (userHfToken) {
|
| 2165 |
+
regenHeaders['Authorization'] = 'Bearer ' + userHfToken;
|
| 2166 |
+
console.log('[fireRegen] using HF token fallback for quota attribution');
|
| 2167 |
+
} else {
|
| 2168 |
+
console.warn('[fireRegen] no zerogpu headers or HF token β may use anonymous quota');
|
| 2169 |
+
}
|
| 2170 |
}
|
| 2171 |
fetch('/gradio_api/queue/join', {
|
| 2172 |
method: 'POST',
|
|
|
|
| 2189 |
var sb = document.getElementById('wf_statusbar_' + slot_id);
|
| 2190 |
if (sb) { sb.style.color = '#e05252'; sb.textContent = '\u26a0 Request failed: ' + e.message; }
|
| 2191 |
});
|
| 2192 |
+
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2193 |
}
|
| 2194 |
|
| 2195 |
// Subscribe to Gradio SSE stream for an event and apply outputs to DOM.
|
|
|
|
| 2642 |
api_name="xregen_hunyuan",
|
| 2643 |
)
|
| 2644 |
|
| 2645 |
+
# NOTE: ZeroGPU quota attribution is now handled via postMessage("zerogpu-headers")
|
| 2646 |
+
# to the HF parent frame β the same mechanism Gradio's own JS client uses.
|
| 2647 |
+
# This replaced the old x-ip-token relay approach which was unreliable.
|
| 2648 |
+
# The HF token input in Settings is kept as a fallback.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2649 |
|
| 2650 |
print("[startup] app.py fully loaded β regen handlers registered, SSR disabled")
|
| 2651 |
demo.queue(max_size=10).launch(ssr_mode=False, height=900, allowed_paths=["/tmp"])
|