Spaces:
Running on Zero
fix(hf-space): cap ZeroGPU duration at 120s + actionable HF_TOKEN error
Browse filesUser-reported live errors (verbose reporting from prior commit caught
them cleanly):
1. ZeroGPU: 'The requested GPU duration (300s) is larger than the
maximum allowed.' β Pro-tier per-request cap is 120s in practice,
not 300s as I'd assumed. Reverted default; comment now explains
that the cap is quota-tier dependent and override via env var if
the owner has a higher allocation.
2. HuggingFace API: 'You must provide an api_key to work with auto
API or log in with hf auth login.' β public HF Spaces do NOT
auto-inject HF_TOKEN. The user has to add it as a Space secret
manually. _call_huggingface now:
- Checks HF_TOKEN, HUGGING_FACE_HUB_TOKEN, AND get_token() (which
reads from `hf auth login`'s cached token)
- If all three sources are empty, raises a RuntimeError that
quotes the exact UI path: 'Settings β Repository secrets β New
secret β name: HF_TOKEN' and the URL to generate one. Tells
the user to pick a different model in the meantime.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
|
@@ -189,7 +189,10 @@ ROOT = Path(__file__).parent
|
|
| 189 |
ANTHROPIC_MODEL_ID = os.environ.get("MODEL_ID", "claude-opus-4-7")
|
| 190 |
HF_MODEL_ID = os.environ.get("HF_MODEL_ID", "google/gemma-2-9b-it")
|
| 191 |
ZEROGPU_MODEL_ID = os.environ.get("ZEROGPU_MODEL_ID", "microsoft/Phi-4-mini-instruct")
|
| 192 |
-
|
|
|
|
|
|
|
|
|
|
| 193 |
MAX_DESCRIPTION_WORDS = int(os.environ.get("MAX_DESCRIPTION_WORDS", "5000"))
|
| 194 |
MIN_DESCRIPTION_WORDS = 200
|
| 195 |
|
|
@@ -277,13 +280,28 @@ def _call_huggingface(system_block: str, user_prompt: str) -> str:
|
|
| 277 |
Phi-4-mini-instruct, Llama-3.3, Qwen 2.5, and many others. Lower
|
| 278 |
temperature (0.2) than the SDK default to keep JSON output stable β
|
| 279 |
smaller open models can be looser than Claude on schema adherence.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
"""
|
| 281 |
-
from huggingface_hub import InferenceClient
|
| 282 |
|
| 283 |
token = (
|
| 284 |
os.environ.get("HF_TOKEN")
|
| 285 |
or os.environ.get("HUGGING_FACE_HUB_TOKEN")
|
|
|
|
| 286 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
client = InferenceClient(model=HF_MODEL_ID, token=token, timeout=120)
|
| 288 |
resp = client.chat_completion(
|
| 289 |
messages=[
|
|
|
|
| 189 |
ANTHROPIC_MODEL_ID = os.environ.get("MODEL_ID", "claude-opus-4-7")
|
| 190 |
HF_MODEL_ID = os.environ.get("HF_MODEL_ID", "google/gemma-2-9b-it")
|
| 191 |
ZEROGPU_MODEL_ID = os.environ.get("ZEROGPU_MODEL_ID", "microsoft/Phi-4-mini-instruct")
|
| 192 |
+
# ZeroGPU's per-request duration cap depends on the Space owner's quota
|
| 193 |
+
# tier. 120s is the Pro-tier default; we found 300s exceeds the limit.
|
| 194 |
+
# Override via env var if your tier allows longer.
|
| 195 |
+
ZEROGPU_DURATION_SECONDS = int(os.environ.get("ZEROGPU_DURATION_SECONDS", "120"))
|
| 196 |
MAX_DESCRIPTION_WORDS = int(os.environ.get("MAX_DESCRIPTION_WORDS", "5000"))
|
| 197 |
MIN_DESCRIPTION_WORDS = 200
|
| 198 |
|
|
|
|
| 280 |
Phi-4-mini-instruct, Llama-3.3, Qwen 2.5, and many others. Lower
|
| 281 |
temperature (0.2) than the SDK default to keep JSON output stable β
|
| 282 |
smaller open models can be looser than Claude on schema adherence.
|
| 283 |
+
|
| 284 |
+
Requires an HF token: HF_TOKEN env var, HUGGING_FACE_HUB_TOKEN env
|
| 285 |
+
var, or a `hf auth login`-stored token (huggingface_hub.get_token()
|
| 286 |
+
checks all three sources). HF Spaces do NOT auto-inject a token on
|
| 287 |
+
public Spaces β the Space owner has to add it as a Space secret.
|
| 288 |
+
Raise a clear, actionable error if missing.
|
| 289 |
"""
|
| 290 |
+
from huggingface_hub import InferenceClient, get_token
|
| 291 |
|
| 292 |
token = (
|
| 293 |
os.environ.get("HF_TOKEN")
|
| 294 |
or os.environ.get("HUGGING_FACE_HUB_TOKEN")
|
| 295 |
+
or get_token() # checks ~/.cache/huggingface/token from `hf auth login`
|
| 296 |
)
|
| 297 |
+
if not token:
|
| 298 |
+
raise RuntimeError(
|
| 299 |
+
"No HuggingFace token found. The Space owner needs to add HF_TOKEN "
|
| 300 |
+
"as a Space secret (Settings β Repository secrets β New secret β "
|
| 301 |
+
"name: HF_TOKEN, value: a User Access Token from "
|
| 302 |
+
"https://huggingface.co/settings/tokens). Then restart the Space. "
|
| 303 |
+
"Until then, pick a different model from the dropdown."
|
| 304 |
+
)
|
| 305 |
client = InferenceClient(model=HF_MODEL_ID, token=token, timeout=120)
|
| 306 |
resp = client.chat_completion(
|
| 307 |
messages=[
|