Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -79,31 +79,41 @@ def _strip(t):
|
|
| 79 |
return t.strip()
|
| 80 |
|
| 81 |
def call_hf(prompt, sys_msg="", key="", model="Qwen/Qwen3.5-397B-A17B", max_tok=4096, temp=0.6):
|
|
|
|
|
|
|
|
|
|
| 82 |
msgs = []
|
| 83 |
if sys_msg: msgs.append({"role": "system", "content": sys_msg})
|
| 84 |
msgs.append({"role": "user", "content": prompt})
|
| 85 |
-
h = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"}
|
| 86 |
-
body = {"model": model, "messages": msgs, "max_tokens": max_tok, "temperature": temp, "stream": False}
|
| 87 |
|
| 88 |
for attempt in range(3):
|
| 89 |
try:
|
| 90 |
print(f" 📡 HF call: {model} (attempt {attempt+1})")
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
r.raise_for_status()
|
| 100 |
-
content = r.json()["choices"][0]["message"]["content"]
|
| 101 |
print(f" ✅ Got {len(content)} chars")
|
| 102 |
return _strip(content)
|
| 103 |
except Exception as e:
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
def call_oai(prompt, sys_msg="", key="", model="gpt-5.2", max_tok=4096, temp=0.6):
|
| 109 |
msgs = []
|
|
|
|
| 79 |
return t.strip()
|
| 80 |
|
| 81 |
def call_hf(prompt, sys_msg="", key="", model="Qwen/Qwen3.5-397B-A17B", max_tok=4096, temp=0.6):
|
| 82 |
+
"""HuggingFace InferenceClient — auto-routes to correct provider"""
|
| 83 |
+
from huggingface_hub import InferenceClient
|
| 84 |
+
|
| 85 |
msgs = []
|
| 86 |
if sys_msg: msgs.append({"role": "system", "content": sys_msg})
|
| 87 |
msgs.append({"role": "user", "content": prompt})
|
|
|
|
|
|
|
| 88 |
|
| 89 |
for attempt in range(3):
|
| 90 |
try:
|
| 91 |
print(f" 📡 HF call: {model} (attempt {attempt+1})")
|
| 92 |
+
client = InferenceClient(token=key)
|
| 93 |
+
response = client.chat_completion(
|
| 94 |
+
model=model,
|
| 95 |
+
messages=msgs,
|
| 96 |
+
max_tokens=max_tok,
|
| 97 |
+
temperature=temp,
|
| 98 |
+
)
|
| 99 |
+
content = response.choices[0].message.content
|
|
|
|
|
|
|
| 100 |
print(f" ✅ Got {len(content)} chars")
|
| 101 |
return _strip(content)
|
| 102 |
except Exception as e:
|
| 103 |
+
err_str = str(e)
|
| 104 |
+
print(f" ❌ HF error (attempt {attempt+1}): {err_str[:150]}")
|
| 105 |
+
if "429" in err_str or "rate" in err_str.lower():
|
| 106 |
+
wait = 10 * (attempt + 1)
|
| 107 |
+
print(f" ⏳ Rate limited, waiting {wait}s")
|
| 108 |
+
time.sleep(wait)
|
| 109 |
+
elif "503" in err_str or "loading" in err_str.lower():
|
| 110 |
+
wait = 15 * (attempt + 1)
|
| 111 |
+
print(f" ⏳ Model loading, waiting {wait}s")
|
| 112 |
+
time.sleep(wait)
|
| 113 |
+
elif attempt < 2:
|
| 114 |
+
time.sleep(3 * (attempt + 1))
|
| 115 |
+
else:
|
| 116 |
+
return f"[API_ERROR] {err_str[:200]}"
|
| 117 |
|
| 118 |
def call_oai(prompt, sys_msg="", key="", model="gpt-5.2", max_tok=4096, temp=0.6):
|
| 119 |
msgs = []
|