EXAM-FINALBENCH3

Running

App Files Files Community

seawolf2357 commited on 1 day ago

Commit

aab2bd0

verified ·

1 Parent(s): 90ad6a9

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -15

app.py CHANGED Viewed

@@ -79,31 +79,41 @@ def _strip(t):
     return t.strip()
 def call_hf(prompt, sys_msg="", key="", model="Qwen/Qwen3.5-397B-A17B", max_tok=4096, temp=0.6):
     msgs = []
     if sys_msg: msgs.append({"role": "system", "content": sys_msg})
     msgs.append({"role": "user", "content": prompt})
-    h = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"}
-    body = {"model": model, "messages": msgs, "max_tokens": max_tok, "temperature": temp, "stream": False}
     for attempt in range(3):
         try:
             print(f"  📡 HF call: {model} (attempt {attempt+1})")
-            r = requests.post(
-                f"https://router.huggingface.co/hf-inference/models/{model}/v1/chat/completions",
-                headers=h, json=body, timeout=120)
-            print(f"  📡 Status: {r.status_code}")
-            if r.status_code in (429, 503):
-                wait = 10 * (attempt + 1)
-                print(f"  ⏳ Rate limited, waiting {wait}s")
-                time.sleep(wait); continue
-            r.raise_for_status()
-            content = r.json()["choices"][0]["message"]["content"]
             print(f"  ✅ Got {len(content)} chars")
             return _strip(content)
         except Exception as e:
-            print(f"  ❌ HF error: {e}")
-            if attempt < 2: time.sleep(3 * (attempt + 1))
-            else: return f"[API_ERROR] {e}"
 def call_oai(prompt, sys_msg="", key="", model="gpt-5.2", max_tok=4096, temp=0.6):
     msgs = []

     return t.strip()
 def call_hf(prompt, sys_msg="", key="", model="Qwen/Qwen3.5-397B-A17B", max_tok=4096, temp=0.6):
+    """HuggingFace InferenceClient — auto-routes to correct provider"""
+    from huggingface_hub import InferenceClient
     msgs = []
     if sys_msg: msgs.append({"role": "system", "content": sys_msg})
     msgs.append({"role": "user", "content": prompt})
     for attempt in range(3):
         try:
             print(f"  📡 HF call: {model} (attempt {attempt+1})")
+            client = InferenceClient(token=key)
+            response = client.chat_completion(
+                model=model,
+                messages=msgs,
+                max_tokens=max_tok,
+                temperature=temp,
+            )
+            content = response.choices[0].message.content
             print(f"  ✅ Got {len(content)} chars")
             return _strip(content)
         except Exception as e:
+            err_str = str(e)
+            print(f"  ❌ HF error (attempt {attempt+1}): {err_str[:150]}")
+            if "429" in err_str or "rate" in err_str.lower():
+                wait = 10 * (attempt + 1)
+                print(f"  ⏳ Rate limited, waiting {wait}s")
+                time.sleep(wait)
+            elif "503" in err_str or "loading" in err_str.lower():
+                wait = 15 * (attempt + 1)
+                print(f"  ⏳ Model loading, waiting {wait}s")
+                time.sleep(wait)
+            elif attempt < 2:
+                time.sleep(3 * (attempt + 1))
+            else:
+                return f"[API_ERROR] {err_str[:200]}"
 def call_oai(prompt, sys_msg="", key="", model="gpt-5.2", max_tok=4096, temp=0.6):
     msgs = []