seawolf2357 commited on
Commit
aab2bd0
·
verified ·
1 Parent(s): 90ad6a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -15
app.py CHANGED
@@ -79,31 +79,41 @@ def _strip(t):
79
  return t.strip()
80
 
81
  def call_hf(prompt, sys_msg="", key="", model="Qwen/Qwen3.5-397B-A17B", max_tok=4096, temp=0.6):
 
 
 
82
  msgs = []
83
  if sys_msg: msgs.append({"role": "system", "content": sys_msg})
84
  msgs.append({"role": "user", "content": prompt})
85
- h = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"}
86
- body = {"model": model, "messages": msgs, "max_tokens": max_tok, "temperature": temp, "stream": False}
87
 
88
  for attempt in range(3):
89
  try:
90
  print(f" 📡 HF call: {model} (attempt {attempt+1})")
91
- r = requests.post(
92
- f"https://router.huggingface.co/hf-inference/models/{model}/v1/chat/completions",
93
- headers=h, json=body, timeout=120)
94
- print(f" 📡 Status: {r.status_code}")
95
- if r.status_code in (429, 503):
96
- wait = 10 * (attempt + 1)
97
- print(f" ⏳ Rate limited, waiting {wait}s")
98
- time.sleep(wait); continue
99
- r.raise_for_status()
100
- content = r.json()["choices"][0]["message"]["content"]
101
  print(f" ✅ Got {len(content)} chars")
102
  return _strip(content)
103
  except Exception as e:
104
- print(f" ❌ HF error: {e}")
105
- if attempt < 2: time.sleep(3 * (attempt + 1))
106
- else: return f"[API_ERROR] {e}"
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  def call_oai(prompt, sys_msg="", key="", model="gpt-5.2", max_tok=4096, temp=0.6):
109
  msgs = []
 
79
  return t.strip()
80
 
81
  def call_hf(prompt, sys_msg="", key="", model="Qwen/Qwen3.5-397B-A17B", max_tok=4096, temp=0.6):
82
+ """HuggingFace InferenceClient — auto-routes to correct provider"""
83
+ from huggingface_hub import InferenceClient
84
+
85
  msgs = []
86
  if sys_msg: msgs.append({"role": "system", "content": sys_msg})
87
  msgs.append({"role": "user", "content": prompt})
 
 
88
 
89
  for attempt in range(3):
90
  try:
91
  print(f" 📡 HF call: {model} (attempt {attempt+1})")
92
+ client = InferenceClient(token=key)
93
+ response = client.chat_completion(
94
+ model=model,
95
+ messages=msgs,
96
+ max_tokens=max_tok,
97
+ temperature=temp,
98
+ )
99
+ content = response.choices[0].message.content
 
 
100
  print(f" ✅ Got {len(content)} chars")
101
  return _strip(content)
102
  except Exception as e:
103
+ err_str = str(e)
104
+ print(f" ❌ HF error (attempt {attempt+1}): {err_str[:150]}")
105
+ if "429" in err_str or "rate" in err_str.lower():
106
+ wait = 10 * (attempt + 1)
107
+ print(f" ⏳ Rate limited, waiting {wait}s")
108
+ time.sleep(wait)
109
+ elif "503" in err_str or "loading" in err_str.lower():
110
+ wait = 15 * (attempt + 1)
111
+ print(f" ⏳ Model loading, waiting {wait}s")
112
+ time.sleep(wait)
113
+ elif attempt < 2:
114
+ time.sleep(3 * (attempt + 1))
115
+ else:
116
+ return f"[API_ERROR] {err_str[:200]}"
117
 
118
  def call_oai(prompt, sys_msg="", key="", model="gpt-5.2", max_tok=4096, temp=0.6):
119
  msgs = []