SeaWolf-AI commited on
Commit
5603963
Β·
verified Β·
1 Parent(s): 09cae0a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -41
app.py CHANGED
@@ -32,13 +32,32 @@ except ImportError as e:
32
  SGLANG_BASE = os.getenv("DARWIN_API", "http://localhost:7947")
33
  SGLANG_URL = f"{SGLANG_BASE}/v1/chat/completions"
34
 
35
- MODEL_NAME = "Darwin-35B-A3B-Opus"
36
- MODEL_CAP = {
37
- "arch": "MoE", "active": "3B / 35B total",
38
- "ctx": "262K", "thinking": True, "vision": True,
39
- "max_tokens": 16384, "temp_max": 1.5,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  }
41
 
 
 
 
 
42
  PRESETS = {
43
  "general": "You are Darwin-35B-A3B-Opus, a highly capable reasoning model created by VIDRAFT via evolutionary merge. Think step by step for complex questions.",
44
  "code": "You are an expert software engineer. Write clean, efficient, well-commented code. Explain your approach before writing. Use modern best practices.",
@@ -155,11 +174,14 @@ def generate_reply(
155
  max_new_tokens: int,
156
  temperature: float,
157
  top_p: float,
 
158
  ) -> Generator[str, None, None]:
159
 
 
 
160
  use_think = "Thinking" in thinking_mode
161
- max_new_tokens = min(int(max_new_tokens), MODEL_CAP["max_tokens"])
162
- temperature = min(float(temperature), MODEL_CAP["temp_max"])
163
 
164
  messages: list[dict] = []
165
  if system_prompt.strip():
@@ -196,7 +218,7 @@ def generate_reply(
196
  user_text = build_user_message(message, use_think)
197
 
198
  # Vision: image input handling
199
- if image_input and MODEL_CAP["vision"]:
200
  import io
201
  from PIL import Image as PILImage
202
 
@@ -218,41 +240,75 @@ def generate_reply(
218
  content = user_text
219
  messages.append({"role":"user","content":content})
220
 
221
- # Stream from SGLang
222
- try:
223
- resp = requests.post(SGLANG_URL, json={
224
- "model": "FINAL-Bench/Darwin-35B-A3B-Opus",
225
- "messages": messages,
226
- "max_tokens": max_new_tokens,
227
- "temperature": temperature,
228
- "top_p": float(top_p),
229
- "stream": True,
230
- }, stream=True, timeout=600, verify=False)
231
-
232
- raw = ""
233
- for line in resp.iter_lines(decode_unicode=True):
234
- if not line or not line.startswith("data: "):
235
- continue
236
- payload = line[6:]
237
- if payload.strip() == "[DONE]":
238
- break
239
- try:
240
- chunk = json.loads(payload)
241
- delta = chunk.get("choices", [{}])[0].get("delta", {})
242
- token = delta.get("content", "")
243
- if token:
244
- raw += token
245
- yield format_response(raw)
246
- except (json.JSONDecodeError, IndexError, KeyError):
247
- continue
248
 
249
- if raw:
250
- yield format_response(raw)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
- except requests.exceptions.ConnectionError:
253
- yield "**❌ SGLang μ„œλ²„ μ—°κ²° μ‹€νŒ¨.** `localhost:7947`에 μ„œλ²„κ°€ μ‹€ν–‰ 쀑인지 ν™•μΈν•˜μ„Έμš”."
254
- except Exception as exc:
255
- yield f"**Error:** `{exc}`"
 
 
 
 
 
 
 
 
256
 
257
 
258
  # ══════════════════════════════════════════════════════════════════════════════
@@ -270,6 +326,7 @@ with gr.Blocks(title="Darwin-35B-A3B-Opus") as gradio_demo:
270
  max_new_tokens = gr.Slider(minimum=64, maximum=16384, value=4096, visible=False)
271
  temperature = gr.Slider(minimum=0.0, maximum=1.5, value=0.6, visible=False)
272
  top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, visible=False)
 
273
 
274
  gr.ChatInterface(
275
  fn=generate_reply,
@@ -277,6 +334,7 @@ with gr.Blocks(title="Darwin-35B-A3B-Opus") as gradio_demo:
277
  additional_inputs=[
278
  thinking_toggle, image_input,
279
  system_prompt, max_new_tokens, temperature, top_p,
 
280
  ],
281
  )
282
 
@@ -374,6 +432,14 @@ async def health():
374
  except:
375
  return {"status":"ok","sglang":"disconnected"}
376
 
 
 
 
 
 
 
 
 
377
  # ── Web Search API (Brave) ──────────────────────────────────────────────
378
  BRAVE_API_KEY = os.getenv("BRAVE_API_KEY", "")
379
 
 
32
  SGLANG_BASE = os.getenv("DARWIN_API", "http://localhost:7947")
33
  SGLANG_URL = f"{SGLANG_BASE}/v1/chat/completions"
34
 
35
+ # Multi-model config
36
+ MODELS = {
37
+ "Darwin-35B-A3B-Opus": {
38
+ "id": "FINAL-Bench/Darwin-35B-A3B-Opus",
39
+ "api": os.getenv("DARWIN_API", "http://localhost:7947"),
40
+ "arch": "MoE", "active": "3B / 35B total",
41
+ "ctx": "262K", "thinking": True, "vision": True,
42
+ "max_tokens": 16384, "temp_max": 1.5,
43
+ "desc": "Original BF16 Β· SGLang",
44
+ "badge": "BF16",
45
+ },
46
+ "Darwin-35B-A3B-Opus-Q8-GGUF": {
47
+ "id": "FINAL-Bench/Darwin-35B-A3B-Opus-Q8-GGUF",
48
+ "api": os.getenv("DARWIN_GGUF_API", "http://localhost:8080"),
49
+ "arch": "MoE", "active": "3B / 35B total",
50
+ "ctx": "262K", "thinking": True, "vision": False,
51
+ "max_tokens": 16384, "temp_max": 1.5,
52
+ "desc": "Q8_0 GGUF Β· llama.cpp",
53
+ "badge": "Q8 GGUF",
54
+ },
55
  }
56
 
57
+ DEFAULT_MODEL = "Darwin-35B-A3B-Opus"
58
+ MODEL_NAME = DEFAULT_MODEL
59
+ MODEL_CAP = MODELS[DEFAULT_MODEL]
60
+
61
  PRESETS = {
62
  "general": "You are Darwin-35B-A3B-Opus, a highly capable reasoning model created by VIDRAFT via evolutionary merge. Think step by step for complex questions.",
63
  "code": "You are an expert software engineer. Write clean, efficient, well-commented code. Explain your approach before writing. Use modern best practices.",
 
174
  max_new_tokens: int,
175
  temperature: float,
176
  top_p: float,
177
+ model_name: str = "Darwin-35B-A3B-Opus",
178
  ) -> Generator[str, None, None]:
179
 
180
+ mcfg = MODELS.get(model_name, MODELS[DEFAULT_MODEL])
181
+ api_url = f"{mcfg['api']}/v1/chat/completions"
182
  use_think = "Thinking" in thinking_mode
183
+ max_new_tokens = min(int(max_new_tokens), mcfg["max_tokens"])
184
+ temperature = min(float(temperature), mcfg["temp_max"])
185
 
186
  messages: list[dict] = []
187
  if system_prompt.strip():
 
218
  user_text = build_user_message(message, use_think)
219
 
220
  # Vision: image input handling
221
+ if image_input and mcfg["vision"]:
222
  import io
223
  from PIL import Image as PILImage
224
 
 
240
  content = user_text
241
  messages.append({"role":"user","content":content})
242
 
243
+ # Stream from API (with fallback)
244
+ H100_API = os.getenv("H100_API", "")
245
+ api_urls = [api_url]
246
+ if H100_API:
247
+ api_urls.append(f"{H100_API.rstrip('/')}/v1/chat/completions")
248
+
249
+ request_body = {
250
+ "model": mcfg["id"],
251
+ "messages": messages,
252
+ "max_tokens": max_new_tokens,
253
+ "temperature": temperature,
254
+ "top_p": float(top_p),
255
+ "stream": True,
256
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
+ for i, url in enumerate(api_urls):
259
+ try:
260
+ label = "Primary" if i == 0 else "Fallback(H100)"
261
+ print(f"[API] {label}: {url}", flush=True)
262
+
263
+ resp = requests.post(url, json=request_body,
264
+ stream=True, timeout=15, verify=False)
265
+
266
+ if resp.status_code != 200:
267
+ raise requests.exceptions.ConnectionError(f"HTTP {resp.status_code}")
268
+
269
+ raw = ""
270
+ got_token = False
271
+ for line in resp.iter_lines(decode_unicode=True):
272
+ if not line or not line.startswith("data: "):
273
+ continue
274
+ payload = line[6:]
275
+ if payload.strip() == "[DONE]":
276
+ break
277
+ try:
278
+ chunk = json.loads(payload)
279
+ delta = chunk.get("choices", [{}])[0].get("delta", {})
280
+ token = delta.get("content", "")
281
+ if token:
282
+ raw += token
283
+ got_token = True
284
+ yield format_response(raw)
285
+ except (json.JSONDecodeError, IndexError, KeyError):
286
+ continue
287
+
288
+ if raw:
289
+ yield format_response(raw)
290
+
291
+ if got_token:
292
+ print(f"[API] {label} OK β€” {len(raw)} chars", flush=True)
293
+ return # 성곡 μ‹œ μ’…λ£Œ
294
+
295
+ # 토큰 0개면 λ‹€μŒ API둜
296
+ if not got_token and i < len(api_urls) - 1:
297
+ print(f"[API] {label} returned no tokens, trying fallback...", flush=True)
298
+ continue
299
 
300
+ except (requests.exceptions.ConnectionError,
301
+ requests.exceptions.Timeout,
302
+ requests.exceptions.ReadTimeout) as e:
303
+ print(f"[API] {label} failed: {e}", flush=True)
304
+ if i < len(api_urls) - 1:
305
+ print(f"[API] Switching to fallback...", flush=True)
306
+ continue
307
+ else:
308
+ yield f"**❌ λͺ¨λ“  API μ—°κ²° μ‹€νŒ¨.**\n\n- Primary: `{api_urls[0]}`\n- Fallback: `{url}`"
309
+ except Exception as exc:
310
+ yield f"**Error:** `{exc}`"
311
+ return
312
 
313
 
314
  # ══════════════════════════════════════════════════════════════════════════════
 
326
  max_new_tokens = gr.Slider(minimum=64, maximum=16384, value=4096, visible=False)
327
  temperature = gr.Slider(minimum=0.0, maximum=1.5, value=0.6, visible=False)
328
  top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, visible=False)
329
+ model_selector = gr.Textbox(value=DEFAULT_MODEL, visible=False)
330
 
331
  gr.ChatInterface(
332
  fn=generate_reply,
 
334
  additional_inputs=[
335
  thinking_toggle, image_input,
336
  system_prompt, max_new_tokens, temperature, top_p,
337
+ model_selector,
338
  ],
339
  )
340
 
 
432
  except:
433
  return {"status":"ok","sglang":"disconnected"}
434
 
435
+ @fapp.get("/api/models")
436
+ async def api_models():
437
+ return JSONResponse({
438
+ "models": {k: {"desc": v["desc"], "badge": v["badge"], "vision": v["vision"], "ctx": v["ctx"]}
439
+ for k, v in MODELS.items()},
440
+ "default": DEFAULT_MODEL,
441
+ })
442
+
443
  # ── Web Search API (Brave) ──────────────────────────────────────────────
444
  BRAVE_API_KEY = os.getenv("BRAVE_API_KEY", "")
445