Spaces:
Running on Zero
Running on Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -32,13 +32,32 @@ except ImportError as e:
|
|
| 32 |
SGLANG_BASE = os.getenv("DARWIN_API", "http://localhost:7947")
|
| 33 |
SGLANG_URL = f"{SGLANG_BASE}/v1/chat/completions"
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
"
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
}
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
PRESETS = {
|
| 43 |
"general": "You are Darwin-35B-A3B-Opus, a highly capable reasoning model created by VIDRAFT via evolutionary merge. Think step by step for complex questions.",
|
| 44 |
"code": "You are an expert software engineer. Write clean, efficient, well-commented code. Explain your approach before writing. Use modern best practices.",
|
|
@@ -155,11 +174,14 @@ def generate_reply(
|
|
| 155 |
max_new_tokens: int,
|
| 156 |
temperature: float,
|
| 157 |
top_p: float,
|
|
|
|
| 158 |
) -> Generator[str, None, None]:
|
| 159 |
|
|
|
|
|
|
|
| 160 |
use_think = "Thinking" in thinking_mode
|
| 161 |
-
max_new_tokens = min(int(max_new_tokens),
|
| 162 |
-
temperature = min(float(temperature),
|
| 163 |
|
| 164 |
messages: list[dict] = []
|
| 165 |
if system_prompt.strip():
|
|
@@ -196,7 +218,7 @@ def generate_reply(
|
|
| 196 |
user_text = build_user_message(message, use_think)
|
| 197 |
|
| 198 |
# Vision: image input handling
|
| 199 |
-
if image_input and
|
| 200 |
import io
|
| 201 |
from PIL import Image as PILImage
|
| 202 |
|
|
@@ -218,41 +240,75 @@ def generate_reply(
|
|
| 218 |
content = user_text
|
| 219 |
messages.append({"role":"user","content":content})
|
| 220 |
|
| 221 |
-
# Stream from
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
continue
|
| 236 |
-
payload = line[6:]
|
| 237 |
-
if payload.strip() == "[DONE]":
|
| 238 |
-
break
|
| 239 |
-
try:
|
| 240 |
-
chunk = json.loads(payload)
|
| 241 |
-
delta = chunk.get("choices", [{}])[0].get("delta", {})
|
| 242 |
-
token = delta.get("content", "")
|
| 243 |
-
if token:
|
| 244 |
-
raw += token
|
| 245 |
-
yield format_response(raw)
|
| 246 |
-
except (json.JSONDecodeError, IndexError, KeyError):
|
| 247 |
-
continue
|
| 248 |
|
| 249 |
-
|
| 250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
|
| 257 |
|
| 258 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -270,6 +326,7 @@ with gr.Blocks(title="Darwin-35B-A3B-Opus") as gradio_demo:
|
|
| 270 |
max_new_tokens = gr.Slider(minimum=64, maximum=16384, value=4096, visible=False)
|
| 271 |
temperature = gr.Slider(minimum=0.0, maximum=1.5, value=0.6, visible=False)
|
| 272 |
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, visible=False)
|
|
|
|
| 273 |
|
| 274 |
gr.ChatInterface(
|
| 275 |
fn=generate_reply,
|
|
@@ -277,6 +334,7 @@ with gr.Blocks(title="Darwin-35B-A3B-Opus") as gradio_demo:
|
|
| 277 |
additional_inputs=[
|
| 278 |
thinking_toggle, image_input,
|
| 279 |
system_prompt, max_new_tokens, temperature, top_p,
|
|
|
|
| 280 |
],
|
| 281 |
)
|
| 282 |
|
|
@@ -374,6 +432,14 @@ async def health():
|
|
| 374 |
except:
|
| 375 |
return {"status":"ok","sglang":"disconnected"}
|
| 376 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
# ββ Web Search API (Brave) ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 378 |
BRAVE_API_KEY = os.getenv("BRAVE_API_KEY", "")
|
| 379 |
|
|
|
|
| 32 |
SGLANG_BASE = os.getenv("DARWIN_API", "http://localhost:7947")
|
| 33 |
SGLANG_URL = f"{SGLANG_BASE}/v1/chat/completions"
|
| 34 |
|
| 35 |
+
# Multi-model config
|
| 36 |
+
MODELS = {
|
| 37 |
+
"Darwin-35B-A3B-Opus": {
|
| 38 |
+
"id": "FINAL-Bench/Darwin-35B-A3B-Opus",
|
| 39 |
+
"api": os.getenv("DARWIN_API", "http://localhost:7947"),
|
| 40 |
+
"arch": "MoE", "active": "3B / 35B total",
|
| 41 |
+
"ctx": "262K", "thinking": True, "vision": True,
|
| 42 |
+
"max_tokens": 16384, "temp_max": 1.5,
|
| 43 |
+
"desc": "Original BF16 Β· SGLang",
|
| 44 |
+
"badge": "BF16",
|
| 45 |
+
},
|
| 46 |
+
"Darwin-35B-A3B-Opus-Q8-GGUF": {
|
| 47 |
+
"id": "FINAL-Bench/Darwin-35B-A3B-Opus-Q8-GGUF",
|
| 48 |
+
"api": os.getenv("DARWIN_GGUF_API", "http://localhost:8080"),
|
| 49 |
+
"arch": "MoE", "active": "3B / 35B total",
|
| 50 |
+
"ctx": "262K", "thinking": True, "vision": False,
|
| 51 |
+
"max_tokens": 16384, "temp_max": 1.5,
|
| 52 |
+
"desc": "Q8_0 GGUF Β· llama.cpp",
|
| 53 |
+
"badge": "Q8 GGUF",
|
| 54 |
+
},
|
| 55 |
}
|
| 56 |
|
| 57 |
+
DEFAULT_MODEL = "Darwin-35B-A3B-Opus"
|
| 58 |
+
MODEL_NAME = DEFAULT_MODEL
|
| 59 |
+
MODEL_CAP = MODELS[DEFAULT_MODEL]
|
| 60 |
+
|
| 61 |
PRESETS = {
|
| 62 |
"general": "You are Darwin-35B-A3B-Opus, a highly capable reasoning model created by VIDRAFT via evolutionary merge. Think step by step for complex questions.",
|
| 63 |
"code": "You are an expert software engineer. Write clean, efficient, well-commented code. Explain your approach before writing. Use modern best practices.",
|
|
|
|
| 174 |
max_new_tokens: int,
|
| 175 |
temperature: float,
|
| 176 |
top_p: float,
|
| 177 |
+
model_name: str = "Darwin-35B-A3B-Opus",
|
| 178 |
) -> Generator[str, None, None]:
|
| 179 |
|
| 180 |
+
mcfg = MODELS.get(model_name, MODELS[DEFAULT_MODEL])
|
| 181 |
+
api_url = f"{mcfg['api']}/v1/chat/completions"
|
| 182 |
use_think = "Thinking" in thinking_mode
|
| 183 |
+
max_new_tokens = min(int(max_new_tokens), mcfg["max_tokens"])
|
| 184 |
+
temperature = min(float(temperature), mcfg["temp_max"])
|
| 185 |
|
| 186 |
messages: list[dict] = []
|
| 187 |
if system_prompt.strip():
|
|
|
|
| 218 |
user_text = build_user_message(message, use_think)
|
| 219 |
|
| 220 |
# Vision: image input handling
|
| 221 |
+
if image_input and mcfg["vision"]:
|
| 222 |
import io
|
| 223 |
from PIL import Image as PILImage
|
| 224 |
|
|
|
|
| 240 |
content = user_text
|
| 241 |
messages.append({"role":"user","content":content})
|
| 242 |
|
| 243 |
+
# Stream from API (with fallback)
|
| 244 |
+
H100_API = os.getenv("H100_API", "")
|
| 245 |
+
api_urls = [api_url]
|
| 246 |
+
if H100_API:
|
| 247 |
+
api_urls.append(f"{H100_API.rstrip('/')}/v1/chat/completions")
|
| 248 |
+
|
| 249 |
+
request_body = {
|
| 250 |
+
"model": mcfg["id"],
|
| 251 |
+
"messages": messages,
|
| 252 |
+
"max_tokens": max_new_tokens,
|
| 253 |
+
"temperature": temperature,
|
| 254 |
+
"top_p": float(top_p),
|
| 255 |
+
"stream": True,
|
| 256 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
+
for i, url in enumerate(api_urls):
|
| 259 |
+
try:
|
| 260 |
+
label = "Primary" if i == 0 else "Fallback(H100)"
|
| 261 |
+
print(f"[API] {label}: {url}", flush=True)
|
| 262 |
+
|
| 263 |
+
resp = requests.post(url, json=request_body,
|
| 264 |
+
stream=True, timeout=15, verify=False)
|
| 265 |
+
|
| 266 |
+
if resp.status_code != 200:
|
| 267 |
+
raise requests.exceptions.ConnectionError(f"HTTP {resp.status_code}")
|
| 268 |
+
|
| 269 |
+
raw = ""
|
| 270 |
+
got_token = False
|
| 271 |
+
for line in resp.iter_lines(decode_unicode=True):
|
| 272 |
+
if not line or not line.startswith("data: "):
|
| 273 |
+
continue
|
| 274 |
+
payload = line[6:]
|
| 275 |
+
if payload.strip() == "[DONE]":
|
| 276 |
+
break
|
| 277 |
+
try:
|
| 278 |
+
chunk = json.loads(payload)
|
| 279 |
+
delta = chunk.get("choices", [{}])[0].get("delta", {})
|
| 280 |
+
token = delta.get("content", "")
|
| 281 |
+
if token:
|
| 282 |
+
raw += token
|
| 283 |
+
got_token = True
|
| 284 |
+
yield format_response(raw)
|
| 285 |
+
except (json.JSONDecodeError, IndexError, KeyError):
|
| 286 |
+
continue
|
| 287 |
+
|
| 288 |
+
if raw:
|
| 289 |
+
yield format_response(raw)
|
| 290 |
+
|
| 291 |
+
if got_token:
|
| 292 |
+
print(f"[API] {label} OK β {len(raw)} chars", flush=True)
|
| 293 |
+
return # μ±κ³΅ μ μ’
λ£
|
| 294 |
+
|
| 295 |
+
# ν ν° 0κ°λ©΄ λ€μ APIλ‘
|
| 296 |
+
if not got_token and i < len(api_urls) - 1:
|
| 297 |
+
print(f"[API] {label} returned no tokens, trying fallback...", flush=True)
|
| 298 |
+
continue
|
| 299 |
|
| 300 |
+
except (requests.exceptions.ConnectionError,
|
| 301 |
+
requests.exceptions.Timeout,
|
| 302 |
+
requests.exceptions.ReadTimeout) as e:
|
| 303 |
+
print(f"[API] {label} failed: {e}", flush=True)
|
| 304 |
+
if i < len(api_urls) - 1:
|
| 305 |
+
print(f"[API] Switching to fallback...", flush=True)
|
| 306 |
+
continue
|
| 307 |
+
else:
|
| 308 |
+
yield f"**β λͺ¨λ API μ°κ²° μ€ν¨.**\n\n- Primary: `{api_urls[0]}`\n- Fallback: `{url}`"
|
| 309 |
+
except Exception as exc:
|
| 310 |
+
yield f"**Error:** `{exc}`"
|
| 311 |
+
return
|
| 312 |
|
| 313 |
|
| 314 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 326 |
max_new_tokens = gr.Slider(minimum=64, maximum=16384, value=4096, visible=False)
|
| 327 |
temperature = gr.Slider(minimum=0.0, maximum=1.5, value=0.6, visible=False)
|
| 328 |
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, visible=False)
|
| 329 |
+
model_selector = gr.Textbox(value=DEFAULT_MODEL, visible=False)
|
| 330 |
|
| 331 |
gr.ChatInterface(
|
| 332 |
fn=generate_reply,
|
|
|
|
| 334 |
additional_inputs=[
|
| 335 |
thinking_toggle, image_input,
|
| 336 |
system_prompt, max_new_tokens, temperature, top_p,
|
| 337 |
+
model_selector,
|
| 338 |
],
|
| 339 |
)
|
| 340 |
|
|
|
|
| 432 |
except:
|
| 433 |
return {"status":"ok","sglang":"disconnected"}
|
| 434 |
|
| 435 |
+
@fapp.get("/api/models")
|
| 436 |
+
async def api_models():
|
| 437 |
+
return JSONResponse({
|
| 438 |
+
"models": {k: {"desc": v["desc"], "badge": v["badge"], "vision": v["vision"], "ctx": v["ctx"]}
|
| 439 |
+
for k, v in MODELS.items()},
|
| 440 |
+
"default": DEFAULT_MODEL,
|
| 441 |
+
})
|
| 442 |
+
|
| 443 |
# ββ Web Search API (Brave) ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 444 |
BRAVE_API_KEY = os.getenv("BRAVE_API_KEY", "")
|
| 445 |
|