Darwin-4B-david

Running on L4

App Files Files Community

SeaWolf-AI commited on 18 days ago

Commit

84612a3

verified ·

1 Parent(s): 164ae71

Create app.py

Browse files

Files changed (1) hide show

app.py +311 -0

app.py ADDED Viewed

	@@ -0,0 +1,311 @@

+"""
+🧬 Darwin-35B-A3B-Opus — Demo Space
+Single model · SGLang backend · Vision support
+"""
+import sys
+print(f"[BOOT] Python {sys.version}", flush=True)
+import base64, os, re, json
+from typing import Generator, Optional
+# NIPA 등 자체 인증서 엔드포인트용 SSL 경고 무시
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+try:
+    import gradio as gr
+    print(f"[BOOT] gradio {gr.__version__}", flush=True)
+except ImportError as e:
+    print(f"[BOOT] FATAL: {e}", flush=True); sys.exit(1)
+try:
+    import httpx, uvicorn, requests
+    from fastapi import FastAPI, Request
+    from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
+    print("[BOOT] All imports OK", flush=True)
+except ImportError as e:
+    print(f"[BOOT] FATAL: {e}", flush=True); sys.exit(1)
+# ══════════════════════════════════════════════════════════════════════════════
+# 1.  SGLANG BACKEND CONFIG
+# ══════════════════════════════════════════════════════════════════════════════
+SGLANG_BASE = os.getenv("DARWIN_API", "http://localhost:7947")
+SGLANG_URL  = f"{SGLANG_BASE}/v1/chat/completions"
+MODEL_NAME = "Darwin-35B-A3B-Opus"
+MODEL_CAP  = {
+    "arch": "MoE", "active": "3B / 35B total",
+    "ctx": "262K", "thinking": True, "vision": True,
+    "max_tokens": 16384, "temp_max": 1.5,
+}
+PRESETS = {
+    "general":   "You are Darwin-35B-A3B-Opus, a highly capable reasoning model created by VIDRAFT via evolutionary merge. Think step by step for complex questions.",
+    "code":      "You are an expert software engineer. Write clean, efficient, well-commented code. Explain your approach before writing. Use modern best practices.",
+    "math":      "You are a world-class mathematician. Break problems step-by-step. Show full working. Use LaTeX where helpful.",
+    "creative":  "You are a brilliant creative writer. Be imaginative, vivid, and engaging. Adapt tone and style to the request.",
+    "translate": "You are a professional translator fluent in 201 languages. Provide accurate, natural-sounding translations with cultural context.",
+    "research":  "You are a rigorous research analyst. Provide structured, well-reasoned analysis. Identify assumptions and acknowledge uncertainty.",
+}
+# ══════════════════════════════════════════════════════════════════════════════
+# 2.  THINKING MODE HELPERS
+# ══════════════════════════════════════════════════════════════════════════════
+def build_user_message(text: str, thinking: bool) -> str:
+    return ("/think\n" if thinking else "/no_think\n") + text
+def parse_think_blocks(text: str) -> tuple[str, str]:
+    m = re.search(r"<think>(.*?)</think>\s*", text, re.DOTALL)
+    return (m.group(1).strip(), text[m.end():].strip()) if m else ("", text)
+def format_response(raw: str) -> str:
+    chain, answer = parse_think_blocks(raw)
+    if chain:
+        lines  = chain.split("\n")
+        quoted = "\n".join(f"> {l}" for l in lines)
+        block  = (
+            "<details>\n"
+            "<summary>🧠 Reasoning Chain — click to expand</summary>\n\n"
+            f"{quoted}\n\n"
+            "</details>\n\n"
+        )
+        return block + answer
+    return raw
+# ══════════════════════════════════════════════════════════════════════════════
+# 3.  STREAMING BACKEND — SGLang OpenAI-compatible API
+# ══════════════════════════════════════════════════════════════════════════════
+def generate_reply(
+    message:        str,
+    history:        list,
+    thinking_mode:  str,
+    image_input,
+    system_prompt:  str,
+    max_new_tokens: int,
+    temperature:    float,
+    top_p:          float,
+) -> Generator[str, None, None]:
+    use_think = "Thinking" in thinking_mode
+    max_new_tokens = min(int(max_new_tokens), MODEL_CAP["max_tokens"])
+    temperature    = min(float(temperature),  MODEL_CAP["temp_max"])
+    messages: list[dict] = []
+    if system_prompt.strip():
+        messages.append({"role": "system", "content": system_prompt.strip()})
+    for turn in history:
+        if isinstance(turn, dict):
+            role = turn.get("role", "")
+            raw  = turn.get("content") or ""
+            text = (" ".join(p.get("text","") for p in raw
+                             if isinstance(p,dict) and p.get("type")=="text")
+                    if isinstance(raw, list) else str(raw))
+            if role == "user":
+                messages.append({"role":"user","content":text})
+            elif role == "assistant":
+                _, clean = parse_think_blocks(text)
+                messages.append({"role":"assistant","content":clean})
+        else:
+            try:
+                u, a = (turn[0] or None), (turn[1] if len(turn)>1 else None)
+            except (IndexError, TypeError):
+                continue
+            def _txt(v):
+                if v is None: return None
+                if isinstance(v, list):
+                    return " ".join(p.get("text","") for p in v
+                                    if isinstance(p,dict) and p.get("type")=="text")
+                return str(v)
+            if u := _txt(u): messages.append({"role":"user","content":u})
+            if a := _txt(a):
+                _, clean = parse_think_blocks(a)
+                messages.append({"role":"assistant","content":clean})
+    user_text = build_user_message(message, use_think)
+    # Vision: image input handling
+    if image_input and MODEL_CAP["vision"]:
+        import io
+        from PIL import Image as PILImage
+        if isinstance(image_input, str) and image_input.startswith("data:"):
+            header, b64_data = image_input.split(",", 1)
+            b64 = b64_data
+        else:
+            buf = io.BytesIO()
+            if not isinstance(image_input, PILImage.Image):
+                image_input = PILImage.fromarray(image_input)
+            image_input.save(buf, format="JPEG")
+            b64 = base64.b64encode(buf.getvalue()).decode()
+        content = [
+            {"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{b64}"}},
+            {"type":"text","text":user_text},
+        ]
+    else:
+        content = user_text
+    messages.append({"role":"user","content":content})
+    # Stream from SGLang
+    try:
+        resp = requests.post(SGLANG_URL, json={
+            "model": "FINAL-Bench/Darwin-35B-A3B-Opus",
+            "messages": messages,
+            "max_tokens": max_new_tokens,
+            "temperature": temperature,
+            "top_p": float(top_p),
+            "stream": True,
+        }, stream=True, timeout=600, verify=False)
+        raw = ""
+        for line in resp.iter_lines(decode_unicode=True):
+            if not line or not line.startswith("data: "):
+                continue
+            payload = line[6:]
+            if payload.strip() == "[DONE]":
+                break
+            try:
+                chunk = json.loads(payload)
+                delta = chunk.get("choices", [{}])[0].get("delta", {})
+                token = delta.get("content", "")
+                if token:
+                    raw += token
+                    yield format_response(raw)
+            except (json.JSONDecodeError, IndexError, KeyError):
+                continue
+        if raw:
+            yield format_response(raw)
+    except requests.exceptions.ConnectionError:
+        yield "**❌ SGLang 서버 연결 실패.** `localhost:7947`에 서버가 실행 중인지 확인하세요."
+    except Exception as exc:
+        yield f"**Error:** `{exc}`"
+# ══════════════════════════════════════════════════════════════════════════════
+# 4.  GRADIO BLOCKS  (hidden — serves API for frontend)
+# ══════════════════════════════════════════════════════════════════════════════
+with gr.Blocks(title="Darwin-35B-A3B-Opus") as gradio_demo:
+    thinking_toggle = gr.Radio(
+        choices=["⚡ Fast Mode  (direct answer)",
+                 "🧠 Thinking Mode  (chain-of-thought reasoning)"],
+        value="⚡ Fast Mode  (direct answer)",
+        visible=False,
+    )
+    image_input    = gr.Textbox(value="", visible=False)
+    system_prompt  = gr.Textbox(value=PRESETS["general"], visible=False)
+    max_new_tokens = gr.Slider(minimum=64, maximum=16384, value=4096, visible=False)
+    temperature    = gr.Slider(minimum=0.0, maximum=1.5, value=0.6,  visible=False)
+    top_p          = gr.Slider(minimum=0.1, maximum=1.0, value=0.9,  visible=False)
+    gr.ChatInterface(
+        fn=generate_reply,
+        api_name="chat",
+        additional_inputs=[
+            thinking_toggle, image_input,
+            system_prompt, max_new_tokens, temperature, top_p,
+        ],
+    )
+# ══════════════════════════════════════════════════════════════════════��═══════
+# 5.  FASTAPI — index.html + HF OAuth + Gradio API
+# ══════════════════════════════════════════════════════════════════════════════
+import pathlib, secrets
+fapp    = FastAPI()
+SESSIONS: dict[str, dict] = {}
+HTML    = pathlib.Path(__file__).parent / "index.html"
+CLIENT_ID     = os.getenv("OAUTH_CLIENT_ID", "")
+CLIENT_SECRET = os.getenv("OAUTH_CLIENT_SECRET", "")
+SPACE_HOST    = os.getenv("SPACE_HOST", "localhost:7860")
+REDIRECT_URI  = f"https://{SPACE_HOST}/login/callback"
+print(f"[OAuth] CLIENT_ID set: {bool(CLIENT_ID)}")
+print(f"[OAuth] SPACE_HOST: {SPACE_HOST}")
+HF_AUTH_URL   = "https://huggingface.co/oauth/authorize"
+HF_TOKEN_URL  = "https://huggingface.co/oauth/token"
+HF_USER_URL   = "https://huggingface.co/oauth/userinfo"
+SCOPES        = os.getenv("OAUTH_SCOPES", "openid profile")
+from urllib.parse import urlencode
+def _sid(req: Request) -> Optional[str]:
+    return req.cookies.get("mc_session")
+def _user(req: Request) -> Optional[dict]:
+    sid = _sid(req)
+    return SESSIONS.get(sid) if sid else None
+@fapp.get("/")
+async def root(request: Request):
+    html = HTML.read_text(encoding="utf-8") if HTML.exists() else "<h2>index.html missing</h2>"
+    return HTMLResponse(html)
+@fapp.get("/oauth/user")
+async def oauth_user(request: Request):
+    u = _user(request)
+    return JSONResponse(u) if u else JSONResponse({"logged_in": False}, status_code=401)
+@fapp.get("/oauth/login")
+async def oauth_login(request: Request):
+    if not CLIENT_ID:
+        return RedirectResponse("/?oauth_error=not_configured")
+    state = secrets.token_urlsafe(16)
+    params = {"response_type":"code","client_id":CLIENT_ID,"redirect_uri":REDIRECT_URI,"scope":SCOPES,"state":state}
+    return RedirectResponse(f"{HF_AUTH_URL}?{urlencode(params)}", status_code=302)
+@fapp.get("/login/callback")
+async def oauth_callback(code: str = "", error: str = "", state: str = ""):
+    if error or not code:
+        return RedirectResponse("/?auth_error=1")
+    basic = base64.b64encode(f"{CLIENT_ID}:{CLIENT_SECRET}".encode()).decode()
+    async with httpx.AsyncClient() as client:
+        tok = await client.post(HF_TOKEN_URL, data={"grant_type":"authorization_code","code":code,"redirect_uri":REDIRECT_URI},
+                                headers={"Accept":"application/json","Authorization":f"Basic {basic}"})
+        if tok.status_code != 200:
+            return RedirectResponse("/?auth_error=1")
+        access_token = tok.json().get("access_token", "")
+        if not access_token:
+            return RedirectResponse("/?auth_error=1")
+        uinfo = await client.get(HF_USER_URL, headers={"Authorization":f"Bearer {access_token}"})
+        if uinfo.status_code != 200:
+            return RedirectResponse("/?auth_error=1")
+        user = uinfo.json()
+    sid = secrets.token_urlsafe(32)
+    SESSIONS[sid] = {
+        "logged_in": True,
+        "username": user.get("preferred_username", user.get("name", "User")),
+        "name": user.get("name", ""),
+        "avatar": user.get("picture", ""),
+        "profile": f"https://huggingface.co/{user.get('preferred_username', '')}",
+    }
+    resp = RedirectResponse("/")
+    resp.set_cookie("mc_session", sid, httponly=True, samesite="lax", secure=True, max_age=60*60*24*7)
+    return resp
+@fapp.get("/oauth/logout")
+async def oauth_logout(request: Request):
+    sid = _sid(request)
+    if sid and sid in SESSIONS: del SESSIONS[sid]
+    resp = RedirectResponse("/")
+    resp.delete_cookie("mc_session")
+    return resp
+@fapp.get("/health")
+async def health():
+    # Check SGLang
+    try:
+        r = requests.get(f"{SGLANG_BASE}/v1/models", timeout=5, verify=False)
+        return {"status":"ok","sglang":"connected"}
+    except:
+        return {"status":"ok","sglang":"disconnected"}
+app = gr.mount_gradio_app(fapp, gradio_demo, path="/gradio")
+if __name__ == "__main__":
+    print(f"[BOOT] Darwin-35B-A3B-Opus Demo · SGLang: {SGLANG_URL}", flush=True)
+    uvicorn.run(app, host="0.0.0.0", port=7860)