Darwin-9B-Opus

Sleeping

App Files Files Community

SeaWolf-AI commited on 14 days ago

Commit

39ca988

verified ·

1 Parent(s): 1c545b5

Delete app-backup2.py

Browse files

Files changed (1) hide show

app-backup2.py +0 -483

app-backup2.py DELETED Viewed

@@ -1,483 +0,0 @@
-"""
-🧬 Darwin-35B-A3B-Opus — Demo Space
-Single model · SGLang backend · Vision support
-"""
-import sys
-print(f"[BOOT] Python {sys.version}", flush=True)
-import base64, os, re, json
-from typing import Generator, Optional
-# NIPA 등 자체 인증서 엔드포인트용 SSL 경고 무시
-import urllib3
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-try:
-    import gradio as gr
-    print(f"[BOOT] gradio {gr.__version__}", flush=True)
-except ImportError as e:
-    print(f"[BOOT] FATAL: {e}", flush=True); sys.exit(1)
-try:
-    import httpx, uvicorn, requests
-    from fastapi import FastAPI, Request
-    from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
-    print("[BOOT] All imports OK", flush=True)
-except ImportError as e:
-    print(f"[BOOT] FATAL: {e}", flush=True); sys.exit(1)
-# ══════════════════════════════════════════════════════════════════════════════
-# 1.  SGLANG BACKEND CONFIG
-# ══════════════════════════════════════════════════════════════════════════════
-SGLANG_BASE = os.getenv("DARWIN_API", "http://localhost:7947")
-SGLANG_URL  = f"{SGLANG_BASE}/v1/chat/completions"
-# Multi-model config
-MODEL_NAME = "Darwin-35B-A3B-Opus"
-MODEL_ID   = "FINAL-Bench/Darwin-35B-A3B-Opus"
-MODEL_CAP  = {
-    "arch": "MoE", "active": "3B / 35B total",
-    "ctx": "262K", "thinking": True, "vision": True,
-    "max_tokens": 16384, "temp_max": 1.5,
-}
-PRESETS = {
-    "general":   "You are Darwin-35B-A3B-Opus, a highly capable reasoning model created by VIDRAFT via evolutionary merge. Think step by step for complex questions.",
-    "code":      "You are an expert software engineer. Write clean, efficient, well-commented code. Explain your approach before writing. Use modern best practices.",
-    "math":      "You are a world-class mathematician. Break problems step-by-step. Show full working. Use LaTeX where helpful.",
-    "creative":  "You are a brilliant creative writer. Be imaginative, vivid, and engaging. Adapt tone and style to the request.",
-    "translate": "You are a professional translator fluent in 201 languages. Provide accurate, natural-sounding translations with cultural context.",
-    "research":  "You are a rigorous research analyst. Provide structured, well-reasoned analysis. Identify assumptions and acknowledge uncertainty.",
-}
-# ══════════════════════════════════════════════════════════════════════════════
-# 2.  THINKING MODE HELPERS
-# ══════════════════════════════════════════════════════════════════════════════
-def build_user_message(text: str, thinking: bool) -> str:
-    return text  # NIPA 엔드포인트는 /think, /no_think 미지원
-def parse_think_blocks(text: str) -> tuple[str, str]:
-    m = re.search(r"<think>(.*?)</think>\s*", text, re.DOTALL)
-    return (m.group(1).strip(), text[m.end():].strip()) if m else ("", text)
-def _is_thinking_line(line: str) -> bool:
-    """한 줄이 reasoning/thinking인지 감지"""
-    l = line.strip()
-    if not l:
-        return True  # 빈 줄은 thinking 블록에 포함
-    # 영어 reasoning 패턴
-    think_starts = [
-        "The user", "the user", "This is", "this is", "I should", "I need to",
-        "Let me", "let me", "My task", "my task", "I'll ", "I will",
-        "Since ", "since ", "Now,", "now,", "So,", "so,", "First,", "first,",
-        "Okay", "okay", "Alright", "Hmm", "Wait", "Actually",
-        "The question", "the question", "The input", "the input",
-        "The request", "the request", "The prompt", "the prompt",
-        "Thinking Process", "Thinking process", "**Thinking",
-        "Step ", "step ", "Approach:", "Analysis:", "Reasoning:",
-        "1. **", "2. **", "3. **", "4. **", "5. **",
-    ]
-    for s in think_starts:
-        if l.startswith(s):
-            return True
-    # 글머리 기호 + 영어 reasoning
-    if l.startswith(("- ", "* ", "○ ")) and any(c.isascii() and c.isalpha() for c in l[:20]):
-        if not any(ord(c) > 0x1100 for c in l[:30]):  # 한글 없으면 thinking
-            return True
-    return False
-def _split_thinking_answer(raw: str) -> tuple:
-    """응답에서 thinking 부분과 실제 답변을 분리"""
-    lines = raw.split("\n")
-    answer_start = -1
-    for i, line in enumerate(lines):
-        if not _is_thinking_line(line):
-            # 한글/비ASCII가 포함된 실제 답변 시작점
-            if any(ord(c) > 0x1100 for c in line.strip()[:10]):
-                answer_start = i
-                break
-            # 또는 빈줄 2개 이후 영어 답변
-            if i > 2 and not _is_thinking_line(line):
-                # 앞의 2줄이 빈줄이면 답변 시작
-                if all(not lines[j].strip() for j in range(max(0,i-2), i)):
-                    answer_start = i
-                    break
-    if answer_start > 0:
-        thinking = "\n".join(lines[:answer_start]).strip()
-        answer = "\n".join(lines[answer_start:]).strip()
-        return thinking, answer
-    return "", raw
-def format_response(raw: str) -> str:
-    # 1. <think>...</think> 완료
-    chain, answer = parse_think_blocks(raw)
-    if chain:
-        return (
-            "<details>\n"
-            "<summary>🧠 Reasoning Chain — click to expand</summary>\n\n"
-            f"{chain}\n\n"
-            "</details>\n\n"
-            f"{answer}"
-        )
-    # 2. <think> 열림 but 닫히지 않음
-    if "<think>" in raw and "</think>" not in raw:
-        think_len = len(raw) - raw.index("<think>") - 7
-        return f"🧠 Reasoning... ({think_len} chars)"
-    # 3. 평문 thinking 감지 (NIPA 엔드포인트)
-    first_line = raw.strip().split("\n")[0] if raw.strip() else ""
-    if _is_thinking_line(first_line) and len(raw) > 20:
-        thinking, answer = _split_thinking_answer(raw)
-        if thinking and answer:
-            return (
-                f"<details>\n"
-                f"<summary>🧠 Reasoning Chain ({len(thinking)} chars)</summary>\n\n"
-                f"{thinking}\n\n"
-                f"</details>\n\n"
-                f"{answer}"
-            )
-        elif thinking and not answer:
-            # 아직 답변 안 나옴 — 캐릭터 수만
-            return f"🧠 Reasoning... ({len(raw)} chars)"
-    return raw
-# ══════════════════════════════════════════════════════════════════════════════
-# 3.  STREAMING BACKEND — SGLang OpenAI-compatible API
-# ══════════════════════════════════════════════════════════════════════════════
-def generate_reply(
-    message:        str,
-    history:        list,
-    thinking_mode:  str,
-    image_input,
-    system_prompt:  str,
-    max_new_tokens: int,
-    temperature:    float,
-    top_p:          float,
-) -> Generator[str, None, None]:
-    api_url = f"{SGLANG_BASE}/v1/chat/completions"
-    use_think = "Thinking" in thinking_mode
-    max_new_tokens = min(int(max_new_tokens), MODEL_CAP["max_tokens"])
-    temperature    = min(float(temperature),  MODEL_CAP["temp_max"])
-    messages: list[dict] = []
-    if system_prompt.strip():
-        messages.append({"role": "system", "content": system_prompt.strip()})
-    for turn in history:
-        if isinstance(turn, dict):
-            role = turn.get("role", "")
-            raw  = turn.get("content") or ""
-            text = (" ".join(p.get("text","") for p in raw
-                             if isinstance(p,dict) and p.get("type")=="text")
-                    if isinstance(raw, list) else str(raw))
-            if role == "user":
-                messages.append({"role":"user","content":text})
-            elif role == "assistant":
-                _, clean = parse_think_blocks(text)
-                messages.append({"role":"assistant","content":clean})
-        else:
-            try:
-                u, a = (turn[0] or None), (turn[1] if len(turn)>1 else None)
-            except (IndexError, TypeError):
-                continue
-            def _txt(v):
-                if v is None: return None
-                if isinstance(v, list):
-                    return " ".join(p.get("text","") for p in v
-                                    if isinstance(p,dict) and p.get("type")=="text")
-                return str(v)
-            if u := _txt(u): messages.append({"role":"user","content":u})
-            if a := _txt(a):
-                _, clean = parse_think_blocks(a)
-                messages.append({"role":"assistant","content":clean})
-    user_text = build_user_message(message, use_think)
-    # Vision: image input handling
-    if image_input and MODEL_CAP["vision"]:
-        import io
-        from PIL import Image as PILImage
-        if isinstance(image_input, str) and image_input.startswith("data:"):
-            header, b64_data = image_input.split(",", 1)
-            b64 = b64_data
-        else:
-            buf = io.BytesIO()
-            if not isinstance(image_input, PILImage.Image):
-                image_input = PILImage.fromarray(image_input)
-            image_input.save(buf, format="JPEG")
-            b64 = base64.b64encode(buf.getvalue()).decode()
-        content = [
-            {"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{b64}"}},
-            {"type":"text","text":user_text},
-        ]
-    else:
-        content = user_text
-    messages.append({"role":"user","content":content})
-    # Stream from API (with fallback)
-    H100_API = os.getenv("H100_API", "")
-    api_urls = [api_url]
-    if H100_API:
-        api_urls.append(f"{H100_API.rstrip('/')}/v1/chat/completions")
-    request_body = {
-        "model": MODEL_ID,
-        "messages": messages,
-        "max_tokens": max_new_tokens,
-        "temperature": temperature,
-        "top_p": float(top_p),
-        "stream": True,
-    }
-    for i, url in enumerate(api_urls):
-        try:
-            label = "Primary" if i == 0 else "Fallback(H100)"
-            masked = url.split("/v1")[0][:20] + "***"
-            print(f"[API] {label}: {masked}", flush=True)
-            resp = requests.post(url, json=request_body,
-                                 stream=True, timeout=(10, 600), verify=False)
-            if resp.status_code != 200:
-                print(f"[API] {label} HTTP {resp.status_code}", flush=True)
-                if i < len(api_urls) - 1:
-                    continue  # try fallback
-                else:
-                    yield f"**❌ API 오류 (HTTP {resp.status_code})**"
-                    return
-            raw = ""
-            got_token = False
-            for line in resp.iter_lines(decode_unicode=True):
-                if not line or not line.startswith("data: "):
-                    continue
-                payload = line[6:]
-                if payload.strip() == "[DONE]":
-                    break
-                try:
-                    chunk = json.loads(payload)
-                    delta = chunk.get("choices", [{}])[0].get("delta", {})
-                    token = delta.get("content", "")
-                    if token:
-                        raw += token
-                        got_token = True
-                        yield format_response(raw)
-                except (json.JSONDecodeError, IndexError, KeyError):
-                    continue
-            if raw:
-                yield format_response(raw)
-            if got_token:
-                print(f"[API] {label} OK — {len(raw)} chars", flush=True)
-                return  # 성공 시 종료
-            # 토큰 0개면 다음 API로
-            if not got_token and i < len(api_urls) - 1:
-                print(f"[API] {label} returned no tokens, trying fallback...", flush=True)
-                continue
-        except (requests.exceptions.ConnectionError,
-                requests.exceptions.Timeout,
-                requests.exceptions.ReadTimeout) as e:
-            print(f"[API] {label} failed: connection error", flush=True)
-            if i < len(api_urls) - 1:
-                print(f"[API] Switching to fallback...", flush=True)
-                continue
-            else:
-                yield "**❌ 모든 API 연결 실패.** 서버 상태를 확인하세요."
-        except Exception as exc:
-            yield f"**Error:** `{exc}`"
-            return
-# ══════════════════════════════════════════════════════════════════════════════
-# 4.  GRADIO BLOCKS  (hidden — serves API for frontend)
-# ══════════════════════════════════════════════════════════════════════════════
-with gr.Blocks(title="Darwin-35B-A3B-Opus") as gradio_demo:
-    thinking_toggle = gr.Radio(
-        choices=["⚡ Fast Mode  (direct answer)",
-                 "🧠 Thinking Mode  (chain-of-thought reasoning)"],
-        value="⚡ Fast Mode  (direct answer)",
-        visible=False,
-    )
-    image_input    = gr.Textbox(value="", visible=False)
-    system_prompt  = gr.Textbox(value=PRESETS["general"], visible=False)
-    max_new_tokens = gr.Slider(minimum=64, maximum=16384, value=4096, visible=False)
-    temperature    = gr.Slider(minimum=0.0, maximum=1.5, value=0.6,  visible=False)
-    top_p          = gr.Slider(minimum=0.1, maximum=1.0, value=0.9,  visible=False)
-    gr.ChatInterface(
-        fn=generate_reply,
-        api_name="chat",
-        additional_inputs=[
-            thinking_toggle, image_input,
-            system_prompt, max_new_tokens, temperature, top_p,
-        ],
-    )
-# ══════════════════════════════════════════════════════════════════════════════
-# 5.  FASTAPI — index.html + HF OAuth + Gradio API
-# ══════════════════════════════════════════════════════════════════════════════
-import pathlib, secrets
-fapp    = FastAPI()
-SESSIONS: dict[str, dict] = {}
-HTML    = pathlib.Path(__file__).parent / "index.html"
-CLIENT_ID     = os.getenv("OAUTH_CLIENT_ID", "")
-CLIENT_SECRET = os.getenv("OAUTH_CLIENT_SECRET", "")
-SPACE_HOST    = os.getenv("SPACE_HOST", "localhost:7860")
-REDIRECT_URI  = f"https://{SPACE_HOST}/login/callback"
-print(f"[OAuth] CLIENT_ID set: {bool(CLIENT_ID)}")
-print(f"[OAuth] SPACE_HOST: {SPACE_HOST}")
-HF_AUTH_URL   = "https://huggingface.co/oauth/authorize"
-HF_TOKEN_URL  = "https://huggingface.co/oauth/token"
-HF_USER_URL   = "https://huggingface.co/oauth/userinfo"
-SCOPES        = os.getenv("OAUTH_SCOPES", "openid profile")
-from urllib.parse import urlencode
-def _sid(req: Request) -> Optional[str]:
-    return req.cookies.get("mc_session")
-def _user(req: Request) -> Optional[dict]:
-    sid = _sid(req)
-    return SESSIONS.get(sid) if sid else None
-@fapp.get("/")
-async def root(request: Request):
-    html = HTML.read_text(encoding="utf-8") if HTML.exists() else "<h2>index.html missing</h2>"
-    return HTMLResponse(html)
-@fapp.get("/oauth/user")
-async def oauth_user(request: Request):
-    u = _user(request)
-    return JSONResponse(u) if u else JSONResponse({"logged_in": False}, status_code=401)
-@fapp.get("/oauth/login")
-async def oauth_login(request: Request):
-    if not CLIENT_ID:
-        return RedirectResponse("/?oauth_error=not_configured")
-    state = secrets.token_urlsafe(16)
-    params = {"response_type":"code","client_id":CLIENT_ID,"redirect_uri":REDIRECT_URI,"scope":SCOPES,"state":state}
-    return RedirectResponse(f"{HF_AUTH_URL}?{urlencode(params)}", status_code=302)
-@fapp.get("/login/callback")
-async def oauth_callback(code: str = "", error: str = "", state: str = ""):
-    if error or not code:
-        return RedirectResponse("/?auth_error=1")
-    basic = base64.b64encode(f"{CLIENT_ID}:{CLIENT_SECRET}".encode()).decode()
-    async with httpx.AsyncClient() as client:
-        tok = await client.post(HF_TOKEN_URL, data={"grant_type":"authorization_code","code":code,"redirect_uri":REDIRECT_URI},
-                                headers={"Accept":"application/json","Authorization":f"Basic {basic}"})
-        if tok.status_code != 200:
-            return RedirectResponse("/?auth_error=1")
-        access_token = tok.json().get("access_token", "")
-        if not access_token:
-            return RedirectResponse("/?auth_error=1")
-        uinfo = await client.get(HF_USER_URL, headers={"Authorization":f"Bearer {access_token}"})
-        if uinfo.status_code != 200:
-            return RedirectResponse("/?auth_error=1")
-        user = uinfo.json()
-    sid = secrets.token_urlsafe(32)
-    SESSIONS[sid] = {
-        "logged_in": True,
-        "username": user.get("preferred_username", user.get("name", "User")),
-        "name": user.get("name", ""),
-        "avatar": user.get("picture", ""),
-        "profile": f"https://huggingface.co/{user.get('preferred_username', '')}",
-    }
-    resp = RedirectResponse("/")
-    resp.set_cookie("mc_session", sid, httponly=True, samesite="lax", secure=True, max_age=60*60*24*7)
-    return resp
-@fapp.get("/oauth/logout")
-async def oauth_logout(request: Request):
-    sid = _sid(request)
-    if sid and sid in SESSIONS: del SESSIONS[sid]
-    resp = RedirectResponse("/")
-    resp.delete_cookie("mc_session")
-    return resp
-@fapp.get("/health")
-async def health():
-    try:
-        r = requests.get(f"{SGLANG_BASE}/v1/models", timeout=5, verify=False)
-        return {"status":"ok","sglang":"connected"}
-    except:
-        return {"status":"ok","sglang":"disconnected"}
-# ── Web Search API (Brave) ──────────────────────────────────────────────
-BRAVE_API_KEY = os.getenv("BRAVE_API_KEY", "")
-@fapp.post("/api/search")
-async def api_search(request: Request):
-    body = await request.json()
-    query = body.get("query", "").strip()
-    if not query:
-        return JSONResponse({"error": "empty query"}, status_code=400)
-    key = BRAVE_API_KEY
-    if not key:
-        return JSONResponse({"error": "BRAVE_API_KEY not set"}, status_code=500)
-    try:
-        r = requests.get(
-            "https://api.search.brave.com/res/v1/web/search",
-            headers={"X-Subscription-Token": key, "Accept": "application/json"},
-            params={"q": query, "count": 5}, timeout=10,
-        )
-        r.raise_for_status()
-        results = r.json().get("web", {}).get("results", [])
-        items = []
-        for item in results[:5]:
-            items.append({
-                "title": item.get("title", ""),
-                "desc": item.get("description", ""),
-                "url": item.get("url", ""),
-            })
-        return JSONResponse({"results": items})
-    except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
-# ── PDF Text Extraction ──────────────────────────────────────────────���──
-@fapp.post("/api/extract-pdf")
-async def api_extract_pdf(request: Request):
-    """Base64 PDF → text extraction"""
-    try:
-        body = await request.json()
-        b64 = body.get("data", "")
-        if "," in b64:
-            b64 = b64.split(",", 1)[1]
-        import io
-        pdf_bytes = base64.b64decode(b64)
-        text = ""
-        try:
-            import fitz  # PyMuPDF
-            doc = fitz.open(stream=pdf_bytes, filetype="pdf")
-            for page in doc:
-                text += page.get_text() + "\n"
-        except ImportError:
-            # Fallback: simple text extraction
-            content = pdf_bytes.decode("utf-8", errors="ignore")
-            # Very basic: find text between stream/endstream
-            text = re.sub(r'[^\x20-\x7E\n\r\uAC00-\uD7A3\u3040-\u309F\u30A0-\u30FF]', '', content)
-        text = text.strip()[:8000]  # Max 8000 chars
-        return JSONResponse({"text": text, "chars": len(text)})
-    except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
-app = gr.mount_gradio_app(fapp, gradio_demo, path="/gradio")
-if __name__ == "__main__":
-    print(f"[BOOT] Darwin-35B-A3B-Opus Demo · SGLang: {SGLANG_URL}", flush=True)
-    uvicorn.run(app, host="0.0.0.0", port=7860)