SeaWolf-AI commited on
Commit
84612a3
Β·
verified Β·
1 Parent(s): 164ae71

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +311 -0
app.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 🧬 Darwin-35B-A3B-Opus β€” Demo Space
3
+ Single model Β· SGLang backend Β· Vision support
4
+ """
5
+ import sys
6
+ print(f"[BOOT] Python {sys.version}", flush=True)
7
+
8
+ import base64, os, re, json
9
+ from typing import Generator, Optional
10
+
11
+ # NIPA λ“± 자체 μΈμ¦μ„œ μ—”λ“œν¬μΈνŠΈμš© SSL κ²½κ³  λ¬΄μ‹œ
12
+ import urllib3
13
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
14
+
15
+ try:
16
+ import gradio as gr
17
+ print(f"[BOOT] gradio {gr.__version__}", flush=True)
18
+ except ImportError as e:
19
+ print(f"[BOOT] FATAL: {e}", flush=True); sys.exit(1)
20
+
21
+ try:
22
+ import httpx, uvicorn, requests
23
+ from fastapi import FastAPI, Request
24
+ from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
25
+ print("[BOOT] All imports OK", flush=True)
26
+ except ImportError as e:
27
+ print(f"[BOOT] FATAL: {e}", flush=True); sys.exit(1)
28
+
29
+ # ══════════════════════════════════════════════════════════════════════════════
30
+ # 1. SGLANG BACKEND CONFIG
31
+ # ══════════════════════════════════════════════════════════════════════════════
32
+ SGLANG_BASE = os.getenv("DARWIN_API", "http://localhost:7947")
33
+ SGLANG_URL = f"{SGLANG_BASE}/v1/chat/completions"
34
+
35
+ MODEL_NAME = "Darwin-35B-A3B-Opus"
36
+ MODEL_CAP = {
37
+ "arch": "MoE", "active": "3B / 35B total",
38
+ "ctx": "262K", "thinking": True, "vision": True,
39
+ "max_tokens": 16384, "temp_max": 1.5,
40
+ }
41
+
42
+ PRESETS = {
43
+ "general": "You are Darwin-35B-A3B-Opus, a highly capable reasoning model created by VIDRAFT via evolutionary merge. Think step by step for complex questions.",
44
+ "code": "You are an expert software engineer. Write clean, efficient, well-commented code. Explain your approach before writing. Use modern best practices.",
45
+ "math": "You are a world-class mathematician. Break problems step-by-step. Show full working. Use LaTeX where helpful.",
46
+ "creative": "You are a brilliant creative writer. Be imaginative, vivid, and engaging. Adapt tone and style to the request.",
47
+ "translate": "You are a professional translator fluent in 201 languages. Provide accurate, natural-sounding translations with cultural context.",
48
+ "research": "You are a rigorous research analyst. Provide structured, well-reasoned analysis. Identify assumptions and acknowledge uncertainty.",
49
+ }
50
+
51
+ # ══════════════════════════════════════════════════════════════════════════════
52
+ # 2. THINKING MODE HELPERS
53
+ # ══════════════════════════════════════════════════════════════════════════════
54
+ def build_user_message(text: str, thinking: bool) -> str:
55
+ return ("/think\n" if thinking else "/no_think\n") + text
56
+
57
+ def parse_think_blocks(text: str) -> tuple[str, str]:
58
+ m = re.search(r"<think>(.*?)</think>\s*", text, re.DOTALL)
59
+ return (m.group(1).strip(), text[m.end():].strip()) if m else ("", text)
60
+
61
+ def format_response(raw: str) -> str:
62
+ chain, answer = parse_think_blocks(raw)
63
+ if chain:
64
+ lines = chain.split("\n")
65
+ quoted = "\n".join(f"> {l}" for l in lines)
66
+ block = (
67
+ "<details>\n"
68
+ "<summary>🧠 Reasoning Chain β€” click to expand</summary>\n\n"
69
+ f"{quoted}\n\n"
70
+ "</details>\n\n"
71
+ )
72
+ return block + answer
73
+ return raw
74
+
75
+ # ══════════════════════════════════════════════════════════════════════════════
76
+ # 3. STREAMING BACKEND β€” SGLang OpenAI-compatible API
77
+ # ══════════════════════════════════════════════════════════════════════════════
78
+ def generate_reply(
79
+ message: str,
80
+ history: list,
81
+ thinking_mode: str,
82
+ image_input,
83
+ system_prompt: str,
84
+ max_new_tokens: int,
85
+ temperature: float,
86
+ top_p: float,
87
+ ) -> Generator[str, None, None]:
88
+
89
+ use_think = "Thinking" in thinking_mode
90
+ max_new_tokens = min(int(max_new_tokens), MODEL_CAP["max_tokens"])
91
+ temperature = min(float(temperature), MODEL_CAP["temp_max"])
92
+
93
+ messages: list[dict] = []
94
+ if system_prompt.strip():
95
+ messages.append({"role": "system", "content": system_prompt.strip()})
96
+
97
+ for turn in history:
98
+ if isinstance(turn, dict):
99
+ role = turn.get("role", "")
100
+ raw = turn.get("content") or ""
101
+ text = (" ".join(p.get("text","") for p in raw
102
+ if isinstance(p,dict) and p.get("type")=="text")
103
+ if isinstance(raw, list) else str(raw))
104
+ if role == "user":
105
+ messages.append({"role":"user","content":text})
106
+ elif role == "assistant":
107
+ _, clean = parse_think_blocks(text)
108
+ messages.append({"role":"assistant","content":clean})
109
+ else:
110
+ try:
111
+ u, a = (turn[0] or None), (turn[1] if len(turn)>1 else None)
112
+ except (IndexError, TypeError):
113
+ continue
114
+ def _txt(v):
115
+ if v is None: return None
116
+ if isinstance(v, list):
117
+ return " ".join(p.get("text","") for p in v
118
+ if isinstance(p,dict) and p.get("type")=="text")
119
+ return str(v)
120
+ if u := _txt(u): messages.append({"role":"user","content":u})
121
+ if a := _txt(a):
122
+ _, clean = parse_think_blocks(a)
123
+ messages.append({"role":"assistant","content":clean})
124
+
125
+ user_text = build_user_message(message, use_think)
126
+
127
+ # Vision: image input handling
128
+ if image_input and MODEL_CAP["vision"]:
129
+ import io
130
+ from PIL import Image as PILImage
131
+
132
+ if isinstance(image_input, str) and image_input.startswith("data:"):
133
+ header, b64_data = image_input.split(",", 1)
134
+ b64 = b64_data
135
+ else:
136
+ buf = io.BytesIO()
137
+ if not isinstance(image_input, PILImage.Image):
138
+ image_input = PILImage.fromarray(image_input)
139
+ image_input.save(buf, format="JPEG")
140
+ b64 = base64.b64encode(buf.getvalue()).decode()
141
+
142
+ content = [
143
+ {"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{b64}"}},
144
+ {"type":"text","text":user_text},
145
+ ]
146
+ else:
147
+ content = user_text
148
+ messages.append({"role":"user","content":content})
149
+
150
+ # Stream from SGLang
151
+ try:
152
+ resp = requests.post(SGLANG_URL, json={
153
+ "model": "FINAL-Bench/Darwin-35B-A3B-Opus",
154
+ "messages": messages,
155
+ "max_tokens": max_new_tokens,
156
+ "temperature": temperature,
157
+ "top_p": float(top_p),
158
+ "stream": True,
159
+ }, stream=True, timeout=600, verify=False)
160
+
161
+ raw = ""
162
+ for line in resp.iter_lines(decode_unicode=True):
163
+ if not line or not line.startswith("data: "):
164
+ continue
165
+ payload = line[6:]
166
+ if payload.strip() == "[DONE]":
167
+ break
168
+ try:
169
+ chunk = json.loads(payload)
170
+ delta = chunk.get("choices", [{}])[0].get("delta", {})
171
+ token = delta.get("content", "")
172
+ if token:
173
+ raw += token
174
+ yield format_response(raw)
175
+ except (json.JSONDecodeError, IndexError, KeyError):
176
+ continue
177
+
178
+ if raw:
179
+ yield format_response(raw)
180
+
181
+ except requests.exceptions.ConnectionError:
182
+ yield "**❌ SGLang μ„œλ²„ μ—°κ²° μ‹€νŒ¨.** `localhost:7947`에 μ„œλ²„κ°€ μ‹€ν–‰ 쀑인지 ν™•μΈν•˜μ„Έμš”."
183
+ except Exception as exc:
184
+ yield f"**Error:** `{exc}`"
185
+
186
+
187
+ # ══════════════════════════════════════════════════════════════════════════════
188
+ # 4. GRADIO BLOCKS (hidden β€” serves API for frontend)
189
+ # ══════════════════════════════════════════════════════════════════════════════
190
+ with gr.Blocks(title="Darwin-35B-A3B-Opus") as gradio_demo:
191
+ thinking_toggle = gr.Radio(
192
+ choices=["⚑ Fast Mode (direct answer)",
193
+ "🧠 Thinking Mode (chain-of-thought reasoning)"],
194
+ value="⚑ Fast Mode (direct answer)",
195
+ visible=False,
196
+ )
197
+ image_input = gr.Textbox(value="", visible=False)
198
+ system_prompt = gr.Textbox(value=PRESETS["general"], visible=False)
199
+ max_new_tokens = gr.Slider(minimum=64, maximum=16384, value=4096, visible=False)
200
+ temperature = gr.Slider(minimum=0.0, maximum=1.5, value=0.6, visible=False)
201
+ top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, visible=False)
202
+
203
+ gr.ChatInterface(
204
+ fn=generate_reply,
205
+ api_name="chat",
206
+ additional_inputs=[
207
+ thinking_toggle, image_input,
208
+ system_prompt, max_new_tokens, temperature, top_p,
209
+ ],
210
+ )
211
+
212
+ # ══════════════════════════════════════════════════════════════════════��═══════
213
+ # 5. FASTAPI β€” index.html + HF OAuth + Gradio API
214
+ # ══════════════════════════════════════════════════════════════════════════════
215
+ import pathlib, secrets
216
+
217
+ fapp = FastAPI()
218
+ SESSIONS: dict[str, dict] = {}
219
+ HTML = pathlib.Path(__file__).parent / "index.html"
220
+
221
+ CLIENT_ID = os.getenv("OAUTH_CLIENT_ID", "")
222
+ CLIENT_SECRET = os.getenv("OAUTH_CLIENT_SECRET", "")
223
+ SPACE_HOST = os.getenv("SPACE_HOST", "localhost:7860")
224
+ REDIRECT_URI = f"https://{SPACE_HOST}/login/callback"
225
+
226
+ print(f"[OAuth] CLIENT_ID set: {bool(CLIENT_ID)}")
227
+ print(f"[OAuth] SPACE_HOST: {SPACE_HOST}")
228
+ HF_AUTH_URL = "https://huggingface.co/oauth/authorize"
229
+ HF_TOKEN_URL = "https://huggingface.co/oauth/token"
230
+ HF_USER_URL = "https://huggingface.co/oauth/userinfo"
231
+ SCOPES = os.getenv("OAUTH_SCOPES", "openid profile")
232
+
233
+ from urllib.parse import urlencode
234
+
235
+ def _sid(req: Request) -> Optional[str]:
236
+ return req.cookies.get("mc_session")
237
+
238
+ def _user(req: Request) -> Optional[dict]:
239
+ sid = _sid(req)
240
+ return SESSIONS.get(sid) if sid else None
241
+
242
+ @fapp.get("/")
243
+ async def root(request: Request):
244
+ html = HTML.read_text(encoding="utf-8") if HTML.exists() else "<h2>index.html missing</h2>"
245
+ return HTMLResponse(html)
246
+
247
+ @fapp.get("/oauth/user")
248
+ async def oauth_user(request: Request):
249
+ u = _user(request)
250
+ return JSONResponse(u) if u else JSONResponse({"logged_in": False}, status_code=401)
251
+
252
+ @fapp.get("/oauth/login")
253
+ async def oauth_login(request: Request):
254
+ if not CLIENT_ID:
255
+ return RedirectResponse("/?oauth_error=not_configured")
256
+ state = secrets.token_urlsafe(16)
257
+ params = {"response_type":"code","client_id":CLIENT_ID,"redirect_uri":REDIRECT_URI,"scope":SCOPES,"state":state}
258
+ return RedirectResponse(f"{HF_AUTH_URL}?{urlencode(params)}", status_code=302)
259
+
260
+ @fapp.get("/login/callback")
261
+ async def oauth_callback(code: str = "", error: str = "", state: str = ""):
262
+ if error or not code:
263
+ return RedirectResponse("/?auth_error=1")
264
+ basic = base64.b64encode(f"{CLIENT_ID}:{CLIENT_SECRET}".encode()).decode()
265
+ async with httpx.AsyncClient() as client:
266
+ tok = await client.post(HF_TOKEN_URL, data={"grant_type":"authorization_code","code":code,"redirect_uri":REDIRECT_URI},
267
+ headers={"Accept":"application/json","Authorization":f"Basic {basic}"})
268
+ if tok.status_code != 200:
269
+ return RedirectResponse("/?auth_error=1")
270
+ access_token = tok.json().get("access_token", "")
271
+ if not access_token:
272
+ return RedirectResponse("/?auth_error=1")
273
+ uinfo = await client.get(HF_USER_URL, headers={"Authorization":f"Bearer {access_token}"})
274
+ if uinfo.status_code != 200:
275
+ return RedirectResponse("/?auth_error=1")
276
+ user = uinfo.json()
277
+
278
+ sid = secrets.token_urlsafe(32)
279
+ SESSIONS[sid] = {
280
+ "logged_in": True,
281
+ "username": user.get("preferred_username", user.get("name", "User")),
282
+ "name": user.get("name", ""),
283
+ "avatar": user.get("picture", ""),
284
+ "profile": f"https://huggingface.co/{user.get('preferred_username', '')}",
285
+ }
286
+ resp = RedirectResponse("/")
287
+ resp.set_cookie("mc_session", sid, httponly=True, samesite="lax", secure=True, max_age=60*60*24*7)
288
+ return resp
289
+
290
+ @fapp.get("/oauth/logout")
291
+ async def oauth_logout(request: Request):
292
+ sid = _sid(request)
293
+ if sid and sid in SESSIONS: del SESSIONS[sid]
294
+ resp = RedirectResponse("/")
295
+ resp.delete_cookie("mc_session")
296
+ return resp
297
+
298
+ @fapp.get("/health")
299
+ async def health():
300
+ # Check SGLang
301
+ try:
302
+ r = requests.get(f"{SGLANG_BASE}/v1/models", timeout=5, verify=False)
303
+ return {"status":"ok","sglang":"connected"}
304
+ except:
305
+ return {"status":"ok","sglang":"disconnected"}
306
+
307
+ app = gr.mount_gradio_app(fapp, gradio_demo, path="/gradio")
308
+
309
+ if __name__ == "__main__":
310
+ print(f"[BOOT] Darwin-35B-A3B-Opus Demo Β· SGLang: {SGLANG_URL}", flush=True)
311
+ uvicorn.run(app, host="0.0.0.0", port=7860)