SeaWolf-AI commited on
Commit
09cae0a
Β·
verified Β·
1 Parent(s): 9335e0b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +440 -0
app.py ADDED
@@ -0,0 +1,440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 🧬 Darwin-35B-A3B-Opus β€” Demo Space
3
+ Single model Β· SGLang backend Β· Vision support
4
+ """
5
+ import sys
6
+ print(f"[BOOT] Python {sys.version}", flush=True)
7
+
8
+ import base64, os, re, json
9
+ from typing import Generator, Optional
10
+
11
+ # NIPA λ“± 자체 μΈμ¦μ„œ μ—”λ“œν¬μΈνŠΈμš© SSL κ²½κ³  λ¬΄μ‹œ
12
+ import urllib3
13
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
14
+
15
+ try:
16
+ import gradio as gr
17
+ print(f"[BOOT] gradio {gr.__version__}", flush=True)
18
+ except ImportError as e:
19
+ print(f"[BOOT] FATAL: {e}", flush=True); sys.exit(1)
20
+
21
+ try:
22
+ import httpx, uvicorn, requests
23
+ from fastapi import FastAPI, Request
24
+ from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
25
+ print("[BOOT] All imports OK", flush=True)
26
+ except ImportError as e:
27
+ print(f"[BOOT] FATAL: {e}", flush=True); sys.exit(1)
28
+
29
+ # ══════════════════════════════════════════════════════════════════════════════
30
+ # 1. SGLANG BACKEND CONFIG
31
+ # ══════════════════════════════════════════════════════════════════════════════
32
+ SGLANG_BASE = os.getenv("DARWIN_API", "http://localhost:7947")
33
+ SGLANG_URL = f"{SGLANG_BASE}/v1/chat/completions"
34
+
35
+ MODEL_NAME = "Darwin-35B-A3B-Opus"
36
+ MODEL_CAP = {
37
+ "arch": "MoE", "active": "3B / 35B total",
38
+ "ctx": "262K", "thinking": True, "vision": True,
39
+ "max_tokens": 16384, "temp_max": 1.5,
40
+ }
41
+
42
+ PRESETS = {
43
+ "general": "You are Darwin-35B-A3B-Opus, a highly capable reasoning model created by VIDRAFT via evolutionary merge. Think step by step for complex questions.",
44
+ "code": "You are an expert software engineer. Write clean, efficient, well-commented code. Explain your approach before writing. Use modern best practices.",
45
+ "math": "You are a world-class mathematician. Break problems step-by-step. Show full working. Use LaTeX where helpful.",
46
+ "creative": "You are a brilliant creative writer. Be imaginative, vivid, and engaging. Adapt tone and style to the request.",
47
+ "translate": "You are a professional translator fluent in 201 languages. Provide accurate, natural-sounding translations with cultural context.",
48
+ "research": "You are a rigorous research analyst. Provide structured, well-reasoned analysis. Identify assumptions and acknowledge uncertainty.",
49
+ }
50
+
51
+ # ══════════════════════════════════════════════════════════════════════════════
52
+ # 2. THINKING MODE HELPERS
53
+ # ══════════════════════════════════════════════════════════════════════════════
54
+ def build_user_message(text: str, thinking: bool) -> str:
55
+ return text # NIPA μ—”λ“œν¬μΈνŠΈλŠ” /think, /no_think 미지원
56
+
57
+ def parse_think_blocks(text: str) -> tuple[str, str]:
58
+ m = re.search(r"<think>(.*?)</think>\s*", text, re.DOTALL)
59
+ return (m.group(1).strip(), text[m.end():].strip()) if m else ("", text)
60
+
61
+ def _is_thinking_line(line: str) -> bool:
62
+ """ν•œ 쀄이 reasoning/thinking인지 감지"""
63
+ l = line.strip()
64
+ if not l:
65
+ return True # 빈 쀄은 thinking 블둝에 포함
66
+ # μ˜μ–΄ reasoning νŒ¨ν„΄
67
+ think_starts = [
68
+ "The user", "the user", "This is", "this is", "I should", "I need to",
69
+ "Let me", "let me", "My task", "my task", "I'll ", "I will",
70
+ "Since ", "since ", "Now,", "now,", "So,", "so,", "First,", "first,",
71
+ "Okay", "okay", "Alright", "Hmm", "Wait", "Actually",
72
+ "The question", "the question", "The input", "the input",
73
+ "The request", "the request", "The prompt", "the prompt",
74
+ "Thinking Process", "Thinking process", "**Thinking",
75
+ "Step ", "step ", "Approach:", "Analysis:", "Reasoning:",
76
+ "1. **", "2. **", "3. **", "4. **", "5. **",
77
+ ]
78
+ for s in think_starts:
79
+ if l.startswith(s):
80
+ return True
81
+ # 글머리 기호 + μ˜μ–΄ reasoning
82
+ if l.startswith(("- ", "* ", "β—‹ ")) and any(c.isascii() and c.isalpha() for c in l[:20]):
83
+ if not any(ord(c) > 0x1100 for c in l[:30]): # ν•œκΈ€ μ—†μœΌλ©΄ thinking
84
+ return True
85
+ return False
86
+
87
+
88
+ def _split_thinking_answer(raw: str) -> tuple:
89
+ """μ‘λ‹΅μ—μ„œ thinking λΆ€λΆ„κ³Ό μ‹€μ œ 닡변을 뢄리"""
90
+ lines = raw.split("\n")
91
+ answer_start = -1
92
+
93
+ for i, line in enumerate(lines):
94
+ if not _is_thinking_line(line):
95
+ # ν•œκΈ€/λΉ„ASCIIκ°€ ν¬ν•¨λœ μ‹€μ œ λ‹΅λ³€ μ‹œμž‘μ 
96
+ if any(ord(c) > 0x1100 for c in line.strip()[:10]):
97
+ answer_start = i
98
+ break
99
+ # λ˜λŠ” λΉˆμ€„ 2개 이후 μ˜μ–΄ λ‹΅λ³€
100
+ if i > 2 and not _is_thinking_line(line):
101
+ # μ•žμ˜ 2쀄이 λΉˆμ€„μ΄λ©΄ λ‹΅λ³€ μ‹œμž‘
102
+ if all(not lines[j].strip() for j in range(max(0,i-2), i)):
103
+ answer_start = i
104
+ break
105
+
106
+ if answer_start > 0:
107
+ thinking = "\n".join(lines[:answer_start]).strip()
108
+ answer = "\n".join(lines[answer_start:]).strip()
109
+ return thinking, answer
110
+
111
+ return "", raw
112
+
113
+
114
+ def format_response(raw: str) -> str:
115
+ # 1. <think>...</think> μ™„λ£Œ
116
+ chain, answer = parse_think_blocks(raw)
117
+ if chain:
118
+ return (
119
+ "<details>\n"
120
+ "<summary>🧠 Reasoning Chain β€” click to expand</summary>\n\n"
121
+ f"{chain}\n\n"
122
+ "</details>\n\n"
123
+ f"{answer}"
124
+ )
125
+ # 2. <think> μ—΄λ¦Ό but λ‹«νžˆμ§€ μ•ŠμŒ
126
+ if "<think>" in raw and "</think>" not in raw:
127
+ think_len = len(raw) - raw.index("<think>") - 7
128
+ return f"🧠 Reasoning... ({think_len} chars)"
129
+ # 3. 평문 thinking 감지 (NIPA μ—”λ“œν¬μΈνŠΈ)
130
+ first_line = raw.strip().split("\n")[0] if raw.strip() else ""
131
+ if _is_thinking_line(first_line) and len(raw) > 20:
132
+ thinking, answer = _split_thinking_answer(raw)
133
+ if thinking and answer:
134
+ return (
135
+ f"<details>\n"
136
+ f"<summary>🧠 Reasoning Chain ({len(thinking)} chars)</summary>\n\n"
137
+ f"{thinking}\n\n"
138
+ f"</details>\n\n"
139
+ f"{answer}"
140
+ )
141
+ elif thinking and not answer:
142
+ # 아직 λ‹΅λ³€ μ•ˆ λ‚˜μ˜΄ β€” 캐릭터 수만
143
+ return f"🧠 Reasoning... ({len(raw)} chars)"
144
+ return raw
145
+
146
+ # ══════════════════════════════════════════════════════════════════════════════
147
+ # 3. STREAMING BACKEND β€” SGLang OpenAI-compatible API
148
+ # ══════════════════════════════════════════════════════════════════════════════
149
+ def generate_reply(
150
+ message: str,
151
+ history: list,
152
+ thinking_mode: str,
153
+ image_input,
154
+ system_prompt: str,
155
+ max_new_tokens: int,
156
+ temperature: float,
157
+ top_p: float,
158
+ ) -> Generator[str, None, None]:
159
+
160
+ use_think = "Thinking" in thinking_mode
161
+ max_new_tokens = min(int(max_new_tokens), MODEL_CAP["max_tokens"])
162
+ temperature = min(float(temperature), MODEL_CAP["temp_max"])
163
+
164
+ messages: list[dict] = []
165
+ if system_prompt.strip():
166
+ messages.append({"role": "system", "content": system_prompt.strip()})
167
+
168
+ for turn in history:
169
+ if isinstance(turn, dict):
170
+ role = turn.get("role", "")
171
+ raw = turn.get("content") or ""
172
+ text = (" ".join(p.get("text","") for p in raw
173
+ if isinstance(p,dict) and p.get("type")=="text")
174
+ if isinstance(raw, list) else str(raw))
175
+ if role == "user":
176
+ messages.append({"role":"user","content":text})
177
+ elif role == "assistant":
178
+ _, clean = parse_think_blocks(text)
179
+ messages.append({"role":"assistant","content":clean})
180
+ else:
181
+ try:
182
+ u, a = (turn[0] or None), (turn[1] if len(turn)>1 else None)
183
+ except (IndexError, TypeError):
184
+ continue
185
+ def _txt(v):
186
+ if v is None: return None
187
+ if isinstance(v, list):
188
+ return " ".join(p.get("text","") for p in v
189
+ if isinstance(p,dict) and p.get("type")=="text")
190
+ return str(v)
191
+ if u := _txt(u): messages.append({"role":"user","content":u})
192
+ if a := _txt(a):
193
+ _, clean = parse_think_blocks(a)
194
+ messages.append({"role":"assistant","content":clean})
195
+
196
+ user_text = build_user_message(message, use_think)
197
+
198
+ # Vision: image input handling
199
+ if image_input and MODEL_CAP["vision"]:
200
+ import io
201
+ from PIL import Image as PILImage
202
+
203
+ if isinstance(image_input, str) and image_input.startswith("data:"):
204
+ header, b64_data = image_input.split(",", 1)
205
+ b64 = b64_data
206
+ else:
207
+ buf = io.BytesIO()
208
+ if not isinstance(image_input, PILImage.Image):
209
+ image_input = PILImage.fromarray(image_input)
210
+ image_input.save(buf, format="JPEG")
211
+ b64 = base64.b64encode(buf.getvalue()).decode()
212
+
213
+ content = [
214
+ {"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{b64}"}},
215
+ {"type":"text","text":user_text},
216
+ ]
217
+ else:
218
+ content = user_text
219
+ messages.append({"role":"user","content":content})
220
+
221
+ # Stream from SGLang
222
+ try:
223
+ resp = requests.post(SGLANG_URL, json={
224
+ "model": "FINAL-Bench/Darwin-35B-A3B-Opus",
225
+ "messages": messages,
226
+ "max_tokens": max_new_tokens,
227
+ "temperature": temperature,
228
+ "top_p": float(top_p),
229
+ "stream": True,
230
+ }, stream=True, timeout=600, verify=False)
231
+
232
+ raw = ""
233
+ for line in resp.iter_lines(decode_unicode=True):
234
+ if not line or not line.startswith("data: "):
235
+ continue
236
+ payload = line[6:]
237
+ if payload.strip() == "[DONE]":
238
+ break
239
+ try:
240
+ chunk = json.loads(payload)
241
+ delta = chunk.get("choices", [{}])[0].get("delta", {})
242
+ token = delta.get("content", "")
243
+ if token:
244
+ raw += token
245
+ yield format_response(raw)
246
+ except (json.JSONDecodeError, IndexError, KeyError):
247
+ continue
248
+
249
+ if raw:
250
+ yield format_response(raw)
251
+
252
+ except requests.exceptions.ConnectionError:
253
+ yield "**❌ SGLang μ„œλ²„ μ—°κ²° μ‹€νŒ¨.** `localhost:7947`에 μ„œλ²„κ°€ μ‹€ν–‰ 쀑인지 ν™•μΈν•˜μ„Έμš”."
254
+ except Exception as exc:
255
+ yield f"**Error:** `{exc}`"
256
+
257
+
258
+ # ══════════════════════════════════════════════════════════════════════════════
259
+ # 4. GRADIO BLOCKS (hidden β€” serves API for frontend)
260
+ # ══════════════════════════════════════════════════════════════════════════════
261
+ with gr.Blocks(title="Darwin-35B-A3B-Opus") as gradio_demo:
262
+ thinking_toggle = gr.Radio(
263
+ choices=["⚑ Fast Mode (direct answer)",
264
+ "🧠 Thinking Mode (chain-of-thought reasoning)"],
265
+ value="⚑ Fast Mode (direct answer)",
266
+ visible=False,
267
+ )
268
+ image_input = gr.Textbox(value="", visible=False)
269
+ system_prompt = gr.Textbox(value=PRESETS["general"], visible=False)
270
+ max_new_tokens = gr.Slider(minimum=64, maximum=16384, value=4096, visible=False)
271
+ temperature = gr.Slider(minimum=0.0, maximum=1.5, value=0.6, visible=False)
272
+ top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, visible=False)
273
+
274
+ gr.ChatInterface(
275
+ fn=generate_reply,
276
+ api_name="chat",
277
+ additional_inputs=[
278
+ thinking_toggle, image_input,
279
+ system_prompt, max_new_tokens, temperature, top_p,
280
+ ],
281
+ )
282
+
283
+ # ══════════════════════════════════════════════════════════════════════════════
284
+ # 5. FASTAPI β€” index.html + HF OAuth + Gradio API
285
+ # ══════════════════════════════════════════════════════════════════════════════
286
+ import pathlib, secrets
287
+
288
+ fapp = FastAPI()
289
+ SESSIONS: dict[str, dict] = {}
290
+ HTML = pathlib.Path(__file__).parent / "index.html"
291
+
292
+ CLIENT_ID = os.getenv("OAUTH_CLIENT_ID", "")
293
+ CLIENT_SECRET = os.getenv("OAUTH_CLIENT_SECRET", "")
294
+ SPACE_HOST = os.getenv("SPACE_HOST", "localhost:7860")
295
+ REDIRECT_URI = f"https://{SPACE_HOST}/login/callback"
296
+
297
+ print(f"[OAuth] CLIENT_ID set: {bool(CLIENT_ID)}")
298
+ print(f"[OAuth] SPACE_HOST: {SPACE_HOST}")
299
+ HF_AUTH_URL = "https://huggingface.co/oauth/authorize"
300
+ HF_TOKEN_URL = "https://huggingface.co/oauth/token"
301
+ HF_USER_URL = "https://huggingface.co/oauth/userinfo"
302
+ SCOPES = os.getenv("OAUTH_SCOPES", "openid profile")
303
+
304
+ from urllib.parse import urlencode
305
+
306
+ def _sid(req: Request) -> Optional[str]:
307
+ return req.cookies.get("mc_session")
308
+
309
+ def _user(req: Request) -> Optional[dict]:
310
+ sid = _sid(req)
311
+ return SESSIONS.get(sid) if sid else None
312
+
313
+ @fapp.get("/")
314
+ async def root(request: Request):
315
+ html = HTML.read_text(encoding="utf-8") if HTML.exists() else "<h2>index.html missing</h2>"
316
+ return HTMLResponse(html)
317
+
318
+ @fapp.get("/oauth/user")
319
+ async def oauth_user(request: Request):
320
+ u = _user(request)
321
+ return JSONResponse(u) if u else JSONResponse({"logged_in": False}, status_code=401)
322
+
323
+ @fapp.get("/oauth/login")
324
+ async def oauth_login(request: Request):
325
+ if not CLIENT_ID:
326
+ return RedirectResponse("/?oauth_error=not_configured")
327
+ state = secrets.token_urlsafe(16)
328
+ params = {"response_type":"code","client_id":CLIENT_ID,"redirect_uri":REDIRECT_URI,"scope":SCOPES,"state":state}
329
+ return RedirectResponse(f"{HF_AUTH_URL}?{urlencode(params)}", status_code=302)
330
+
331
+ @fapp.get("/login/callback")
332
+ async def oauth_callback(code: str = "", error: str = "", state: str = ""):
333
+ if error or not code:
334
+ return RedirectResponse("/?auth_error=1")
335
+ basic = base64.b64encode(f"{CLIENT_ID}:{CLIENT_SECRET}".encode()).decode()
336
+ async with httpx.AsyncClient() as client:
337
+ tok = await client.post(HF_TOKEN_URL, data={"grant_type":"authorization_code","code":code,"redirect_uri":REDIRECT_URI},
338
+ headers={"Accept":"application/json","Authorization":f"Basic {basic}"})
339
+ if tok.status_code != 200:
340
+ return RedirectResponse("/?auth_error=1")
341
+ access_token = tok.json().get("access_token", "")
342
+ if not access_token:
343
+ return RedirectResponse("/?auth_error=1")
344
+ uinfo = await client.get(HF_USER_URL, headers={"Authorization":f"Bearer {access_token}"})
345
+ if uinfo.status_code != 200:
346
+ return RedirectResponse("/?auth_error=1")
347
+ user = uinfo.json()
348
+
349
+ sid = secrets.token_urlsafe(32)
350
+ SESSIONS[sid] = {
351
+ "logged_in": True,
352
+ "username": user.get("preferred_username", user.get("name", "User")),
353
+ "name": user.get("name", ""),
354
+ "avatar": user.get("picture", ""),
355
+ "profile": f"https://huggingface.co/{user.get('preferred_username', '')}",
356
+ }
357
+ resp = RedirectResponse("/")
358
+ resp.set_cookie("mc_session", sid, httponly=True, samesite="lax", secure=True, max_age=60*60*24*7)
359
+ return resp
360
+
361
+ @fapp.get("/oauth/logout")
362
+ async def oauth_logout(request: Request):
363
+ sid = _sid(request)
364
+ if sid and sid in SESSIONS: del SESSIONS[sid]
365
+ resp = RedirectResponse("/")
366
+ resp.delete_cookie("mc_session")
367
+ return resp
368
+
369
+ @fapp.get("/health")
370
+ async def health():
371
+ try:
372
+ r = requests.get(f"{SGLANG_BASE}/v1/models", timeout=5, verify=False)
373
+ return {"status":"ok","sglang":"connected"}
374
+ except:
375
+ return {"status":"ok","sglang":"disconnected"}
376
+
377
+ # ── Web Search API (Brave) ──────────────────────────────────────────────
378
+ BRAVE_API_KEY = os.getenv("BRAVE_API_KEY", "")
379
+
380
+ @fapp.post("/api/search")
381
+ async def api_search(request: Request):
382
+ body = await request.json()
383
+ query = body.get("query", "").strip()
384
+ if not query:
385
+ return JSONResponse({"error": "empty query"}, status_code=400)
386
+ key = BRAVE_API_KEY
387
+ if not key:
388
+ return JSONResponse({"error": "BRAVE_API_KEY not set"}, status_code=500)
389
+ try:
390
+ r = requests.get(
391
+ "https://api.search.brave.com/res/v1/web/search",
392
+ headers={"X-Subscription-Token": key, "Accept": "application/json"},
393
+ params={"q": query, "count": 5}, timeout=10,
394
+ )
395
+ r.raise_for_status()
396
+ results = r.json().get("web", {}).get("results", [])
397
+ items = []
398
+ for item in results[:5]:
399
+ items.append({
400
+ "title": item.get("title", ""),
401
+ "desc": item.get("description", ""),
402
+ "url": item.get("url", ""),
403
+ })
404
+ return JSONResponse({"results": items})
405
+ except Exception as e:
406
+ return JSONResponse({"error": str(e)}, status_code=500)
407
+
408
+ # ── PDF Text Extraction ─────────────────────────────────────────────────
409
+ @fapp.post("/api/extract-pdf")
410
+ async def api_extract_pdf(request: Request):
411
+ """Base64 PDF β†’ text extraction"""
412
+ try:
413
+ body = await request.json()
414
+ b64 = body.get("data", "")
415
+ if "," in b64:
416
+ b64 = b64.split(",", 1)[1]
417
+ import io
418
+ pdf_bytes = base64.b64decode(b64)
419
+ text = ""
420
+ try:
421
+ import fitz # PyMuPDF
422
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
423
+ for page in doc:
424
+ text += page.get_text() + "\n"
425
+ except ImportError:
426
+ # Fallback: simple text extraction
427
+ content = pdf_bytes.decode("utf-8", errors="ignore")
428
+ # Very basic: find text between stream/endstream
429
+ text = re.sub(r'[^\x20-\x7E\n\r\uAC00-\uD7A3\u3040-\u309F\u30A0-\u30FF]', '', content)
430
+
431
+ text = text.strip()[:8000] # Max 8000 chars
432
+ return JSONResponse({"text": text, "chars": len(text)})
433
+ except Exception as e:
434
+ return JSONResponse({"error": str(e)}, status_code=500)
435
+
436
+ app = gr.mount_gradio_app(fapp, gradio_demo, path="/gradio")
437
+
438
+ if __name__ == "__main__":
439
+ print(f"[BOOT] Darwin-35B-A3B-Opus Demo Β· SGLang: {SGLANG_URL}", flush=True)
440
+ uvicorn.run(app, host="0.0.0.0", port=7860)