SeaWolf-AI commited on
Commit
39ca988
Β·
verified Β·
1 Parent(s): 1c545b5

Delete app-backup2.py

Browse files
Files changed (1) hide show
  1. app-backup2.py +0 -483
app-backup2.py DELETED
@@ -1,483 +0,0 @@
1
- """
2
- 🧬 Darwin-35B-A3B-Opus β€” Demo Space
3
- Single model Β· SGLang backend Β· Vision support
4
- """
5
- import sys
6
- print(f"[BOOT] Python {sys.version}", flush=True)
7
-
8
- import base64, os, re, json
9
- from typing import Generator, Optional
10
-
11
- # NIPA λ“± 자체 μΈμ¦μ„œ μ—”λ“œν¬μΈνŠΈμš© SSL κ²½κ³  λ¬΄μ‹œ
12
- import urllib3
13
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
14
-
15
- try:
16
- import gradio as gr
17
- print(f"[BOOT] gradio {gr.__version__}", flush=True)
18
- except ImportError as e:
19
- print(f"[BOOT] FATAL: {e}", flush=True); sys.exit(1)
20
-
21
- try:
22
- import httpx, uvicorn, requests
23
- from fastapi import FastAPI, Request
24
- from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
25
- print("[BOOT] All imports OK", flush=True)
26
- except ImportError as e:
27
- print(f"[BOOT] FATAL: {e}", flush=True); sys.exit(1)
28
-
29
- # ══════════════════════════════════════════════════════════════════════════════
30
- # 1. SGLANG BACKEND CONFIG
31
- # ══════════════════════════════════════════════════════════════════════════════
32
- SGLANG_BASE = os.getenv("DARWIN_API", "http://localhost:7947")
33
- SGLANG_URL = f"{SGLANG_BASE}/v1/chat/completions"
34
-
35
- # Multi-model config
36
- MODEL_NAME = "Darwin-35B-A3B-Opus"
37
- MODEL_ID = "FINAL-Bench/Darwin-35B-A3B-Opus"
38
- MODEL_CAP = {
39
- "arch": "MoE", "active": "3B / 35B total",
40
- "ctx": "262K", "thinking": True, "vision": True,
41
- "max_tokens": 16384, "temp_max": 1.5,
42
- }
43
-
44
- PRESETS = {
45
- "general": "You are Darwin-35B-A3B-Opus, a highly capable reasoning model created by VIDRAFT via evolutionary merge. Think step by step for complex questions.",
46
- "code": "You are an expert software engineer. Write clean, efficient, well-commented code. Explain your approach before writing. Use modern best practices.",
47
- "math": "You are a world-class mathematician. Break problems step-by-step. Show full working. Use LaTeX where helpful.",
48
- "creative": "You are a brilliant creative writer. Be imaginative, vivid, and engaging. Adapt tone and style to the request.",
49
- "translate": "You are a professional translator fluent in 201 languages. Provide accurate, natural-sounding translations with cultural context.",
50
- "research": "You are a rigorous research analyst. Provide structured, well-reasoned analysis. Identify assumptions and acknowledge uncertainty.",
51
- }
52
-
53
- # ══════════════════════════════════════════════════════════════════════════════
54
- # 2. THINKING MODE HELPERS
55
- # ══════════════════════════════════════════════════════════════════════════════
56
- def build_user_message(text: str, thinking: bool) -> str:
57
- return text # NIPA μ—”λ“œν¬μΈνŠΈλŠ” /think, /no_think 미지원
58
-
59
- def parse_think_blocks(text: str) -> tuple[str, str]:
60
- m = re.search(r"<think>(.*?)</think>\s*", text, re.DOTALL)
61
- return (m.group(1).strip(), text[m.end():].strip()) if m else ("", text)
62
-
63
- def _is_thinking_line(line: str) -> bool:
64
- """ν•œ 쀄이 reasoning/thinking인지 감지"""
65
- l = line.strip()
66
- if not l:
67
- return True # 빈 쀄은 thinking 블둝에 포함
68
- # μ˜μ–΄ reasoning νŒ¨ν„΄
69
- think_starts = [
70
- "The user", "the user", "This is", "this is", "I should", "I need to",
71
- "Let me", "let me", "My task", "my task", "I'll ", "I will",
72
- "Since ", "since ", "Now,", "now,", "So,", "so,", "First,", "first,",
73
- "Okay", "okay", "Alright", "Hmm", "Wait", "Actually",
74
- "The question", "the question", "The input", "the input",
75
- "The request", "the request", "The prompt", "the prompt",
76
- "Thinking Process", "Thinking process", "**Thinking",
77
- "Step ", "step ", "Approach:", "Analysis:", "Reasoning:",
78
- "1. **", "2. **", "3. **", "4. **", "5. **",
79
- ]
80
- for s in think_starts:
81
- if l.startswith(s):
82
- return True
83
- # 글머리 기호 + μ˜μ–΄ reasoning
84
- if l.startswith(("- ", "* ", "β—‹ ")) and any(c.isascii() and c.isalpha() for c in l[:20]):
85
- if not any(ord(c) > 0x1100 for c in l[:30]): # ν•œκΈ€ μ—†μœΌλ©΄ thinking
86
- return True
87
- return False
88
-
89
-
90
- def _split_thinking_answer(raw: str) -> tuple:
91
- """μ‘λ‹΅μ—μ„œ thinking λΆ€λΆ„κ³Ό μ‹€μ œ 닡변을 뢄리"""
92
- lines = raw.split("\n")
93
- answer_start = -1
94
-
95
- for i, line in enumerate(lines):
96
- if not _is_thinking_line(line):
97
- # ν•œκΈ€/λΉ„ASCIIκ°€ ν¬ν•¨λœ μ‹€μ œ λ‹΅λ³€ μ‹œμž‘μ 
98
- if any(ord(c) > 0x1100 for c in line.strip()[:10]):
99
- answer_start = i
100
- break
101
- # λ˜λŠ” λΉˆμ€„ 2개 이후 μ˜μ–΄ λ‹΅λ³€
102
- if i > 2 and not _is_thinking_line(line):
103
- # μ•žμ˜ 2쀄이 λΉˆμ€„μ΄λ©΄ λ‹΅λ³€ μ‹œμž‘
104
- if all(not lines[j].strip() for j in range(max(0,i-2), i)):
105
- answer_start = i
106
- break
107
-
108
- if answer_start > 0:
109
- thinking = "\n".join(lines[:answer_start]).strip()
110
- answer = "\n".join(lines[answer_start:]).strip()
111
- return thinking, answer
112
-
113
- return "", raw
114
-
115
-
116
- def format_response(raw: str) -> str:
117
- # 1. <think>...</think> μ™„λ£Œ
118
- chain, answer = parse_think_blocks(raw)
119
- if chain:
120
- return (
121
- "<details>\n"
122
- "<summary>🧠 Reasoning Chain β€” click to expand</summary>\n\n"
123
- f"{chain}\n\n"
124
- "</details>\n\n"
125
- f"{answer}"
126
- )
127
- # 2. <think> μ—΄λ¦Ό but λ‹«νžˆμ§€ μ•ŠμŒ
128
- if "<think>" in raw and "</think>" not in raw:
129
- think_len = len(raw) - raw.index("<think>") - 7
130
- return f"🧠 Reasoning... ({think_len} chars)"
131
- # 3. 평문 thinking 감지 (NIPA μ—”λ“œν¬μΈνŠΈ)
132
- first_line = raw.strip().split("\n")[0] if raw.strip() else ""
133
- if _is_thinking_line(first_line) and len(raw) > 20:
134
- thinking, answer = _split_thinking_answer(raw)
135
- if thinking and answer:
136
- return (
137
- f"<details>\n"
138
- f"<summary>🧠 Reasoning Chain ({len(thinking)} chars)</summary>\n\n"
139
- f"{thinking}\n\n"
140
- f"</details>\n\n"
141
- f"{answer}"
142
- )
143
- elif thinking and not answer:
144
- # 아직 λ‹΅λ³€ μ•ˆ λ‚˜μ˜΄ β€” 캐릭터 수만
145
- return f"🧠 Reasoning... ({len(raw)} chars)"
146
- return raw
147
-
148
- # ══════════════════════════════════════════════════════════════════════════════
149
- # 3. STREAMING BACKEND β€” SGLang OpenAI-compatible API
150
- # ══════════════════════════════════════════════════════════════════════════════
151
- def generate_reply(
152
- message: str,
153
- history: list,
154
- thinking_mode: str,
155
- image_input,
156
- system_prompt: str,
157
- max_new_tokens: int,
158
- temperature: float,
159
- top_p: float,
160
- ) -> Generator[str, None, None]:
161
-
162
- api_url = f"{SGLANG_BASE}/v1/chat/completions"
163
- use_think = "Thinking" in thinking_mode
164
- max_new_tokens = min(int(max_new_tokens), MODEL_CAP["max_tokens"])
165
- temperature = min(float(temperature), MODEL_CAP["temp_max"])
166
-
167
- messages: list[dict] = []
168
- if system_prompt.strip():
169
- messages.append({"role": "system", "content": system_prompt.strip()})
170
-
171
- for turn in history:
172
- if isinstance(turn, dict):
173
- role = turn.get("role", "")
174
- raw = turn.get("content") or ""
175
- text = (" ".join(p.get("text","") for p in raw
176
- if isinstance(p,dict) and p.get("type")=="text")
177
- if isinstance(raw, list) else str(raw))
178
- if role == "user":
179
- messages.append({"role":"user","content":text})
180
- elif role == "assistant":
181
- _, clean = parse_think_blocks(text)
182
- messages.append({"role":"assistant","content":clean})
183
- else:
184
- try:
185
- u, a = (turn[0] or None), (turn[1] if len(turn)>1 else None)
186
- except (IndexError, TypeError):
187
- continue
188
- def _txt(v):
189
- if v is None: return None
190
- if isinstance(v, list):
191
- return " ".join(p.get("text","") for p in v
192
- if isinstance(p,dict) and p.get("type")=="text")
193
- return str(v)
194
- if u := _txt(u): messages.append({"role":"user","content":u})
195
- if a := _txt(a):
196
- _, clean = parse_think_blocks(a)
197
- messages.append({"role":"assistant","content":clean})
198
-
199
- user_text = build_user_message(message, use_think)
200
-
201
- # Vision: image input handling
202
- if image_input and MODEL_CAP["vision"]:
203
- import io
204
- from PIL import Image as PILImage
205
-
206
- if isinstance(image_input, str) and image_input.startswith("data:"):
207
- header, b64_data = image_input.split(",", 1)
208
- b64 = b64_data
209
- else:
210
- buf = io.BytesIO()
211
- if not isinstance(image_input, PILImage.Image):
212
- image_input = PILImage.fromarray(image_input)
213
- image_input.save(buf, format="JPEG")
214
- b64 = base64.b64encode(buf.getvalue()).decode()
215
-
216
- content = [
217
- {"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{b64}"}},
218
- {"type":"text","text":user_text},
219
- ]
220
- else:
221
- content = user_text
222
- messages.append({"role":"user","content":content})
223
-
224
- # Stream from API (with fallback)
225
- H100_API = os.getenv("H100_API", "")
226
- api_urls = [api_url]
227
- if H100_API:
228
- api_urls.append(f"{H100_API.rstrip('/')}/v1/chat/completions")
229
-
230
- request_body = {
231
- "model": MODEL_ID,
232
- "messages": messages,
233
- "max_tokens": max_new_tokens,
234
- "temperature": temperature,
235
- "top_p": float(top_p),
236
- "stream": True,
237
- }
238
-
239
- for i, url in enumerate(api_urls):
240
- try:
241
- label = "Primary" if i == 0 else "Fallback(H100)"
242
- masked = url.split("/v1")[0][:20] + "***"
243
- print(f"[API] {label}: {masked}", flush=True)
244
-
245
- resp = requests.post(url, json=request_body,
246
- stream=True, timeout=(10, 600), verify=False)
247
-
248
- if resp.status_code != 200:
249
- print(f"[API] {label} HTTP {resp.status_code}", flush=True)
250
- if i < len(api_urls) - 1:
251
- continue # try fallback
252
- else:
253
- yield f"**❌ API 였λ₯˜ (HTTP {resp.status_code})**"
254
- return
255
-
256
- raw = ""
257
- got_token = False
258
- for line in resp.iter_lines(decode_unicode=True):
259
- if not line or not line.startswith("data: "):
260
- continue
261
- payload = line[6:]
262
- if payload.strip() == "[DONE]":
263
- break
264
- try:
265
- chunk = json.loads(payload)
266
- delta = chunk.get("choices", [{}])[0].get("delta", {})
267
- token = delta.get("content", "")
268
- if token:
269
- raw += token
270
- got_token = True
271
- yield format_response(raw)
272
- except (json.JSONDecodeError, IndexError, KeyError):
273
- continue
274
-
275
- if raw:
276
- yield format_response(raw)
277
-
278
- if got_token:
279
- print(f"[API] {label} OK β€” {len(raw)} chars", flush=True)
280
- return # 성곡 μ‹œ μ’…λ£Œ
281
-
282
- # 토큰 0개면 λ‹€μŒ API둜
283
- if not got_token and i < len(api_urls) - 1:
284
- print(f"[API] {label} returned no tokens, trying fallback...", flush=True)
285
- continue
286
-
287
- except (requests.exceptions.ConnectionError,
288
- requests.exceptions.Timeout,
289
- requests.exceptions.ReadTimeout) as e:
290
- print(f"[API] {label} failed: connection error", flush=True)
291
- if i < len(api_urls) - 1:
292
- print(f"[API] Switching to fallback...", flush=True)
293
- continue
294
- else:
295
- yield "**❌ λͺ¨λ“  API μ—°κ²° μ‹€νŒ¨.** μ„œλ²„ μƒνƒœλ₯Ό ν™•μΈν•˜μ„Έμš”."
296
- except Exception as exc:
297
- yield f"**Error:** `{exc}`"
298
- return
299
-
300
-
301
- # ══════════════════════════════════════════════════════════════════════════════
302
- # 4. GRADIO BLOCKS (hidden β€” serves API for frontend)
303
- # ══════════════════════════════════════════════════════════════════════════════
304
- with gr.Blocks(title="Darwin-35B-A3B-Opus") as gradio_demo:
305
- thinking_toggle = gr.Radio(
306
- choices=["⚑ Fast Mode (direct answer)",
307
- "🧠 Thinking Mode (chain-of-thought reasoning)"],
308
- value="⚑ Fast Mode (direct answer)",
309
- visible=False,
310
- )
311
- image_input = gr.Textbox(value="", visible=False)
312
- system_prompt = gr.Textbox(value=PRESETS["general"], visible=False)
313
- max_new_tokens = gr.Slider(minimum=64, maximum=16384, value=4096, visible=False)
314
- temperature = gr.Slider(minimum=0.0, maximum=1.5, value=0.6, visible=False)
315
- top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, visible=False)
316
-
317
- gr.ChatInterface(
318
- fn=generate_reply,
319
- api_name="chat",
320
- additional_inputs=[
321
- thinking_toggle, image_input,
322
- system_prompt, max_new_tokens, temperature, top_p,
323
- ],
324
- )
325
-
326
- # ══════════════════════════════════════════════════════════════════════════════
327
- # 5. FASTAPI β€” index.html + HF OAuth + Gradio API
328
- # ══════════════════════════════════════════════════════════════════════════════
329
- import pathlib, secrets
330
-
331
- fapp = FastAPI()
332
- SESSIONS: dict[str, dict] = {}
333
- HTML = pathlib.Path(__file__).parent / "index.html"
334
-
335
- CLIENT_ID = os.getenv("OAUTH_CLIENT_ID", "")
336
- CLIENT_SECRET = os.getenv("OAUTH_CLIENT_SECRET", "")
337
- SPACE_HOST = os.getenv("SPACE_HOST", "localhost:7860")
338
- REDIRECT_URI = f"https://{SPACE_HOST}/login/callback"
339
-
340
- print(f"[OAuth] CLIENT_ID set: {bool(CLIENT_ID)}")
341
- print(f"[OAuth] SPACE_HOST: {SPACE_HOST}")
342
- HF_AUTH_URL = "https://huggingface.co/oauth/authorize"
343
- HF_TOKEN_URL = "https://huggingface.co/oauth/token"
344
- HF_USER_URL = "https://huggingface.co/oauth/userinfo"
345
- SCOPES = os.getenv("OAUTH_SCOPES", "openid profile")
346
-
347
- from urllib.parse import urlencode
348
-
349
- def _sid(req: Request) -> Optional[str]:
350
- return req.cookies.get("mc_session")
351
-
352
- def _user(req: Request) -> Optional[dict]:
353
- sid = _sid(req)
354
- return SESSIONS.get(sid) if sid else None
355
-
356
- @fapp.get("/")
357
- async def root(request: Request):
358
- html = HTML.read_text(encoding="utf-8") if HTML.exists() else "<h2>index.html missing</h2>"
359
- return HTMLResponse(html)
360
-
361
- @fapp.get("/oauth/user")
362
- async def oauth_user(request: Request):
363
- u = _user(request)
364
- return JSONResponse(u) if u else JSONResponse({"logged_in": False}, status_code=401)
365
-
366
- @fapp.get("/oauth/login")
367
- async def oauth_login(request: Request):
368
- if not CLIENT_ID:
369
- return RedirectResponse("/?oauth_error=not_configured")
370
- state = secrets.token_urlsafe(16)
371
- params = {"response_type":"code","client_id":CLIENT_ID,"redirect_uri":REDIRECT_URI,"scope":SCOPES,"state":state}
372
- return RedirectResponse(f"{HF_AUTH_URL}?{urlencode(params)}", status_code=302)
373
-
374
- @fapp.get("/login/callback")
375
- async def oauth_callback(code: str = "", error: str = "", state: str = ""):
376
- if error or not code:
377
- return RedirectResponse("/?auth_error=1")
378
- basic = base64.b64encode(f"{CLIENT_ID}:{CLIENT_SECRET}".encode()).decode()
379
- async with httpx.AsyncClient() as client:
380
- tok = await client.post(HF_TOKEN_URL, data={"grant_type":"authorization_code","code":code,"redirect_uri":REDIRECT_URI},
381
- headers={"Accept":"application/json","Authorization":f"Basic {basic}"})
382
- if tok.status_code != 200:
383
- return RedirectResponse("/?auth_error=1")
384
- access_token = tok.json().get("access_token", "")
385
- if not access_token:
386
- return RedirectResponse("/?auth_error=1")
387
- uinfo = await client.get(HF_USER_URL, headers={"Authorization":f"Bearer {access_token}"})
388
- if uinfo.status_code != 200:
389
- return RedirectResponse("/?auth_error=1")
390
- user = uinfo.json()
391
-
392
- sid = secrets.token_urlsafe(32)
393
- SESSIONS[sid] = {
394
- "logged_in": True,
395
- "username": user.get("preferred_username", user.get("name", "User")),
396
- "name": user.get("name", ""),
397
- "avatar": user.get("picture", ""),
398
- "profile": f"https://huggingface.co/{user.get('preferred_username', '')}",
399
- }
400
- resp = RedirectResponse("/")
401
- resp.set_cookie("mc_session", sid, httponly=True, samesite="lax", secure=True, max_age=60*60*24*7)
402
- return resp
403
-
404
- @fapp.get("/oauth/logout")
405
- async def oauth_logout(request: Request):
406
- sid = _sid(request)
407
- if sid and sid in SESSIONS: del SESSIONS[sid]
408
- resp = RedirectResponse("/")
409
- resp.delete_cookie("mc_session")
410
- return resp
411
-
412
- @fapp.get("/health")
413
- async def health():
414
- try:
415
- r = requests.get(f"{SGLANG_BASE}/v1/models", timeout=5, verify=False)
416
- return {"status":"ok","sglang":"connected"}
417
- except:
418
- return {"status":"ok","sglang":"disconnected"}
419
-
420
- # ── Web Search API (Brave) ──────────────────────────────────────────────
421
- BRAVE_API_KEY = os.getenv("BRAVE_API_KEY", "")
422
-
423
- @fapp.post("/api/search")
424
- async def api_search(request: Request):
425
- body = await request.json()
426
- query = body.get("query", "").strip()
427
- if not query:
428
- return JSONResponse({"error": "empty query"}, status_code=400)
429
- key = BRAVE_API_KEY
430
- if not key:
431
- return JSONResponse({"error": "BRAVE_API_KEY not set"}, status_code=500)
432
- try:
433
- r = requests.get(
434
- "https://api.search.brave.com/res/v1/web/search",
435
- headers={"X-Subscription-Token": key, "Accept": "application/json"},
436
- params={"q": query, "count": 5}, timeout=10,
437
- )
438
- r.raise_for_status()
439
- results = r.json().get("web", {}).get("results", [])
440
- items = []
441
- for item in results[:5]:
442
- items.append({
443
- "title": item.get("title", ""),
444
- "desc": item.get("description", ""),
445
- "url": item.get("url", ""),
446
- })
447
- return JSONResponse({"results": items})
448
- except Exception as e:
449
- return JSONResponse({"error": str(e)}, status_code=500)
450
-
451
- # ── PDF Text Extraction ──────────────────────────────────────────────���──
452
- @fapp.post("/api/extract-pdf")
453
- async def api_extract_pdf(request: Request):
454
- """Base64 PDF β†’ text extraction"""
455
- try:
456
- body = await request.json()
457
- b64 = body.get("data", "")
458
- if "," in b64:
459
- b64 = b64.split(",", 1)[1]
460
- import io
461
- pdf_bytes = base64.b64decode(b64)
462
- text = ""
463
- try:
464
- import fitz # PyMuPDF
465
- doc = fitz.open(stream=pdf_bytes, filetype="pdf")
466
- for page in doc:
467
- text += page.get_text() + "\n"
468
- except ImportError:
469
- # Fallback: simple text extraction
470
- content = pdf_bytes.decode("utf-8", errors="ignore")
471
- # Very basic: find text between stream/endstream
472
- text = re.sub(r'[^\x20-\x7E\n\r\uAC00-\uD7A3\u3040-\u309F\u30A0-\u30FF]', '', content)
473
-
474
- text = text.strip()[:8000] # Max 8000 chars
475
- return JSONResponse({"text": text, "chars": len(text)})
476
- except Exception as e:
477
- return JSONResponse({"error": str(e)}, status_code=500)
478
-
479
- app = gr.mount_gradio_app(fapp, gradio_demo, path="/gradio")
480
-
481
- if __name__ == "__main__":
482
- print(f"[BOOT] Darwin-35B-A3B-Opus Demo Β· SGLang: {SGLANG_URL}", flush=True)
483
- uvicorn.run(app, host="0.0.0.0", port=7860)