SeaWolf-AI commited on
Commit
de52729
Β·
verified Β·
1 Parent(s): c4783b1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +559 -0
app.py ADDED
@@ -0,0 +1,559 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 🧬 Darwin-35B-A3B-Opus β€” ZeroGPU Direct Serving
3
+ transformers + @spaces.GPU Β· Vision support Β· Streaming
4
+ """
5
+ import sys
6
+ print(f"[BOOT] Python {sys.version}", flush=True)
7
+
8
+ import base64, os, re, json, io
9
+ from typing import Generator, Optional
10
+ from threading import Thread
11
+
12
+ # ── Core imports ──────────────────────────────────────────────────────────
13
+ import torch
14
+ import spaces
15
+ import gradio as gr
16
+ print(f"[BOOT] gradio {gr.__version__}, torch {torch.__version__}", flush=True)
17
+
18
+ from transformers import (
19
+ AutoProcessor,
20
+ AutoModelForImageTextToText,
21
+ AutoModelForCausalLM,
22
+ AutoTokenizer,
23
+ TextIteratorStreamer,
24
+ )
25
+ from PIL import Image
26
+ import requests
27
+ import httpx, uvicorn
28
+ from fastapi import FastAPI, Request
29
+ from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
30
+ from urllib.parse import urlencode
31
+ import pathlib, secrets
32
+
33
+ # SSL κ²½κ³  λ¬΄μ‹œ
34
+ import urllib3
35
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
36
+
37
+ # ══════════════════════════════════════════════════════════════════════════════
38
+ # 1. MODEL CONFIG
39
+ # ══════════════════════════════════════════════════════════════════════════════
40
+ MODEL_ID = "FINAL-Bench/Darwin-35B-A3B-Opus"
41
+ MODEL_NAME = "Darwin-35B-A3B-Opus"
42
+ MODEL_CAP = {
43
+ "arch": "MoE", "active": "3B / 35B total",
44
+ "ctx": "262K", "thinking": True, "vision": True,
45
+ "max_tokens": 16384, "temp_max": 1.5,
46
+ }
47
+
48
+ PRESETS = {
49
+ "general": "You are Darwin-35B-A3B-Opus, a highly capable reasoning model created by VIDRAFT via evolutionary merge. Think step by step for complex questions.",
50
+ "code": "You are an expert software engineer. Write clean, efficient, well-commented code. Explain your approach before writing. Use modern best practices.",
51
+ "math": "You are a world-class mathematician. Break problems step-by-step. Show full working. Use LaTeX where helpful.",
52
+ "creative": "You are a brilliant creative writer. Be imaginative, vivid, and engaging. Adapt tone and style to the request.",
53
+ "translate": "You are a professional translator fluent in 201 languages. Provide accurate, natural-sounding translations with cultural context.",
54
+ "research": "You are a rigorous research analyst. Provide structured, well-reasoned analysis. Identify assumptions and acknowledge uncertainty.",
55
+ }
56
+
57
+ # ══════════════════════════════════════════════════════════════════════════════
58
+ # 2. MODEL LOADING (ZeroGPU: CPU at import, GPU at inference)
59
+ # ══════════════════════════════════════════════════════════════════════════════
60
+ print(f"[MODEL] Loading {MODEL_ID} ...", flush=True)
61
+
62
+ IS_VISION = True # λͺ¨λΈμ΄ vision μ§€μ›ν•˜λŠ”μ§€ μ—¬λΆ€
63
+ processor = None
64
+ tokenizer = None
65
+ model = None
66
+
67
+ try:
68
+ processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
69
+ print("[MODEL] AutoProcessor loaded (vision mode)", flush=True)
70
+ except Exception as e:
71
+ print(f"[MODEL] AutoProcessor failed: {e}", flush=True)
72
+ print("[MODEL] Falling back to AutoTokenizer (text-only mode)", flush=True)
73
+ IS_VISION = False
74
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
75
+
76
+ try:
77
+ if IS_VISION:
78
+ model = AutoModelForImageTextToText.from_pretrained(
79
+ MODEL_ID,
80
+ torch_dtype=torch.bfloat16,
81
+ device_map="auto",
82
+ trust_remote_code=True,
83
+ )
84
+ print("[MODEL] AutoModelForImageTextToText loaded βœ“", flush=True)
85
+ else:
86
+ model = AutoModelForCausalLM.from_pretrained(
87
+ MODEL_ID,
88
+ torch_dtype=torch.bfloat16,
89
+ device_map="auto",
90
+ trust_remote_code=True,
91
+ )
92
+ print("[MODEL] AutoModelForCausalLM loaded βœ“", flush=True)
93
+ except Exception as e:
94
+ print(f"[MODEL] bfloat16 load failed: {e}", flush=True)
95
+ print("[MODEL] Retrying with 4-bit quantization...", flush=True)
96
+ from transformers import BitsAndBytesConfig
97
+ bnb_config = BitsAndBytesConfig(
98
+ load_in_4bit=True,
99
+ bnb_4bit_quant_type="nf4",
100
+ bnb_4bit_compute_dtype=torch.bfloat16,
101
+ bnb_4bit_use_double_quant=True,
102
+ )
103
+ ModelClass = AutoModelForImageTextToText if IS_VISION else AutoModelForCausalLM
104
+ model = ModelClass.from_pretrained(
105
+ MODEL_ID,
106
+ quantization_config=bnb_config,
107
+ device_map="auto",
108
+ trust_remote_code=True,
109
+ )
110
+ print("[MODEL] 4-bit quantized model loaded βœ“", flush=True)
111
+
112
+ # ν† ν¬λ‚˜μ΄μ € κ²°μ •
113
+ _tok = processor.tokenizer if (processor and hasattr(processor, 'tokenizer')) else (processor or tokenizer)
114
+ print(f"[MODEL] Ready β€” device: {model.device}, dtype: {model.dtype}", flush=True)
115
+
116
+ # ══════════════════════════════════════════════════════════════════════════════
117
+ # 3. THINKING MODE HELPERS (κΈ°μ‘΄ 둜직 μœ μ§€)
118
+ # ══════════════════════════════════════════════════════════════════════════════
119
+ def parse_think_blocks(text: str) -> tuple[str, str]:
120
+ m = re.search(r"<think>(.*?)</think>\s*", text, re.DOTALL)
121
+ return (m.group(1).strip(), text[m.end():].strip()) if m else ("", text)
122
+
123
+ def _is_thinking_line(line: str) -> bool:
124
+ l = line.strip()
125
+ if not l:
126
+ return True
127
+ think_starts = [
128
+ "The user", "the user", "This is", "this is", "I should", "I need to",
129
+ "Let me", "let me", "My task", "my task", "I'll ", "I will",
130
+ "Since ", "since ", "Now,", "now,", "So,", "so,", "First,", "first,",
131
+ "Okay", "okay", "Alright", "Hmm", "Wait", "Actually",
132
+ "The question", "the question", "The input", "the input",
133
+ "The request", "the request", "The prompt", "the prompt",
134
+ "Thinking Process", "Thinking process", "**Thinking",
135
+ "Step ", "step ", "Approach:", "Analysis:", "Reasoning:",
136
+ "1. **", "2. **", "3. **", "4. **", "5. **",
137
+ ]
138
+ for s in think_starts:
139
+ if l.startswith(s):
140
+ return True
141
+ if l.startswith(("- ", "* ", "β—‹ ")) and any(c.isascii() and c.isalpha() for c in l[:20]):
142
+ if not any(ord(c) > 0x1100 for c in l[:30]):
143
+ return True
144
+ return False
145
+
146
+ def _split_thinking_answer(raw: str) -> tuple:
147
+ lines = raw.split("\n")
148
+ answer_start = -1
149
+ for i, line in enumerate(lines):
150
+ if not _is_thinking_line(line):
151
+ if any(ord(c) > 0x1100 for c in line.strip()[:10]):
152
+ answer_start = i
153
+ break
154
+ if i > 2 and not _is_thinking_line(line):
155
+ if all(not lines[j].strip() for j in range(max(0,i-2), i)):
156
+ answer_start = i
157
+ break
158
+ if answer_start > 0:
159
+ thinking = "\n".join(lines[:answer_start]).strip()
160
+ answer = "\n".join(lines[answer_start:]).strip()
161
+ return thinking, answer
162
+ return "", raw
163
+
164
+ def format_response(raw: str) -> str:
165
+ chain, answer = parse_think_blocks(raw)
166
+ if chain:
167
+ return (
168
+ "<details>\n"
169
+ "<summary>🧠 Reasoning Chain β€” click to expand</summary>\n\n"
170
+ f"{chain}\n\n"
171
+ "</details>\n\n"
172
+ f"{answer}"
173
+ )
174
+ if "<think>" in raw and "</think>" not in raw:
175
+ think_len = len(raw) - raw.index("<think>") - 7
176
+ return f"🧠 Reasoning... ({think_len} chars)"
177
+ first_line = raw.strip().split("\n")[0] if raw.strip() else ""
178
+ if _is_thinking_line(first_line) and len(raw) > 20:
179
+ thinking, answer = _split_thinking_answer(raw)
180
+ if thinking and answer:
181
+ return (
182
+ f"<details>\n"
183
+ f"<summary>🧠 Reasoning Chain ({len(thinking)} chars)</summary>\n\n"
184
+ f"{thinking}\n\n"
185
+ f"</details>\n\n"
186
+ f"{answer}"
187
+ )
188
+ elif thinking and not answer:
189
+ return f"🧠 Reasoning... ({len(raw)} chars)"
190
+ return raw
191
+
192
+ # ══════════════════════════════════════════════════════════════════════════════
193
+ # 4. IMAGE HELPERS
194
+ # ══════════════════════════════════════════════════════════════════════════════
195
+ def _load_image_from_source(src: str) -> Optional[Image.Image]:
196
+ """base64 data URI λ˜λŠ” URL β†’ PIL Image"""
197
+ try:
198
+ if src.startswith("data:"):
199
+ _, b64 = src.split(",", 1)
200
+ return Image.open(io.BytesIO(base64.b64decode(b64))).convert("RGB")
201
+ elif src.startswith("http"):
202
+ resp = requests.get(src, timeout=15)
203
+ resp.raise_for_status()
204
+ return Image.open(io.BytesIO(resp.content)).convert("RGB")
205
+ except Exception as e:
206
+ print(f"[IMG] Failed to load image: {e}", flush=True)
207
+ return None
208
+
209
+ # ══════════════════════════════════════════════════════════════════════════════
210
+ # 5. GENERATION β€” ZeroGPU + TextIteratorStreamer
211
+ # ══════════════════════════════════════════════════════════════════════════════
212
+ @spaces.GPU(duration=180)
213
+ def _run_generation(input_ids, attention_mask, pixel_values, image_grid_thw,
214
+ max_new_tokens, temperature, top_p, streamer):
215
+ """GPU ν• λ‹Ή ν›„ μ‹€ν–‰λ˜λŠ” μ‹€μ œ 생성 ν•¨μˆ˜"""
216
+ gen_kwargs = dict(
217
+ input_ids=input_ids.to(model.device),
218
+ attention_mask=attention_mask.to(model.device),
219
+ max_new_tokens=max_new_tokens,
220
+ do_sample=temperature > 0.01,
221
+ temperature=max(temperature, 0.01) if temperature > 0.01 else 1.0,
222
+ top_p=top_p,
223
+ streamer=streamer,
224
+ use_cache=True,
225
+ )
226
+ # vision inputs (있으면)
227
+ if pixel_values is not None:
228
+ gen_kwargs["pixel_values"] = pixel_values.to(model.device)
229
+ if image_grid_thw is not None:
230
+ gen_kwargs["image_grid_thw"] = image_grid_thw.to(model.device)
231
+
232
+ with torch.inference_mode():
233
+ model.generate(**gen_kwargs)
234
+
235
+
236
+ def generate_reply(
237
+ message: str,
238
+ history: list,
239
+ thinking_mode: str,
240
+ image_input,
241
+ system_prompt: str,
242
+ max_new_tokens: int,
243
+ temperature: float,
244
+ top_p: float,
245
+ ) -> Generator[str, None, None]:
246
+
247
+ max_new_tokens = min(int(max_new_tokens), MODEL_CAP["max_tokens"])
248
+ temperature = min(float(temperature), MODEL_CAP["temp_max"])
249
+
250
+ # ── λ©”μ‹œμ§€ ꡬ성 ──
251
+ messages: list[dict] = []
252
+ if system_prompt.strip():
253
+ messages.append({"role": "system", "content": system_prompt.strip()})
254
+
255
+ # history (ν”„λ‘ νŠΈμ—”λ“œ: [user, assistant] νŠœν”Œ 리슀트)
256
+ for turn in history:
257
+ if isinstance(turn, dict):
258
+ role = turn.get("role", "")
259
+ raw = turn.get("content") or ""
260
+ text = (" ".join(p.get("text","") for p in raw
261
+ if isinstance(p,dict) and p.get("type")=="text")
262
+ if isinstance(raw, list) else str(raw))
263
+ if role == "user":
264
+ messages.append({"role":"user","content":text})
265
+ elif role == "assistant":
266
+ _, clean = parse_think_blocks(text)
267
+ messages.append({"role":"assistant","content":clean})
268
+ else:
269
+ try:
270
+ u, a = (turn[0] or None), (turn[1] if len(turn)>1 else None)
271
+ except (IndexError, TypeError):
272
+ continue
273
+ def _txt(v):
274
+ if v is None: return None
275
+ if isinstance(v, list):
276
+ return " ".join(p.get("text","") for p in v
277
+ if isinstance(p,dict) and p.get("type")=="text")
278
+ return str(v)
279
+ ut = _txt(u)
280
+ at = _txt(a)
281
+ if ut: messages.append({"role":"user","content":ut})
282
+ if at:
283
+ _, clean = parse_think_blocks(at)
284
+ messages.append({"role":"assistant","content":clean})
285
+
286
+ # ── ν˜„μž¬ λ©”μ‹œμ§€ (이미지 포함 κ°€λŠ₯) ──
287
+ has_image = False
288
+ pil_image = None
289
+
290
+ if image_input and isinstance(image_input, str) and image_input.strip():
291
+ pil_image = _load_image_from_source(image_input)
292
+ if pil_image:
293
+ has_image = True
294
+
295
+ if IS_VISION and has_image:
296
+ # Vision λͺ¨λ“œ: 이미지 + ν…μŠ€νŠΈ
297
+ messages.append({
298
+ "role": "user",
299
+ "content": [
300
+ {"type": "image", "image": pil_image},
301
+ {"type": "text", "text": message},
302
+ ]
303
+ })
304
+ else:
305
+ messages.append({"role": "user", "content": message})
306
+
307
+ # ── ν† ν¬λ‚˜μ΄μ¦ˆ ──
308
+ try:
309
+ if IS_VISION and processor is not None:
310
+ text_prompt = processor.apply_chat_template(
311
+ messages,
312
+ tokenize=False,
313
+ add_generation_prompt=True,
314
+ )
315
+ if has_image and pil_image:
316
+ inputs = processor(
317
+ text=[text_prompt],
318
+ images=[pil_image],
319
+ return_tensors="pt",
320
+ padding=True,
321
+ )
322
+ else:
323
+ inputs = processor(
324
+ text=[text_prompt],
325
+ return_tensors="pt",
326
+ padding=True,
327
+ )
328
+ else:
329
+ # text-only λͺ¨λ“œ
330
+ text_prompt = tokenizer.apply_chat_template(
331
+ messages,
332
+ tokenize=False,
333
+ add_generation_prompt=True,
334
+ )
335
+ inputs = tokenizer(text_prompt, return_tensors="pt")
336
+ except Exception as e:
337
+ yield f"**❌ Tokenization error:** `{e}`"
338
+ return
339
+
340
+ # ── Streamer οΏ½οΏ½μ • ──
341
+ decode_tok = _tok
342
+ streamer = TextIteratorStreamer(decode_tok, skip_special_tokens=True, skip_prompt=True)
343
+
344
+ # ── ν…μ„œ μΆ”μΆœ ──
345
+ input_ids = inputs["input_ids"]
346
+ attention_mask = inputs.get("attention_mask", torch.ones_like(input_ids))
347
+ pixel_values = inputs.get("pixel_values", None)
348
+ image_grid_thw = inputs.get("image_grid_thw", None)
349
+
350
+ print(f"[GEN] tokens={input_ids.shape[-1]}, max_new={max_new_tokens}, "
351
+ f"temp={temperature}, vision={has_image}", flush=True)
352
+
353
+ # ── μŠ€λ ˆλ“œμ—μ„œ 생성 μ‹€ν–‰ ──
354
+ thread = Thread(
355
+ target=_run_generation,
356
+ kwargs=dict(
357
+ input_ids=input_ids,
358
+ attention_mask=attention_mask,
359
+ pixel_values=pixel_values,
360
+ image_grid_thw=image_grid_thw,
361
+ max_new_tokens=max_new_tokens,
362
+ temperature=temperature,
363
+ top_p=float(top_p),
364
+ streamer=streamer,
365
+ ),
366
+ )
367
+ thread.start()
368
+
369
+ output = ""
370
+ try:
371
+ for text in streamer:
372
+ output += text
373
+ yield format_response(output)
374
+ except Exception as e:
375
+ if output:
376
+ yield format_response(output)
377
+ else:
378
+ yield f"**❌ Generation error:** `{e}`"
379
+
380
+ thread.join()
381
+
382
+ if output:
383
+ print(f"[GEN] Done β€” {len(output)} chars", flush=True)
384
+ yield format_response(output)
385
+ else:
386
+ yield "**⚠️ λͺ¨λΈμ΄ 빈 응닡을 λ°˜ν™˜ν–ˆμŠ΅λ‹ˆλ‹€.** λ‹€μ‹œ μ‹œλ„ν•΄ μ£Όμ„Έμš”."
387
+
388
+
389
+ # ══════════════════════════════════════════════════════════════════════════════
390
+ # 6. GRADIO BLOCKS
391
+ # ══════════════════════════════════════════════════════════════════════════════
392
+ with gr.Blocks(title="Darwin-35B-A3B-Opus") as gradio_demo:
393
+ thinking_toggle = gr.Radio(
394
+ choices=["⚑ Fast Mode (direct answer)",
395
+ "🧠 Thinking Mode (chain-of-thought reasoning)"],
396
+ value="⚑ Fast Mode (direct answer)",
397
+ visible=False,
398
+ )
399
+ image_input = gr.Textbox(value="", visible=False)
400
+ system_prompt = gr.Textbox(value=PRESETS["general"], visible=False)
401
+ max_new_tokens = gr.Slider(minimum=64, maximum=16384, value=4096, visible=False)
402
+ temperature = gr.Slider(minimum=0.0, maximum=1.5, value=0.6, visible=False)
403
+ top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, visible=False)
404
+
405
+ gr.ChatInterface(
406
+ fn=generate_reply,
407
+ api_name="chat",
408
+ additional_inputs=[
409
+ thinking_toggle, image_input,
410
+ system_prompt, max_new_tokens, temperature, top_p,
411
+ ],
412
+ )
413
+
414
+ # ══════════════════════════════════════════════════════════════════════════════
415
+ # 7. FASTAPI β€” index.html + OAuth + μœ ν‹Έ API
416
+ # ══════════════════════════════════════════════════════════════════════════════
417
+ fapp = FastAPI()
418
+ SESSIONS: dict[str, dict] = {}
419
+ HTML = pathlib.Path(__file__).parent / "index.html"
420
+
421
+ CLIENT_ID = os.getenv("OAUTH_CLIENT_ID", "")
422
+ CLIENT_SECRET = os.getenv("OAUTH_CLIENT_SECRET", "")
423
+ SPACE_HOST = os.getenv("SPACE_HOST", "localhost:7860")
424
+ REDIRECT_URI = f"https://{SPACE_HOST}/login/callback"
425
+
426
+ print(f"[OAuth] CLIENT_ID set: {bool(CLIENT_ID)}")
427
+ print(f"[OAuth] SPACE_HOST: {SPACE_HOST}")
428
+ HF_AUTH_URL = "https://huggingface.co/oauth/authorize"
429
+ HF_TOKEN_URL = "https://huggingface.co/oauth/token"
430
+ HF_USER_URL = "https://huggingface.co/oauth/userinfo"
431
+ SCOPES = os.getenv("OAUTH_SCOPES", "openid profile")
432
+
433
+ def _sid(req: Request) -> Optional[str]:
434
+ return req.cookies.get("mc_session")
435
+
436
+ def _user(req: Request) -> Optional[dict]:
437
+ sid = _sid(req)
438
+ return SESSIONS.get(sid) if sid else None
439
+
440
+ @fapp.get("/")
441
+ async def root(request: Request):
442
+ html = HTML.read_text(encoding="utf-8") if HTML.exists() else "<h2>index.html missing</h2>"
443
+ return HTMLResponse(html)
444
+
445
+ @fapp.get("/oauth/user")
446
+ async def oauth_user(request: Request):
447
+ u = _user(request)
448
+ return JSONResponse(u) if u else JSONResponse({"logged_in": False}, status_code=401)
449
+
450
+ @fapp.get("/oauth/login")
451
+ async def oauth_login(request: Request):
452
+ if not CLIENT_ID:
453
+ return RedirectResponse("/?oauth_error=not_configured")
454
+ state = secrets.token_urlsafe(16)
455
+ params = {"response_type":"code","client_id":CLIENT_ID,"redirect_uri":REDIRECT_URI,"scope":SCOPES,"state":state}
456
+ return RedirectResponse(f"{HF_AUTH_URL}?{urlencode(params)}", status_code=302)
457
+
458
+ @fapp.get("/login/callback")
459
+ async def oauth_callback(code: str = "", error: str = "", state: str = ""):
460
+ if error or not code:
461
+ return RedirectResponse("/?auth_error=1")
462
+ basic = base64.b64encode(f"{CLIENT_ID}:{CLIENT_SECRET}".encode()).decode()
463
+ async with httpx.AsyncClient() as client:
464
+ tok = await client.post(HF_TOKEN_URL, data={"grant_type":"authorization_code","code":code,"redirect_uri":REDIRECT_URI},
465
+ headers={"Accept":"application/json","Authorization":f"Basic {basic}"})
466
+ if tok.status_code != 200:
467
+ return RedirectResponse("/?auth_error=1")
468
+ access_token = tok.json().get("access_token", "")
469
+ if not access_token:
470
+ return RedirectResponse("/?auth_error=1")
471
+ uinfo = await client.get(HF_USER_URL, headers={"Authorization":f"Bearer {access_token}"})
472
+ if uinfo.status_code != 200:
473
+ return RedirectResponse("/?auth_error=1")
474
+ user = uinfo.json()
475
+
476
+ sid = secrets.token_urlsafe(32)
477
+ SESSIONS[sid] = {
478
+ "logged_in": True,
479
+ "username": user.get("preferred_username", user.get("name", "User")),
480
+ "name": user.get("name", ""),
481
+ "avatar": user.get("picture", ""),
482
+ "profile": f"https://huggingface.co/{user.get('preferred_username', '')}",
483
+ }
484
+ resp = RedirectResponse("/")
485
+ resp.set_cookie("mc_session", sid, httponly=True, samesite="lax", secure=True, max_age=60*60*24*7)
486
+ return resp
487
+
488
+ @fapp.get("/oauth/logout")
489
+ async def oauth_logout(request: Request):
490
+ sid = _sid(request)
491
+ if sid and sid in SESSIONS: del SESSIONS[sid]
492
+ resp = RedirectResponse("/")
493
+ resp.delete_cookie("mc_session")
494
+ return resp
495
+
496
+ @fapp.get("/health")
497
+ async def health():
498
+ return {
499
+ "status": "ok",
500
+ "model": MODEL_ID,
501
+ "vision": IS_VISION,
502
+ "device": str(model.device),
503
+ "dtype": str(model.dtype),
504
+ }
505
+
506
+ # ── Web Search API (Brave) ──
507
+ BRAVE_API_KEY = os.getenv("BRAVE_API_KEY", "")
508
+
509
+ @fapp.post("/api/search")
510
+ async def api_search(request: Request):
511
+ body = await request.json()
512
+ query = body.get("query", "").strip()
513
+ if not query:
514
+ return JSONResponse({"error": "empty query"}, status_code=400)
515
+ key = BRAVE_API_KEY
516
+ if not key:
517
+ return JSONResponse({"error": "BRAVE_API_KEY not set"}, status_code=500)
518
+ try:
519
+ r = requests.get(
520
+ "https://api.search.brave.com/res/v1/web/search",
521
+ headers={"X-Subscription-Token": key, "Accept": "application/json"},
522
+ params={"q": query, "count": 5}, timeout=10,
523
+ )
524
+ r.raise_for_status()
525
+ results = r.json().get("web", {}).get("results", [])
526
+ items = [{"title": item.get("title",""), "desc": item.get("description",""), "url": item.get("url","")} for item in results[:5]]
527
+ return JSONResponse({"results": items})
528
+ except Exception as e:
529
+ return JSONResponse({"error": str(e)}, status_code=500)
530
+
531
+ # ── PDF Text Extraction ──
532
+ @fapp.post("/api/extract-pdf")
533
+ async def api_extract_pdf(request: Request):
534
+ try:
535
+ body = await request.json()
536
+ b64 = body.get("data", "")
537
+ if "," in b64:
538
+ b64 = b64.split(",", 1)[1]
539
+ pdf_bytes = base64.b64decode(b64)
540
+ text = ""
541
+ try:
542
+ import fitz
543
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
544
+ for page in doc:
545
+ text += page.get_text() + "\n"
546
+ except ImportError:
547
+ content = pdf_bytes.decode("utf-8", errors="ignore")
548
+ text = re.sub(r'[^\x20-\x7E\n\r\uAC00-\uD7A3\u3040-\u309F\u30A0-\u30FF]', '', content)
549
+ text = text.strip()[:8000]
550
+ return JSONResponse({"text": text, "chars": len(text)})
551
+ except Exception as e:
552
+ return JSONResponse({"error": str(e)}, status_code=500)
553
+
554
+ # ── Mount ──
555
+ app = gr.mount_gradio_app(fapp, gradio_demo, path="/gradio")
556
+
557
+ if __name__ == "__main__":
558
+ print(f"[BOOT] Darwin-35B-A3B-Opus Β· ZeroGPU Direct Serving", flush=True)
559
+ uvicorn.run(app, host="0.0.0.0", port=7860)