Spaces:
Running
Running
| import sys | |
| print(f"[BOOT] Python {sys.version}", flush=True) | |
| import base64 | |
| import os | |
| import re | |
| from typing import Generator, Optional | |
| try: | |
| import gradio as gr | |
| print(f"[BOOT] gradio {gr.__version__}", flush=True) | |
| except ImportError as e: | |
| print(f"[BOOT] FATAL: {e}", flush=True) | |
| sys.exit(1) | |
| try: | |
| from huggingface_hub import InferenceClient | |
| import httpx | |
| import uvicorn | |
| from fastapi import FastAPI, Request | |
| from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse | |
| print("[BOOT] All imports OK", flush=True) | |
| except ImportError as e: | |
| print(f"[BOOT] FATAL: {e} β add to requirements.txt", flush=True) | |
| sys.exit(1) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 1. MODEL CAPABILITY MATRIX | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| MODEL_CAPS: dict[str, dict] = { | |
| "Qwen/Qwen3.5-122B-A10B": { | |
| "arch": "MoE", "active": "10B / 122B total", | |
| "ctx": "262K β 1M", "thinking": True, "vision": True, | |
| "max_tokens": 8192, "temp_max": 2.0, "top_p": True, | |
| "color": "#7c3aed", | |
| "badge": "π Best Overall Β· BFCL 72.2 Β· GPQA 86.6 Β· SWE 72.0", | |
| "desc": "Top reasoning & agents Β· Complex math Β· Long context", | |
| }, | |
| "Qwen/Qwen3.5-27B": { | |
| "arch": "Dense", "active": "27B (all active)", | |
| "ctx": "262K β 1M", "thinking": True, "vision": True, | |
| "max_tokens": 8192, "temp_max": 2.0, "top_p": True, | |
| "color": "#0d9488", | |
| "badge": "π― Dense #1 Β· IFEval 95.0 Β· SWE 72.4 Β· PolyMATH 71.2", | |
| "desc": "Instruction king Β· Creative writing Β· 201 languages", | |
| }, | |
| "Qwen/Qwen3.5-35B-A3B": { | |
| "arch": "MoE", "active": "3B / 35B total", | |
| "ctx": "262K β 1M", "thinking": True, "vision": True, | |
| "max_tokens": 4096, "temp_max": 1.5, "top_p": True, | |
| "color": "#d97706", | |
| "badge": "β‘ Flash Speed Β· TAU2 81.2 Β· MMLU-Pro 85.3", | |
| "desc": "Fastest Β· 3B active params Β· ~6Γ faster than 27B", | |
| } | |
| } | |
| MODEL_IDS = list(MODEL_CAPS.keys()) | |
| DEFAULT_MODEL = MODEL_IDS[0] | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 2. SYSTEM PROMPT PRESETS | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| PRESETS = { | |
| "general": "You are a helpful, harmless, and honest AI assistant.", | |
| "code": "You are an expert software engineer. Write clean, efficient, well-commented code. Explain your approach before writing. Use modern best practices.", | |
| "math": "You are a world-class mathematician. Break problems step-by-step. Show full working. Use LaTeX where helpful.", | |
| "creative": "You are a brilliant creative writer. Be imaginative, vivid, and engaging. Adapt tone and style to the request.", | |
| "translate": "You are a professional translator fluent in 201 languages. Provide accurate, natural-sounding translations with cultural context.", | |
| "research": "You are a rigorous research analyst. Provide structured, well-reasoned analysis. Identify assumptions and acknowledge uncertainty.", | |
| } | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 3. THINKING MODE HELPERS | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_user_message(text: str, thinking: bool) -> str: | |
| return ("/think\n" if thinking else "/no_think\n") + text | |
| def parse_think_blocks(text: str) -> tuple[str, str]: | |
| m = re.search(r"<think>(.*?)</think>\s*", text, re.DOTALL) | |
| return (m.group(1).strip(), text[m.end():].strip()) if m else ("", text) | |
| def format_response(raw: str) -> str: | |
| chain, answer = parse_think_blocks(raw) | |
| if chain: | |
| lines = chain.split("\n") | |
| quoted = "\n".join(f"> {l}" for l in lines) | |
| block = ( | |
| "<details>\n" | |
| "<summary>π§ Reasoning Chain β click to expand</summary>\n\n" | |
| f"{quoted}\n\n" | |
| "</details>\n\n" | |
| ) | |
| return block + answer | |
| return raw | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 4. STREAMING BACKEND | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def generate_reply( | |
| message: str, | |
| history: list, | |
| model_id: str, | |
| thinking_mode: str, | |
| image_input, | |
| system_prompt: str, | |
| max_new_tokens: int, | |
| temperature: float, | |
| top_p: float, | |
| ) -> Generator[str, None, None]: | |
| token = os.getenv("HF_TOKEN") | |
| client = InferenceClient(token=token, timeout=120) | |
| cap = MODEL_CAPS[model_id] | |
| use_think = "Thinking" in thinking_mode and cap["thinking"] | |
| max_new_tokens = min(int(max_new_tokens), cap["max_tokens"]) | |
| temperature = min(float(temperature), cap["temp_max"]) | |
| messages: list[dict] = [] | |
| if system_prompt.strip(): | |
| messages.append({"role": "system", "content": system_prompt.strip()}) | |
| for turn in history: | |
| if isinstance(turn, dict): | |
| role = turn.get("role", "") | |
| raw = turn.get("content") or "" | |
| text = (" ".join(p.get("text","") for p in raw | |
| if isinstance(p,dict) and p.get("type")=="text") | |
| if isinstance(raw, list) else str(raw)) | |
| if role == "user": | |
| messages.append({"role":"user","content":text}) | |
| elif role == "assistant": | |
| _, clean = parse_think_blocks(text) | |
| messages.append({"role":"assistant","content":clean}) | |
| else: | |
| try: | |
| u, a = (turn[0] or None), (turn[1] if len(turn)>1 else None) | |
| except (IndexError, TypeError): | |
| continue | |
| def _txt(v): | |
| if v is None: return None | |
| if isinstance(v, list): | |
| return " ".join(p.get("text","") for p in v | |
| if isinstance(p,dict) and p.get("type")=="text") | |
| return str(v) | |
| if u := _txt(u): messages.append({"role":"user","content":u}) | |
| if a := _txt(a): | |
| _, clean = parse_think_blocks(a) | |
| messages.append({"role":"assistant","content":clean}) | |
| user_text = build_user_message(message, use_think) | |
| if image_input and cap["vision"]: | |
| import io | |
| from PIL import Image as PILImage | |
| # Handle 3 cases: | |
| # 1. base64 data URL string (from JS fetch API) "data:image/...;base64,..." | |
| # 2. PIL Image object (from Gradio UI) | |
| # 3. numpy array (legacy Gradio) | |
| if isinstance(image_input, str) and image_input.startswith("data:"): | |
| # Strip the data URL prefix and decode directly | |
| header, b64_data = image_input.split(",", 1) | |
| b64 = b64_data | |
| else: | |
| buf = io.BytesIO() | |
| if not isinstance(image_input, PILImage.Image): | |
| image_input = PILImage.fromarray(image_input) | |
| image_input.save(buf, format="JPEG") | |
| b64 = base64.b64encode(buf.getvalue()).decode() | |
| content = [ | |
| {"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{b64}"}}, | |
| {"type":"text","text":user_text}, | |
| ] | |
| else: | |
| content = user_text | |
| messages.append({"role":"user","content":content}) | |
| try: | |
| stream = client.chat_completion( | |
| model=model_id, messages=messages, | |
| max_tokens=max_new_tokens, temperature=temperature, | |
| top_p=float(top_p), stream=True, | |
| ) | |
| raw = "" | |
| for chunk in stream: | |
| if not chunk.choices: continue | |
| delta = chunk.choices[0].delta | |
| if not delta or not delta.content: continue | |
| raw += delta.content | |
| yield format_response(raw) | |
| except Exception as exc: | |
| yield (f"**Error:** `{model_id}`\n\n```\n{exc}\n```\n\n" | |
| "_Check HF\\_TOKEN or try another model._") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 5. GRADIO BLOCKS (hidden β only serves /gradio/gradio_api/call/chat API) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks(title="Qwen3.5 MultiChat API") as gradio_demo: | |
| model_dd = gr.Dropdown(choices=MODEL_IDS, value=DEFAULT_MODEL, visible=False) | |
| thinking_toggle = gr.Radio( | |
| choices=["β‘ Fast Mode (direct answer)", | |
| "π§ Thinking Mode (chain-of-thought reasoning)"], | |
| value="β‘ Fast Mode (direct answer)", | |
| visible=False, | |
| ) | |
| image_input = gr.Textbox(value="", visible=False) # receives base64 data URL from JS | |
| system_prompt = gr.Textbox(value=PRESETS["general"], visible=False) | |
| max_new_tokens = gr.Slider(minimum=64, maximum=8192, value=1024, visible=False) | |
| temperature = gr.Slider(minimum=0.0, maximum=2.0, value=0.7, visible=False) | |
| top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, visible=False) | |
| gr.ChatInterface( | |
| fn=generate_reply, | |
| api_name="chat", | |
| additional_inputs=[ | |
| model_dd, thinking_toggle, image_input, | |
| system_prompt, max_new_tokens, temperature, top_p, | |
| ], | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 6. FASTAPI β index.html + HF OAuth + Gradio API | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| import pathlib, secrets | |
| fapp = FastAPI() | |
| SESSIONS: dict[str, dict] = {} # session_id β user info | |
| HTML = pathlib.Path(__file__).parent / "index.html" | |
| # ββ HF OAuth config (auto-injected by HF Spaces when OAuth is enabled) ββββββββ | |
| CLIENT_ID = os.getenv("OAUTH_CLIENT_ID", "") | |
| CLIENT_SECRET = os.getenv("OAUTH_CLIENT_SECRET", "") | |
| SPACE_HOST = os.getenv("SPACE_HOST", "localhost:7860") | |
| REDIRECT_URI = f"https://{SPACE_HOST}/login/callback" | |
| # Startup OAuth status log | |
| print(f"[OAuth] CLIENT_ID set: {bool(CLIENT_ID)}") | |
| print(f"[OAuth] CLIENT_SECRET set: {bool(CLIENT_SECRET)}") | |
| print(f"[OAuth] SPACE_HOST: {SPACE_HOST}") | |
| print(f"[OAuth] REDIRECT_URI: {REDIRECT_URI}") | |
| HF_AUTH_URL = "https://huggingface.co/oauth/authorize" | |
| HF_TOKEN_URL = "https://huggingface.co/oauth/token" | |
| HF_USER_URL = "https://huggingface.co/oauth/userinfo" | |
| SCOPES = os.getenv("OAUTH_SCOPES", "openid profile") | |
| from urllib.parse import urlencode | |
| def _sid(req: Request) -> Optional[str]: | |
| return req.cookies.get("mc_session") | |
| def _user(req: Request) -> Optional[dict]: | |
| sid = _sid(req) | |
| return SESSIONS.get(sid) if sid else None | |
| # ββ Routes ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def root(request: Request): | |
| html = HTML.read_text(encoding="utf-8") if HTML.exists() else "<h2>index.html missing</h2>" | |
| return HTMLResponse(html) | |
| async def oauth_user(request: Request): | |
| u = _user(request) | |
| if u: | |
| return JSONResponse(u) | |
| return JSONResponse({"logged_in": False}, status_code=401) | |
| async def oauth_login(request: Request): | |
| print(f"[OAuth] /oauth/login called. CLIENT_ID={bool(CLIENT_ID)}") | |
| if not CLIENT_ID: | |
| print("[OAuth] ERROR: OAUTH_CLIENT_ID not set β add hf_oauth: true to README.md") | |
| return RedirectResponse("/?oauth_error=not_configured") | |
| state = secrets.token_urlsafe(16) | |
| params = { | |
| "response_type": "code", | |
| "client_id": CLIENT_ID, | |
| "redirect_uri": REDIRECT_URI, | |
| "scope": SCOPES, | |
| "state": state, | |
| } | |
| url = f"{HF_AUTH_URL}?{urlencode(params)}" | |
| print(f"[OAuth] Redirecting β {url[:120]}") | |
| return RedirectResponse(url, status_code=302) | |
| async def oauth_callback(code: str = "", error: str = "", state: str = ""): | |
| if error or not code: | |
| print(f"[OAuth] Callback error: {error}") | |
| return RedirectResponse("/?auth_error=1") | |
| # Basic auth as recommended by HF docs | |
| basic = base64.b64encode(f"{CLIENT_ID}:{CLIENT_SECRET}".encode()).decode() | |
| async with httpx.AsyncClient() as client: | |
| # Exchange code for token β use Authorization: Basic header | |
| tok = await client.post(HF_TOKEN_URL, data={ | |
| "grant_type": "authorization_code", | |
| "code": code, | |
| "redirect_uri": REDIRECT_URI, | |
| }, headers={ | |
| "Accept": "application/json", | |
| "Authorization": f"Basic {basic}", | |
| }) | |
| if tok.status_code != 200: | |
| print(f"[OAuth] Token exchange FAILED: {tok.status_code} {tok.text[:300]}") | |
| return RedirectResponse("/?auth_error=1") | |
| access_token = tok.json().get("access_token", "") | |
| if not access_token: | |
| print(f"[OAuth] No access_token: {tok.text[:300]}") | |
| return RedirectResponse("/?auth_error=1") | |
| # Get user info | |
| uinfo = await client.get(HF_USER_URL, headers={"Authorization": f"Bearer {access_token}"}) | |
| if uinfo.status_code != 200: | |
| print(f"[OAuth] Userinfo FAILED: {uinfo.status_code}") | |
| return RedirectResponse("/?auth_error=1") | |
| user = uinfo.json() | |
| print(f"[OAuth] Login OK: {user.get('preferred_username', '?')}") | |
| sid = secrets.token_urlsafe(32) | |
| SESSIONS[sid] = { | |
| "logged_in": True, | |
| "username": user.get("preferred_username", user.get("name", "User")), | |
| "name": user.get("name", ""), | |
| "avatar": user.get("picture", ""), | |
| "profile": f"https://huggingface.co/{user.get('preferred_username', '')}", | |
| } | |
| resp = RedirectResponse("/") | |
| resp.set_cookie("mc_session", sid, httponly=True, samesite="lax", secure=True, max_age=60*60*24*7) | |
| return resp | |
| async def oauth_logout(request: Request): | |
| sid = _sid(request) | |
| if sid and sid in SESSIONS: | |
| del SESSIONS[sid] | |
| resp = RedirectResponse("/") | |
| resp.delete_cookie("mc_session") | |
| return resp | |
| async def health(): | |
| return {"status": "ok"} | |
| # Mount Gradio at /gradio β API at /gradio/gradio_api/call/chat | |
| app = gr.mount_gradio_app(fapp, gradio_demo, path="/gradio") | |
| # ββ Launch ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if __name__ == "__main__": | |
| print("[BOOT] All components initialized. Starting uvicorn on :7860", flush=True) | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |