Spaces:

MAYA-AI
/

QWEN-3_5-CHAT

Running

App Files Files Community

QWEN-3_5-CHAT / app.py

mayafree

Update app.py

0454a0a verified about 2 months ago

raw

history blame contribute delete

16.6 kB

	import sys
	print(f"[BOOT] Python {sys.version}", flush=True)

	import base64
	import os
	import re
	from typing import Generator, Optional

	try:
	import gradio as gr
	print(f"[BOOT] gradio {gr.__version__}", flush=True)
	except ImportError as e:
	print(f"[BOOT] FATAL: {e}", flush=True)
	sys.exit(1)

	try:
	from huggingface_hub import InferenceClient
	import httpx
	import uvicorn
	from fastapi import FastAPI, Request
	from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
	print("[BOOT] All imports OK", flush=True)
	except ImportError as e:
	print(f"[BOOT] FATAL: {e} — add to requirements.txt", flush=True)
	sys.exit(1)

	# ══════════════════════════════════════════════════════════════════════════════
	# 1. MODEL CAPABILITY MATRIX
	# ══════════════════════════════════════════════════════════════════════════════
	MODEL_CAPS: dict[str, dict] = {
	"Qwen/Qwen3.5-122B-A10B": {
	"arch": "MoE", "active": "10B / 122B total",
	"ctx": "262K → 1M", "thinking": True, "vision": True,
	"max_tokens": 8192, "temp_max": 2.0, "top_p": True,
	"color": "#7c3aed",
	"badge": "🏆 Best Overall · BFCL 72.2 · GPQA 86.6 · SWE 72.0",
	"desc": "Top reasoning & agents · Complex math · Long context",
	},
	"Qwen/Qwen3.5-27B": {
	"arch": "Dense", "active": "27B (all active)",
	"ctx": "262K → 1M", "thinking": True, "vision": True,
	"max_tokens": 8192, "temp_max": 2.0, "top_p": True,
	"color": "#0d9488",
	"badge": "🎯 Dense #1 · IFEval 95.0 · SWE 72.4 · PolyMATH 71.2",
	"desc": "Instruction king · Creative writing · 201 languages",
	},
	"Qwen/Qwen3.5-35B-A3B": {
	"arch": "MoE", "active": "3B / 35B total",
	"ctx": "262K → 1M", "thinking": True, "vision": True,
	"max_tokens": 4096, "temp_max": 1.5, "top_p": True,
	"color": "#d97706",
	"badge": "⚡ Flash Speed · TAU2 81.2 · MMLU-Pro 85.3",
	"desc": "Fastest · 3B active params · ~6× faster than 27B",
	}
	}
	MODEL_IDS = list(MODEL_CAPS.keys())
	DEFAULT_MODEL = MODEL_IDS[0]

	# ══════════════════════════════════════════════════════════════════════════════
	# 2. SYSTEM PROMPT PRESETS

	# ══════════════════════════════════════════════════════════════════════════════

	PRESETS = {
	"general": "You are a helpful, harmless, and honest AI assistant.",
	"code": "You are an expert software engineer. Write clean, efficient, well-commented code. Explain your approach before writing. Use modern best practices.",
	"math": "You are a world-class mathematician. Break problems step-by-step. Show full working. Use LaTeX where helpful.",
	"creative": "You are a brilliant creative writer. Be imaginative, vivid, and engaging. Adapt tone and style to the request.",
	"translate": "You are a professional translator fluent in 201 languages. Provide accurate, natural-sounding translations with cultural context.",
	"research": "You are a rigorous research analyst. Provide structured, well-reasoned analysis. Identify assumptions and acknowledge uncertainty.",
	}

	# ══════════════════════════════════════════════════════════════════════════════
	# 3. THINKING MODE HELPERS
	# ══════════════════════════════════════════════════════════════════════════════
	def build_user_message(text: str, thinking: bool) -> str:
	return ("/think\n" if thinking else "/no_think\n") + text

	def parse_think_blocks(text: str) -> tuple[str, str]:
	m = re.search(r"<think>(.?)</think>\s", text, re.DOTALL)
	return (m.group(1).strip(), text[m.end():].strip()) if m else ("", text)

	def format_response(raw: str) -> str:
	chain, answer = parse_think_blocks(raw)
	if chain:
	lines = chain.split("\n")
	quoted = "\n".join(f"> {l}" for l in lines)
	block = (
	"<details>\n"
	"<summary>🧠 Reasoning Chain — click to expand</summary>\n\n"
	f"{quoted}\n\n"
	"</details>\n\n"
	)
	return block + answer
	return raw

	# ══════════════════════════════════════════════════════════════════════════════
	# 4. STREAMING BACKEND
	# ══════════════════════════════════════════════════════════════════════════════
	def generate_reply(
	message: str,
	history: list,
	model_id: str,
	thinking_mode: str,
	image_input,
	system_prompt: str,
	max_new_tokens: int,
	temperature: float,
	top_p: float,
	) -> Generator[str, None, None]:

	token = os.getenv("HF_TOKEN")
	client = InferenceClient(token=token, timeout=120)
	cap = MODEL_CAPS[model_id]
	use_think = "Thinking" in thinking_mode and cap["thinking"]

	max_new_tokens = min(int(max_new_tokens), cap["max_tokens"])
	temperature = min(float(temperature), cap["temp_max"])

	messages: list[dict] = []
	if system_prompt.strip():
	messages.append({"role": "system", "content": system_prompt.strip()})

	for turn in history:
	if isinstance(turn, dict):
	role = turn.get("role", "")
	raw = turn.get("content") or ""
	text = (" ".join(p.get("text","") for p in raw
	if isinstance(p,dict) and p.get("type")=="text")
	if isinstance(raw, list) else str(raw))
	if role == "user":
	messages.append({"role":"user","content":text})
	elif role == "assistant":
	_, clean = parse_think_blocks(text)
	messages.append({"role":"assistant","content":clean})
	else:
	try:
	u, a = (turn[0] or None), (turn[1] if len(turn)>1 else None)
	except (IndexError, TypeError):
	continue
	def _txt(v):
	if v is None: return None
	if isinstance(v, list):
	return " ".join(p.get("text","") for p in v
	if isinstance(p,dict) and p.get("type")=="text")
	return str(v)
	if u := _txt(u): messages.append({"role":"user","content":u})
	if a := _txt(a):
	_, clean = parse_think_blocks(a)
	messages.append({"role":"assistant","content":clean})

	user_text = build_user_message(message, use_think)
	if image_input and cap["vision"]:
	import io
	from PIL import Image as PILImage

	# Handle 3 cases:
	# 1. base64 data URL string (from JS fetch API) "data:image/...;base64,..."
	# 2. PIL Image object (from Gradio UI)
	# 3. numpy array (legacy Gradio)
	if isinstance(image_input, str) and image_input.startswith("data:"):
	# Strip the data URL prefix and decode directly
	header, b64_data = image_input.split(",", 1)
	b64 = b64_data
	else:
	buf = io.BytesIO()
	if not isinstance(image_input, PILImage.Image):
	image_input = PILImage.fromarray(image_input)
	image_input.save(buf, format="JPEG")
	b64 = base64.b64encode(buf.getvalue()).decode()

	content = [
	{"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{b64}"}},
	{"type":"text","text":user_text},
	]
	else:
	content = user_text
	messages.append({"role":"user","content":content})

	try:
	stream = client.chat_completion(
	model=model_id, messages=messages,
	max_tokens=max_new_tokens, temperature=temperature,
	top_p=float(top_p), stream=True,
	)
	raw = ""
	for chunk in stream:
	if not chunk.choices: continue
	delta = chunk.choices[0].delta
	if not delta or not delta.content: continue
	raw += delta.content
	yield format_response(raw)
	except Exception as exc:
	yield (f"Error: `{model_id}`\n\n```\n{exc}\n```\n\n"
	"_Check HF\\_TOKEN or try another model._")



	# ══════════════════════════════════════════════════════════════════════════════
	# 5. GRADIO BLOCKS (hidden – only serves /gradio/gradio_api/call/chat API)
	# ══════════════════════════════════════════════════════════════════════════════
	with gr.Blocks(title="Qwen3.5 MultiChat API") as gradio_demo:
	model_dd = gr.Dropdown(choices=MODEL_IDS, value=DEFAULT_MODEL, visible=False)
	thinking_toggle = gr.Radio(
	choices=["⚡ Fast Mode (direct answer)",
	"🧠 Thinking Mode (chain-of-thought reasoning)"],
	value="⚡ Fast Mode (direct answer)",
	visible=False,
	)
	image_input = gr.Textbox(value="", visible=False) # receives base64 data URL from JS
	system_prompt = gr.Textbox(value=PRESETS["general"], visible=False)
	max_new_tokens = gr.Slider(minimum=64, maximum=8192, value=1024, visible=False)
	temperature = gr.Slider(minimum=0.0, maximum=2.0, value=0.7, visible=False)
	top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, visible=False)

	gr.ChatInterface(
	fn=generate_reply,
	api_name="chat",
	additional_inputs=[
	model_dd, thinking_toggle, image_input,
	system_prompt, max_new_tokens, temperature, top_p,
	],
	)

	# ══════════════════════════════════════════════════════════════════════════════
	# 6. FASTAPI – index.html + HF OAuth + Gradio API
	# ══════════════════════════════════════════════════════════════════════════════
	import pathlib, secrets

	fapp = FastAPI()
	SESSIONS: dict[str, dict] = {} # session_id → user info
	HTML = pathlib.Path(__file__).parent / "index.html"

	# ── HF OAuth config (auto-injected by HF Spaces when OAuth is enabled) ────────
	CLIENT_ID = os.getenv("OAUTH_CLIENT_ID", "")
	CLIENT_SECRET = os.getenv("OAUTH_CLIENT_SECRET", "")
	SPACE_HOST = os.getenv("SPACE_HOST", "localhost:7860")
	REDIRECT_URI = f"https://{SPACE_HOST}/login/callback"

	# Startup OAuth status log
	print(f"[OAuth] CLIENT_ID set: {bool(CLIENT_ID)}")
	print(f"[OAuth] CLIENT_SECRET set: {bool(CLIENT_SECRET)}")
	print(f"[OAuth] SPACE_HOST: {SPACE_HOST}")
	print(f"[OAuth] REDIRECT_URI: {REDIRECT_URI}")
	HF_AUTH_URL = "https://huggingface.co/oauth/authorize"
	HF_TOKEN_URL = "https://huggingface.co/oauth/token"
	HF_USER_URL = "https://huggingface.co/oauth/userinfo"
	SCOPES = os.getenv("OAUTH_SCOPES", "openid profile")

	from urllib.parse import urlencode

	def _sid(req: Request) -> Optional[str]:
	return req.cookies.get("mc_session")

	def _user(req: Request) -> Optional[dict]:
	sid = _sid(req)
	return SESSIONS.get(sid) if sid else None

	# ── Routes ────────────────────────────────────────────────────────────────────
	@fapp.get("/")
	async def root(request: Request):
	html = HTML.read_text(encoding="utf-8") if HTML.exists() else "<h2>index.html missing</h2>"
	return HTMLResponse(html)

	@fapp.get("/oauth/user")
	async def oauth_user(request: Request):
	u = _user(request)
	if u:
	return JSONResponse(u)
	return JSONResponse({"logged_in": False}, status_code=401)

	@fapp.get("/oauth/login")
	async def oauth_login(request: Request):
	print(f"[OAuth] /oauth/login called. CLIENT_ID={bool(CLIENT_ID)}")
	if not CLIENT_ID:
	print("[OAuth] ERROR: OAUTH_CLIENT_ID not set — add hf_oauth: true to README.md")
	return RedirectResponse("/?oauth_error=not_configured")
	state = secrets.token_urlsafe(16)
	params = {
	"response_type": "code",
	"client_id": CLIENT_ID,
	"redirect_uri": REDIRECT_URI,
	"scope": SCOPES,
	"state": state,
	}
	url = f"{HF_AUTH_URL}?{urlencode(params)}"
	print(f"[OAuth] Redirecting → {url[:120]}")
	return RedirectResponse(url, status_code=302)

	@fapp.get("/login/callback")
	async def oauth_callback(code: str = "", error: str = "", state: str = ""):
	if error or not code:
	print(f"[OAuth] Callback error: {error}")
	return RedirectResponse("/?auth_error=1")
	# Basic auth as recommended by HF docs
	basic = base64.b64encode(f"{CLIENT_ID}:{CLIENT_SECRET}".encode()).decode()
	async with httpx.AsyncClient() as client:
	# Exchange code for token — use Authorization: Basic header
	tok = await client.post(HF_TOKEN_URL, data={
	"grant_type": "authorization_code",
	"code": code,
	"redirect_uri": REDIRECT_URI,
	}, headers={
	"Accept": "application/json",
	"Authorization": f"Basic {basic}",
	})
	if tok.status_code != 200:
	print(f"[OAuth] Token exchange FAILED: {tok.status_code} {tok.text[:300]}")
	return RedirectResponse("/?auth_error=1")
	access_token = tok.json().get("access_token", "")
	if not access_token:
	print(f"[OAuth] No access_token: {tok.text[:300]}")
	return RedirectResponse("/?auth_error=1")
	# Get user info
	uinfo = await client.get(HF_USER_URL, headers={"Authorization": f"Bearer {access_token}"})
	if uinfo.status_code != 200:
	print(f"[OAuth] Userinfo FAILED: {uinfo.status_code}")
	return RedirectResponse("/?auth_error=1")
	user = uinfo.json()
	print(f"[OAuth] Login OK: {user.get('preferred_username', '?')}")

	sid = secrets.token_urlsafe(32)
	SESSIONS[sid] = {
	"logged_in": True,
	"username": user.get("preferred_username", user.get("name", "User")),
	"name": user.get("name", ""),
	"avatar": user.get("picture", ""),
	"profile": f"https://huggingface.co/{user.get('preferred_username', '')}",
	}
	resp = RedirectResponse("/")
	resp.set_cookie("mc_session", sid, httponly=True, samesite="lax", secure=True, max_age=606024*7)
	return resp

	@fapp.get("/oauth/logout")
	async def oauth_logout(request: Request):
	sid = _sid(request)
	if sid and sid in SESSIONS:
	del SESSIONS[sid]
	resp = RedirectResponse("/")
	resp.delete_cookie("mc_session")
	return resp

	@fapp.get("/health")
	async def health():
	return {"status": "ok"}

	# Mount Gradio at /gradio → API at /gradio/gradio_api/call/chat
	app = gr.mount_gradio_app(fapp, gradio_demo, path="/gradio")

	# ── Launch ────────────────────────────────────────────────────────────────────
	if __name__ == "__main__":
	print("[BOOT] All components initialized. Starting uvicorn on :7860", flush=True)
	uvicorn.run(app, host="0.0.0.0", port=7860)