Spaces:

akhaliq
/

MiniMax-M3

Running

App Files Files Community

MiniMax-M3 / app.py

akhaliq HF Staff

feat: MiniMax-M3 multimodal chat with gradio.Server + custom UI

09a378c 5 days ago

Raw

History Blame Contribute Delete

2.53 kB

	import os
	import json
	from openai import OpenAI
	from gradio import Server
	from fastapi.responses import HTMLResponse, StreamingResponse
	from fastapi import Request

	# ── OpenAI-compatible client pointing at HF Router ──────────────────────────
	client = OpenAI(
	base_url="https://router.huggingface.co/v1",
	api_key=os.environ.get("HF_TOKEN", ""),
	default_headers={"X-HF-Bill-To": "huggingface"},
	)

	MODEL = "MiniMaxAI/MiniMax-M3:novita"

	app = Server()


	# ── Serve the custom frontend ────────────────────────────────────────────────
	@app.get("/")
	async def homepage():
	html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html")
	with open(html_path, "r", encoding="utf-8") as f:
	content = f.read()
	return HTMLResponse(content=content)


	# ── Streaming chat endpoint (SSE) ────────────────────────────────────────────
	@app.post("/chat")
	async def chat_stream(request: Request):
	"""
	Accepts JSON body:
	{
	"messages": [
	{ "role": "user", "content": "..." } ← text-only
	{ "role": "user", "content": [ ← multimodal
	{"type": "text", "text": "..."},
	{"type": "image_url", "image_url": {"url": "..."}}
	]
	}
	]
	}
	Returns an SSE stream of partial tokens.
	"""
	body = await request.json()
	messages = body.get("messages", [])

	async def generate():
	try:
	stream = client.chat.completions.create(
	model=MODEL,
	messages=messages,
	stream=True,
	)
	for chunk in stream:
	delta = chunk.choices[0].delta
	content = delta.content
	if content:
	payload = json.dumps({"token": content})
	yield f"data: {payload}\n\n"
	yield "data: [DONE]\n\n"
	except Exception as e:
	yield f"data: {json.dumps({'error': str(e)})}\n\n"
	yield "data: [DONE]\n\n"

	return StreamingResponse(
	generate(),
	media_type="text/event-stream",
	headers={
	"Cache-Control": "no-cache",
	"X-Accel-Buffering": "no",
	},
	)


	app.launch(show_error=True)