MiniMax-M3 / app.py
akhaliq's picture
akhaliq HF Staff
feat: MiniMax-M3 multimodal chat with gradio.Server + custom UI
09a378c
Raw
History Blame Contribute Delete
2.53 kB
import os
import json
from openai import OpenAI
from gradio import Server
from fastapi.responses import HTMLResponse, StreamingResponse
from fastapi import Request
# ── OpenAI-compatible client pointing at HF Router ──────────────────────────
client = OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=os.environ.get("HF_TOKEN", ""),
default_headers={"X-HF-Bill-To": "huggingface"},
)
MODEL = "MiniMaxAI/MiniMax-M3:novita"
app = Server()
# ── Serve the custom frontend ────────────────────────────────────────────────
@app.get("/")
async def homepage():
html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html")
with open(html_path, "r", encoding="utf-8") as f:
content = f.read()
return HTMLResponse(content=content)
# ── Streaming chat endpoint (SSE) ────────────────────────────────────────────
@app.post("/chat")
async def chat_stream(request: Request):
"""
Accepts JSON body:
{
"messages": [
{ "role": "user", "content": "..." } ← text-only
{ "role": "user", "content": [ ← multimodal
{"type": "text", "text": "..."},
{"type": "image_url", "image_url": {"url": "..."}}
]
}
]
}
Returns an SSE stream of partial tokens.
"""
body = await request.json()
messages = body.get("messages", [])
async def generate():
try:
stream = client.chat.completions.create(
model=MODEL,
messages=messages,
stream=True,
)
for chunk in stream:
delta = chunk.choices[0].delta
content = delta.content
if content:
payload = json.dumps({"token": content})
yield f"data: {payload}\n\n"
yield "data: [DONE]\n\n"
except Exception as e:
yield f"data: {json.dumps({'error': str(e)})}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(
generate(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"X-Accel-Buffering": "no",
},
)
app.launch(show_error=True)