Spaces:
Running
Running
| import os | |
| import json | |
| from openai import OpenAI | |
| from gradio import Server | |
| from fastapi.responses import HTMLResponse, StreamingResponse | |
| from fastapi import Request | |
| # ββ OpenAI-compatible client pointing at HF Router ββββββββββββββββββββββββββ | |
| client = OpenAI( | |
| base_url="https://router.huggingface.co/v1", | |
| api_key=os.environ.get("HF_TOKEN", ""), | |
| default_headers={"X-HF-Bill-To": "huggingface"}, | |
| ) | |
| MODEL = "MiniMaxAI/MiniMax-M3:novita" | |
| app = Server() | |
| # ββ Serve the custom frontend ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def homepage(): | |
| html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html") | |
| with open(html_path, "r", encoding="utf-8") as f: | |
| content = f.read() | |
| return HTMLResponse(content=content) | |
| # ββ Streaming chat endpoint (SSE) ββββββββββββββββββββββββββββββββββββββββββββ | |
| async def chat_stream(request: Request): | |
| """ | |
| Accepts JSON body: | |
| { | |
| "messages": [ | |
| { "role": "user", "content": "..." } β text-only | |
| { "role": "user", "content": [ β multimodal | |
| {"type": "text", "text": "..."}, | |
| {"type": "image_url", "image_url": {"url": "..."}} | |
| ] | |
| } | |
| ] | |
| } | |
| Returns an SSE stream of partial tokens. | |
| """ | |
| body = await request.json() | |
| messages = body.get("messages", []) | |
| async def generate(): | |
| try: | |
| stream = client.chat.completions.create( | |
| model=MODEL, | |
| messages=messages, | |
| stream=True, | |
| ) | |
| for chunk in stream: | |
| delta = chunk.choices[0].delta | |
| content = delta.content | |
| if content: | |
| payload = json.dumps({"token": content}) | |
| yield f"data: {payload}\n\n" | |
| yield "data: [DONE]\n\n" | |
| except Exception as e: | |
| yield f"data: {json.dumps({'error': str(e)})}\n\n" | |
| yield "data: [DONE]\n\n" | |
| return StreamingResponse( | |
| generate(), | |
| media_type="text/event-stream", | |
| headers={ | |
| "Cache-Control": "no-cache", | |
| "X-Accel-Buffering": "no", | |
| }, | |
| ) | |
| app.launch(show_error=True) | |