from fastapi import FastAPI, Request from fastapi.responses import StreamingResponse, JSONResponse import requests import json import uuid import uvicorn import random import string import time app = FastAPI() # ========================================================= # CONFIG # ========================================================= HF_BASE = "https://akhaliq-kimi-k2-6.hf.space" HEADERS = { "accept": "*/*", "content-type": "application/json", "origin": HF_BASE, "referer": f"{HF_BASE}/?__theme=system", "user-agent": "Mozilla/5.0", "x-gradio-user": "api" } # ========================================================= # HELPERS # ========================================================= def random_session(): return ''.join( random.choices( string.ascii_lowercase + string.digits, k=12 ) ) # ========================================================= # ROOT # ========================================================= @app.get("/") async def root(): return { "status": "ok", "provider": "Kimi K2", "openai_compatible": True } # ========================================================= # MODELS # ========================================================= @app.get("/v1/models") async def models(): return { "object": "list", "data": [ { "id": "kimi-k2", "object": "model", "owned_by": "custom" } ] } # ========================================================= # CHAT COMPLETIONS # ========================================================= @app.post("/v1/chat/completions") async def chat_completions(request: Request): body = await request.json() messages = body.get("messages", []) stream = body.get("stream", False) # ===================================================== # BUILD PROMPT # ===================================================== prompt = "" for msg in messages: role = msg.get("role", "user") content = msg.get("content", "") prompt += f"{role.upper()}: {content}\n" # ===================================================== # SESSION # ===================================================== session_hash = random_session() # ===================================================== # JOIN QUEUE # ===================================================== payload = { "data": [ prompt, [], None ], "event_data": None, "fn_index": 0, "trigger_id": None, "session_hash": session_hash } join_response = requests.post( f"{HF_BASE}/gradio_api/queue/join?__theme=system", headers=HEADERS, json=payload, timeout=120 ) # ===================================================== # STREAM MODE # ===================================================== if stream: async def generate(): completion_id = f"chatcmpl-{uuid.uuid4().hex}" with requests.get( f"{HF_BASE}/gradio_api/queue/data?session_hash={session_hash}", headers={ **HEADERS, "accept": "text/event-stream" }, stream=True, timeout=600 ) as r: for line in r.iter_lines(): if not line: continue try: decoded = line.decode("utf-8") if decoded.startswith("data:"): data_str = decoded[5:].strip() try: parsed = json.loads(data_str) msg = parsed.get("msg") # ===================================== # PROCESSING COMPLETED # ===================================== if msg == "process_completed": output = parsed["output"]["data"][0] chunk = { "id": completion_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": "kimi-k2", "choices": [ { "index": 0, "delta": { "content": output }, "finish_reason": "stop" } ] } yield f"data: {json.dumps(chunk)}\n\n" yield "data: [DONE]\n\n" break except: pass except: pass return StreamingResponse( generate(), media_type="text/event-stream" ) # ===================================================== # NON STREAM MODE # ===================================================== full_output = "" with requests.get( f"{HF_BASE}/gradio_api/queue/data?session_hash={session_hash}", headers={ **HEADERS, "accept": "text/event-stream" }, stream=True, timeout=600 ) as r: for line in r.iter_lines(): if not line: continue try: decoded = line.decode("utf-8") if decoded.startswith("data:"): data_str = decoded[5:].strip() try: parsed = json.loads(data_str) msg = parsed.get("msg") if msg == "process_completed": full_output = parsed["output"]["data"][0] break except: pass except: pass return JSONResponse({ "id": f"chatcmpl-{uuid.uuid4().hex}", "object": "chat.completion", "created": int(time.time()), "model": "kimi-k2", "choices": [ { "index": 0, "message": { "role": "assistant", "content": full_output }, "finish_reason": "stop" } ], "usage": { "prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0 } }) # ========================================================= # START # ========================================================= if __name__ == "__main__": uvicorn.run( app, host="0.0.0.0", port=7860 )