| from fastapi import FastAPI, Request |
| from fastapi.responses import StreamingResponse, JSONResponse |
| import requests |
| import json |
| import uuid |
| import uvicorn |
| import random |
| import string |
| import time |
|
|
| app = FastAPI() |
|
|
| |
| |
| |
|
|
| HF_BASE = "https://akhaliq-kimi-k2-6.hf.space" |
|
|
| HEADERS = { |
| "accept": "*/*", |
| "content-type": "application/json", |
| "origin": HF_BASE, |
| "referer": f"{HF_BASE}/?__theme=system", |
| "user-agent": "Mozilla/5.0", |
| "x-gradio-user": "api" |
| } |
|
|
| |
| |
| |
|
|
| def random_session(): |
|
|
| return ''.join( |
| random.choices( |
| string.ascii_lowercase + string.digits, |
| k=12 |
| ) |
| ) |
|
|
| |
| |
| |
|
|
| @app.get("/") |
| async def root(): |
|
|
| return { |
| "status": "ok", |
| "provider": "Kimi K2", |
| "openai_compatible": True |
| } |
|
|
| |
| |
| |
|
|
| @app.get("/v1/models") |
| async def models(): |
|
|
| return { |
| "object": "list", |
| "data": [ |
| { |
| "id": "kimi-k2", |
| "object": "model", |
| "owned_by": "custom" |
| } |
| ] |
| } |
|
|
| |
| |
| |
|
|
| @app.post("/v1/chat/completions") |
| async def chat_completions(request: Request): |
|
|
| body = await request.json() |
|
|
| messages = body.get("messages", []) |
| stream = body.get("stream", False) |
|
|
| |
| |
| |
|
|
| prompt = "" |
|
|
| for msg in messages: |
|
|
| role = msg.get("role", "user") |
| content = msg.get("content", "") |
|
|
| prompt += f"{role.upper()}: {content}\n" |
|
|
| |
| |
| |
|
|
| session_hash = random_session() |
|
|
| |
| |
| |
|
|
| payload = { |
| "data": [ |
| prompt, |
| [], |
| None |
| ], |
| "event_data": None, |
| "fn_index": 0, |
| "trigger_id": None, |
| "session_hash": session_hash |
| } |
|
|
| join_response = requests.post( |
| f"{HF_BASE}/gradio_api/queue/join?__theme=system", |
| headers=HEADERS, |
| json=payload, |
| timeout=120 |
| ) |
|
|
| |
| |
| |
|
|
| if stream: |
|
|
| async def generate(): |
|
|
| completion_id = f"chatcmpl-{uuid.uuid4().hex}" |
|
|
| with requests.get( |
| f"{HF_BASE}/gradio_api/queue/data?session_hash={session_hash}", |
| headers={ |
| **HEADERS, |
| "accept": "text/event-stream" |
| }, |
| stream=True, |
| timeout=600 |
| ) as r: |
|
|
| for line in r.iter_lines(): |
|
|
| if not line: |
| continue |
|
|
| try: |
|
|
| decoded = line.decode("utf-8") |
|
|
| if decoded.startswith("data:"): |
|
|
| data_str = decoded[5:].strip() |
|
|
| try: |
|
|
| parsed = json.loads(data_str) |
|
|
| msg = parsed.get("msg") |
|
|
| |
| |
| |
|
|
| if msg == "process_completed": |
|
|
| output = parsed["output"]["data"][0] |
|
|
| chunk = { |
| "id": completion_id, |
| "object": "chat.completion.chunk", |
| "created": int(time.time()), |
| "model": "kimi-k2", |
| "choices": [ |
| { |
| "index": 0, |
| "delta": { |
| "content": output |
| }, |
| "finish_reason": "stop" |
| } |
| ] |
| } |
|
|
| yield f"data: {json.dumps(chunk)}\n\n" |
|
|
| yield "data: [DONE]\n\n" |
|
|
| break |
|
|
| except: |
| pass |
|
|
| except: |
| pass |
|
|
| return StreamingResponse( |
| generate(), |
| media_type="text/event-stream" |
| ) |
|
|
| |
| |
| |
|
|
| full_output = "" |
|
|
| with requests.get( |
| f"{HF_BASE}/gradio_api/queue/data?session_hash={session_hash}", |
| headers={ |
| **HEADERS, |
| "accept": "text/event-stream" |
| }, |
| stream=True, |
| timeout=600 |
| ) as r: |
|
|
| for line in r.iter_lines(): |
|
|
| if not line: |
| continue |
|
|
| try: |
|
|
| decoded = line.decode("utf-8") |
|
|
| if decoded.startswith("data:"): |
|
|
| data_str = decoded[5:].strip() |
|
|
| try: |
|
|
| parsed = json.loads(data_str) |
|
|
| msg = parsed.get("msg") |
|
|
| if msg == "process_completed": |
|
|
| full_output = parsed["output"]["data"][0] |
|
|
| break |
|
|
| except: |
| pass |
|
|
| except: |
| pass |
|
|
| return JSONResponse({ |
|
|
| "id": f"chatcmpl-{uuid.uuid4().hex}", |
| "object": "chat.completion", |
| "created": int(time.time()), |
| "model": "kimi-k2", |
|
|
| "choices": [ |
| { |
| "index": 0, |
| "message": { |
| "role": "assistant", |
| "content": full_output |
| }, |
| "finish_reason": "stop" |
| } |
| ], |
|
|
| "usage": { |
| "prompt_tokens": 0, |
| "completion_tokens": 0, |
| "total_tokens": 0 |
| } |
| }) |
|
|
| |
| |
| |
|
|
| if __name__ == "__main__": |
|
|
| uvicorn.run( |
| app, |
| host="0.0.0.0", |
| port=7860 |
| ) |