Kimi / app.py
imspsycho's picture
Create app.py
90b1f52 verified
Raw
History Blame Contribute Delete
7.38 kB
from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse, JSONResponse
import requests
import json
import uuid
import uvicorn
import random
import string
import time
app = FastAPI()
# =========================================================
# CONFIG
# =========================================================
HF_BASE = "https://akhaliq-kimi-k2-6.hf.space"
HEADERS = {
"accept": "*/*",
"content-type": "application/json",
"origin": HF_BASE,
"referer": f"{HF_BASE}/?__theme=system",
"user-agent": "Mozilla/5.0",
"x-gradio-user": "api"
}
# =========================================================
# HELPERS
# =========================================================
def random_session():
return ''.join(
random.choices(
string.ascii_lowercase + string.digits,
k=12
)
)
# =========================================================
# ROOT
# =========================================================
@app.get("/")
async def root():
return {
"status": "ok",
"provider": "Kimi K2",
"openai_compatible": True
}
# =========================================================
# MODELS
# =========================================================
@app.get("/v1/models")
async def models():
return {
"object": "list",
"data": [
{
"id": "kimi-k2",
"object": "model",
"owned_by": "custom"
}
]
}
# =========================================================
# CHAT COMPLETIONS
# =========================================================
@app.post("/v1/chat/completions")
async def chat_completions(request: Request):
body = await request.json()
messages = body.get("messages", [])
stream = body.get("stream", False)
# =====================================================
# BUILD PROMPT
# =====================================================
prompt = ""
for msg in messages:
role = msg.get("role", "user")
content = msg.get("content", "")
prompt += f"{role.upper()}: {content}\n"
# =====================================================
# SESSION
# =====================================================
session_hash = random_session()
# =====================================================
# JOIN QUEUE
# =====================================================
payload = {
"data": [
prompt,
[],
None
],
"event_data": None,
"fn_index": 0,
"trigger_id": None,
"session_hash": session_hash
}
join_response = requests.post(
f"{HF_BASE}/gradio_api/queue/join?__theme=system",
headers=HEADERS,
json=payload,
timeout=120
)
# =====================================================
# STREAM MODE
# =====================================================
if stream:
async def generate():
completion_id = f"chatcmpl-{uuid.uuid4().hex}"
with requests.get(
f"{HF_BASE}/gradio_api/queue/data?session_hash={session_hash}",
headers={
**HEADERS,
"accept": "text/event-stream"
},
stream=True,
timeout=600
) as r:
for line in r.iter_lines():
if not line:
continue
try:
decoded = line.decode("utf-8")
if decoded.startswith("data:"):
data_str = decoded[5:].strip()
try:
parsed = json.loads(data_str)
msg = parsed.get("msg")
# =====================================
# PROCESSING COMPLETED
# =====================================
if msg == "process_completed":
output = parsed["output"]["data"][0]
chunk = {
"id": completion_id,
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": "kimi-k2",
"choices": [
{
"index": 0,
"delta": {
"content": output
},
"finish_reason": "stop"
}
]
}
yield f"data: {json.dumps(chunk)}\n\n"
yield "data: [DONE]\n\n"
break
except:
pass
except:
pass
return StreamingResponse(
generate(),
media_type="text/event-stream"
)
# =====================================================
# NON STREAM MODE
# =====================================================
full_output = ""
with requests.get(
f"{HF_BASE}/gradio_api/queue/data?session_hash={session_hash}",
headers={
**HEADERS,
"accept": "text/event-stream"
},
stream=True,
timeout=600
) as r:
for line in r.iter_lines():
if not line:
continue
try:
decoded = line.decode("utf-8")
if decoded.startswith("data:"):
data_str = decoded[5:].strip()
try:
parsed = json.loads(data_str)
msg = parsed.get("msg")
if msg == "process_completed":
full_output = parsed["output"]["data"][0]
break
except:
pass
except:
pass
return JSONResponse({
"id": f"chatcmpl-{uuid.uuid4().hex}",
"object": "chat.completion",
"created": int(time.time()),
"model": "kimi-k2",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": full_output
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0
}
})
# =========================================================
# START
# =========================================================
if __name__ == "__main__":
uvicorn.run(
app,
host="0.0.0.0",
port=7860
)