Spaces:

imspsycho
/

Kimi

Sleeping

App Files Files Community

Kimi / app.py

imspsycho

Create app.py

90b1f52 verified about 1 month ago

Raw

History Blame Contribute Delete

7.38 kB

	from fastapi import FastAPI, Request
	from fastapi.responses import StreamingResponse, JSONResponse
	import requests
	import json
	import uuid
	import uvicorn
	import random
	import string
	import time

	app = FastAPI()

	# =========================================================
	# CONFIG
	# =========================================================

	HF_BASE = "https://akhaliq-kimi-k2-6.hf.space"

	HEADERS = {
	"accept": "/",
	"content-type": "application/json",
	"origin": HF_BASE,
	"referer": f"{HF_BASE}/?__theme=system",
	"user-agent": "Mozilla/5.0",
	"x-gradio-user": "api"
	}

	# =========================================================
	# HELPERS
	# =========================================================

	def random_session():

	return ''.join(
	random.choices(
	string.ascii_lowercase + string.digits,
	k=12
	)
	)

	# =========================================================
	# ROOT
	# =========================================================

	@app.get("/")
	async def root():

	return {
	"status": "ok",
	"provider": "Kimi K2",
	"openai_compatible": True
	}

	# =========================================================
	# MODELS
	# =========================================================

	@app.get("/v1/models")
	async def models():

	return {
	"object": "list",
	"data": [
	{
	"id": "kimi-k2",
	"object": "model",
	"owned_by": "custom"
	}
	]
	}

	# =========================================================
	# CHAT COMPLETIONS
	# =========================================================

	@app.post("/v1/chat/completions")
	async def chat_completions(request: Request):

	body = await request.json()

	messages = body.get("messages", [])
	stream = body.get("stream", False)

	# =====================================================
	# BUILD PROMPT
	# =====================================================

	prompt = ""

	for msg in messages:

	role = msg.get("role", "user")
	content = msg.get("content", "")

	prompt += f"{role.upper()}: {content}\n"

	# =====================================================
	# SESSION
	# =====================================================

	session_hash = random_session()

	# =====================================================
	# JOIN QUEUE
	# =====================================================

	payload = {
	"data": [
	prompt,
	[],
	None
	],
	"event_data": None,
	"fn_index": 0,
	"trigger_id": None,
	"session_hash": session_hash
	}

	join_response = requests.post(
	f"{HF_BASE}/gradio_api/queue/join?__theme=system",
	headers=HEADERS,
	json=payload,
	timeout=120
	)

	# =====================================================
	# STREAM MODE
	# =====================================================

	if stream:

	async def generate():

	completion_id = f"chatcmpl-{uuid.uuid4().hex}"

	with requests.get(
	f"{HF_BASE}/gradio_api/queue/data?session_hash={session_hash}",
	headers={
	**HEADERS,
	"accept": "text/event-stream"
	},
	stream=True,
	timeout=600
	) as r:

	for line in r.iter_lines():

	if not line:
	continue

	try:

	decoded = line.decode("utf-8")

	if decoded.startswith("data:"):

	data_str = decoded[5:].strip()

	try:

	parsed = json.loads(data_str)

	msg = parsed.get("msg")

	# =====================================
	# PROCESSING COMPLETED
	# =====================================

	if msg == "process_completed":

	output = parsed["output"]["data"][0]

	chunk = {
	"id": completion_id,
	"object": "chat.completion.chunk",
	"created": int(time.time()),
	"model": "kimi-k2",
	"choices": [
	{
	"index": 0,
	"delta": {
	"content": output
	},
	"finish_reason": "stop"
	}
	]
	}

	yield f"data: {json.dumps(chunk)}\n\n"

	yield "data: [DONE]\n\n"

	break

	except:
	pass

	except:
	pass

	return StreamingResponse(
	generate(),
	media_type="text/event-stream"
	)

	# =====================================================
	# NON STREAM MODE
	# =====================================================

	full_output = ""

	with requests.get(
	f"{HF_BASE}/gradio_api/queue/data?session_hash={session_hash}",
	headers={
	**HEADERS,
	"accept": "text/event-stream"
	},
	stream=True,
	timeout=600
	) as r:

	for line in r.iter_lines():

	if not line:
	continue

	try:

	decoded = line.decode("utf-8")

	if decoded.startswith("data:"):

	data_str = decoded[5:].strip()

	try:

	parsed = json.loads(data_str)

	msg = parsed.get("msg")

	if msg == "process_completed":

	full_output = parsed["output"]["data"][0]

	break

	except:
	pass

	except:
	pass

	return JSONResponse({

	"id": f"chatcmpl-{uuid.uuid4().hex}",
	"object": "chat.completion",
	"created": int(time.time()),
	"model": "kimi-k2",

	"choices": [
	{
	"index": 0,
	"message": {
	"role": "assistant",
	"content": full_output
	},
	"finish_reason": "stop"
	}
	],

	"usage": {
	"prompt_tokens": 0,
	"completion_tokens": 0,
	"total_tokens": 0
	}
	})

	# =========================================================
	# START
	# =========================================================

	if __name__ == "__main__":

	uvicorn.run(
	app,
	host="0.0.0.0",
	port=7860
	)