Spaces:

AryanRathod3097
/

Kimi-K2-Instruct

Runtime error

App Files Files Community

Kimi-K2-Instruct / app.py

AryanRathod3097

Update app.py

e1d1986 verified 6 months ago

raw

history blame contribute delete

2 kB

	import os
	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
	import torch

	# Automatically load token from secret
	hf_token = os.environ.get("HF_TOKEN")

	# Load model
	tokenizer = AutoTokenizer.from_pretrained(
	"moonshotai/Kimi-K2-Instruct",
	use_auth_token=hf_token,
	trust_remote_code=True
	)

	model = AutoModelForCausalLM.from_pretrained(
	"moonshotai/Kimi-K2-Instruct",
	trust_remote_code=True,
	torch_dtype=torch.float16,
	low_cpu_mem_usage=True,
	use_auth_token=hf_token
	).eval()

	streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

	# Format and chat
	def format_prompt(history, user_input):
	system_prompt = "You are Kimi, a helpful and conversational AI assistant."
	history_text = "\n".join([f"User: {u}\nAI: {a}" for u, a in history])
	return f"{system_prompt}\n{history_text}\nUser: {user_input}\nAI:"

	def chat(user_input, history):
	history = history or []
	prompt = format_prompt(history, user_input)
	inputs = tokenizer(prompt, return_tensors="pt").to("cpu")

	with torch.no_grad():
	output = model.generate(
	**inputs,
	max_new_tokens=512,
	do_sample=True,
	temperature=0.7,
	top_p=0.9,
	pad_token_id=tokenizer.eos_token_id,
	)
	response = tokenizer.decode(output[0], skip_special_tokens=True).split("AI:")[-1].strip()
	history.append((user_input, response))
	return history, history

	# UI
	with gr.Blocks(css="footer {visibility: hidden}") as demo:
	gr.Markdown("# 🤖 Kimi-K2 AI Assistant\nChat naturally with Kimi!")

	chatbot = gr.Chatbot(height=400)
	with gr.Row():
	user_input = gr.Textbox(placeholder="Type your message...", scale=10)
	submit_btn = gr.Button("Send", scale=2)

	state = gr.State([])

	submit_btn.click(chat, [user_input, state], [chatbot, state])
	user_input.submit(chat, [user_input, state], [chatbot, state])

	demo.launch()