AryanRathod3097's picture
Update app.py
e1d1986 verified
import os
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import torch
# Automatically load token from secret
hf_token = os.environ.get("HF_TOKEN")
# Load model
tokenizer = AutoTokenizer.from_pretrained(
"moonshotai/Kimi-K2-Instruct",
use_auth_token=hf_token,
trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
"moonshotai/Kimi-K2-Instruct",
trust_remote_code=True,
torch_dtype=torch.float16,
low_cpu_mem_usage=True,
use_auth_token=hf_token
).eval()
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
# Format and chat
def format_prompt(history, user_input):
system_prompt = "You are Kimi, a helpful and conversational AI assistant."
history_text = "\n".join([f"User: {u}\nAI: {a}" for u, a in history])
return f"{system_prompt}\n{history_text}\nUser: {user_input}\nAI:"
def chat(user_input, history):
history = history or []
prompt = format_prompt(history, user_input)
inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=512,
do_sample=True,
temperature=0.7,
top_p=0.9,
pad_token_id=tokenizer.eos_token_id,
)
response = tokenizer.decode(output[0], skip_special_tokens=True).split("AI:")[-1].strip()
history.append((user_input, response))
return history, history
# UI
with gr.Blocks(css="footer {visibility: hidden}") as demo:
gr.Markdown("# 🤖 Kimi-K2 AI Assistant\nChat naturally with Kimi!")
chatbot = gr.Chatbot(height=400)
with gr.Row():
user_input = gr.Textbox(placeholder="Type your message...", scale=10)
submit_btn = gr.Button("Send", scale=2)
state = gr.State([])
submit_btn.click(chat, [user_input, state], [chatbot, state])
user_input.submit(chat, [user_input, state], [chatbot, state])
demo.launch()