Spaces:

AryanRathod3097
/

Kimi-K2-Instruct

Runtime error

File size: 2,000 Bytes

e1d1986
4e6d10c
 
 
 
e1d1986
 
5c9e4f8
e1d1986
 
 
 
 
 
5c9e4f8
e1d1986
 
 
 
 
 
 
5c9e4f8
e1d1986
5c9e4f8
e1d1986
4e6d10c
 
 
 
 
e1d1986
 
4e6d10c
 
 
 
 
 
 
 
 
 
 
 
 
 
e1d1986
4e6d10c
e1d1986
4e6d10c
e1d1986
4e6d10c
e1d1986
 
 
 
4e6d10c
 
 
e1d1986
 
4e6d10c

import os
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import torch

# Automatically load token from secret
hf_token = os.environ.get("HF_TOKEN")

# Load model
tokenizer = AutoTokenizer.from_pretrained(
    "moonshotai/Kimi-K2-Instruct",
    use_auth_token=hf_token,
    trust_remote_code=True
)

model = AutoModelForCausalLM.from_pretrained(
    "moonshotai/Kimi-K2-Instruct",
    trust_remote_code=True,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
    use_auth_token=hf_token
).eval()

streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

# Format and chat
def format_prompt(history, user_input):
    system_prompt = "You are Kimi, a helpful and conversational AI assistant."
    history_text = "\n".join([f"User: {u}\nAI: {a}" for u, a in history])
    return f"{system_prompt}\n{history_text}\nUser: {user_input}\nAI:"

def chat(user_input, history):
    history = history or []
    prompt = format_prompt(history, user_input)
    inputs = tokenizer(prompt, return_tensors="pt").to("cpu")

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=512,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id,
        )
    response = tokenizer.decode(output[0], skip_special_tokens=True).split("AI:")[-1].strip()
    history.append((user_input, response))
    return history, history

# UI
with gr.Blocks(css="footer {visibility: hidden}") as demo:
    gr.Markdown("# 🤖 Kimi-K2 AI Assistant\nChat naturally with Kimi!")

    chatbot = gr.Chatbot(height=400)
    with gr.Row():
        user_input = gr.Textbox(placeholder="Type your message...", scale=10)
        submit_btn = gr.Button("Send", scale=2)

    state = gr.State([])

    submit_btn.click(chat, [user_input, state], [chatbot, state])
    user_input.submit(chat, [user_input, state], [chatbot, state])

demo.launch()