Spaces:
Runtime error
Runtime error
| import os | |
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer | |
| import torch | |
| # Automatically load token from secret | |
| hf_token = os.environ.get("HF_TOKEN") | |
| # Load model | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| "moonshotai/Kimi-K2-Instruct", | |
| use_auth_token=hf_token, | |
| trust_remote_code=True | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "moonshotai/Kimi-K2-Instruct", | |
| trust_remote_code=True, | |
| torch_dtype=torch.float16, | |
| low_cpu_mem_usage=True, | |
| use_auth_token=hf_token | |
| ).eval() | |
| streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| # Format and chat | |
| def format_prompt(history, user_input): | |
| system_prompt = "You are Kimi, a helpful and conversational AI assistant." | |
| history_text = "\n".join([f"User: {u}\nAI: {a}" for u, a in history]) | |
| return f"{system_prompt}\n{history_text}\nUser: {user_input}\nAI:" | |
| def chat(user_input, history): | |
| history = history or [] | |
| prompt = format_prompt(history, user_input) | |
| inputs = tokenizer(prompt, return_tensors="pt").to("cpu") | |
| with torch.no_grad(): | |
| output = model.generate( | |
| **inputs, | |
| max_new_tokens=512, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| response = tokenizer.decode(output[0], skip_special_tokens=True).split("AI:")[-1].strip() | |
| history.append((user_input, response)) | |
| return history, history | |
| # UI | |
| with gr.Blocks(css="footer {visibility: hidden}") as demo: | |
| gr.Markdown("# 🤖 Kimi-K2 AI Assistant\nChat naturally with Kimi!") | |
| chatbot = gr.Chatbot(height=400) | |
| with gr.Row(): | |
| user_input = gr.Textbox(placeholder="Type your message...", scale=10) | |
| submit_btn = gr.Button("Send", scale=2) | |
| state = gr.State([]) | |
| submit_btn.click(chat, [user_input, state], [chatbot, state]) | |
| user_input.submit(chat, [user_input, state], [chatbot, state]) | |
| demo.launch() | |