Spaces:

AryanRathod3097
/

Kimi-K2-Instruct

Runtime error

App Files Files Community

AryanRathod3097 commited on Jul 19, 2025

Commit

e1d1986

verified ·

1 Parent(s): 5c9e4f8

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -47

app.py CHANGED Viewed

@@ -1,46 +1,36 @@
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 import torch
-model = None
-tokenizer = None
-streamer = None
-# This function is run after token is submitted
-def load_model(token):
-    global model, tokenizer, streamer
-    try:
-        tokenizer = AutoTokenizer.from_pretrained(
-            "moonshotai/Kimi-K2-Instruct",
-            use_auth_token=token,
-            trust_remote_code=True
-        )
-        model = AutoModelForCausalLM.from_pretrained(
-            "moonshotai/Kimi-K2-Instruct",
-            trust_remote_code=True,
-            torch_dtype=torch.float16,
-            low_cpu_mem_usage=True,
-            use_auth_token=token
-        ).eval()
-        streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-        return gr.update(visible=False), gr.update(visible=True), "✅ Model loaded! Start chatting.", token
-    except Exception as e:
-        return gr.update(visible=True), gr.update(visible=False), f"❌ Error: {str(e)}", ""
-# Format chat
 def format_prompt(history, user_input):
     system_prompt = "You are Kimi, a helpful and conversational AI assistant."
     history_text = "\n".join([f"User: {u}\nAI: {a}" for u, a in history])
     return f"{system_prompt}\n{history_text}\nUser: {user_input}\nAI:"
-# Main chat function
-def chat(user_input, history, token):
-    if model is None or tokenizer is None:
-        return history, history, "❌ Model not loaded yet. Please enter your token."
     prompt = format_prompt(history, user_input)
     inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
@@ -53,30 +43,22 @@ def chat(user_input, history, token):
             top_p=0.9,
             pad_token_id=tokenizer.eos_token_id,
         )
     response = tokenizer.decode(output[0], skip_special_tokens=True).split("AI:")[-1].strip()
     history.append((user_input, response))
-    return history, history, ""
-# Build Gradio UI
 with gr.Blocks(css="footer {visibility: hidden}") as demo:
-    gr.Markdown("## 🤖 Kimi-K2 AI Chat\nEnter your Hugging Face token to start chatting.")
-    token_box = gr.Textbox(label="🔐 Hugging Face Token", type="password", placeholder="Paste your Hugging Face Access Token here")
-    load_button = gr.Button("🔓 Load Model")
-    token_status = gr.Textbox(visible=False)
-    chatbot = gr.Chatbot(label="Kimi-K2 Chat", visible=False)
     state = gr.State([])
-    saved_token = gr.State("")
-    with gr.Row(visible=False) as chat_row:
-        msg = gr.Textbox(placeholder="Type your message...", scale=10)
-        send = gr.Button("Send", scale=2)
-    # Connect logic
-    load_button.click(load_model, inputs=token_box, outputs=[token_box, chat_row, token_status, saved_token])
-    send.click(chat, inputs=[msg, state, saved_token], outputs=[chatbot, state, token_status])
-    msg.submit(chat, inputs=[msg, state, saved_token], outputs=[chatbot, state, token_status])
 demo.launch()

+import os
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 import torch
+# Automatically load token from secret
+hf_token = os.environ.get("HF_TOKEN")
+# Load model
+tokenizer = AutoTokenizer.from_pretrained(
+    "moonshotai/Kimi-K2-Instruct",
+    use_auth_token=hf_token,
+    trust_remote_code=True
+)
+model = AutoModelForCausalLM.from_pretrained(
+    "moonshotai/Kimi-K2-Instruct",
+    trust_remote_code=True,
+    torch_dtype=torch.float16,
+    low_cpu_mem_usage=True,
+    use_auth_token=hf_token
+).eval()
+streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+# Format and chat
 def format_prompt(history, user_input):
     system_prompt = "You are Kimi, a helpful and conversational AI assistant."
     history_text = "\n".join([f"User: {u}\nAI: {a}" for u, a in history])
     return f"{system_prompt}\n{history_text}\nUser: {user_input}\nAI:"
+def chat(user_input, history):
+    history = history or []
     prompt = format_prompt(history, user_input)
     inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
             top_p=0.9,
             pad_token_id=tokenizer.eos_token_id,
         )
     response = tokenizer.decode(output[0], skip_special_tokens=True).split("AI:")[-1].strip()
     history.append((user_input, response))
+    return history, history
+# UI
 with gr.Blocks(css="footer {visibility: hidden}") as demo:
+    gr.Markdown("# 🤖 Kimi-K2 AI Assistant\nChat naturally with Kimi!")
+    chatbot = gr.Chatbot(height=400)
+    with gr.Row():
+        user_input = gr.Textbox(placeholder="Type your message...", scale=10)
+        submit_btn = gr.Button("Send", scale=2)
     state = gr.State([])
+    submit_btn.click(chat, [user_input, state], [chatbot, state])
+    user_input.submit(chat, [user_input, state], [chatbot, state])
 demo.launch()