Spaces:

EGYADMIN
/

kimi-k2-thinking-dev

Paused

App Files Files Community

EGYADMIN commited on 24 days ago

Commit

9d2d217

verified ·

1 Parent(s): 0d3e6aa

Add application file to load Kimi-K2-Thinking model

Browse files

Files changed (1) hide show

app.py +58 -0

app.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import os
+print("Starting model loading...")
+print(f"CUDA available: {torch.cuda.is_available()}")
+print(f"GPU count: {torch.cuda.device_count()}")
+# Model configuration
+model_name = "moonshotai/Kimi-K2-Thinking"
+print(f"Loading model: {model_name}")
+# Load tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+# Load model with automatic device mapping for multi-GPU support
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.float16,
+    device_map="auto",
+    trust_remote_code=True
+)
+print("Model loaded successfully!")
+print(f"Model device map: {model.hf_device_map}")
+def generate_response(prompt, max_length=512, temperature=0.7):
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_length=max_length,
+            temperature=temperature,
+            do_sample=True,
+            top_p=0.9
+        )
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return response
+# Gradio interface
+iface = gr.Interface(
+    fn=generate_response,
+    inputs=[
+        gr.Textbox(lines=5, placeholder="Enter your prompt here...", label="Prompt"),
+        gr.Slider(minimum=128, maximum=2048, value=512, step=128, label="Max Length"),
+        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
+    ],
+    outputs=gr.Textbox(lines=10, label="Generated Response"),
+    title="Kimi-K2-Thinking Model",
+    description="Development environment for Kimi-K2-Thinking model with GPU acceleration"
+)
+if __name__ == "__main__":
+    iface.launch(server_name="0.0.0.0", server_port=7860)