EGYADMIN commited on
Commit
9d2d217
·
verified ·
1 Parent(s): 0d3e6aa

Add application file to load Kimi-K2-Thinking model

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import os
5
+
6
+ print("Starting model loading...")
7
+ print(f"CUDA available: {torch.cuda.is_available()}")
8
+ print(f"GPU count: {torch.cuda.device_count()}")
9
+
10
+ # Model configuration
11
+ model_name = "moonshotai/Kimi-K2-Thinking"
12
+
13
+ print(f"Loading model: {model_name}")
14
+
15
+ # Load tokenizer
16
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
17
+
18
+ # Load model with automatic device mapping for multi-GPU support
19
+ model = AutoModelForCausalLM.from_pretrained(
20
+ model_name,
21
+ torch_dtype=torch.float16,
22
+ device_map="auto",
23
+ trust_remote_code=True
24
+ )
25
+
26
+ print("Model loaded successfully!")
27
+ print(f"Model device map: {model.hf_device_map}")
28
+
29
+ def generate_response(prompt, max_length=512, temperature=0.7):
30
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
31
+
32
+ with torch.no_grad():
33
+ outputs = model.generate(
34
+ **inputs,
35
+ max_length=max_length,
36
+ temperature=temperature,
37
+ do_sample=True,
38
+ top_p=0.9
39
+ )
40
+
41
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
42
+ return response
43
+
44
+ # Gradio interface
45
+ iface = gr.Interface(
46
+ fn=generate_response,
47
+ inputs=[
48
+ gr.Textbox(lines=5, placeholder="Enter your prompt here...", label="Prompt"),
49
+ gr.Slider(minimum=128, maximum=2048, value=512, step=128, label="Max Length"),
50
+ gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
51
+ ],
52
+ outputs=gr.Textbox(lines=10, label="Generated Response"),
53
+ title="Kimi-K2-Thinking Model",
54
+ description="Development environment for Kimi-K2-Thinking model with GPU acceleration"
55
+ )
56
+
57
+ if __name__ == "__main__":
58
+ iface.launch(server_name="0.0.0.0", server_port=7860)