EGYADMIN commited on
Commit
1e6a29d
·
verified ·
1 Parent(s): cfa3f95

Switch to quantized model RedHatAI/Kimi-K2-Instruct-quantized.w4a16

Browse files
Files changed (1) hide show
  1. app.py +75 -80
app.py CHANGED
@@ -3,55 +3,55 @@ import os
3
  from huggingface_hub import InferenceClient
4
 
5
  # Model configuration - Using Inference API
6
- MODEL_NAME = "moonshotai/Kimi-K2-Instruct"
7
  DEFAULT_SYSTEM_PROMPT = "You are Kimi, an AI assistant created by Moonshot AI. You are helpful, harmless, and honest."
8
 
9
  # Initialize Inference Client
10
  client = None
11
 
12
  def init_client():
13
- """Initialize the Hugging Face Inference Client"""""
14
- global client
15
- hf_token = os.environ.get("HF_TOKEN")
16
- if hf_token:
17
- client = InferenceClient(token=hf_token)
18
- print("Inference client initialized successfully")
19
- return True
20
- else:
21
  print("Warning: HF_TOKEN not found. Please set it in Space secrets.")
22
- return False
23
 
24
  def generate_response(message, history, system_prompt, max_tokens, temperature):
25
- """Generate response using Hugging Face Inference API"""""
26
  global client
27
-
28
  if client is None:
29
- if not init_client():
30
- return "Error: HF_TOKEN not configured. Please add it in Space settings."
31
-
32
- try:
33
- # Build messages
34
- messages = [{"role": "system", "content": system_prompt or DEFAULT_SYSTEM_PROMPT}]
35
-
36
- for h in history:
37
- if h[0]:
38
- messages.append({"role": "user", "content": h[0]})
39
- if h[1]:
40
- messages.append({"role": "assistant", "content": h[1]})
41
-
42
- messages.append({"role": "user", "content": message})
43
-
44
  # Call Inference API
45
  response = client.chat_completion(
46
- model=MODEL_NAME,
47
- messages=messages,
48
- max_tokens=int(max_tokens),
49
- temperature=float(temperature)
50
  )
51
-
52
  return response.choices[0].message.content
53
-
54
- except Exception as e:
55
  return f"Error: {str(e)}"
56
 
57
  # Create interface
@@ -62,66 +62,61 @@ print(f"Using Inference API with model: {MODEL_NAME}")
62
  client_ready = init_client()
63
 
64
  with gr.Blocks(title="Kimi-K2 Chat", theme=gr.themes.Soft()) as iface:
65
- gr.Markdown("""
66
- # 🤖 Kimi-K2 Instruct Chat
67
- **Powered by Hugging Face Inference API**
68
-
69
- This space uses the Kimi-K2-Instruct model via API for efficient inference.
70
- """"")
71
-
72
  if not client_ready:
73
- gr.Markdown("⚠️ **Warning:** HF_TOKEN not found. Please configure it in Space secrets.")
74
-
75
  chatbot = gr.Chatbot(height=450, label="Chat")
76
-
77
  with gr.Row():
78
- msg = gr.Textbox(
79
- placeholder="Type your message here...",
80
- label="Your Message",
81
- scale=4,
82
- lines=2
83
- )
84
  submit_btn = gr.Button("Send 🚀", variant="primary", scale=1)
85
-
86
  with gr.Accordion("⚙️ Settings", open=False):
87
- system_prompt = gr.Textbox(
88
- value=DEFAULT_SYSTEM_PROMPT,
89
- label="System Prompt",
90
- lines=2
91
- )
92
- with gr.Row():
93
- max_tokens = gr.Slider(
94
- minimum=64,
95
- maximum=2048,
96
- value=512,
97
- step=64,
98
- label="Max Tokens"
99
  )
 
 
 
 
 
 
 
 
100
  temperature = gr.Slider(
101
- minimum=0.1,
102
- maximum=2.0,
103
- value=0.7,
104
- step=0.1,
105
- label="Temperature"
106
  )
107
-
108
  clear_btn = gr.Button("🗑️ Clear Chat")
109
-
110
  def respond(message, history, system_prompt, max_tokens, temperature):
111
- if not message.strip():
112
- return "", history
113
- response = generate_response(message, history, system_prompt, max_tokens, temperature)
114
  history.append((message, response))
115
  return "", history
116
-
117
  msg.submit(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot])
118
  submit_btn.click(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot])
119
  clear_btn.click(lambda: [], None, chatbot)
120
 
121
  if __name__ == "__main__":
122
- iface.launch(server_name="0.0.0.0", server_port=7860)
123
- )
124
- )
125
- )
126
- )
127
- )
 
3
  from huggingface_hub import InferenceClient
4
 
5
  # Model configuration - Using Inference API
6
+ MODEL_NAME = "RedHatAI/Kimi-K2-Instruct-quantized.w4a16"
7
  DEFAULT_SYSTEM_PROMPT = "You are Kimi, an AI assistant created by Moonshot AI. You are helpful, harmless, and honest."
8
 
9
  # Initialize Inference Client
10
  client = None
11
 
12
  def init_client():
13
+ """Initialize the Hugging Face Inference Client"""""
14
+ global client
15
+ hf_token = os.environ.get("HF_TOKEN")
16
+ if hf_token:
17
+ client = InferenceClient(token=hf_token)
18
+ print("Inference client initialized successfully")
19
+ return True
20
+ else:
21
  print("Warning: HF_TOKEN not found. Please set it in Space secrets.")
22
+ return False
23
 
24
  def generate_response(message, history, system_prompt, max_tokens, temperature):
25
+ """Generate response using Hugging Face Inference API"""""
26
  global client
27
+
28
  if client is None:
29
+ if not init_client():
30
+ return "Error: HF_TOKEN not configured. Please add it in Space settings."
31
+
32
+ try:
33
+ # Build messages
34
+ messages = [{"role": "system", "content": system_prompt or DEFAULT_SYSTEM_PROMPT}]
35
+
36
+ for h in history:
37
+ if h[0]:
38
+ messages.append({"role": "user", "content": h[0]})
39
+ if h[1]:
40
+ messages.append({"role": "assistant", "content": h[1]})
41
+
42
+ messages.append({"role": "user", "content": message})
43
+
44
  # Call Inference API
45
  response = client.chat_completion(
46
+ model=MODEL_NAME,
47
+ messages=messages,
48
+ max_tokens=int(max_tokens),
49
+ temperature=float(temperature)
50
  )
51
+
52
  return response.choices[0].message.content
53
+
54
+ except Exception as e:
55
  return f"Error: {str(e)}"
56
 
57
  # Create interface
 
62
  client_ready = init_client()
63
 
64
  with gr.Blocks(title="Kimi-K2 Chat", theme=gr.themes.Soft()) as iface:
65
+ gr.Markdown("""
66
+ # 🤖 Kimi-K2 Instruct Chat
67
+ **Powered by Hugging Face Inference API**
68
+
69
+ This space uses the Kimi-K2-Instruct quantized model via API for efficient inference.
70
+ """)
71
+
72
  if not client_ready:
73
+ gr.Markdown("⚠️ **Warning:** HF_TOKEN not found. Please configure it in Space secrets.")
74
+
75
  chatbot = gr.Chatbot(height=450, label="Chat")
76
+
77
  with gr.Row():
78
+ msg = gr.Textbox(
79
+ placeholder="Type your message here...",
80
+ label="Your Message",
81
+ scale=4,
82
+ lines=2
83
+ )
84
  submit_btn = gr.Button("Send 🚀", variant="primary", scale=1)
85
+
86
  with gr.Accordion("⚙️ Settings", open=False):
87
+ system_prompt = gr.Textbox(
88
+ value=DEFAULT_SYSTEM_PROMPT,
89
+ label="System Prompt",
90
+ lines=2
 
 
 
 
 
 
 
 
91
  )
92
+ with gr.Row():
93
+ max_tokens = gr.Slider(
94
+ minimum=64,
95
+ maximum=2048,
96
+ value=512,
97
+ step=64,
98
+ label="Max Tokens"
99
+ )
100
  temperature = gr.Slider(
101
+ minimum=0.1,
102
+ maximum=2.0,
103
+ value=0.7,
104
+ step=0.1,
105
+ label="Temperature"
106
  )
107
+
108
  clear_btn = gr.Button("🗑️ Clear Chat")
109
+
110
  def respond(message, history, system_prompt, max_tokens, temperature):
111
+ if not message.strip():
112
+ return "", history
113
+ response = generate_response(message, history, system_prompt, max_tokens, temperature)
114
  history.append((message, response))
115
  return "", history
116
+
117
  msg.submit(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot])
118
  submit_btn.click(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot])
119
  clear_btn.click(lambda: [], None, chatbot)
120
 
121
  if __name__ == "__main__":
122
+ iface.launch(server_name="0.0.0.0", server_port=7860)