Spaces:

EGYADMIN
/

kimi-k2-thinking-dev

Paused

App Files Files Community

EGYADMIN commited on 29 days ago

Commit

1e6a29d

verified ·

1 Parent(s): cfa3f95

Switch to quantized model RedHatAI/Kimi-K2-Instruct-quantized.w4a16

Browse files

Files changed (1) hide show

app.py +75 -80

app.py CHANGED Viewed

@@ -3,55 +3,55 @@ import os
 from huggingface_hub import InferenceClient
 # Model configuration - Using Inference API
-MODEL_NAME = "moonshotai/Kimi-K2-Instruct"
 DEFAULT_SYSTEM_PROMPT = "You are Kimi, an AI assistant created by Moonshot AI. You are helpful, harmless, and honest."
 # Initialize Inference Client
 client = None
 def init_client():
-        """Initialize the Hugging Face Inference Client"""""
-        global client
-        hf_token = os.environ.get("HF_TOKEN")
-        if hf_token:
-                    client = InferenceClient(token=hf_token)
-                    print("Inference client initialized successfully")
-                    return True
-else:
         print("Warning: HF_TOKEN not found. Please set it in Space secrets.")
-            return False
 def generate_response(message, history, system_prompt, max_tokens, temperature):
-        """Generate response using Hugging Face Inference API"""""
     global client
     if client is None:
-                if not init_client():
-                                return "Error: HF_TOKEN not configured. Please add it in Space settings."
-            try:
-                        # Build messages
-                        messages = [{"role": "system", "content": system_prompt or DEFAULT_SYSTEM_PROMPT}]
-                        for h in history:
-                                        if h[0]:
-                                                            messages.append({"role": "user", "content": h[0]})
-                                                        if h[1]:
-                                                            messages.append({"role": "assistant", "content": h[1]})
-                                    messages.append({"role": "user", "content": message})
         # Call Inference API
         response = client.chat_completion(
-                        model=MODEL_NAME,
-                        messages=messages,
-                        max_tokens=int(max_tokens),
-                        temperature=float(temperature)
         )
         return response.choices[0].message.content
-except Exception as e:
         return f"Error: {str(e)}"
 # Create interface
@@ -62,66 +62,61 @@ print(f"Using Inference API with model: {MODEL_NAME}")
 client_ready = init_client()
 with gr.Blocks(title="Kimi-K2 Chat", theme=gr.themes.Soft()) as iface:
-        gr.Markdown("""
-            # 🤖 Kimi-K2 Instruct Chat
-                **Powered by Hugging Face Inference API**
-                        This space uses the Kimi-K2-Instruct model via API for efficient inference.
-                            """"")
     if not client_ready:
-                gr.Markdown("⚠️ **Warning:** HF_TOKEN not found. Please configure it in Space secrets.")
     chatbot = gr.Chatbot(height=450, label="Chat")
     with gr.Row():
-                msg = gr.Textbox(
-                                placeholder="Type your message here...",
-                                label="Your Message",
-                                scale=4,
-                                lines=2
-                )
         submit_btn = gr.Button("Send 🚀", variant="primary", scale=1)
     with gr.Accordion("⚙️ Settings", open=False):
-                system_prompt = gr.Textbox(
-                                value=DEFAULT_SYSTEM_PROMPT,
-                                label="System Prompt",
-                                lines=2
-                )
-        with gr.Row():
-                        max_tokens = gr.Slider(
-                            minimum=64,
-                                            maximum=2048,
-                            value=512,
-                            step=64,
-                            label="Max Tokens"
         )
             temperature = gr.Slider(
-                                minimum=0.1,
-                                maximum=2.0,
-                                value=0.7,
-                                step=0.1,
-                                label="Temperature"
             )
     clear_btn = gr.Button("🗑️ Clear Chat")
     def respond(message, history, system_prompt, max_tokens, temperature):
-                if not message.strip():
-                                return "", history
-                            response = generate_response(message, history, system_prompt, max_tokens, temperature)
         history.append((message, response))
         return "", history
     msg.submit(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot])
     submit_btn.click(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot])
     clear_btn.click(lambda: [], None, chatbot)
 if __name__ == "__main__":
-        iface.launch(server_name="0.0.0.0", server_port=7860)
-            )
-                        )
-                )
-                )
-        )

 from huggingface_hub import InferenceClient
 # Model configuration - Using Inference API
+MODEL_NAME = "RedHatAI/Kimi-K2-Instruct-quantized.w4a16"
 DEFAULT_SYSTEM_PROMPT = "You are Kimi, an AI assistant created by Moonshot AI. You are helpful, harmless, and honest."
 # Initialize Inference Client
 client = None
 def init_client():
+    """Initialize the Hugging Face Inference Client"""""
+    global client
+    hf_token = os.environ.get("HF_TOKEN")
+    if hf_token:
+        client = InferenceClient(token=hf_token)
+        print("Inference client initialized successfully")
+        return True
+    else:
         print("Warning: HF_TOKEN not found. Please set it in Space secrets.")
+        return False
 def generate_response(message, history, system_prompt, max_tokens, temperature):
+    """Generate response using Hugging Face Inference API"""""
     global client
     if client is None:
+        if not init_client():
+            return "Error: HF_TOKEN not configured. Please add it in Space settings."
+    try:
+        # Build messages
+        messages = [{"role": "system", "content": system_prompt or DEFAULT_SYSTEM_PROMPT}]
+        for h in history:
+            if h[0]:
+                messages.append({"role": "user", "content": h[0]})
+            if h[1]:
+                messages.append({"role": "assistant", "content": h[1]})
+        messages.append({"role": "user", "content": message})
         # Call Inference API
         response = client.chat_completion(
+            model=MODEL_NAME,
+            messages=messages,
+            max_tokens=int(max_tokens),
+            temperature=float(temperature)
         )
         return response.choices[0].message.content
+    except Exception as e:
         return f"Error: {str(e)}"
 # Create interface
 client_ready = init_client()
 with gr.Blocks(title="Kimi-K2 Chat", theme=gr.themes.Soft()) as iface:
+    gr.Markdown("""
+    # 🤖 Kimi-K2 Instruct Chat
+    **Powered by Hugging Face Inference API**
+    This space uses the Kimi-K2-Instruct quantized model via API for efficient inference.
+    """)
     if not client_ready:
+        gr.Markdown("⚠️ **Warning:** HF_TOKEN not found. Please configure it in Space secrets.")
     chatbot = gr.Chatbot(height=450, label="Chat")
     with gr.Row():
+        msg = gr.Textbox(
+            placeholder="Type your message here...",
+            label="Your Message",
+            scale=4,
+            lines=2
+        )
         submit_btn = gr.Button("Send 🚀", variant="primary", scale=1)
     with gr.Accordion("⚙️ Settings", open=False):
+        system_prompt = gr.Textbox(
+            value=DEFAULT_SYSTEM_PROMPT,
+            label="System Prompt",
+            lines=2
         )
+        with gr.Row():
+            max_tokens = gr.Slider(
+                minimum=64,
+                maximum=2048,
+                value=512,
+                step=64,
+                label="Max Tokens"
+            )
             temperature = gr.Slider(
+                minimum=0.1,
+                maximum=2.0,
+                value=0.7,
+                step=0.1,
+                label="Temperature"
             )
     clear_btn = gr.Button("🗑️ Clear Chat")
     def respond(message, history, system_prompt, max_tokens, temperature):
+        if not message.strip():
+            return "", history
+        response = generate_response(message, history, system_prompt, max_tokens, temperature)
         history.append((message, response))
         return "", history
     msg.submit(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot])
     submit_btn.click(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot])
     clear_btn.click(lambda: [], None, chatbot)
 if __name__ == "__main__":
+    iface.launch(server_name="0.0.0.0", server_port=7860)