import gradio as gr import os from huggingface_hub import InferenceClient # Model configuration - Using Inference API MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3" DEFAULT_SYSTEM_PROMPT = "You are a helpful AI assistant powered by Mistral." client = None def init_client(): """Initialize the Hugging Face Inference Client""""" global client hf_token = os.environ.get("HF_TOKEN") if hf_token: client = InferenceClient(token=hf_token) print("Inference client initialized successfully") return True else: print("Warning: HF_TOKEN not found. Please set it in Space secrets.") return False def generate_response(message, history, system_prompt, max_tokens, temperature): """Generate response using Hugging Face Inference API""""" global client if client is None: if not init_client(): return "Error: HF_TOKEN not configured. Please add it in Space settings." try: # Build messages messages = [{"role": "system", "content": system_prompt or DEFAULT_SYSTEM_PROMPT}] for h in history: if h[0]: messages.append({"role": "user", "content": h[0]}) if h[1]: messages.append({"role": "assistant", "content": h[1]}) messages.append({"role": "user", "content": message}) # Call Inference API response = client.chat_completion( model=MODEL_NAME, messages=messages, max_tokens=int(max_tokens), temperature=float(temperature) ) return response.choices[0].message.content except Exception as e: return f"Error: {str(e)}" # Create interface print("===== Kimi K2 Thinking Dev =====") print(f"Using Inference API with model: {MODEL_NAME}") # Initialize client at startup client_ready = init_client() with gr.Blocks(title="Kimi-K2 Chat", theme=gr.themes.Soft()) as iface: gr.Markdown(""" # 🤖 Kimi-K2 Instruct Chat **Powered by Hugging Face Inference API** This space uses the Kimi-K2-Instruct quantized model via API for efficient inference. """) if not client_ready: gr.Markdown("⚠️ **Warning:** HF_TOKEN not found. Please configure it in Space secrets.") chatbot = gr.Chatbot(height=450, label="Chat") with gr.Row(): msg = gr.Textbox( placeholder="Type your message here...", label="Your Message", scale=4, lines=2 ) submit_btn = gr.Button("Send 🚀", variant="primary", scale=1) with gr.Accordion("⚙️ Settings", open=False): system_prompt = gr.Textbox( value=DEFAULT_SYSTEM_PROMPT, label="System Prompt", lines=2 ) with gr.Row(): max_tokens = gr.Slider( minimum=64, maximum=2048, value=512, step=64, label="Max Tokens" ) temperature = gr.Slider( minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature" ) clear_btn = gr.Button("🗑️ Clear Chat") def respond(message, history, system_prompt, max_tokens, temperature): if not message.strip(): return "", history response = generate_response(message, history, system_prompt, max_tokens, temperature) history.append((message, response)) return "", history msg.submit(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot]) submit_btn.click(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot]) clear_btn.click(lambda: [], None, chatbot) if __name__ == "__main__": iface.launch(server_name="0.0.0.0", server_port=7860)