| | import gradio as gr |
| | import os |
| | from huggingface_hub import InferenceClient |
| |
|
| | |
| | MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3" |
| | DEFAULT_SYSTEM_PROMPT = "You are a helpful AI assistant powered by Mistral." |
| | client = None |
| |
|
| | def init_client(): |
| | """Initialize the Hugging Face Inference Client""""" |
| | global client |
| | hf_token = os.environ.get("HF_TOKEN") |
| | if hf_token: |
| | client = InferenceClient(token=hf_token) |
| | print("Inference client initialized successfully") |
| | return True |
| | else: |
| | print("Warning: HF_TOKEN not found. Please set it in Space secrets.") |
| | return False |
| |
|
| | def generate_response(message, history, system_prompt, max_tokens, temperature): |
| | """Generate response using Hugging Face Inference API""""" |
| | global client |
| | |
| | if client is None: |
| | if not init_client(): |
| | return "Error: HF_TOKEN not configured. Please add it in Space settings." |
| | |
| | try: |
| | |
| | messages = [{"role": "system", "content": system_prompt or DEFAULT_SYSTEM_PROMPT}] |
| | |
| | for h in history: |
| | if h[0]: |
| | messages.append({"role": "user", "content": h[0]}) |
| | if h[1]: |
| | messages.append({"role": "assistant", "content": h[1]}) |
| | |
| | messages.append({"role": "user", "content": message}) |
| | |
| | |
| | response = client.chat_completion( |
| | model=MODEL_NAME, |
| | messages=messages, |
| | max_tokens=int(max_tokens), |
| | temperature=float(temperature) |
| | ) |
| | |
| | return response.choices[0].message.content |
| | |
| | except Exception as e: |
| | return f"Error: {str(e)}" |
| |
|
| | |
| | print("===== Kimi K2 Thinking Dev =====") |
| | print(f"Using Inference API with model: {MODEL_NAME}") |
| |
|
| | |
| | client_ready = init_client() |
| |
|
| | with gr.Blocks(title="Kimi-K2 Chat", theme=gr.themes.Soft()) as iface: |
| | gr.Markdown(""" |
| | # ๐ค Kimi-K2 Instruct Chat |
| | **Powered by Hugging Face Inference API** |
| | |
| | This space uses the Kimi-K2-Instruct quantized model via API for efficient inference. |
| | """) |
| | |
| | if not client_ready: |
| | gr.Markdown("โ ๏ธ **Warning:** HF_TOKEN not found. Please configure it in Space secrets.") |
| | |
| | chatbot = gr.Chatbot(height=450, label="Chat") |
| | |
| | with gr.Row(): |
| | msg = gr.Textbox( |
| | placeholder="Type your message here...", |
| | label="Your Message", |
| | scale=4, |
| | lines=2 |
| | ) |
| | submit_btn = gr.Button("Send ๐", variant="primary", scale=1) |
| | |
| | with gr.Accordion("โ๏ธ Settings", open=False): |
| | system_prompt = gr.Textbox( |
| | value=DEFAULT_SYSTEM_PROMPT, |
| | label="System Prompt", |
| | lines=2 |
| | ) |
| | with gr.Row(): |
| | max_tokens = gr.Slider( |
| | minimum=64, |
| | maximum=2048, |
| | value=512, |
| | step=64, |
| | label="Max Tokens" |
| | ) |
| | temperature = gr.Slider( |
| | minimum=0.1, |
| | maximum=2.0, |
| | value=0.7, |
| | step=0.1, |
| | label="Temperature" |
| | ) |
| | |
| | clear_btn = gr.Button("๐๏ธ Clear Chat") |
| | |
| | def respond(message, history, system_prompt, max_tokens, temperature): |
| | if not message.strip(): |
| | return "", history |
| | response = generate_response(message, history, system_prompt, max_tokens, temperature) |
| | history.append((message, response)) |
| | return "", history |
| | |
| | msg.submit(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot]) |
| | submit_btn.click(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot]) |
| | clear_btn.click(lambda: [], None, chatbot) |
| |
|
| | if __name__ == "__main__": |
| | iface.launch(server_name="0.0.0.0", server_port=7860) |