File size: 3,950 Bytes
9d2d217 cfa3f95 ef80b0e cfa3f95 8520334 a8a7c74 cfa3f95 1e6a29d cfa3f95 1e6a29d 24a1793 cfa3f95 1e6a29d cfa3f95 1e6a29d cfa3f95 1e6a29d cfa3f95 1e6a29d cfa3f95 1e6a29d cfa3f95 1e6a29d cfa3f95 24a1793 cfa3f95 24a1793 cfa3f95 1e6a29d cfa3f95 1e6a29d cfa3f95 1e6a29d cfa3f95 1e6a29d cfa3f95 1e6a29d cfa3f95 1e6a29d cfa3f95 1e6a29d cfa3f95 1e6a29d cfa3f95 1e6a29d cfa3f95 1e6a29d cfa3f95 1e6a29d cfa3f95 1e6a29d cfa3f95 7501b6e 9d2d217 1e6a29d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | import gradio as gr
import os
from huggingface_hub import InferenceClient
# Model configuration - Using Inference API
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
DEFAULT_SYSTEM_PROMPT = "You are a helpful AI assistant powered by Mistral."
client = None
def init_client():
"""Initialize the Hugging Face Inference Client"""""
global client
hf_token = os.environ.get("HF_TOKEN")
if hf_token:
client = InferenceClient(token=hf_token)
print("Inference client initialized successfully")
return True
else:
print("Warning: HF_TOKEN not found. Please set it in Space secrets.")
return False
def generate_response(message, history, system_prompt, max_tokens, temperature):
"""Generate response using Hugging Face Inference API"""""
global client
if client is None:
if not init_client():
return "Error: HF_TOKEN not configured. Please add it in Space settings."
try:
# Build messages
messages = [{"role": "system", "content": system_prompt or DEFAULT_SYSTEM_PROMPT}]
for h in history:
if h[0]:
messages.append({"role": "user", "content": h[0]})
if h[1]:
messages.append({"role": "assistant", "content": h[1]})
messages.append({"role": "user", "content": message})
# Call Inference API
response = client.chat_completion(
model=MODEL_NAME,
messages=messages,
max_tokens=int(max_tokens),
temperature=float(temperature)
)
return response.choices[0].message.content
except Exception as e:
return f"Error: {str(e)}"
# Create interface
print("===== Kimi K2 Thinking Dev =====")
print(f"Using Inference API with model: {MODEL_NAME}")
# Initialize client at startup
client_ready = init_client()
with gr.Blocks(title="Kimi-K2 Chat", theme=gr.themes.Soft()) as iface:
gr.Markdown("""
# 🤖 Kimi-K2 Instruct Chat
**Powered by Hugging Face Inference API**
This space uses the Kimi-K2-Instruct quantized model via API for efficient inference.
""")
if not client_ready:
gr.Markdown("⚠️ **Warning:** HF_TOKEN not found. Please configure it in Space secrets.")
chatbot = gr.Chatbot(height=450, label="Chat")
with gr.Row():
msg = gr.Textbox(
placeholder="Type your message here...",
label="Your Message",
scale=4,
lines=2
)
submit_btn = gr.Button("Send 🚀", variant="primary", scale=1)
with gr.Accordion("⚙️ Settings", open=False):
system_prompt = gr.Textbox(
value=DEFAULT_SYSTEM_PROMPT,
label="System Prompt",
lines=2
)
with gr.Row():
max_tokens = gr.Slider(
minimum=64,
maximum=2048,
value=512,
step=64,
label="Max Tokens"
)
temperature = gr.Slider(
minimum=0.1,
maximum=2.0,
value=0.7,
step=0.1,
label="Temperature"
)
clear_btn = gr.Button("🗑️ Clear Chat")
def respond(message, history, system_prompt, max_tokens, temperature):
if not message.strip():
return "", history
response = generate_response(message, history, system_prompt, max_tokens, temperature)
history.append((message, response))
return "", history
msg.submit(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot])
submit_btn.click(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot])
clear_btn.click(lambda: [], None, chatbot)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860) |