EGYADMIN's picture
Fix: Complete MODEL_NAME string (add .3")
8520334 verified
import gradio as gr
import os
from huggingface_hub import InferenceClient
# Model configuration - Using Inference API
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
DEFAULT_SYSTEM_PROMPT = "You are a helpful AI assistant powered by Mistral."
client = None
def init_client():
"""Initialize the Hugging Face Inference Client"""""
global client
hf_token = os.environ.get("HF_TOKEN")
if hf_token:
client = InferenceClient(token=hf_token)
print("Inference client initialized successfully")
return True
else:
print("Warning: HF_TOKEN not found. Please set it in Space secrets.")
return False
def generate_response(message, history, system_prompt, max_tokens, temperature):
"""Generate response using Hugging Face Inference API"""""
global client
if client is None:
if not init_client():
return "Error: HF_TOKEN not configured. Please add it in Space settings."
try:
# Build messages
messages = [{"role": "system", "content": system_prompt or DEFAULT_SYSTEM_PROMPT}]
for h in history:
if h[0]:
messages.append({"role": "user", "content": h[0]})
if h[1]:
messages.append({"role": "assistant", "content": h[1]})
messages.append({"role": "user", "content": message})
# Call Inference API
response = client.chat_completion(
model=MODEL_NAME,
messages=messages,
max_tokens=int(max_tokens),
temperature=float(temperature)
)
return response.choices[0].message.content
except Exception as e:
return f"Error: {str(e)}"
# Create interface
print("===== Kimi K2 Thinking Dev =====")
print(f"Using Inference API with model: {MODEL_NAME}")
# Initialize client at startup
client_ready = init_client()
with gr.Blocks(title="Kimi-K2 Chat", theme=gr.themes.Soft()) as iface:
gr.Markdown("""
# ๐Ÿค– Kimi-K2 Instruct Chat
**Powered by Hugging Face Inference API**
This space uses the Kimi-K2-Instruct quantized model via API for efficient inference.
""")
if not client_ready:
gr.Markdown("โš ๏ธ **Warning:** HF_TOKEN not found. Please configure it in Space secrets.")
chatbot = gr.Chatbot(height=450, label="Chat")
with gr.Row():
msg = gr.Textbox(
placeholder="Type your message here...",
label="Your Message",
scale=4,
lines=2
)
submit_btn = gr.Button("Send ๐Ÿš€", variant="primary", scale=1)
with gr.Accordion("โš™๏ธ Settings", open=False):
system_prompt = gr.Textbox(
value=DEFAULT_SYSTEM_PROMPT,
label="System Prompt",
lines=2
)
with gr.Row():
max_tokens = gr.Slider(
minimum=64,
maximum=2048,
value=512,
step=64,
label="Max Tokens"
)
temperature = gr.Slider(
minimum=0.1,
maximum=2.0,
value=0.7,
step=0.1,
label="Temperature"
)
clear_btn = gr.Button("๐Ÿ—‘๏ธ Clear Chat")
def respond(message, history, system_prompt, max_tokens, temperature):
if not message.strip():
return "", history
response = generate_response(message, history, system_prompt, max_tokens, temperature)
history.append((message, response))
return "", history
msg.submit(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot])
submit_btn.click(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot])
clear_btn.click(lambda: [], None, chatbot)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)