Spaces:

EGYADMIN
/

kimi-k2-thinking-dev

Paused

App Files Files Community

kimi-k2-thinking-dev / app.py

EGYADMIN

Fix: Complete MODEL_NAME string (add .3")

8520334 verified 23 days ago

raw

history blame contribute delete

3.95 kB

	import gradio as gr
	import os
	from huggingface_hub import InferenceClient

	# Model configuration - Using Inference API
	MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
	DEFAULT_SYSTEM_PROMPT = "You are a helpful AI assistant powered by Mistral."
	client = None

	def init_client():
	"""Initialize the Hugging Face Inference Client"""""
	global client
	hf_token = os.environ.get("HF_TOKEN")
	if hf_token:
	client = InferenceClient(token=hf_token)
	print("Inference client initialized successfully")
	return True
	else:
	print("Warning: HF_TOKEN not found. Please set it in Space secrets.")
	return False

	def generate_response(message, history, system_prompt, max_tokens, temperature):
	"""Generate response using Hugging Face Inference API"""""
	global client

	if client is None:
	if not init_client():
	return "Error: HF_TOKEN not configured. Please add it in Space settings."

	try:
	# Build messages
	messages = [{"role": "system", "content": system_prompt or DEFAULT_SYSTEM_PROMPT}]

	for h in history:
	if h[0]:
	messages.append({"role": "user", "content": h[0]})
	if h[1]:
	messages.append({"role": "assistant", "content": h[1]})

	messages.append({"role": "user", "content": message})

	# Call Inference API
	response = client.chat_completion(
	model=MODEL_NAME,
	messages=messages,
	max_tokens=int(max_tokens),
	temperature=float(temperature)
	)

	return response.choices[0].message.content

	except Exception as e:
	return f"Error: {str(e)}"

	# Create interface
	print("===== Kimi K2 Thinking Dev =====")
	print(f"Using Inference API with model: {MODEL_NAME}")

	# Initialize client at startup
	client_ready = init_client()

	with gr.Blocks(title="Kimi-K2 Chat", theme=gr.themes.Soft()) as iface:
	gr.Markdown("""
	# 🤖 Kimi-K2 Instruct Chat
	Powered by Hugging Face Inference API

	This space uses the Kimi-K2-Instruct quantized model via API for efficient inference.
	""")

	if not client_ready:
	gr.Markdown("⚠️ Warning: HF_TOKEN not found. Please configure it in Space secrets.")

	chatbot = gr.Chatbot(height=450, label="Chat")

	with gr.Row():
	msg = gr.Textbox(
	placeholder="Type your message here...",
	label="Your Message",
	scale=4,
	lines=2
	)
	submit_btn = gr.Button("Send 🚀", variant="primary", scale=1)

	with gr.Accordion("⚙️ Settings", open=False):
	system_prompt = gr.Textbox(
	value=DEFAULT_SYSTEM_PROMPT,
	label="System Prompt",
	lines=2
	)
	with gr.Row():
	max_tokens = gr.Slider(
	minimum=64,
	maximum=2048,
	value=512,
	step=64,
	label="Max Tokens"
	)
	temperature = gr.Slider(
	minimum=0.1,
	maximum=2.0,
	value=0.7,
	step=0.1,
	label="Temperature"
	)

	clear_btn = gr.Button("🗑️ Clear Chat")

	def respond(message, history, system_prompt, max_tokens, temperature):
	if not message.strip():
	return "", history
	response = generate_response(message, history, system_prompt, max_tokens, temperature)
	history.append((message, response))
	return "", history

	msg.submit(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot])
	submit_btn.click(respond, [msg, chatbot, system_prompt, max_tokens, temperature], [msg, chatbot])
	clear_btn.click(lambda: [], None, chatbot)

	if __name__ == "__main__":
	iface.launch(server_name="0.0.0.0", server_port=7860)