MultiModelCoder

Running

Update app.py

146c824 verified 4 months ago

1.91 kB

	import gradio as gr
	from huggingface_hub import InferenceClient

	# Using Qwen 2.5 Coder
	model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"

	def respond(message, history, system_message, temperature):
	# Initialize the client
	client = InferenceClient(model_id)

	# 1. Start with the System Message
	messages = [{"role": "system", "content": system_message}]

	# 2. Add the history
	for user_msg, bot_msg in history:
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": bot_msg})

	# 3. Add the current user message
	messages.append({"role": "user", "content": message})

	# Generate response
	response_text = ""
	try:
	stream = client.chat_completion(
	messages,
	max_tokens=2048,
	stream=True,
	temperature=temperature,
	top_p=0.9
	)
	for chunk in stream:
	content = chunk.choices[0].delta.content
	if content:
	response_text += content
	yield response_text
	except Exception as e:
	yield f"Error: {str(e)}. The model might be busy."

	# Build the UI
	with gr.Blocks(fill_height=True) as demo:
	with gr.Sidebar():
	gr.Markdown("# AI Coding Assistant")
	gr.Markdown(f"Running {model_id}")
	gr.LoginButton("Sign in")

	gr.ChatInterface(
	respond,
	additional_inputs=[
	# System Message Input
	gr.Textbox(
	value="You are a helpful assistant.",
	label="System Instruction",
	lines=2
	),
	# Temperature Slider
	gr.Slider(
	minimum=0.1,
	maximum=2.0,
	value=0.7,
	step=0.1,
	label="Temperature"
	)
	]
	)

	demo.launch()