MultiModelCoder

Sleeping

Update app.py

92a045a verified about 2 months ago

1.54 kB

	import gradio as gr
	from huggingface_hub import InferenceClient

	# We use the 32B Coder model which is generally available on the free API
	model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"

	def respond(message, history):
	# Initialize the client inside the function to handle sessions correctly
	client = InferenceClient(model_id)

	# Build the message history for the API
	messages = []
	for user_msg, bot_msg in history:
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": bot_msg})

	messages.append({"role": "user", "content": message})

	# Generate the response
	response_text = ""
	try:
	# Stream the response
	stream = client.chat_completion(
	messages,
	max_tokens=2048,
	stream=True,
	temperature=0.7
	)
	for chunk in stream:
	content = chunk.choices[0].delta.content
	if content:
	response_text += content
	yield response_text
	except Exception as e:
	yield f"Error: {str(e)}. The model might be busy or too large for the current free tier."

	# Build the UI
	with gr.Blocks(fill_height=True) as demo:
	with gr.Sidebar():
	gr.Markdown("# AI Coding Assistant")
	gr.Markdown(f"Running {model_id}")
	gr.Markdown("If you see an error, the free API might be overloaded. Try again in a minute.")
	gr.LoginButton("Sign in")

	gr.ChatInterface(respond)

	demo.launch()