Spaces:

commonlemon
/

GenerativeAIPractice-IA

Sleeping

Update app.py

9b5d97e verified 19 days ago

1.04 kB

	import gradio as gr
	from huggingface_hub import InferenceClient #InferenceClient class

	client = InferenceClient("Qwen/Qwen2.5-7B-Instruct")


	def respond(message, history):
	messages = [
	{
	"role": "system",
	"content": """You are a recipe assistant who suggests simple recipies that take less than 30 minutes based on the
	ingredients the user has and their dietary restrictions."""
	}
	]

	if history:
	messages.extend(history)

	messages.append({"role": "user", "content": message})


	#stream response, return 1 word at a time as soon as its available instead of returning all at once
	response = ""

	for message in client.chat_completion(
	messages,
	max_tokens = 500,
	temperature = 0.5,
	stream = True
	):

	token = message.choices[0].delta.content
	response += token
	yield response

	# defining chatbot
	chatbot = gr.ChatInterface(respond, title = "", description = "")

	chatbot.launch()