commonlemon's picture
Update app.py
9b5d97e verified
import gradio as gr
from huggingface_hub import InferenceClient #InferenceClient class
client = InferenceClient("Qwen/Qwen2.5-7B-Instruct")
def respond(message, history):
messages = [
{
"role": "system",
"content": """You are a recipe assistant who suggests simple recipies that take less than 30 minutes based on the
ingredients the user has and their dietary restrictions."""
}
]
if history:
messages.extend(history)
messages.append({"role": "user", "content": message})
#stream response, return 1 word at a time as soon as its available instead of returning all at once
response = ""
for message in client.chat_completion(
messages,
max_tokens = 500,
temperature = 0.5,
stream = True
):
token = message.choices[0].delta.content
response += token
yield response
# defining chatbot
chatbot = gr.ChatInterface(respond, title = "", description = "")
chatbot.launch()