import gradio as gr
from huggingface_hub import InferenceClient #InferenceClient class

client = InferenceClient("Qwen/Qwen2.5-7B-Instruct")


def respond(message, history):
    messages = [
        {
            "role": "system", 
            "content": """You are a recipe assistant who suggests simple recipies that take less than 30 minutes based on the
            ingredients the user has and their dietary restrictions."""
        }
    ]
    
    if history: 
        messages.extend(history)

    messages.append({"role": "user", "content": message})


    #stream response, return 1 word at a time as soon as its available instead of returning all at once
    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens = 500,
        temperature = 0.5,
        stream = True
    ):
        
        token = message.choices[0].delta.content
        response += token
        yield response

# defining chatbot
chatbot = gr.ChatInterface(respond, title = "", description = "") 

chatbot.launch()