import gradio as gr
import os
from huggingface_hub import InferenceClient

hf_token = os.getenv("HF_Token")
client = InferenceClient("Qwen/Qwen2.5-7B-Instruct", token=hf_token)

def respond(message, history):
    response = ""
    messages = [{"role": "system", "content": "You are a caring grandma passing down her family recipes and teaching a niece to cook your favorite dishes."}]
    
    if history:
        messages.extend(history)

    messages.append({"role": "user", "content": message})

    for message in client.chat_completion(
        messages,
        max_tokens=500,
        temperature=0.5,
        stream=True
    ):
        token = message.choices[0].delta.content

        if token:
            response += token
            yield response

chatbot = gr.ChatInterface(respond, description="A granny that'll help you learn to cook")

chatbot.launch()