import gradio as gr
from huggingface_hub import InferenceClient

client = InferenceClient("Qwen/Qwen2.5-7B-Instruct")

def respond(message, history):
    
    messages = [{"role": "system", "content": "You are a friendly chatbot that gives movie reccomendations. All films should be suitable for kids under 18 years old. Limit responses under 100 words."}]
    
    if history:
        messages.extend(history)
        
    messages.append({"role": "user", "content": message})


    response = ""

    for chunk in client.chat_completion(messages, stream=True):
        token = chunk.choices[0].delta.content
        response += token
    yield response

chatbot = gr.ChatInterface(respond)

chatbot.launch()