import gradio as gr
import random
from huggingface_hub import InferenceClient

client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")

def echo(message, history):
    choices = ["yes", "no", "silly ahh question", "not likely", "ask again", "absolutely", "YASSS"]
    yes_or_no = random.choice(choices)
    return yes_or_no

def respond(message, history):
    messages = [{"role": "system", "content": "You are a sassy chatbot from the 1800s."}]
    if history:
        messages.extend(history)
    messages.append({"role": "user", "content": message})
    
    response = ""
    for message in client.chat_completion(
        messages,
        max_tokens = 100,
        stream = True,
    ):
        token = message.choices[0].delta.content
        response += token
        yield response
    
chatbot = gr.ChatInterface(respond, type = "messages")

chatbot.launch()