import gradio as gr
import random
from huggingface_hub import InferenceClient


client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")

def respond(message, history):
    messages = [{"role": "system", "content": "You are an angry chatbot."}]
    if history:
        messages.extend(history)
    messages.append({"role" : "user", "content" : message})
    response = ""
    for message in client.chat_completion(
        messages, 
        max_tokens = 100, 
        stream = True,
    ):
    #print(response["choices"][0]["message"]["content"].strip())
    return response["choices"][0]["message"]["content"].strip()

def echo(message, history):
    choices = ["yes", "no", "sure", "absolutely", "of course not", "by no means"]
    chat_answer = random.choice(choices)
    #use random to select ones of those choices
        token = messages.choices[0].delta.content
        response += token
        yield response

chatbot = gr.ChatInterface(respond, type = 'messages')

chatbot.launch(debug = True)