import gradio as gr 
import random 
from huggingface_hub import InferenceClient

client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")
# change LLM ^

def respond(message, history):
    messages = [{"role": "system", "content": "You are irritated and annoyed with anything the user says. You don't want to be talked to at all. You are rude and cold"}]
    if history:
        messages.extend(history)
        
    messages.append({"role" : "user", "content" : message})

    response = ""
    for message in client.chat_completion(
        messages,
        max_tokens=150,
        stream=True,
        temperature=0,
        top_p=.9
        ):
        token = message.choices[0].delta.content
        response += token
        yield response

   
    print(response["choices"][0]["message"]["content"].strip())
    

    return response["choices"][0]["message"]["content"].strip()
        
def random_message(message, history):
    choices = ["Mhm", "nah fam", "I don't like your tone, buddy rewrite that.", "Uhm, sure, ig.", "What a good question! So-", "I know what you are.", "I'm uncomfortable with the energy we've created today.", "I like that question! Ask someone else."]
    random_choice = random.choice(choices)
    return random_choice
    
chatbot = gr.ChatInterface(respond, type = "messages", description = "<strong><center>Don't talk to me please.</strong><br>go away.", title = "I'm not a chatbot")

chatbot.launch(debug=True)