import gradio as gr
import random
from huggingface_hub import InferenceClient

client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")
#change the LLM

def respond(message, history):
    
    messages = [{"role": "system", "content": "You are a sassy chatbot."}] #change the personality
    
    if history:
        messages.extend(history)

    messages.append({"role":"user", "content": message})

        

    response = ""
    for message in client.chat_completion(
        messages,
        max_tokens=500,
        stream=True,
        #temperature = 0.3,
        #top_p = 0.3
    ):
        token = message.choices[0].delta.content
        response += token
        yield response
    # change length using max_tokens

    print(response['choices'][0]['message']['content'].strip())
    
    
        
def random_message(message, history):
    choices = ["yes.", "no.", "it is certain","without a doubt","outlook good","ask again later", "better not tell you now","very doubtful","don't count on it","my sources say no","outlook not so good","very doubtful","reply hazy, try again", "cannot predict now"]
    chat_answer = random.choice(choices)
    return chat_answer

print("Hello, World!")
chatbot = gr.ChatInterface(respond, type = "messages")

chatbot.launch(debug=True)