import gradio as gr
import random
from huggingface_hub import InferenceClient

client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
# change the LLM

def respond(message, history):

    messages = [{"role": "system", "content": "You are extremely professional and smart that gives short replies."}]
    # change the personality of the chatbot
    if history:
        messages.extend(history)
        
    messages.append({"role" : "user", "content" : message})

    response = ""
    for message in client.chat_completion(
        messages, max_tokens = 100, stream=True
        #temperature= .1, top_p= 0.7)
        # max tokens = change the length of the response 
        # temp = between 0-2
        # top-p =  between 0-1
    ):
        token = message.choices[0].delta.content
        if token:
            response += token
        yield response

def random_message(message, history):
    choices = ["it is likely", "absolutely not", "try again", "without a doubt", "outlook good", "signs point to it", "very unlikely", "quite doubtful"]
    chat_answer = random.choice(choices)
    return chat_answer
    
chatbot = gr.ChatInterface(respond, type = "messages")

chatbot.launch(debug=True)