from huggingface_hub import InferenceClient
import gradio as gr
import os

client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta", token=os.getenv("HF_TOKEN"))

#built a chat interface on gradio

def chatbot(input, history):
    message = [{"role": "user", "content": input}]
    #print(message)
    history = [{"role" : "system", "content": "You are a helpful assistant."}]
    #print(history)
    messages = history + message
    #print(messages)
    output = client.chat_completion(
        messages=messages,
        max_tokens=256,
        temperature=0.7
    )
    #print(output)
    history = history + [{"role": "assistant", "content": output.choices[0].message.content}]
    #print(history)
    #print(history[0])
    return output.choices[0].message.content

demo = gr.ChatInterface(chatbot, title="Chatbot",fill_height=True)

demo.launch()