from huggingface_hub import InferenceClient import gradio as gr import os client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta", token=os.getenv("HF_TOKEN")) #built a chat interface on gradio def chatbot(input, history): message = [{"role": "user", "content": input}] #print(message) history = [{"role" : "system", "content": "You are a helpful assistant."}] #print(history) messages = history + message #print(messages) output = client.chat_completion( messages=messages, max_tokens=256, temperature=0.7 ) #print(output) history = history + [{"role": "assistant", "content": output.choices[0].message.content}] #print(history) #print(history[0]) return output.choices[0].message.content demo = gr.ChatInterface(chatbot, title="Chatbot",fill_height=True) demo.launch()