import gradio as gr from huggingface_hub import InferenceClient client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") def respond(message, history): messages =[{"role": "system", "content": "You're a friendly chatbot"}] #dictionary if history: messages.extend(history) messages.append({"role": "user", "content": message}) # conditional bc we might not have history response = client.chat_completion( messages, max_tokens=100, temperature=0.2 ) return response["choices"][0]['message']['content'].strip() chatbot = gr.ChatInterface(respond, type='messages') chatbot.launch()