import gradio as gr from huggingface_hub import InferenceClient client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") def respond(message, history): system_message = "you are a color theory chatbot that helps people evaluate color schemes" messages = [{"role": "system", "content": system_message}] if history: message.extend(history) messages.append({"role": "user", "content": message}) response = "" for message in client.chat_completion( messages, max_tokens=150, temperature = .8, top_p = .5, stream=True ): token = message.choices[0].delta.content response += token yield response chatbot = gr.ChatInterface(respond, type="messages") chatbot.launch(debug=True)