import gradio as gr from huggingface_hub import InferenceClient def respond( message, history: list[dict[str, str]], system_message, hf_token: gr.OAuthToken, ): client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b") messages = [{"role": "system", "content": system_message}] messages.extend(history) messages.append({"role": "user", "content": message}) response = "" for message in client.chat_completion( messages, max_tokens=512, # fixed stream=True, temperature=0.7, # fixed top_p=0.95, # fixed ): choices = message.choices token = "" if len(choices) and choices[0].delta.content: token = choices[0].delta.content response += token yield response chatbot = gr.ChatInterface( respond, type="messages", additional_inputs=[ gr.Textbox(value="You are a friendly Chatbot.", label="System message"), ], ) with gr.Blocks(css=""" body { margin: 0; padding: 0; font-family: system-ui, sans-serif; } .gradio-container { height: 100vh; width: 100%; display: flex; flex-direction: column; } .gr-blocks { flex: 1; display: flex; flex-direction: column; } .gr-chatbot { flex: 1; overflow-y: auto; max-height: calc(100vh - 120px); } @media (max-width: 768px) { .gradio-container, .gr-blocks { padding: 0; margin: 0; } .gr-chatbot { max-height: calc(100vh - 100px); } } """) as demo: with gr.Sidebar(): gr.LoginButton() chatbot.render() if __name__ == "__main__": demo.launch()