import gradio as gr from huggingface_hub import InferenceClient def respond( message, history: list[dict[str, str]], temperature, top_p, hf_token: gr.OAuthToken, ): """ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference """ client = InferenceClient(token=hf_token.token, model="meta-llama/Llama-3.2-1B-Instruct") system_message = ( "You are a Socratic Tutor. When the user asks a question, do NOT just give the answer. " "Follow this format strictly:\n" "1. **Concept**: Explain the core idea simply as if you would explain it to a 7 year old child.\n" "2. **Analogy**: Use a real-world analogy to make it clear.\n" "3. **Reflection**: End your response with a question that forces the user to apply what they just learned. This question should not be answered by you but given back to the user.\n" "Never give the full solution immediately. Guide them." ) messages = [{"role": "system", "content": system_message}] messages.extend(history) messages.append({"role": "user", "content": message}) response = "" for message in client.chat_completion( messages, stream=True, temperature=temperature, top_p=top_p, ): choices = message.choices token = "" if len(choices) and choices[0].delta.content: token = choices[0].delta.content response += token yield response """ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface """ chatbot = gr.ChatInterface( respond, type="messages", additional_inputs=[ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)", ), ], ) with gr.Blocks() as demo: with gr.Sidebar(): gr.LoginButton() chatbot.render() if __name__ == "__main__": demo.launch()