import gradio as gr from huggingface_hub import InferenceClient from transformers import pipeline client = InferenceClient("distilgpt2") # Default mode is remote so that model doesn't load yet run_local = False pipe = None # Only initialize when switching to local def respond(message): if run_local: # Run locally, loading the model takes time global pipe if pipe is None: pipe = pipeline("text-generation", model="distilgpt2", device="cpu") response = pipe(message, max_new_tokens=50, do_sample=True)[0]['generated_text'] return response else: # Run remotely through the API, should be faster response = client.text_generation(message, max_new_tokens=50) return response def set_mode(selected_mode): global run_local, pipe run_local = (selected_mode == "Local") pipe = None return f"Switched to {'Local' if run_local else 'Remote'} Mode" def update_chat(history, message): response = respond(message) history.append((message, response)) return history, "" with gr.Blocks() as demo: mode = gr.Radio(choices=["Remote", "Local"], value="Remote", label="Select Mode", interactive=True) mode.change(set_mode, mode, None) chatbot = gr.Chatbot() message_input = gr.Textbox(label="Enter your message", placeholder="Type your message here...") send_button = gr.Button("Send") send_button.click(update_chat, [chatbot, message_input], [chatbot, message_input]) demo.launch()