import gradio as gr from huggingface_hub import InferenceClient # Three open-source models to choose from MODEL_CHOICES = { "Mistral 7B Instruct (mistralai/Mistral-7B-Instruct-v0.2)": "mistralai/Mistral-7B-Instruct-v0.2", "Falcon 7B Instruct (tiiuae/falcon-7b-instruct)": "tiiuae/falcon-7b-instruct", "Zephyr 7B Beta (HuggingFaceH4/zephyr-7b-beta)": "HuggingFaceH4/zephyr-7b-beta", } def complete_text(prompt, max_tokens, temperature, top_p, model_choice, hf_token): """ Generate a text completion from a Hugging Face model, streamed chunk by chunk. """ if not prompt: yield "⚠️ Please enter a prompt." return if not hf_token or not getattr(hf_token, "token", None): yield "🔐 Please login with your Hugging Face account (see left sidebar)." return model_id = MODEL_CHOICES[model_choice] client = InferenceClient(model=model_id, token=hf_token.token) generated = "" try: for chunk in client.text_generation( prompt=prompt, max_new_tokens=int(max_tokens), temperature=float(temperature), top_p=float(top_p), repetition_penalty=1.0, stream=True, ): generated += chunk yield generated except Exception as e: yield f"❌ Error while generating: {e}" with gr.Blocks() as demo: gr.Markdown("## ✍️ Text Completion Demo (Open-Source LLMs via Hugging Face Inference API)") gr.Markdown( "Pick a model, enter a prompt, and stream completions. " "Some models require logging in (click **Login** in the sidebar)." ) with gr.Row(): with gr.Column(scale=2): prompt = gr.Textbox(label="Prompt", placeholder="Type the beginning of your text...", lines=4) max_tokens = gr.Slider(1, 1024, value=100, step=1, label="Max new tokens") temperature = gr.Slider(0.0, 2.0, value=0.7, step=0.1, label="Temperature") top_p = gr.Slider(0.1, 1.0, value=1.0, step=0.05, label="Top-p") model_choice = gr.Dropdown( choices=list(MODEL_CHOICES.keys()), value=list(MODEL_CHOICES.keys())[0], label="Choose a model", ) submit = gr.Button("Generate Completion") with gr.Column(scale=3): with gr.Sidebar(): login = gr.LoginButton() # supplies OAuth token output = gr.Textbox(label="Generated Completion", lines=15) # Function has 6 args, so inputs list must have exactly 6 items submit.click( fn=complete_text, inputs=[prompt, max_tokens, temperature, top_p, model_choice, login], outputs=output, ) if __name__ == "__main__": demo.launch()