Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| # Three open-source models to choose from | |
| MODEL_CHOICES = { | |
| "Mistral 7B Instruct (mistralai/Mistral-7B-Instruct-v0.2)": "mistralai/Mistral-7B-Instruct-v0.2", | |
| "Falcon 7B Instruct (tiiuae/falcon-7b-instruct)": "tiiuae/falcon-7b-instruct", | |
| "Zephyr 7B Beta (HuggingFaceH4/zephyr-7b-beta)": "HuggingFaceH4/zephyr-7b-beta", | |
| } | |
| def complete_text(prompt, max_tokens, temperature, top_p, model_choice, hf_token): | |
| """ | |
| Generate a text completion from a Hugging Face model, streamed chunk by chunk. | |
| """ | |
| if not prompt: | |
| yield "β οΈ Please enter a prompt." | |
| return | |
| if not hf_token or not getattr(hf_token, "token", None): | |
| yield "π Please login with your Hugging Face account (see left sidebar)." | |
| return | |
| model_id = MODEL_CHOICES[model_choice] | |
| client = InferenceClient(model=model_id, token=hf_token.token) | |
| generated = "" | |
| try: | |
| for chunk in client.text_generation( | |
| prompt=prompt, | |
| max_new_tokens=int(max_tokens), | |
| temperature=float(temperature), | |
| top_p=float(top_p), | |
| repetition_penalty=1.0, | |
| stream=True, | |
| ): | |
| generated += chunk | |
| yield generated | |
| except Exception as e: | |
| yield f"β Error while generating: {e}" | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## βοΈ Text Completion Demo (Open-Source LLMs via Hugging Face Inference API)") | |
| gr.Markdown( | |
| "Pick a model, enter a prompt, and stream completions. " | |
| "Some models require logging in (click **Login** in the sidebar)." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| prompt = gr.Textbox(label="Prompt", placeholder="Type the beginning of your text...", lines=4) | |
| max_tokens = gr.Slider(1, 1024, value=100, step=1, label="Max new tokens") | |
| temperature = gr.Slider(0.0, 2.0, value=0.7, step=0.1, label="Temperature") | |
| top_p = gr.Slider(0.1, 1.0, value=1.0, step=0.05, label="Top-p") | |
| model_choice = gr.Dropdown( | |
| choices=list(MODEL_CHOICES.keys()), | |
| value=list(MODEL_CHOICES.keys())[0], | |
| label="Choose a model", | |
| ) | |
| submit = gr.Button("Generate Completion") | |
| with gr.Column(scale=3): | |
| with gr.Sidebar(): | |
| login = gr.LoginButton() # supplies OAuth token | |
| output = gr.Textbox(label="Generated Completion", lines=15) | |
| # Function has 6 args, so inputs list must have exactly 6 items | |
| submit.click( | |
| fn=complete_text, | |
| inputs=[prompt, max_tokens, temperature, top_p, model_choice, login], | |
| outputs=output, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |