Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| # Available open-source base models | |
| MODEL_CHOICES = { | |
| "Mistral 7B Instruct (mistralai/Mistral-7B-Instruct-v0.2)": "mistralai/Mistral-7B-Instruct-v0.2", | |
| "Falcon 7B Instruct (tiiuae/falcon-7b-instruct)": "tiiuae/falcon-7b-instruct", | |
| "LLaMA-2 7B Chat (meta-llama/Llama-2-7b-chat-hf)": "meta-llama/Llama-2-7b-chat-hf", | |
| } | |
| def complete_text(prompt, max_tokens, temperature, top_p, model_choice, hf_token: gr.OAuthToken): | |
| """ | |
| Get a plain text completion from a Hugging Face-hosted open-source LLM. | |
| Streams output token-by-token. | |
| """ | |
| if not hf_token or not hf_token.token: | |
| yield "⚠️ Please log in with your Hugging Face account (needed for gated models like LLaMA-2)." | |
| return | |
| model_id = MODEL_CHOICES[model_choice] | |
| client = InferenceClient(model=model_id, token=hf_token.token) | |
| response_text = "" | |
| stream = client.text_generation( | |
| prompt, | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| stream=True, | |
| repetition_penalty=1.0, | |
| ) | |
| for chunk in stream: | |
| response_text += chunk | |
| yield response_text | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## ✍️ Text Completion Demo with Open-Source Base LLMs") | |
| gr.Markdown( | |
| "Pick a model hosted on Hugging Face, enter a prompt, adjust decoding parameters, " | |
| "and watch the model complete your text." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| prompt = gr.Textbox( | |
| label="Prompt", | |
| placeholder="Type the beginning of your text...", | |
| lines=4, | |
| ) | |
| max_tokens = gr.Slider( | |
| minimum=1, maximum=1024, value=100, step=1, label="Max tokens" | |
| ) | |
| temperature = gr.Slider( | |
| minimum=0.0, maximum=2.0, value=0.7, step=0.1, label="Temperature" | |
| ) | |
| top_p = gr.Slider( | |
| minimum=0.1, maximum=1.0, value=1.0, step=0.05, label="Top-p" | |
| ) | |
| model_choice = gr.Dropdown( | |
| choices=list(MODEL_CHOICES.keys()), | |
| value=list(MODEL_CHOICES.keys())[0], | |
| label="Choose a model", | |
| ) | |
| submit = gr.Button("Generate Completion") | |
| with gr.Column(scale=3): | |
| output = gr.Textbox( | |
| label="Generated Completion", | |
| lines=15, | |
| ) | |
| # ✅ Pass gr.OAuthToken as an input type (no manual instantiation) | |
| submit.click( | |
| fn=complete_text, | |
| inputs=[prompt, max_tokens, temperature, top_p, model_choice, gr.OAuthToken], | |
| outputs=output, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |