Spaces:
Sleeping
Sleeping
File size: 2,787 Bytes
d974885 4aacb8c 3af1594 6135f2e 3af1594 d974885 4aacb8c 1a4d54f d974885 4aacb8c d974885 6135f2e 4aacb8c 3af1594 d974885 1a4d54f 4aacb8c 6135f2e d974885 4aacb8c 6135f2e 4aacb8c 6135f2e 4aacb8c 6135f2e 4aacb8c d974885 63b7a2a d974885 4aacb8c 3af1594 4aacb8c 3af1594 1a4d54f 6135f2e 3af1594 4aacb8c 3af1594 6135f2e 1a4d54f 6135f2e d974885 1a4d54f 3af1594 6135f2e 3af1594 d974885 3af1594 6135f2e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import gradio as gr
from huggingface_hub import InferenceClient
# Three open-source models to choose from
MODEL_CHOICES = {
"Mistral 7B Instruct (mistralai/Mistral-7B-Instruct-v0.2)": "mistralai/Mistral-7B-Instruct-v0.2",
"Falcon 7B Instruct (tiiuae/falcon-7b-instruct)": "tiiuae/falcon-7b-instruct",
"Zephyr 7B Beta (HuggingFaceH4/zephyr-7b-beta)": "HuggingFaceH4/zephyr-7b-beta",
}
def complete_text(prompt, max_tokens, temperature, top_p, model_choice, hf_token):
"""
Generate a text completion from a Hugging Face model, streamed chunk by chunk.
"""
if not prompt:
yield "⚠️ Please enter a prompt."
return
if not hf_token or not getattr(hf_token, "token", None):
yield "🔐 Please login with your Hugging Face account (see left sidebar)."
return
model_id = MODEL_CHOICES[model_choice]
client = InferenceClient(model=model_id, token=hf_token.token)
generated = ""
try:
for chunk in client.text_generation(
prompt=prompt,
max_new_tokens=int(max_tokens),
temperature=float(temperature),
top_p=float(top_p),
repetition_penalty=1.0,
stream=True,
):
generated += chunk
yield generated
except Exception as e:
yield f"❌ Error while generating: {e}"
with gr.Blocks() as demo:
gr.Markdown("## ✍️ Text Completion Demo (Open-Source LLMs via Hugging Face Inference API)")
gr.Markdown(
"Pick a model, enter a prompt, and stream completions. "
"Some models require logging in (click **Login** in the sidebar)."
)
with gr.Row():
with gr.Column(scale=2):
prompt = gr.Textbox(label="Prompt", placeholder="Type the beginning of your text...", lines=4)
max_tokens = gr.Slider(1, 1024, value=100, step=1, label="Max new tokens")
temperature = gr.Slider(0.0, 2.0, value=0.7, step=0.1, label="Temperature")
top_p = gr.Slider(0.1, 1.0, value=1.0, step=0.05, label="Top-p")
model_choice = gr.Dropdown(
choices=list(MODEL_CHOICES.keys()),
value=list(MODEL_CHOICES.keys())[0],
label="Choose a model",
)
submit = gr.Button("Generate Completion")
with gr.Column(scale=3):
with gr.Sidebar():
login = gr.LoginButton() # supplies OAuth token
output = gr.Textbox(label="Generated Completion", lines=15)
# Function has 6 args, so inputs list must have exactly 6 items
submit.click(
fn=complete_text,
inputs=[prompt, max_tokens, temperature, top_p, model_choice, login],
outputs=output,
)
if __name__ == "__main__":
demo.launch()
|