Demo_OS_BaseLLM / app.py
Nullpointer-KK's picture
Update app.py
1a4d54f verified
import gradio as gr
from huggingface_hub import InferenceClient
# Three open-source models to choose from
MODEL_CHOICES = {
"Mistral 7B Instruct (mistralai/Mistral-7B-Instruct-v0.2)": "mistralai/Mistral-7B-Instruct-v0.2",
"Falcon 7B Instruct (tiiuae/falcon-7b-instruct)": "tiiuae/falcon-7b-instruct",
"Zephyr 7B Beta (HuggingFaceH4/zephyr-7b-beta)": "HuggingFaceH4/zephyr-7b-beta",
}
def complete_text(prompt, max_tokens, temperature, top_p, model_choice, hf_token):
"""
Generate a text completion from a Hugging Face model, streamed chunk by chunk.
"""
if not prompt:
yield "⚠️ Please enter a prompt."
return
if not hf_token or not getattr(hf_token, "token", None):
yield "πŸ” Please login with your Hugging Face account (see left sidebar)."
return
model_id = MODEL_CHOICES[model_choice]
client = InferenceClient(model=model_id, token=hf_token.token)
generated = ""
try:
for chunk in client.text_generation(
prompt=prompt,
max_new_tokens=int(max_tokens),
temperature=float(temperature),
top_p=float(top_p),
repetition_penalty=1.0,
stream=True,
):
generated += chunk
yield generated
except Exception as e:
yield f"❌ Error while generating: {e}"
with gr.Blocks() as demo:
gr.Markdown("## ✍️ Text Completion Demo (Open-Source LLMs via Hugging Face Inference API)")
gr.Markdown(
"Pick a model, enter a prompt, and stream completions. "
"Some models require logging in (click **Login** in the sidebar)."
)
with gr.Row():
with gr.Column(scale=2):
prompt = gr.Textbox(label="Prompt", placeholder="Type the beginning of your text...", lines=4)
max_tokens = gr.Slider(1, 1024, value=100, step=1, label="Max new tokens")
temperature = gr.Slider(0.0, 2.0, value=0.7, step=0.1, label="Temperature")
top_p = gr.Slider(0.1, 1.0, value=1.0, step=0.05, label="Top-p")
model_choice = gr.Dropdown(
choices=list(MODEL_CHOICES.keys()),
value=list(MODEL_CHOICES.keys())[0],
label="Choose a model",
)
submit = gr.Button("Generate Completion")
with gr.Column(scale=3):
with gr.Sidebar():
login = gr.LoginButton() # supplies OAuth token
output = gr.Textbox(label="Generated Completion", lines=15)
# Function has 6 args, so inputs list must have exactly 6 items
submit.click(
fn=complete_text,
inputs=[prompt, max_tokens, temperature, top_p, model_choice, login],
outputs=output,
)
if __name__ == "__main__":
demo.launch()