Demo_OS_BaseLLM / app.py
Nullpointer-KK's picture
Update app.py
3af1594 verified
raw
history blame
2.75 kB
import gradio as gr
from huggingface_hub import InferenceClient
# Available open-source base models (completion style)
MODEL_CHOICES = {
"Mistral 7B Instruct (mistralai/Mistral-7B-Instruct-v0.2)": "mistralai/Mistral-7B-Instruct-v0.2",
"Falcon 7B Instruct (tiiuae/falcon-7b-instruct)": "tiiuae/falcon-7b-instruct",
"LLaMA-2 7B Chat (meta-llama/Llama-2-7b-chat-hf)": "meta-llama/Llama-2-7b-chat-hf",
}
def complete_text(prompt, max_tokens, temperature, top_p, model_choice, hf_token: gr.OAuthToken):
"""
Get a plain text completion from a Hugging Face-hosted open-source LLM.
Streams output token-by-token.
"""
if not hf_token or not hf_token.token:
yield "⚠️ Please log in with your Hugging Face account (for gated models like LLaMA-2)."
return
model_id = MODEL_CHOICES[model_choice]
client = InferenceClient(model=model_id, token=hf_token.token)
response_text = ""
stream = client.text_generation(
prompt,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=True,
repetition_penalty=1.0,
)
for event in stream:
# Each event is a string chunk
response_text += event
yield response_text
with gr.Blocks() as demo:
gr.Markdown("## ✍️ Text Completion Demo with Open-Source Base LLMs")
gr.Markdown(
"Pick a model hosted on Hugging Face, enter a prompt, adjust decoding parameters, "
"and watch the model complete your text."
)
with gr.Row():
with gr.Column(scale=2):
prompt = gr.Textbox(
label="Prompt",
placeholder="Type the beginning of your text...",
lines=4,
)
max_tokens = gr.Slider(
minimum=1, maximum=1024, value=100, step=1, label="Max tokens"
)
temperature = gr.Slider(
minimum=0.0, maximum=2.0, value=0.7, step=0.1, label="Temperature"
)
top_p = gr.Slider(
minimum=0.1, maximum=1.0, value=1.0, step=0.05, label="Top-p"
)
model_choice = gr.Dropdown(
choices=list(MODEL_CHOICES.keys()),
value=list(MODEL_CHOICES.keys())[0],
label="Choose a model",
)
submit = gr.Button("Generate Completion")
with gr.Column(scale=3):
output = gr.Textbox(
label="Generated Completion",
lines=15,
)
submit.click(
fn=complete_text,
inputs=[prompt, max_tokens, temperature, top_p, model_choice, gr.OAuthToken()],
outputs=output,
)
if __name__ == "__main__":
demo.launch()