kairusama's picture
Update app.py
74afe25 verified
# app.py
import gradio as gr
from transformers import pipeline
# ---- Load model via pipeline ----
MODEL_NAME = "vicgalle/gpt2-open-instruct-v1"
pipe = pipeline("text-generation", model=MODEL_NAME, device_map="auto")
# ---- Inference function ----
def generate_response(instruction, max_new_tokens=150, temperature=0.7, top_k=50, top_p=0.9, rep_pty=1.2):
system_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{instruction}
### Response:
"""
output = pipe(
system_prompt,
max_new_tokens=max_new_tokens,
temperature=temperature,
top_k=top_k,
top_p=top_p,
do_sample=True,
pad_token_id=pipe.tokenizer.eos_token_id,
repetition_penalty=rep_pty
)
# Clean up output text
text = output[0]["generated_text"]
return text.split("### Response:")[-1].strip()
# ---- Gradio UI ----
with gr.Blocks() as demo:
gr.Markdown("# 🛸 GPT-2 Open Instruct Playground\nFine-tuned with Open Instruct v1!")
with gr.Row():
with gr.Column(scale=4):
instruction = gr.Textbox(label="Instruction", value="What is the capital city of France?", lines=6)
output_box = gr.Textbox(label="Model Output", lines=10)
with gr.Column(scale=1):
generate_btn = gr.Button("Generate ✨")
max_new_tokens = gr.Slider(50, 500, value=150, step=10, label="Max new tokens")
temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.05, label="Temperature")
top_k = gr.Slider(10, 100, value=50, step=5, label="Top-K sampling")
top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P (nucleus) sampling")
rep_pty = gr.Slider(label="Repetition Penalty",info="Token repetition penalty. 1.0 means no penalty.",minimum=1.0, maximum=2.0, step=0.01, value=1.2)
generate_btn.click(generate_response, [instruction, max_new_tokens, temperature, top_k, top_p, rep_pty], output_box)
# ---- Launch ----
if __name__ == "__main__":
demo.launch()