|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
|
|
|
|
|
|
|
MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct" |
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
MODEL_ID, |
|
|
torch_dtype=torch.float32, |
|
|
low_cpu_mem_usage=True |
|
|
) |
|
|
|
|
|
|
|
|
if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None: |
|
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
|
|
|
|
|
pipe = pipeline( |
|
|
task="text-generation", |
|
|
model=model, |
|
|
tokenizer=tokenizer |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def generate_sampling(prompt, max_new_tokens=96, temperature=0.6, top_p=0.9, repetition_penalty=1.1, ngram=3): |
|
|
if not prompt or not prompt.strip(): |
|
|
return "Please enter an instruction (e.g., 'Explain why the sky is blue in one short paragraph.')" |
|
|
try: |
|
|
out = pipe( |
|
|
prompt.strip(), |
|
|
max_new_tokens=int(max_new_tokens), |
|
|
do_sample=True, |
|
|
temperature=float(temperature), |
|
|
top_p=float(top_p), |
|
|
repetition_penalty=float(repetition_penalty), |
|
|
no_repeat_ngram_size=int(ngram), |
|
|
return_full_text=False |
|
|
) |
|
|
return out[0]["generated_text"] |
|
|
except Exception as e: |
|
|
return f"⚠️ Sampling error: {e}" |
|
|
|
|
|
def generate_deterministic(prompt, max_new_tokens=96, num_beams=4, length_penalty=0.9, ngram=3): |
|
|
if not prompt or not prompt.strip(): |
|
|
return "Please enter an instruction (e.g., 'Explain why the sky is blue in one short paragraph.')" |
|
|
try: |
|
|
out = pipe( |
|
|
prompt.strip(), |
|
|
max_new_tokens=int(max_new_tokens), |
|
|
num_beams=int(num_beams), |
|
|
early_stopping=True, |
|
|
length_penalty=float(length_penalty), |
|
|
no_repeat_ngram_size=int(ngram), |
|
|
return_full_text=False |
|
|
) |
|
|
return out[0]["generated_text"] |
|
|
except Exception as e: |
|
|
return f"⚠️ Deterministic error: {e}" |
|
|
|
|
|
def generate_both(prompt, |
|
|
s_max_new=96, s_temp=0.6, s_topp=0.9, s_rep=1.1, s_ngram=3, |
|
|
d_max_new=96, d_beams=4, d_lenpen=0.9, d_ngram=3): |
|
|
|
|
|
sampling = generate_sampling(prompt, s_max_new, s_temp, s_topp, s_rep, s_ngram) |
|
|
deterministic = generate_deterministic(prompt, d_max_new, d_beams, d_lenpen, d_ngram) |
|
|
return sampling, deterministic |
|
|
|
|
|
with gr.Blocks(fill_height=True, analytics_enabled=False) as demo: |
|
|
gr.Markdown( |
|
|
"# 🧪 Mini LLM Playground — Side-by-Side Decoding\n" |
|
|
"Enter one instruction below. The app generates **two answers** using:\n" |
|
|
"- **Sampling** (left): temperature & top-p for creativity\n" |
|
|
"- **Deterministic** (right): beam search for stability\n\n" |
|
|
"_Tip: keep outputs short on CPU (≤ 96 tokens). This is an educational demo; it may be incorrect._" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
prompt = gr.Textbox( |
|
|
label="Instruction", |
|
|
lines=4, |
|
|
placeholder="Explain in one short paragraph: Why is the sky blue?" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
|
|
|
with gr.Column(): |
|
|
gr.Markdown("### 🎲 Sampling (temperature / top-p)") |
|
|
with gr.Row(): |
|
|
s_max_new = gr.Slider(32, 192, value=96, step=8, label="Max new tokens") |
|
|
with gr.Row(): |
|
|
s_temp = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Temperature") |
|
|
s_topp = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p") |
|
|
with gr.Row(): |
|
|
s_rep = gr.Slider(1.0, 2.0, value=1.1, step=0.05, label="Repetition penalty") |
|
|
s_ngram = gr.Slider(0, 6, value=3, step=1, label="no_repeat_ngram_size") |
|
|
sampling_out = gr.Textbox(label="Sampling output", lines=10) |
|
|
|
|
|
|
|
|
with gr.Column(): |
|
|
gr.Markdown("### 🧭 Deterministic (beam search)") |
|
|
with gr.Row(): |
|
|
d_max_new = gr.Slider(32, 192, value=96, step=8, label="Max new tokens") |
|
|
with gr.Row(): |
|
|
d_beams = gr.Slider(1, 8, value=4, step=1, label="Num beams") |
|
|
d_lenpen = gr.Slider(0.6, 1.4, value=0.9, step=0.05, label="Length penalty") |
|
|
with gr.Row(): |
|
|
d_ngram = gr.Slider(0, 6, value=3, step=1, label="no_repeat_ngram_size") |
|
|
deterministic_out = gr.Textbox(label="Deterministic output", lines=10) |
|
|
|
|
|
with gr.Row(): |
|
|
run_both = gr.Button("Generate Both", variant="primary") |
|
|
run_left = gr.Button("Generate Left Only (Sampling)") |
|
|
run_right = gr.Button("Generate Right Only (Deterministic)") |
|
|
|
|
|
|
|
|
run_both.click( |
|
|
fn=generate_both, |
|
|
inputs=[prompt, |
|
|
s_max_new, s_temp, s_topp, s_rep, s_ngram, |
|
|
d_max_new, d_beams, d_lenpen, d_ngram], |
|
|
outputs=[sampling_out, deterministic_out] |
|
|
) |
|
|
|
|
|
run_left.click( |
|
|
fn=generate_sampling, |
|
|
inputs=[prompt, s_max_new, s_temp, s_topp, s_rep, s_ngram], |
|
|
outputs=sampling_out |
|
|
) |
|
|
|
|
|
run_right.click( |
|
|
fn=generate_deterministic, |
|
|
inputs=[prompt, d_max_new, d_beams, d_lenpen, d_ngram], |
|
|
outputs=deterministic_out |
|
|
) |
|
|
|
|
|
gr.Markdown( |
|
|
"#### Compare & Contrast (discussion prompts)\n" |
|
|
"- Which side feels **more factual** or **more concise**?\n" |
|
|
"- Which side feels **more varied** or **more creative**?\n" |
|
|
"- For a study guide, which would you pick? For brainstorming?\n" |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |