File size: 5,912 Bytes
dd37249 04c0cc5 dd37249 04c0cc5 dd37249 04c0cc5 c1c5ac0 04c0cc5 c1c5ac0 04c0cc5 c1c5ac0 dea71b0 04c0cc5 c1c5ac0 dea71b0 c1c5ac0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
# Tiny, modern instruct model that can (patiently) run on CPU
MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
# Load tokenizer + model
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float32, # CPU-safe; on GPU you could use torch.float16/bfloat16
low_cpu_mem_usage=True # helps reduce peak RAM on load
)
# Make sure a pad token exists (avoids warnings on generation)
if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
tokenizer.pad_token = tokenizer.eos_token
# Wrap with a text-generation pipeline
pipe = pipeline(
task="text-generation",
model=model,
tokenizer=tokenizer
)
# --- Decoding functions ---
def generate_sampling(prompt, max_new_tokens=96, temperature=0.6, top_p=0.9, repetition_penalty=1.1, ngram=3):
if not prompt or not prompt.strip():
return "Please enter an instruction (e.g., 'Explain why the sky is blue in one short paragraph.')"
try:
out = pipe(
prompt.strip(),
max_new_tokens=int(max_new_tokens),
do_sample=True,
temperature=float(temperature),
top_p=float(top_p),
repetition_penalty=float(repetition_penalty),
no_repeat_ngram_size=int(ngram),
return_full_text=False
)
return out[0]["generated_text"]
except Exception as e:
return f"⚠️ Sampling error: {e}"
def generate_deterministic(prompt, max_new_tokens=96, num_beams=4, length_penalty=0.9, ngram=3):
if not prompt or not prompt.strip():
return "Please enter an instruction (e.g., 'Explain why the sky is blue in one short paragraph.')"
try:
out = pipe(
prompt.strip(),
max_new_tokens=int(max_new_tokens),
num_beams=int(num_beams),
early_stopping=True,
length_penalty=float(length_penalty),
no_repeat_ngram_size=int(ngram),
return_full_text=False
)
return out[0]["generated_text"]
except Exception as e:
return f"⚠️ Deterministic error: {e}"
def generate_both(prompt,
s_max_new=96, s_temp=0.6, s_topp=0.9, s_rep=1.1, s_ngram=3,
d_max_new=96, d_beams=4, d_lenpen=0.9, d_ngram=3):
# Run both decoders off the same prompt
sampling = generate_sampling(prompt, s_max_new, s_temp, s_topp, s_rep, s_ngram)
deterministic = generate_deterministic(prompt, d_max_new, d_beams, d_lenpen, d_ngram)
return sampling, deterministic
with gr.Blocks(fill_height=True, analytics_enabled=False) as demo:
gr.Markdown(
"# 🧪 Mini LLM Playground — Side-by-Side Decoding\n"
"Enter one instruction below. The app generates **two answers** using:\n"
"- **Sampling** (left): temperature & top-p for creativity\n"
"- **Deterministic** (right): beam search for stability\n\n"
"_Tip: keep outputs short on CPU (≤ 96 tokens). This is an educational demo; it may be incorrect._"
)
with gr.Row():
prompt = gr.Textbox(
label="Instruction",
lines=4,
placeholder="Explain in one short paragraph: Why is the sky blue?"
)
with gr.Row():
# Left column: Sampling controls + output
with gr.Column():
gr.Markdown("### 🎲 Sampling (temperature / top-p)")
with gr.Row():
s_max_new = gr.Slider(32, 192, value=96, step=8, label="Max new tokens")
with gr.Row():
s_temp = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Temperature")
s_topp = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
with gr.Row():
s_rep = gr.Slider(1.0, 2.0, value=1.1, step=0.05, label="Repetition penalty")
s_ngram = gr.Slider(0, 6, value=3, step=1, label="no_repeat_ngram_size")
sampling_out = gr.Textbox(label="Sampling output", lines=10)
# Right column: Deterministic controls + output
with gr.Column():
gr.Markdown("### 🧭 Deterministic (beam search)")
with gr.Row():
d_max_new = gr.Slider(32, 192, value=96, step=8, label="Max new tokens")
with gr.Row():
d_beams = gr.Slider(1, 8, value=4, step=1, label="Num beams")
d_lenpen = gr.Slider(0.6, 1.4, value=0.9, step=0.05, label="Length penalty")
with gr.Row():
d_ngram = gr.Slider(0, 6, value=3, step=1, label="no_repeat_ngram_size")
deterministic_out = gr.Textbox(label="Deterministic output", lines=10)
with gr.Row():
run_both = gr.Button("Generate Both", variant="primary")
run_left = gr.Button("Generate Left Only (Sampling)")
run_right = gr.Button("Generate Right Only (Deterministic)")
# Wire buttons
run_both.click(
fn=generate_both,
inputs=[prompt,
s_max_new, s_temp, s_topp, s_rep, s_ngram,
d_max_new, d_beams, d_lenpen, d_ngram],
outputs=[sampling_out, deterministic_out]
)
run_left.click(
fn=generate_sampling,
inputs=[prompt, s_max_new, s_temp, s_topp, s_rep, s_ngram],
outputs=sampling_out
)
run_right.click(
fn=generate_deterministic,
inputs=[prompt, d_max_new, d_beams, d_lenpen, d_ngram],
outputs=deterministic_out
)
gr.Markdown(
"#### Compare & Contrast (discussion prompts)\n"
"- Which side feels **more factual** or **more concise**?\n"
"- Which side feels **more varied** or **more creative**?\n"
"- For a study guide, which would you pick? For brainstorming?\n"
)
if __name__ == "__main__":
demo.launch() |