ITC388 / app.py
DrDavis's picture
Update app.py
c1c5ac0 verified
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
# Tiny, modern instruct model that can (patiently) run on CPU
MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
# Load tokenizer + model
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float32, # CPU-safe; on GPU you could use torch.float16/bfloat16
low_cpu_mem_usage=True # helps reduce peak RAM on load
)
# Make sure a pad token exists (avoids warnings on generation)
if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
tokenizer.pad_token = tokenizer.eos_token
# Wrap with a text-generation pipeline
pipe = pipeline(
task="text-generation",
model=model,
tokenizer=tokenizer
)
# --- Decoding functions ---
def generate_sampling(prompt, max_new_tokens=96, temperature=0.6, top_p=0.9, repetition_penalty=1.1, ngram=3):
if not prompt or not prompt.strip():
return "Please enter an instruction (e.g., 'Explain why the sky is blue in one short paragraph.')"
try:
out = pipe(
prompt.strip(),
max_new_tokens=int(max_new_tokens),
do_sample=True,
temperature=float(temperature),
top_p=float(top_p),
repetition_penalty=float(repetition_penalty),
no_repeat_ngram_size=int(ngram),
return_full_text=False
)
return out[0]["generated_text"]
except Exception as e:
return f"⚠️ Sampling error: {e}"
def generate_deterministic(prompt, max_new_tokens=96, num_beams=4, length_penalty=0.9, ngram=3):
if not prompt or not prompt.strip():
return "Please enter an instruction (e.g., 'Explain why the sky is blue in one short paragraph.')"
try:
out = pipe(
prompt.strip(),
max_new_tokens=int(max_new_tokens),
num_beams=int(num_beams),
early_stopping=True,
length_penalty=float(length_penalty),
no_repeat_ngram_size=int(ngram),
return_full_text=False
)
return out[0]["generated_text"]
except Exception as e:
return f"⚠️ Deterministic error: {e}"
def generate_both(prompt,
s_max_new=96, s_temp=0.6, s_topp=0.9, s_rep=1.1, s_ngram=3,
d_max_new=96, d_beams=4, d_lenpen=0.9, d_ngram=3):
# Run both decoders off the same prompt
sampling = generate_sampling(prompt, s_max_new, s_temp, s_topp, s_rep, s_ngram)
deterministic = generate_deterministic(prompt, d_max_new, d_beams, d_lenpen, d_ngram)
return sampling, deterministic
with gr.Blocks(fill_height=True, analytics_enabled=False) as demo:
gr.Markdown(
"# 🧪 Mini LLM Playground — Side-by-Side Decoding\n"
"Enter one instruction below. The app generates **two answers** using:\n"
"- **Sampling** (left): temperature & top-p for creativity\n"
"- **Deterministic** (right): beam search for stability\n\n"
"_Tip: keep outputs short on CPU (≤ 96 tokens). This is an educational demo; it may be incorrect._"
)
with gr.Row():
prompt = gr.Textbox(
label="Instruction",
lines=4,
placeholder="Explain in one short paragraph: Why is the sky blue?"
)
with gr.Row():
# Left column: Sampling controls + output
with gr.Column():
gr.Markdown("### 🎲 Sampling (temperature / top-p)")
with gr.Row():
s_max_new = gr.Slider(32, 192, value=96, step=8, label="Max new tokens")
with gr.Row():
s_temp = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Temperature")
s_topp = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
with gr.Row():
s_rep = gr.Slider(1.0, 2.0, value=1.1, step=0.05, label="Repetition penalty")
s_ngram = gr.Slider(0, 6, value=3, step=1, label="no_repeat_ngram_size")
sampling_out = gr.Textbox(label="Sampling output", lines=10)
# Right column: Deterministic controls + output
with gr.Column():
gr.Markdown("### 🧭 Deterministic (beam search)")
with gr.Row():
d_max_new = gr.Slider(32, 192, value=96, step=8, label="Max new tokens")
with gr.Row():
d_beams = gr.Slider(1, 8, value=4, step=1, label="Num beams")
d_lenpen = gr.Slider(0.6, 1.4, value=0.9, step=0.05, label="Length penalty")
with gr.Row():
d_ngram = gr.Slider(0, 6, value=3, step=1, label="no_repeat_ngram_size")
deterministic_out = gr.Textbox(label="Deterministic output", lines=10)
with gr.Row():
run_both = gr.Button("Generate Both", variant="primary")
run_left = gr.Button("Generate Left Only (Sampling)")
run_right = gr.Button("Generate Right Only (Deterministic)")
# Wire buttons
run_both.click(
fn=generate_both,
inputs=[prompt,
s_max_new, s_temp, s_topp, s_rep, s_ngram,
d_max_new, d_beams, d_lenpen, d_ngram],
outputs=[sampling_out, deterministic_out]
)
run_left.click(
fn=generate_sampling,
inputs=[prompt, s_max_new, s_temp, s_topp, s_rep, s_ngram],
outputs=sampling_out
)
run_right.click(
fn=generate_deterministic,
inputs=[prompt, d_max_new, d_beams, d_lenpen, d_ngram],
outputs=deterministic_out
)
gr.Markdown(
"#### Compare & Contrast (discussion prompts)\n"
"- Which side feels **more factual** or **more concise**?\n"
"- Which side feels **more varied** or **more creative**?\n"
"- For a study guide, which would you pick? For brainstorming?\n"
)
if __name__ == "__main__":
demo.launch()