Spaces:

DrDavis
/

ITC388

Sleeping

App Files Files Community

ITC388 / app.py

DrDavis

Update app.py

c1c5ac0 verified 4 months ago

raw

history blame contribute delete

5.91 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

	# Tiny, modern instruct model that can (patiently) run on CPU
	MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"

	# Load tokenizer + model
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.float32, # CPU-safe; on GPU you could use torch.float16/bfloat16
	low_cpu_mem_usage=True # helps reduce peak RAM on load
	)

	# Make sure a pad token exists (avoids warnings on generation)
	if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
	tokenizer.pad_token = tokenizer.eos_token

	# Wrap with a text-generation pipeline
	pipe = pipeline(
	task="text-generation",
	model=model,
	tokenizer=tokenizer
	)


	# --- Decoding functions ---
	def generate_sampling(prompt, max_new_tokens=96, temperature=0.6, top_p=0.9, repetition_penalty=1.1, ngram=3):
	if not prompt or not prompt.strip():
	return "Please enter an instruction (e.g., 'Explain why the sky is blue in one short paragraph.')"
	try:
	out = pipe(
	prompt.strip(),
	max_new_tokens=int(max_new_tokens),
	do_sample=True,
	temperature=float(temperature),
	top_p=float(top_p),
	repetition_penalty=float(repetition_penalty),
	no_repeat_ngram_size=int(ngram),
	return_full_text=False
	)
	return out[0]["generated_text"]
	except Exception as e:
	return f"⚠️ Sampling error: {e}"

	def generate_deterministic(prompt, max_new_tokens=96, num_beams=4, length_penalty=0.9, ngram=3):
	if not prompt or not prompt.strip():
	return "Please enter an instruction (e.g., 'Explain why the sky is blue in one short paragraph.')"
	try:
	out = pipe(
	prompt.strip(),
	max_new_tokens=int(max_new_tokens),
	num_beams=int(num_beams),
	early_stopping=True,
	length_penalty=float(length_penalty),
	no_repeat_ngram_size=int(ngram),
	return_full_text=False
	)
	return out[0]["generated_text"]
	except Exception as e:
	return f"⚠️ Deterministic error: {e}"

	def generate_both(prompt,
	s_max_new=96, s_temp=0.6, s_topp=0.9, s_rep=1.1, s_ngram=3,
	d_max_new=96, d_beams=4, d_lenpen=0.9, d_ngram=3):
	# Run both decoders off the same prompt
	sampling = generate_sampling(prompt, s_max_new, s_temp, s_topp, s_rep, s_ngram)
	deterministic = generate_deterministic(prompt, d_max_new, d_beams, d_lenpen, d_ngram)
	return sampling, deterministic

	with gr.Blocks(fill_height=True, analytics_enabled=False) as demo:
	gr.Markdown(
	"# 🧪 Mini LLM Playground — Side-by-Side Decoding\n"
	"Enter one instruction below. The app generates two answers using:\n"
	"- Sampling (left): temperature & top-p for creativity\n"
	"- Deterministic (right): beam search for stability\n\n"
	"_Tip: keep outputs short on CPU (≤ 96 tokens). This is an educational demo; it may be incorrect._"
	)

	with gr.Row():
	prompt = gr.Textbox(
	label="Instruction",
	lines=4,
	placeholder="Explain in one short paragraph: Why is the sky blue?"
	)

	with gr.Row():
	# Left column: Sampling controls + output
	with gr.Column():
	gr.Markdown("### 🎲 Sampling (temperature / top-p)")
	with gr.Row():
	s_max_new = gr.Slider(32, 192, value=96, step=8, label="Max new tokens")
	with gr.Row():
	s_temp = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Temperature")
	s_topp = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
	with gr.Row():
	s_rep = gr.Slider(1.0, 2.0, value=1.1, step=0.05, label="Repetition penalty")
	s_ngram = gr.Slider(0, 6, value=3, step=1, label="no_repeat_ngram_size")
	sampling_out = gr.Textbox(label="Sampling output", lines=10)

	# Right column: Deterministic controls + output
	with gr.Column():
	gr.Markdown("### 🧭 Deterministic (beam search)")
	with gr.Row():
	d_max_new = gr.Slider(32, 192, value=96, step=8, label="Max new tokens")
	with gr.Row():
	d_beams = gr.Slider(1, 8, value=4, step=1, label="Num beams")
	d_lenpen = gr.Slider(0.6, 1.4, value=0.9, step=0.05, label="Length penalty")
	with gr.Row():
	d_ngram = gr.Slider(0, 6, value=3, step=1, label="no_repeat_ngram_size")
	deterministic_out = gr.Textbox(label="Deterministic output", lines=10)

	with gr.Row():
	run_both = gr.Button("Generate Both", variant="primary")
	run_left = gr.Button("Generate Left Only (Sampling)")
	run_right = gr.Button("Generate Right Only (Deterministic)")

	# Wire buttons
	run_both.click(
	fn=generate_both,
	inputs=[prompt,
	s_max_new, s_temp, s_topp, s_rep, s_ngram,
	d_max_new, d_beams, d_lenpen, d_ngram],
	outputs=[sampling_out, deterministic_out]
	)

	run_left.click(
	fn=generate_sampling,
	inputs=[prompt, s_max_new, s_temp, s_topp, s_rep, s_ngram],
	outputs=sampling_out
	)

	run_right.click(
	fn=generate_deterministic,
	inputs=[prompt, d_max_new, d_beams, d_lenpen, d_ngram],
	outputs=deterministic_out
	)

	gr.Markdown(
	"#### Compare & Contrast (discussion prompts)\n"
	"- Which side feels more factual or more concise?\n"
	"- Which side feels more varied or more creative?\n"
	"- For a study guide, which would you pick? For brainstorming?\n"
	)

	if __name__ == "__main__":
	demo.launch()