ITC754

Sleeping

App Files Files Community

DrDavis commited on Sep 2, 2025

Commit

f1107a5

verified ·

1 Parent(s): 457a713

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -37

app.py CHANGED Viewed

@@ -24,47 +24,125 @@ pipe = pipeline(
     tokenizer=tokenizer
 )
-def infer(prompt, max_new_tokens=128, temperature=0.7, top_p=0.9):
-    """Single-turn chat-style inference with Qwen 0.5B Instruct."""
     if not prompt or not prompt.strip():
-        return "Please type something to generate."
-    # Use Qwen's chat template for better instruct-style behavior
-    messages = [
-        {"role": "system", "content": "You are a helpful, concise assistant for beginners learning about LLMs."},
-        {"role": "user", "content": prompt.strip()}
-    ]
-    chat_prompt = tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True   # appends assistant prefix as the generation start
     )
-    # Generation with light anti-repetition guards
-    outputs = pipe(
-        chat_prompt,
-        max_new_tokens=int(max_new_tokens),
-        do_sample=True,
-        temperature=float(temperature),
-        top_p=float(top_p),
-        no_repeat_ngram_size=3,      # prevents short n-gram loops
-        repetition_penalty=1.1,      # gentle nudge against repeating phrases
-        return_full_text=False       # only return the assistant's new text
     )
-    return outputs[0]["generated_text"]
-demo = gr.Interface(
-    fn=infer,
-    inputs=[
-        gr.Textbox(lines=3, label="Instruction", placeholder="Explain in one paragraph: Why is the sky blue?"),
-        gr.Slider(16, 256, 128, step=8, label="Max new tokens"),
-        gr.Slider(0.0, 1.5, 0.7, step=0.05, label="Temperature"),
-        gr.Slider(0.1, 1.0, 0.9, step=0.05, label="Top-p"),
-    ],
-    outputs=gr.Textbox(lines=10, label="Output"),
-    title="Mini LLM (Local) — Qwen 2.5 (0.5B) Instruct"
-)
 if __name__ == "__main__":
-    demo.launch()

     tokenizer=tokenizer
 )
+# --- Decoding functions ---
+def generate_sampling(prompt, max_new_tokens=96, temperature=0.6, top_p=0.9, repetition_penalty=1.1, ngram=3):
+    if not prompt or not prompt.strip():
+        return "Please enter an instruction (e.g., 'Explain why the sky is blue in one short paragraph.')"
+    try:
+        out = pipe(
+            prompt.strip(),
+            max_new_tokens=int(max_new_tokens),
+            do_sample=True,
+            temperature=float(temperature),
+            top_p=float(top_p),
+            repetition_penalty=float(repetition_penalty),
+            no_repeat_ngram_size=int(ngram),
+            return_full_text=False
+        )
+        return out[0]["generated_text"]
+    except Exception as e:
+        return f"⚠️ Sampling error: {e}"
+def generate_deterministic(prompt, max_new_tokens=96, num_beams=4, length_penalty=0.9, ngram=3):
     if not prompt or not prompt.strip():
+        return "Please enter an instruction (e.g., 'Explain why the sky is blue in one short paragraph.')"
+    try:
+        out = pipe(
+            prompt.strip(),
+            max_new_tokens=int(max_new_tokens),
+            num_beams=int(num_beams),
+            early_stopping=True,
+            length_penalty=float(length_penalty),
+            no_repeat_ngram_size=int(ngram),
+            return_full_text=False
+        )
+        return out[0]["generated_text"]
+    except Exception as e:
+        return f"⚠️ Deterministic error: {e}"
+def generate_both(prompt,
+                  s_max_new=96, s_temp=0.6, s_topp=0.9, s_rep=1.1, s_ngram=3,
+                  d_max_new=96, d_beams=4, d_lenpen=0.9, d_ngram=3):
+    # Run both decoders off the same prompt
+    sampling = generate_sampling(prompt, s_max_new, s_temp, s_topp, s_rep, s_ngram)
+    deterministic = generate_deterministic(prompt, d_max_new, d_beams, d_lenpen, d_ngram)
+    return sampling, deterministic
+with gr.Blocks(fill_height=True, analytics_enabled=False) as demo:
+    gr.Markdown(
+        "# 🧪 Mini LLM Playground — Side-by-Side Decoding\n"
+        "Enter one instruction below. The app generates **two answers** using:\n"
+        "- **Sampling** (left): temperature & top-p for creativity\n"
+        "- **Deterministic** (right): beam search for stability\n\n"
+        "_Tip: keep outputs short on CPU (≤ 96 tokens). This is an educational demo; it may be incorrect._"
     )
+    with gr.Row():
+        prompt = gr.Textbox(
+            label="Instruction",
+            lines=4,
+            placeholder="Explain in one short paragraph: Why is the sky blue?"
+        )
+    with gr.Row():
+        # Left column: Sampling controls + output
+        with gr.Column():
+            gr.Markdown("### 🎲 Sampling (temperature / top-p)")
+            with gr.Row():
+                s_max_new = gr.Slider(32, 192, value=96, step=8, label="Max new tokens")
+            with gr.Row():
+                s_temp = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Temperature")
+                s_topp = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
+            with gr.Row():
+                s_rep = gr.Slider(1.0, 2.0, value=1.1, step=0.05, label="Repetition penalty")
+                s_ngram = gr.Slider(0, 6, value=3, step=1, label="no_repeat_ngram_size")
+            sampling_out = gr.Textbox(label="Sampling output", lines=10)
+        # Right column: Deterministic controls + output
+        with gr.Column():
+            gr.Markdown("### 🧭 Deterministic (beam search)")
+            with gr.Row():
+                d_max_new = gr.Slider(32, 192, value=96, step=8, label="Max new tokens")
+            with gr.Row():
+                d_beams = gr.Slider(1, 8, value=4, step=1, label="Num beams")
+                d_lenpen = gr.Slider(0.6, 1.4, value=0.9, step=0.05, label="Length penalty")
+            with gr.Row():
+                d_ngram = gr.Slider(0, 6, value=3, step=1, label="no_repeat_ngram_size")
+            deterministic_out = gr.Textbox(label="Deterministic output", lines=10)
+    with gr.Row():
+        run_both = gr.Button("Generate Both", variant="primary")
+        run_left = gr.Button("Generate Left Only (Sampling)")
+        run_right = gr.Button("Generate Right Only (Deterministic)")
+    # Wire buttons
+    run_both.click(
+        fn=generate_both,
+        inputs=[prompt,
+                s_max_new, s_temp, s_topp, s_rep, s_ngram,
+                d_max_new, d_beams, d_lenpen, d_ngram],
+        outputs=[sampling_out, deterministic_out]
     )
+    run_left.click(
+        fn=generate_sampling,
+        inputs=[prompt, s_max_new, s_temp, s_topp, s_rep, s_ngram],
+        outputs=sampling_out
+    )
+    run_right.click(
+        fn=generate_deterministic,
+        inputs=[prompt, d_max_new, d_beams, d_lenpen, d_ngram],
+        outputs=deterministic_out
+    )
+    gr.Markdown(
+        "#### Compare & Contrast (discussion prompts)\n"
+        "- Which side feels **more factual** or **more concise**?\n"
+        "- Which side feels **more varied** or **more creative**?\n"
+        "- For a study guide, which would you pick? For brainstorming?\n"
+    )
 if __name__ == "__main__":
+    demo.launch()