File size: 5,912 Bytes
dd37249
04c0cc5
 
dd37249
04c0cc5
 
dd37249
04c0cc5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1c5ac0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04c0cc5
c1c5ac0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04c0cc5
 
c1c5ac0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dea71b0
04c0cc5
c1c5ac0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dea71b0
 
c1c5ac0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Tiny, modern instruct model that can (patiently) run on CPU
MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"

# Load tokenizer + model
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float32,      # CPU-safe; on GPU you could use torch.float16/bfloat16
    low_cpu_mem_usage=True          # helps reduce peak RAM on load
)

# Make sure a pad token exists (avoids warnings on generation)
if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
    tokenizer.pad_token = tokenizer.eos_token

# Wrap with a text-generation pipeline
pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer
)


# --- Decoding functions ---
def generate_sampling(prompt, max_new_tokens=96, temperature=0.6, top_p=0.9, repetition_penalty=1.1, ngram=3):
    if not prompt or not prompt.strip():
        return "Please enter an instruction (e.g., 'Explain why the sky is blue in one short paragraph.')"
    try:
        out = pipe(
            prompt.strip(),
            max_new_tokens=int(max_new_tokens),
            do_sample=True,
            temperature=float(temperature),
            top_p=float(top_p),
            repetition_penalty=float(repetition_penalty),
            no_repeat_ngram_size=int(ngram),
            return_full_text=False
        )
        return out[0]["generated_text"]
    except Exception as e:
        return f"⚠️ Sampling error: {e}"

def generate_deterministic(prompt, max_new_tokens=96, num_beams=4, length_penalty=0.9, ngram=3):
    if not prompt or not prompt.strip():
        return "Please enter an instruction (e.g., 'Explain why the sky is blue in one short paragraph.')"
    try:
        out = pipe(
            prompt.strip(),
            max_new_tokens=int(max_new_tokens),
            num_beams=int(num_beams),
            early_stopping=True,
            length_penalty=float(length_penalty),
            no_repeat_ngram_size=int(ngram),
            return_full_text=False
        )
        return out[0]["generated_text"]
    except Exception as e:
        return f"⚠️ Deterministic error: {e}"

def generate_both(prompt,
                  s_max_new=96, s_temp=0.6, s_topp=0.9, s_rep=1.1, s_ngram=3,
                  d_max_new=96, d_beams=4, d_lenpen=0.9, d_ngram=3):
    # Run both decoders off the same prompt
    sampling = generate_sampling(prompt, s_max_new, s_temp, s_topp, s_rep, s_ngram)
    deterministic = generate_deterministic(prompt, d_max_new, d_beams, d_lenpen, d_ngram)
    return sampling, deterministic

with gr.Blocks(fill_height=True, analytics_enabled=False) as demo:
    gr.Markdown(
        "# 🧪 Mini LLM Playground — Side-by-Side Decoding\n"
        "Enter one instruction below. The app generates **two answers** using:\n"
        "- **Sampling** (left): temperature & top-p for creativity\n"
        "- **Deterministic** (right): beam search for stability\n\n"
        "_Tip: keep outputs short on CPU (≤ 96 tokens). This is an educational demo; it may be incorrect._"
    )

    with gr.Row():
        prompt = gr.Textbox(
            label="Instruction",
            lines=4,
            placeholder="Explain in one short paragraph: Why is the sky blue?"
        )

    with gr.Row():
        # Left column: Sampling controls + output
        with gr.Column():
            gr.Markdown("### 🎲 Sampling (temperature / top-p)")
            with gr.Row():
                s_max_new = gr.Slider(32, 192, value=96, step=8, label="Max new tokens")
            with gr.Row():
                s_temp = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Temperature")
                s_topp = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
            with gr.Row():
                s_rep = gr.Slider(1.0, 2.0, value=1.1, step=0.05, label="Repetition penalty")
                s_ngram = gr.Slider(0, 6, value=3, step=1, label="no_repeat_ngram_size")
            sampling_out = gr.Textbox(label="Sampling output", lines=10)

        # Right column: Deterministic controls + output
        with gr.Column():
            gr.Markdown("### 🧭 Deterministic (beam search)")
            with gr.Row():
                d_max_new = gr.Slider(32, 192, value=96, step=8, label="Max new tokens")
            with gr.Row():
                d_beams = gr.Slider(1, 8, value=4, step=1, label="Num beams")
                d_lenpen = gr.Slider(0.6, 1.4, value=0.9, step=0.05, label="Length penalty")
            with gr.Row():
                d_ngram = gr.Slider(0, 6, value=3, step=1, label="no_repeat_ngram_size")
            deterministic_out = gr.Textbox(label="Deterministic output", lines=10)

    with gr.Row():
        run_both = gr.Button("Generate Both", variant="primary")
        run_left = gr.Button("Generate Left Only (Sampling)")
        run_right = gr.Button("Generate Right Only (Deterministic)")

    # Wire buttons
    run_both.click(
        fn=generate_both,
        inputs=[prompt,
                s_max_new, s_temp, s_topp, s_rep, s_ngram,
                d_max_new, d_beams, d_lenpen, d_ngram],
        outputs=[sampling_out, deterministic_out]
    )

    run_left.click(
        fn=generate_sampling,
        inputs=[prompt, s_max_new, s_temp, s_topp, s_rep, s_ngram],
        outputs=sampling_out
    )

    run_right.click(
        fn=generate_deterministic,
        inputs=[prompt, d_max_new, d_beams, d_lenpen, d_ngram],
        outputs=deterministic_out
    )

    gr.Markdown(
        "#### Compare & Contrast (discussion prompts)\n"
        "- Which side feels **more factual** or **more concise**?\n"
        "- Which side feels **more varied** or **more creative**?\n"
        "- For a study guide, which would you pick? For brainstorming?\n"
    )

if __name__ == "__main__":
    demo.launch()