DrDavis commited on
Commit
f1107a5
·
verified ·
1 Parent(s): 457a713

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -37
app.py CHANGED
@@ -24,47 +24,125 @@ pipe = pipeline(
24
  tokenizer=tokenizer
25
  )
26
 
27
- def infer(prompt, max_new_tokens=128, temperature=0.7, top_p=0.9):
28
- """Single-turn chat-style inference with Qwen 0.5B Instruct."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  if not prompt or not prompt.strip():
30
- return "Please type something to generate."
31
-
32
- # Use Qwen's chat template for better instruct-style behavior
33
- messages = [
34
- {"role": "system", "content": "You are a helpful, concise assistant for beginners learning about LLMs."},
35
- {"role": "user", "content": prompt.strip()}
36
- ]
37
- chat_prompt = tokenizer.apply_chat_template(
38
- messages,
39
- tokenize=False,
40
- add_generation_prompt=True # appends assistant prefix as the generation start
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  )
42
 
43
- # Generation with light anti-repetition guards
44
- outputs = pipe(
45
- chat_prompt,
46
- max_new_tokens=int(max_new_tokens),
47
- do_sample=True,
48
- temperature=float(temperature),
49
- top_p=float(top_p),
50
- no_repeat_ngram_size=3, # prevents short n-gram loops
51
- repetition_penalty=1.1, # gentle nudge against repeating phrases
52
- return_full_text=False # only return the assistant's new text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  )
54
 
55
- return outputs[0]["generated_text"]
56
-
57
- demo = gr.Interface(
58
- fn=infer,
59
- inputs=[
60
- gr.Textbox(lines=3, label="Instruction", placeholder="Explain in one paragraph: Why is the sky blue?"),
61
- gr.Slider(16, 256, 128, step=8, label="Max new tokens"),
62
- gr.Slider(0.0, 1.5, 0.7, step=0.05, label="Temperature"),
63
- gr.Slider(0.1, 1.0, 0.9, step=0.05, label="Top-p"),
64
- ],
65
- outputs=gr.Textbox(lines=10, label="Output"),
66
- title="Mini LLM (Local) — Qwen 2.5 (0.5B) Instruct"
67
- )
 
 
 
 
 
68
 
69
  if __name__ == "__main__":
70
- demo.launch()
 
24
  tokenizer=tokenizer
25
  )
26
 
27
+
28
+ # --- Decoding functions ---
29
+ def generate_sampling(prompt, max_new_tokens=96, temperature=0.6, top_p=0.9, repetition_penalty=1.1, ngram=3):
30
+ if not prompt or not prompt.strip():
31
+ return "Please enter an instruction (e.g., 'Explain why the sky is blue in one short paragraph.')"
32
+ try:
33
+ out = pipe(
34
+ prompt.strip(),
35
+ max_new_tokens=int(max_new_tokens),
36
+ do_sample=True,
37
+ temperature=float(temperature),
38
+ top_p=float(top_p),
39
+ repetition_penalty=float(repetition_penalty),
40
+ no_repeat_ngram_size=int(ngram),
41
+ return_full_text=False
42
+ )
43
+ return out[0]["generated_text"]
44
+ except Exception as e:
45
+ return f"⚠️ Sampling error: {e}"
46
+
47
+ def generate_deterministic(prompt, max_new_tokens=96, num_beams=4, length_penalty=0.9, ngram=3):
48
  if not prompt or not prompt.strip():
49
+ return "Please enter an instruction (e.g., 'Explain why the sky is blue in one short paragraph.')"
50
+ try:
51
+ out = pipe(
52
+ prompt.strip(),
53
+ max_new_tokens=int(max_new_tokens),
54
+ num_beams=int(num_beams),
55
+ early_stopping=True,
56
+ length_penalty=float(length_penalty),
57
+ no_repeat_ngram_size=int(ngram),
58
+ return_full_text=False
59
+ )
60
+ return out[0]["generated_text"]
61
+ except Exception as e:
62
+ return f"⚠️ Deterministic error: {e}"
63
+
64
+ def generate_both(prompt,
65
+ s_max_new=96, s_temp=0.6, s_topp=0.9, s_rep=1.1, s_ngram=3,
66
+ d_max_new=96, d_beams=4, d_lenpen=0.9, d_ngram=3):
67
+ # Run both decoders off the same prompt
68
+ sampling = generate_sampling(prompt, s_max_new, s_temp, s_topp, s_rep, s_ngram)
69
+ deterministic = generate_deterministic(prompt, d_max_new, d_beams, d_lenpen, d_ngram)
70
+ return sampling, deterministic
71
+
72
+ with gr.Blocks(fill_height=True, analytics_enabled=False) as demo:
73
+ gr.Markdown(
74
+ "# 🧪 Mini LLM Playground — Side-by-Side Decoding\n"
75
+ "Enter one instruction below. The app generates **two answers** using:\n"
76
+ "- **Sampling** (left): temperature & top-p for creativity\n"
77
+ "- **Deterministic** (right): beam search for stability\n\n"
78
+ "_Tip: keep outputs short on CPU (≤ 96 tokens). This is an educational demo; it may be incorrect._"
79
  )
80
 
81
+ with gr.Row():
82
+ prompt = gr.Textbox(
83
+ label="Instruction",
84
+ lines=4,
85
+ placeholder="Explain in one short paragraph: Why is the sky blue?"
86
+ )
87
+
88
+ with gr.Row():
89
+ # Left column: Sampling controls + output
90
+ with gr.Column():
91
+ gr.Markdown("### 🎲 Sampling (temperature / top-p)")
92
+ with gr.Row():
93
+ s_max_new = gr.Slider(32, 192, value=96, step=8, label="Max new tokens")
94
+ with gr.Row():
95
+ s_temp = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Temperature")
96
+ s_topp = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
97
+ with gr.Row():
98
+ s_rep = gr.Slider(1.0, 2.0, value=1.1, step=0.05, label="Repetition penalty")
99
+ s_ngram = gr.Slider(0, 6, value=3, step=1, label="no_repeat_ngram_size")
100
+ sampling_out = gr.Textbox(label="Sampling output", lines=10)
101
+
102
+ # Right column: Deterministic controls + output
103
+ with gr.Column():
104
+ gr.Markdown("### 🧭 Deterministic (beam search)")
105
+ with gr.Row():
106
+ d_max_new = gr.Slider(32, 192, value=96, step=8, label="Max new tokens")
107
+ with gr.Row():
108
+ d_beams = gr.Slider(1, 8, value=4, step=1, label="Num beams")
109
+ d_lenpen = gr.Slider(0.6, 1.4, value=0.9, step=0.05, label="Length penalty")
110
+ with gr.Row():
111
+ d_ngram = gr.Slider(0, 6, value=3, step=1, label="no_repeat_ngram_size")
112
+ deterministic_out = gr.Textbox(label="Deterministic output", lines=10)
113
+
114
+ with gr.Row():
115
+ run_both = gr.Button("Generate Both", variant="primary")
116
+ run_left = gr.Button("Generate Left Only (Sampling)")
117
+ run_right = gr.Button("Generate Right Only (Deterministic)")
118
+
119
+ # Wire buttons
120
+ run_both.click(
121
+ fn=generate_both,
122
+ inputs=[prompt,
123
+ s_max_new, s_temp, s_topp, s_rep, s_ngram,
124
+ d_max_new, d_beams, d_lenpen, d_ngram],
125
+ outputs=[sampling_out, deterministic_out]
126
  )
127
 
128
+ run_left.click(
129
+ fn=generate_sampling,
130
+ inputs=[prompt, s_max_new, s_temp, s_topp, s_rep, s_ngram],
131
+ outputs=sampling_out
132
+ )
133
+
134
+ run_right.click(
135
+ fn=generate_deterministic,
136
+ inputs=[prompt, d_max_new, d_beams, d_lenpen, d_ngram],
137
+ outputs=deterministic_out
138
+ )
139
+
140
+ gr.Markdown(
141
+ "#### Compare & Contrast (discussion prompts)\n"
142
+ "- Which side feels **more factual** or **more concise**?\n"
143
+ "- Which side feels **more varied** or **more creative**?\n"
144
+ "- For a study guide, which would you pick? For brainstorming?\n"
145
+ )
146
 
147
  if __name__ == "__main__":
148
+ demo.launch()