Jellyfish042 Claude Sonnet 4.5 commited on
Commit
d620a8f
·
1 Parent(s): d68c16d

Remove Examples section from UI

Browse files

- Remove example text constants (news, code, literature)
- Remove example buttons from UI
- Remove example button event handlers
- Remove example button CSS styling

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +10 -46
app.py CHANGED
@@ -36,20 +36,6 @@ _rwkv_model = None
36
  _rwkv_tokenizer = None
37
  _rwkv_model_path = None
38
 
39
- # Example texts
40
- EXAMPLE_NEWS = """The rapid advancement of artificial intelligence has sparked both excitement and concern among researchers worldwide. While AI systems demonstrate remarkable capabilities in language understanding and generation, questions remain about their potential impact on employment and society."""
41
-
42
- EXAMPLE_CODE = """def fibonacci(n):
43
- if n <= 1:
44
- return n
45
- return fibonacci(n-1) + fibonacci(n-2)
46
-
47
- # Calculate first 10 Fibonacci numbers
48
- for i in range(10):
49
- print(f"F({i}) = {fibonacci(i)}")"""
50
-
51
- EXAMPLE_LITERATURE = """It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness."""
52
-
53
 
54
  def download_rwkv_model(progress=None):
55
  """Download RWKV7 model if not exists."""
@@ -196,7 +182,7 @@ def wrap_html_in_iframe(html: str) -> str:
196
  '''
197
 
198
 
199
- def run_evaluation(text: str, progress=gr.Progress()):
200
  """Run evaluation on both models and generate visualization."""
201
  from core.evaluator import evaluate_hf_single_sample, evaluate_rwkv7_single_sample
202
  from visualization.html_generator import generate_comparison_html
@@ -211,16 +197,9 @@ def run_evaluation(text: str, progress=gr.Progress()):
211
 
212
  text = result # Use cleaned text
213
 
214
- # Helper function to safely call progress
215
- def safe_progress(value, desc):
216
- try:
217
- progress(value, desc=desc)
218
- except:
219
- pass
220
-
221
  try:
222
  # Step 1: Evaluate Qwen (using cached model)
223
- safe_progress(0.2, "Evaluating with Qwen3...")
224
  result_qwen = evaluate_hf_single_sample(
225
  _qwen_model,
226
  _qwen_tokenizer,
@@ -229,15 +208,15 @@ def run_evaluation(text: str, progress=gr.Progress()):
229
  )
230
 
231
  # Step 2: Evaluate RWKV7 (using cached model)
232
- safe_progress(0.6, "Evaluating with RWKV7...")
233
  result_rwkv = evaluate_rwkv7_single_sample(
234
  _rwkv_model,
235
  _rwkv_tokenizer,
236
  text
237
  )
238
 
239
- # Step 8: Generate visualization
240
- safe_progress(0.9, "Generating visualization...")
241
  html = generate_comparison_html(
242
  text=text,
243
  byte_losses_a=result_qwen["byte_wise_losses"],
@@ -255,7 +234,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
255
  # Wrap HTML for iframe display
256
  wrapped_html = wrap_html_in_iframe(html)
257
 
258
- safe_progress(1.0, "Done!")
259
 
260
  return wrapped_html
261
 
@@ -283,21 +262,16 @@ def clear_inputs():
283
  # Build Gradio UI
284
  with gr.Blocks(
285
  title="UncheatableEval: Qwen3 vs RWKV7",
286
- theme=gr.themes.Soft(),
287
- css="""
288
- .example-btn {
289
- margin: 2px !important;
290
- }
291
- """
292
  ) as demo:
293
  gr.Markdown("""
294
  # 🔬 UncheatableEval: Qwen3 vs RWKV7 Byte-Level Comparison
295
 
296
  Compare the byte-level prediction performance between **Qwen3-1.7B-Base** and **RWKV7-G1C-1.5B**.
297
 
298
- - **Green** = Qwen3 predicts better (lower loss)
299
- - **Red** = RWKV7 predicts better (lower loss)
300
- - **Hover** over tokens to see detailed predictions and compression rates
301
  """)
302
 
303
  with gr.Row():
@@ -309,12 +283,6 @@ with gr.Blocks(
309
  max_lines=20,
310
  )
311
 
312
- gr.Markdown("**Examples:**")
313
- with gr.Row():
314
- news_btn = gr.Button("📰 News", size="sm", elem_classes=["example-btn"])
315
- code_btn = gr.Button("💻 Code", size="sm", elem_classes=["example-btn"])
316
- lit_btn = gr.Button("📚 Literature", size="sm", elem_classes=["example-btn"])
317
-
318
  with gr.Row():
319
  clear_btn = gr.Button("Clear", variant="secondary")
320
  run_btn = gr.Button("▶ Run Comparison", variant="primary")
@@ -326,10 +294,6 @@ with gr.Blocks(
326
  output_html = gr.HTML(label="Visualization")
327
 
328
  # Event handlers
329
- news_btn.click(fn=lambda: EXAMPLE_NEWS, outputs=[text_input])
330
- code_btn.click(fn=lambda: EXAMPLE_CODE, outputs=[text_input])
331
- lit_btn.click(fn=lambda: EXAMPLE_LITERATURE, outputs=[text_input])
332
-
333
  clear_btn.click(
334
  fn=clear_inputs,
335
  outputs=[text_input, output_html]
 
36
  _rwkv_tokenizer = None
37
  _rwkv_model_path = None
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  def download_rwkv_model(progress=None):
41
  """Download RWKV7 model if not exists."""
 
182
  '''
183
 
184
 
185
+ def run_evaluation(text: str):
186
  """Run evaluation on both models and generate visualization."""
187
  from core.evaluator import evaluate_hf_single_sample, evaluate_rwkv7_single_sample
188
  from visualization.html_generator import generate_comparison_html
 
197
 
198
  text = result # Use cleaned text
199
 
 
 
 
 
 
 
 
200
  try:
201
  # Step 1: Evaluate Qwen (using cached model)
202
+ print("Evaluating with Qwen3...")
203
  result_qwen = evaluate_hf_single_sample(
204
  _qwen_model,
205
  _qwen_tokenizer,
 
208
  )
209
 
210
  # Step 2: Evaluate RWKV7 (using cached model)
211
+ print("Evaluating with RWKV7...")
212
  result_rwkv = evaluate_rwkv7_single_sample(
213
  _rwkv_model,
214
  _rwkv_tokenizer,
215
  text
216
  )
217
 
218
+ # Step 3: Generate visualization
219
+ print("Generating visualization...")
220
  html = generate_comparison_html(
221
  text=text,
222
  byte_losses_a=result_qwen["byte_wise_losses"],
 
234
  # Wrap HTML for iframe display
235
  wrapped_html = wrap_html_in_iframe(html)
236
 
237
+ print("Done!")
238
 
239
  return wrapped_html
240
 
 
262
  # Build Gradio UI
263
  with gr.Blocks(
264
  title="UncheatableEval: Qwen3 vs RWKV7",
265
+ theme=gr.themes.Soft()
 
 
 
 
 
266
  ) as demo:
267
  gr.Markdown("""
268
  # 🔬 UncheatableEval: Qwen3 vs RWKV7 Byte-Level Comparison
269
 
270
  Compare the byte-level prediction performance between **Qwen3-1.7B-Base** and **RWKV7-G1C-1.5B**.
271
 
272
+ - **Green** = Qwen3 performs better relative to average
273
+ - **Red** = RWKV7 performs better relative to average
274
+ - **Hover** over tokens to see actual loss values and predictions
275
  """)
276
 
277
  with gr.Row():
 
283
  max_lines=20,
284
  )
285
 
 
 
 
 
 
 
286
  with gr.Row():
287
  clear_btn = gr.Button("Clear", variant="secondary")
288
  run_btn = gr.Button("▶ Run Comparison", variant="primary")
 
294
  output_html = gr.HTML(label="Visualization")
295
 
296
  # Event handlers
 
 
 
 
297
  clear_btn.click(
298
  fn=clear_inputs,
299
  outputs=[text_input, output_html]