Spaces:

Jellyfish042
/

Compression-Lens

Running

Jellyfish042 Claude Sonnet 4.5 commited on Jan 18

Commit

d620a8f

1 Parent(s): d68c16d

Remove Examples section from UI

- Remove example text constants (news, code, literature)
- Remove example buttons from UI
- Remove example button event handlers
- Remove example button CSS styling

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

Files changed (1) hide show

app.py +10 -46

app.py CHANGED Viewed

@@ -36,20 +36,6 @@ _rwkv_model = None
 _rwkv_tokenizer = None
 _rwkv_model_path = None
-# Example texts
-EXAMPLE_NEWS = """The rapid advancement of artificial intelligence has sparked both excitement and concern among researchers worldwide. While AI systems demonstrate remarkable capabilities in language understanding and generation, questions remain about their potential impact on employment and society."""
-EXAMPLE_CODE = """def fibonacci(n):
-    if n <= 1:
-        return n
-    return fibonacci(n-1) + fibonacci(n-2)
-# Calculate first 10 Fibonacci numbers
-for i in range(10):
-    print(f"F({i}) = {fibonacci(i)}")"""
-EXAMPLE_LITERATURE = """It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness."""
 def download_rwkv_model(progress=None):
     """Download RWKV7 model if not exists."""
@@ -196,7 +182,7 @@ def wrap_html_in_iframe(html: str) -> str:
     '''
-def run_evaluation(text: str, progress=gr.Progress()):
     """Run evaluation on both models and generate visualization."""
     from core.evaluator import evaluate_hf_single_sample, evaluate_rwkv7_single_sample
     from visualization.html_generator import generate_comparison_html
@@ -211,16 +197,9 @@ def run_evaluation(text: str, progress=gr.Progress()):
     text = result  # Use cleaned text
-    # Helper function to safely call progress
-    def safe_progress(value, desc):
-        try:
-            progress(value, desc=desc)
-        except:
-            pass
     try:
         # Step 1: Evaluate Qwen (using cached model)
-        safe_progress(0.2, "Evaluating with Qwen3...")
         result_qwen = evaluate_hf_single_sample(
             _qwen_model,
             _qwen_tokenizer,
@@ -229,15 +208,15 @@ def run_evaluation(text: str, progress=gr.Progress()):
         )
         # Step 2: Evaluate RWKV7 (using cached model)
-        safe_progress(0.6, "Evaluating with RWKV7...")
         result_rwkv = evaluate_rwkv7_single_sample(
             _rwkv_model,
             _rwkv_tokenizer,
             text
         )
-        # Step 8: Generate visualization
-        safe_progress(0.9, "Generating visualization...")
         html = generate_comparison_html(
             text=text,
             byte_losses_a=result_qwen["byte_wise_losses"],
@@ -255,7 +234,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
         # Wrap HTML for iframe display
         wrapped_html = wrap_html_in_iframe(html)
-        safe_progress(1.0, "Done!")
         return wrapped_html
@@ -283,21 +262,16 @@ def clear_inputs():
 # Build Gradio UI
 with gr.Blocks(
     title="UncheatableEval: Qwen3 vs RWKV7",
-    theme=gr.themes.Soft(),
-    css="""
-    .example-btn {
-        margin: 2px !important;
-    }
-    """
 ) as demo:
     gr.Markdown("""
     # 🔬 UncheatableEval: Qwen3 vs RWKV7 Byte-Level Comparison
     Compare the byte-level prediction performance between **Qwen3-1.7B-Base** and **RWKV7-G1C-1.5B**.
-    - **Green** = Qwen3 predicts better (lower loss)
-    - **Red** = RWKV7 predicts better (lower loss)
-    - **Hover** over tokens to see detailed predictions and compression rates
     """)
     with gr.Row():
@@ -309,12 +283,6 @@ with gr.Blocks(
                 max_lines=20,
             )
-            gr.Markdown("**Examples:**")
-            with gr.Row():
-                news_btn = gr.Button("📰 News", size="sm", elem_classes=["example-btn"])
-                code_btn = gr.Button("💻 Code", size="sm", elem_classes=["example-btn"])
-                lit_btn = gr.Button("📚 Literature", size="sm", elem_classes=["example-btn"])
             with gr.Row():
                 clear_btn = gr.Button("Clear", variant="secondary")
                 run_btn = gr.Button("▶ Run Comparison", variant="primary")
@@ -326,10 +294,6 @@ with gr.Blocks(
             output_html = gr.HTML(label="Visualization")
     # Event handlers
-    news_btn.click(fn=lambda: EXAMPLE_NEWS, outputs=[text_input])
-    code_btn.click(fn=lambda: EXAMPLE_CODE, outputs=[text_input])
-    lit_btn.click(fn=lambda: EXAMPLE_LITERATURE, outputs=[text_input])
     clear_btn.click(
         fn=clear_inputs,
         outputs=[text_input, output_html]

 _rwkv_tokenizer = None
 _rwkv_model_path = None
 def download_rwkv_model(progress=None):
     """Download RWKV7 model if not exists."""
     '''
+def run_evaluation(text: str):
     """Run evaluation on both models and generate visualization."""
     from core.evaluator import evaluate_hf_single_sample, evaluate_rwkv7_single_sample
     from visualization.html_generator import generate_comparison_html
     text = result  # Use cleaned text
     try:
         # Step 1: Evaluate Qwen (using cached model)
+        print("Evaluating with Qwen3...")
         result_qwen = evaluate_hf_single_sample(
             _qwen_model,
             _qwen_tokenizer,
         )
         # Step 2: Evaluate RWKV7 (using cached model)
+        print("Evaluating with RWKV7...")
         result_rwkv = evaluate_rwkv7_single_sample(
             _rwkv_model,
             _rwkv_tokenizer,
             text
         )
+        # Step 3: Generate visualization
+        print("Generating visualization...")
         html = generate_comparison_html(
             text=text,
             byte_losses_a=result_qwen["byte_wise_losses"],
         # Wrap HTML for iframe display
         wrapped_html = wrap_html_in_iframe(html)
+        print("Done!")
         return wrapped_html
 # Build Gradio UI
 with gr.Blocks(
     title="UncheatableEval: Qwen3 vs RWKV7",
+    theme=gr.themes.Soft()
 ) as demo:
     gr.Markdown("""
     # 🔬 UncheatableEval: Qwen3 vs RWKV7 Byte-Level Comparison
     Compare the byte-level prediction performance between **Qwen3-1.7B-Base** and **RWKV7-G1C-1.5B**.
+    - **Green** = Qwen3 performs better relative to average
+    - **Red** = RWKV7 performs better relative to average
+    - **Hover** over tokens to see actual loss values and predictions
     """)
     with gr.Row():
                 max_lines=20,
             )
             with gr.Row():
                 clear_btn = gr.Button("Clear", variant="secondary")
                 run_btn = gr.Button("▶ Run Comparison", variant="primary")
             output_html = gr.HTML(label="Visualization")
     # Event handlers
     clear_btn.click(
         fn=clear_inputs,
         outputs=[text_input, output_html]