Spaces:
Running
Running
Commit
·
d620a8f
1
Parent(s):
d68c16d
Remove Examples section from UI
Browse files- Remove example text constants (news, code, literature)
- Remove example buttons from UI
- Remove example button event handlers
- Remove example button CSS styling
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
app.py
CHANGED
|
@@ -36,20 +36,6 @@ _rwkv_model = None
|
|
| 36 |
_rwkv_tokenizer = None
|
| 37 |
_rwkv_model_path = None
|
| 38 |
|
| 39 |
-
# Example texts
|
| 40 |
-
EXAMPLE_NEWS = """The rapid advancement of artificial intelligence has sparked both excitement and concern among researchers worldwide. While AI systems demonstrate remarkable capabilities in language understanding and generation, questions remain about their potential impact on employment and society."""
|
| 41 |
-
|
| 42 |
-
EXAMPLE_CODE = """def fibonacci(n):
|
| 43 |
-
if n <= 1:
|
| 44 |
-
return n
|
| 45 |
-
return fibonacci(n-1) + fibonacci(n-2)
|
| 46 |
-
|
| 47 |
-
# Calculate first 10 Fibonacci numbers
|
| 48 |
-
for i in range(10):
|
| 49 |
-
print(f"F({i}) = {fibonacci(i)}")"""
|
| 50 |
-
|
| 51 |
-
EXAMPLE_LITERATURE = """It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness."""
|
| 52 |
-
|
| 53 |
|
| 54 |
def download_rwkv_model(progress=None):
|
| 55 |
"""Download RWKV7 model if not exists."""
|
|
@@ -196,7 +182,7 @@ def wrap_html_in_iframe(html: str) -> str:
|
|
| 196 |
'''
|
| 197 |
|
| 198 |
|
| 199 |
-
def run_evaluation(text: str
|
| 200 |
"""Run evaluation on both models and generate visualization."""
|
| 201 |
from core.evaluator import evaluate_hf_single_sample, evaluate_rwkv7_single_sample
|
| 202 |
from visualization.html_generator import generate_comparison_html
|
|
@@ -211,16 +197,9 @@ def run_evaluation(text: str, progress=gr.Progress()):
|
|
| 211 |
|
| 212 |
text = result # Use cleaned text
|
| 213 |
|
| 214 |
-
# Helper function to safely call progress
|
| 215 |
-
def safe_progress(value, desc):
|
| 216 |
-
try:
|
| 217 |
-
progress(value, desc=desc)
|
| 218 |
-
except:
|
| 219 |
-
pass
|
| 220 |
-
|
| 221 |
try:
|
| 222 |
# Step 1: Evaluate Qwen (using cached model)
|
| 223 |
-
|
| 224 |
result_qwen = evaluate_hf_single_sample(
|
| 225 |
_qwen_model,
|
| 226 |
_qwen_tokenizer,
|
|
@@ -229,15 +208,15 @@ def run_evaluation(text: str, progress=gr.Progress()):
|
|
| 229 |
)
|
| 230 |
|
| 231 |
# Step 2: Evaluate RWKV7 (using cached model)
|
| 232 |
-
|
| 233 |
result_rwkv = evaluate_rwkv7_single_sample(
|
| 234 |
_rwkv_model,
|
| 235 |
_rwkv_tokenizer,
|
| 236 |
text
|
| 237 |
)
|
| 238 |
|
| 239 |
-
# Step
|
| 240 |
-
|
| 241 |
html = generate_comparison_html(
|
| 242 |
text=text,
|
| 243 |
byte_losses_a=result_qwen["byte_wise_losses"],
|
|
@@ -255,7 +234,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
|
|
| 255 |
# Wrap HTML for iframe display
|
| 256 |
wrapped_html = wrap_html_in_iframe(html)
|
| 257 |
|
| 258 |
-
|
| 259 |
|
| 260 |
return wrapped_html
|
| 261 |
|
|
@@ -283,21 +262,16 @@ def clear_inputs():
|
|
| 283 |
# Build Gradio UI
|
| 284 |
with gr.Blocks(
|
| 285 |
title="UncheatableEval: Qwen3 vs RWKV7",
|
| 286 |
-
theme=gr.themes.Soft()
|
| 287 |
-
css="""
|
| 288 |
-
.example-btn {
|
| 289 |
-
margin: 2px !important;
|
| 290 |
-
}
|
| 291 |
-
"""
|
| 292 |
) as demo:
|
| 293 |
gr.Markdown("""
|
| 294 |
# 🔬 UncheatableEval: Qwen3 vs RWKV7 Byte-Level Comparison
|
| 295 |
|
| 296 |
Compare the byte-level prediction performance between **Qwen3-1.7B-Base** and **RWKV7-G1C-1.5B**.
|
| 297 |
|
| 298 |
-
- **Green** = Qwen3
|
| 299 |
-
- **Red** = RWKV7
|
| 300 |
-
- **Hover** over tokens to see
|
| 301 |
""")
|
| 302 |
|
| 303 |
with gr.Row():
|
|
@@ -309,12 +283,6 @@ with gr.Blocks(
|
|
| 309 |
max_lines=20,
|
| 310 |
)
|
| 311 |
|
| 312 |
-
gr.Markdown("**Examples:**")
|
| 313 |
-
with gr.Row():
|
| 314 |
-
news_btn = gr.Button("📰 News", size="sm", elem_classes=["example-btn"])
|
| 315 |
-
code_btn = gr.Button("💻 Code", size="sm", elem_classes=["example-btn"])
|
| 316 |
-
lit_btn = gr.Button("📚 Literature", size="sm", elem_classes=["example-btn"])
|
| 317 |
-
|
| 318 |
with gr.Row():
|
| 319 |
clear_btn = gr.Button("Clear", variant="secondary")
|
| 320 |
run_btn = gr.Button("▶ Run Comparison", variant="primary")
|
|
@@ -326,10 +294,6 @@ with gr.Blocks(
|
|
| 326 |
output_html = gr.HTML(label="Visualization")
|
| 327 |
|
| 328 |
# Event handlers
|
| 329 |
-
news_btn.click(fn=lambda: EXAMPLE_NEWS, outputs=[text_input])
|
| 330 |
-
code_btn.click(fn=lambda: EXAMPLE_CODE, outputs=[text_input])
|
| 331 |
-
lit_btn.click(fn=lambda: EXAMPLE_LITERATURE, outputs=[text_input])
|
| 332 |
-
|
| 333 |
clear_btn.click(
|
| 334 |
fn=clear_inputs,
|
| 335 |
outputs=[text_input, output_html]
|
|
|
|
| 36 |
_rwkv_tokenizer = None
|
| 37 |
_rwkv_model_path = None
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
def download_rwkv_model(progress=None):
|
| 41 |
"""Download RWKV7 model if not exists."""
|
|
|
|
| 182 |
'''
|
| 183 |
|
| 184 |
|
| 185 |
+
def run_evaluation(text: str):
|
| 186 |
"""Run evaluation on both models and generate visualization."""
|
| 187 |
from core.evaluator import evaluate_hf_single_sample, evaluate_rwkv7_single_sample
|
| 188 |
from visualization.html_generator import generate_comparison_html
|
|
|
|
| 197 |
|
| 198 |
text = result # Use cleaned text
|
| 199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
try:
|
| 201 |
# Step 1: Evaluate Qwen (using cached model)
|
| 202 |
+
print("Evaluating with Qwen3...")
|
| 203 |
result_qwen = evaluate_hf_single_sample(
|
| 204 |
_qwen_model,
|
| 205 |
_qwen_tokenizer,
|
|
|
|
| 208 |
)
|
| 209 |
|
| 210 |
# Step 2: Evaluate RWKV7 (using cached model)
|
| 211 |
+
print("Evaluating with RWKV7...")
|
| 212 |
result_rwkv = evaluate_rwkv7_single_sample(
|
| 213 |
_rwkv_model,
|
| 214 |
_rwkv_tokenizer,
|
| 215 |
text
|
| 216 |
)
|
| 217 |
|
| 218 |
+
# Step 3: Generate visualization
|
| 219 |
+
print("Generating visualization...")
|
| 220 |
html = generate_comparison_html(
|
| 221 |
text=text,
|
| 222 |
byte_losses_a=result_qwen["byte_wise_losses"],
|
|
|
|
| 234 |
# Wrap HTML for iframe display
|
| 235 |
wrapped_html = wrap_html_in_iframe(html)
|
| 236 |
|
| 237 |
+
print("Done!")
|
| 238 |
|
| 239 |
return wrapped_html
|
| 240 |
|
|
|
|
| 262 |
# Build Gradio UI
|
| 263 |
with gr.Blocks(
|
| 264 |
title="UncheatableEval: Qwen3 vs RWKV7",
|
| 265 |
+
theme=gr.themes.Soft()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
) as demo:
|
| 267 |
gr.Markdown("""
|
| 268 |
# 🔬 UncheatableEval: Qwen3 vs RWKV7 Byte-Level Comparison
|
| 269 |
|
| 270 |
Compare the byte-level prediction performance between **Qwen3-1.7B-Base** and **RWKV7-G1C-1.5B**.
|
| 271 |
|
| 272 |
+
- **Green** = Qwen3 performs better relative to average
|
| 273 |
+
- **Red** = RWKV7 performs better relative to average
|
| 274 |
+
- **Hover** over tokens to see actual loss values and predictions
|
| 275 |
""")
|
| 276 |
|
| 277 |
with gr.Row():
|
|
|
|
| 283 |
max_lines=20,
|
| 284 |
)
|
| 285 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
with gr.Row():
|
| 287 |
clear_btn = gr.Button("Clear", variant="secondary")
|
| 288 |
run_btn = gr.Button("▶ Run Comparison", variant="primary")
|
|
|
|
| 294 |
output_html = gr.HTML(label="Visualization")
|
| 295 |
|
| 296 |
# Event handlers
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
clear_btn.click(
|
| 298 |
fn=clear_inputs,
|
| 299 |
outputs=[text_input, output_html]
|