Spaces:
Sleeping
Sleeping
Commit
·
6fcf271
1
Parent(s):
98b6961
Improve UI and fix multi-token display in tooltips
Browse files- Increase max text length from 4000 to 8192 characters
- Rename model labels to use generic Model A/B instead of hardcoded names
- Fix tooltip to show all tokens in a byte range (not just one)
- Remove "UncheatableEval" title from HTML output
- Update data attributes from data-qwen/data-rwkv to data-model-a/data-model-b
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
- app.py +1 -1
- visualization/html_generator.py +12 -31
app.py
CHANGED
|
@@ -26,7 +26,7 @@ MODELS_DIR = SCRIPT_DIR / "models"
|
|
| 26 |
SUPPORT_DIR = SCRIPT_DIR / "support"
|
| 27 |
|
| 28 |
# Text length limits
|
| 29 |
-
MAX_TEXT_LENGTH =
|
| 30 |
MIN_TEXT_LENGTH = 1
|
| 31 |
|
| 32 |
# Global model cache
|
|
|
|
| 26 |
SUPPORT_DIR = SCRIPT_DIR / "support"
|
| 27 |
|
| 28 |
# Text length limits
|
| 29 |
+
MAX_TEXT_LENGTH = 8192
|
| 30 |
MIN_TEXT_LENGTH = 1
|
| 31 |
|
| 32 |
# Global model cache
|
visualization/html_generator.py
CHANGED
|
@@ -353,30 +353,12 @@ def generate_comparison_html(
|
|
| 353 |
model_a_token_idx = find_token_for_byte(byte_start, model_a_token_ranges)
|
| 354 |
model_b_token_idx = find_token_for_byte(byte_start, model_b_token_ranges)
|
| 355 |
|
| 356 |
-
# Build token info strings showing
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
# For Model A (Qwen), show the actual token ID if available
|
| 361 |
-
if model_a_token_idx is not None and topk_predictions_a and model_a_token_idx < len(topk_predictions_a):
|
| 362 |
-
actual_token_id = topk_predictions_a[model_a_token_idx][0]
|
| 363 |
-
token_text_decoded = decode_token(actual_token_id, tokenizer_a, model_type_a)
|
| 364 |
-
qwen_info_parts.append(f"[{actual_token_id}] {repr(token_text_decoded)}")
|
| 365 |
-
else:
|
| 366 |
-
# Fallback to display tokenizer info
|
| 367 |
-
qwen_info_parts = [f"[{idx}] {repr(s)}" for idx, s in token["qwen_tokens"]]
|
| 368 |
-
|
| 369 |
-
# For Model B (RWKV), show the actual token ID if available
|
| 370 |
-
if model_b_token_idx is not None and topk_predictions_b and model_b_token_idx < len(topk_predictions_b):
|
| 371 |
-
actual_token_id = topk_predictions_b[model_b_token_idx][0]
|
| 372 |
-
token_text_decoded = decode_token(actual_token_id, tokenizer_b, model_type_b)
|
| 373 |
-
rwkv_info_parts.append(f"[{actual_token_id}] {repr(token_text_decoded)}")
|
| 374 |
-
else:
|
| 375 |
-
# Fallback to display tokenizer info
|
| 376 |
-
rwkv_info_parts = [f"[{idx}] {repr(s)}" for idx, s in token["rwkv_tokens"]]
|
| 377 |
|
| 378 |
-
|
| 379 |
-
|
| 380 |
|
| 381 |
raw_bytes = list(text_bytes[byte_start:byte_end])
|
| 382 |
losses_a = byte_losses_a[byte_start:byte_end]
|
|
@@ -443,8 +425,8 @@ def generate_comparison_html(
|
|
| 443 |
|
| 444 |
token_span_content = "".join(token_html_parts)
|
| 445 |
data_attrs = (
|
| 446 |
-
f'data-
|
| 447 |
-
f'data-
|
| 448 |
f'data-bytes="{escape_for_attr(bytes_str)}" '
|
| 449 |
f'data-compression-a="{escape_for_attr(compression_a_str)}" '
|
| 450 |
f'data-compression-b="{escape_for_attr(compression_b_str)}" '
|
|
@@ -475,7 +457,7 @@ def generate_comparison_html(
|
|
| 475 |
<html>
|
| 476 |
<head>
|
| 477 |
<meta charset="UTF-8">
|
| 478 |
-
<title>
|
| 479 |
<style>
|
| 480 |
body {{
|
| 481 |
font-family: Consolas, 'Courier New', monospace;
|
|
@@ -671,7 +653,6 @@ def generate_comparison_html(
|
|
| 671 |
<svg id="svg-overlay"></svg>
|
| 672 |
<div id="tooltip"></div>
|
| 673 |
<div class="header">
|
| 674 |
-
<h1>UncheatableEval - Byte-wise Loss Comparison</h1>
|
| 675 |
<div class="meta">
|
| 676 |
<div>Model A: {model_a_name}</div>
|
| 677 |
<div>Model B: {model_b_name}</div>
|
|
@@ -781,8 +762,8 @@ def generate_comparison_html(
|
|
| 781 |
|
| 782 |
tokenSpans.forEach(token => {{
|
| 783 |
token.addEventListener('mouseenter', (e) => {{
|
| 784 |
-
const
|
| 785 |
-
const
|
| 786 |
const bytes = token.getAttribute('data-bytes') || '';
|
| 787 |
const compressionA = token.getAttribute('data-compression-a') || '';
|
| 788 |
const compressionB = token.getAttribute('data-compression-b') || '';
|
|
@@ -829,8 +810,8 @@ def generate_comparison_html(
|
|
| 829 |
<div><span class="label">Compression A:</span> <span class="loss-a">${{compressionA || '(empty)'}}</span></div>
|
| 830 |
<div><span class="label">Compression B:</span> <span class="loss-b">${{compressionB || '(empty)'}}</span></div>
|
| 831 |
<hr style="border-color: #555; margin: 6px 0;">
|
| 832 |
-
<div><span class="label">
|
| 833 |
-
<div><span class="label">
|
| 834 |
`;
|
| 835 |
if (top5A || top5B) {{
|
| 836 |
tooltipHtml += '<div class="topk-section"><div class="topk-container">';
|
|
|
|
| 353 |
model_a_token_idx = find_token_for_byte(byte_start, model_a_token_ranges)
|
| 354 |
model_b_token_idx = find_token_for_byte(byte_start, model_b_token_ranges)
|
| 355 |
|
| 356 |
+
# Build token info strings showing all tokens in this byte range
|
| 357 |
+
# Model A (RWKV7) - show all tokens that overlap with this byte range
|
| 358 |
+
model_a_info = ", ".join([f"[{idx}] {repr(s)}" for idx, s in token["rwkv_tokens"]])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
|
| 360 |
+
# Model B (Qwen3) - show all tokens that overlap with this byte range
|
| 361 |
+
model_b_info = ", ".join([f"[{idx}] {repr(s)}" for idx, s in token["qwen_tokens"]])
|
| 362 |
|
| 363 |
raw_bytes = list(text_bytes[byte_start:byte_end])
|
| 364 |
losses_a = byte_losses_a[byte_start:byte_end]
|
|
|
|
| 425 |
|
| 426 |
token_span_content = "".join(token_html_parts)
|
| 427 |
data_attrs = (
|
| 428 |
+
f'data-model-a="{escape_for_attr(model_a_info)}" '
|
| 429 |
+
f'data-model-b="{escape_for_attr(model_b_info)}" '
|
| 430 |
f'data-bytes="{escape_for_attr(bytes_str)}" '
|
| 431 |
f'data-compression-a="{escape_for_attr(compression_a_str)}" '
|
| 432 |
f'data-compression-b="{escape_for_attr(compression_b_str)}" '
|
|
|
|
| 457 |
<html>
|
| 458 |
<head>
|
| 459 |
<meta charset="UTF-8">
|
| 460 |
+
<title>Model Comparison</title>
|
| 461 |
<style>
|
| 462 |
body {{
|
| 463 |
font-family: Consolas, 'Courier New', monospace;
|
|
|
|
| 653 |
<svg id="svg-overlay"></svg>
|
| 654 |
<div id="tooltip"></div>
|
| 655 |
<div class="header">
|
|
|
|
| 656 |
<div class="meta">
|
| 657 |
<div>Model A: {model_a_name}</div>
|
| 658 |
<div>Model B: {model_b_name}</div>
|
|
|
|
| 762 |
|
| 763 |
tokenSpans.forEach(token => {{
|
| 764 |
token.addEventListener('mouseenter', (e) => {{
|
| 765 |
+
const modelA = token.getAttribute('data-model-a') || 'N/A';
|
| 766 |
+
const modelB = token.getAttribute('data-model-b') || 'N/A';
|
| 767 |
const bytes = token.getAttribute('data-bytes') || '';
|
| 768 |
const compressionA = token.getAttribute('data-compression-a') || '';
|
| 769 |
const compressionB = token.getAttribute('data-compression-b') || '';
|
|
|
|
| 810 |
<div><span class="label">Compression A:</span> <span class="loss-a">${{compressionA || '(empty)'}}</span></div>
|
| 811 |
<div><span class="label">Compression B:</span> <span class="loss-b">${{compressionB || '(empty)'}}</span></div>
|
| 812 |
<hr style="border-color: #555; margin: 6px 0;">
|
| 813 |
+
<div><span class="label">Model A:</span> <span class="model-a">${{modelA || '(empty)'}}</span></div>
|
| 814 |
+
<div><span class="label">Model B:</span> <span class="model-b">${{modelB || '(empty)'}}</span></div>
|
| 815 |
`;
|
| 816 |
if (top5A || top5B) {{
|
| 817 |
tooltipHtml += '<div class="topk-section"><div class="topk-container">';
|