Jellyfish042 commited on
Commit
350392a
·
1 Parent(s): 25a9607

Checkpoint before tooltip palette update

Browse files
.claude/settings.local.json CHANGED
@@ -9,7 +9,9 @@
9
  "Bash(git commit -m \"$\\(cat <<''EOF''\nFix Gradio compatibility for HuggingFace Spaces\n\n- Upgrade gradio to >=5.0.0 to fix API schema bug\n- Add server_name and server_port to demo.launch\\(\\)\n\nCo-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>\nEOF\n\\)\")",
10
  "Bash(git commit:*)",
11
  "Bash(git reset:*)",
12
- "Bash(and top-10 predictions\" to better reflect what users see in the tooltip.\nAlso updated color legend to match the swapped model positions.\n\nCo-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>\nEOF\n\\)\")"
 
 
13
  ]
14
  }
15
  }
 
9
  "Bash(git commit -m \"$\\(cat <<''EOF''\nFix Gradio compatibility for HuggingFace Spaces\n\n- Upgrade gradio to >=5.0.0 to fix API schema bug\n- Add server_name and server_port to demo.launch\\(\\)\n\nCo-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>\nEOF\n\\)\")",
10
  "Bash(git commit:*)",
11
  "Bash(git reset:*)",
12
+ "Bash(and top-10 predictions\" to better reflect what users see in the tooltip.\nAlso updated color legend to match the swapped model positions.\n\nCo-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>\nEOF\n\\)\")",
13
+ "Bash(git fetch:*)",
14
+ "Bash(git pull:*)"
15
  ]
16
  }
17
  }
README.md CHANGED
@@ -60,6 +60,27 @@ pip install -r requirements.txt
60
  python app.py
61
  ```
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  ## Requirements
64
 
65
  - CUDA-capable GPU (16GB+ VRAM recommended)
 
60
  python app.py
61
  ```
62
 
63
+ ## Regression Checks (Recommended)
64
+
65
+ Run these after UI or rendering changes:
66
+
67
+ ```bash
68
+ # Generate baseline snapshots
69
+ conda run -n torch2 python tests/generate_snapshots.py --out tests/golden
70
+
71
+ # Generate candidate snapshots
72
+ conda run -n torch2 python tests/generate_snapshots.py --out tests/_out
73
+
74
+ # Compare render-model JSON
75
+ conda run -n torch2 python tests/compare_snapshots.py --baseline tests/golden/stress.render_model.json --candidate tests/_out/stress.render_model.json
76
+
77
+ # Compare HTML output
78
+ conda run -n torch2 python tests/compare_html.py --baseline tests/golden/stress.output.html --candidate tests/_out/stress.output.html
79
+
80
+ # Optional: visual smoke placeholder
81
+ conda run -n torch2 python tests/visual_smoke.py --html tests/_out/stress.output.html
82
+ ```
83
+
84
  ## Requirements
85
 
86
  - CUDA-capable GPU (16GB+ VRAM recommended)
app.py CHANGED
@@ -7,6 +7,7 @@ Compare byte-level prediction performance between Qwen3-1.7B-Base and RWKV7-G1C-
7
  import gc
8
  import os
9
  from pathlib import Path
 
10
 
11
  import gradio as gr
12
  import torch
@@ -27,7 +28,7 @@ MODELS_DIR = SCRIPT_DIR / "models"
27
  SUPPORT_DIR = SCRIPT_DIR / "support"
28
 
29
  # Text length limits
30
- MAX_TEXT_LENGTH = 8192
31
  MIN_TEXT_LENGTH = 1
32
 
33
  # Global model cache
@@ -120,7 +121,7 @@ def validate_input(text: str) -> tuple[bool, str]:
120
  if not text or not text.strip():
121
  return False, "Please enter some text to analyze."
122
 
123
- text = text.strip()
124
 
125
  if len(text) < MIN_TEXT_LENGTH:
126
  return False, f"Text is too short. Minimum {MIN_TEXT_LENGTH} characters required."
@@ -299,7 +300,15 @@ def get_default_example():
299
 
300
 
301
  # Build Gradio UI
302
- with gr.Blocks(title="Compression-Lens: RWKV-7 vs Qwen3", theme=gr.themes.Soft()) as demo:
 
 
 
 
 
 
 
 
303
  gr.HTML(
304
  """
305
  <div style="text-align: center; margin-bottom: 20px;">
@@ -324,6 +333,7 @@ with gr.Blocks(title="Compression-Lens: RWKV-7 vs Qwen3", theme=gr.themes.Soft()
324
  placeholder=f"Enter text to analyze (max {MAX_TEXT_LENGTH} characters)...",
325
  lines=10,
326
  max_lines=20,
 
327
  )
328
 
329
  with gr.Row():
 
7
  import gc
8
  import os
9
  from pathlib import Path
10
+ import unicodedata
11
 
12
  import gradio as gr
13
  import torch
 
28
  SUPPORT_DIR = SCRIPT_DIR / "support"
29
 
30
  # Text length limits
31
+ MAX_TEXT_LENGTH = 16384
32
  MIN_TEXT_LENGTH = 1
33
 
34
  # Global model cache
 
121
  if not text or not text.strip():
122
  return False, "Please enter some text to analyze."
123
 
124
+ text = unicodedata.normalize("NFC", text).strip()
125
 
126
  if len(text) < MIN_TEXT_LENGTH:
127
  return False, f"Text is too short. Minimum {MIN_TEXT_LENGTH} characters required."
 
300
 
301
 
302
  # Build Gradio UI
303
+ with gr.Blocks(
304
+ title="Compression-Lens: RWKV-7 vs Qwen3",
305
+ theme=gr.themes.Soft(),
306
+ css="""
307
+ #input-text textarea {
308
+ font-family: Consolas, 'Courier New', monospace;
309
+ }
310
+ """,
311
+ ) as demo:
312
  gr.HTML(
313
  """
314
  <div style="text-align: center; margin-bottom: 20px;">
 
333
  placeholder=f"Enter text to analyze (max {MAX_TEXT_LENGTH} characters)...",
334
  lines=10,
335
  max_lines=20,
336
+ elem_id="input-text",
337
  )
338
 
339
  with gr.Row():
core/escaping.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unified escaping helpers for HTML rendering.
3
+
4
+ Keep all HTML/attribute/script escaping logic in one place to avoid divergence.
5
+ """
6
+
7
+ import json
8
+ from typing import Any
9
+
10
+
11
+ def escape_text_node(text: str) -> str:
12
+ """Escape text for HTML text node insertion."""
13
+ if text is None:
14
+ return ""
15
+ return (
16
+ text.replace("&", "&amp;")
17
+ .replace("<", "&lt;")
18
+ .replace(">", "&gt;")
19
+ )
20
+
21
+
22
+ def escape_attr(text: str) -> str:
23
+ """Escape text for safe placement in HTML attribute values."""
24
+ if text is None:
25
+ return ""
26
+ return (
27
+ text.replace("&", "&amp;")
28
+ .replace('"', "&quot;")
29
+ .replace("'", "&#39;")
30
+ .replace("<", "&lt;")
31
+ .replace(">", "&gt;")
32
+ .replace("\n", "&#10;")
33
+ .replace("\r", "&#13;")
34
+ .replace("\t", "&#9;")
35
+ )
36
+
37
+
38
+ def escape_json_for_script(value: Any) -> str:
39
+ """Serialize JSON for safe embedding inside <script> tags."""
40
+ text = json.dumps(value, ensure_ascii=False)
41
+ # Prevent closing tags or HTML entities from breaking script context.
42
+ return (
43
+ text.replace("<", "\\u003c")
44
+ .replace(">", "\\u003e")
45
+ .replace("&", "\\u0026")
46
+ )
core/render_model.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Render model definitions for visualization.
3
+
4
+ This module defines a stable, serializable intermediate representation
5
+ between model outputs and HTML rendering.
6
+ """
7
+
8
+ from dataclasses import dataclass, field, asdict
9
+ from typing import Any, Dict, List, Optional
10
+
11
+
12
+ def _has_control_chars(text: str) -> bool:
13
+ if not text:
14
+ return False
15
+ for ch in text:
16
+ code = ord(ch)
17
+ if code < 32 or code == 127:
18
+ return True
19
+ return False
20
+
21
+
22
+ @dataclass
23
+ class TokenDisplay:
24
+ text: str
25
+ kind: str # "normal" | "control" | "raw"
26
+
27
+
28
+ @dataclass
29
+ class TokenInfo:
30
+ byte_start: int
31
+ byte_end: int
32
+ display: TokenDisplay
33
+ is_word: bool
34
+ word_id: Optional[int] = None
35
+ word_key: Optional[str] = None
36
+ bytes_hex: str = ""
37
+ compression: Dict[str, str] = field(default_factory=dict)
38
+ model_tokens: Dict[str, List[List[Any]]] = field(default_factory=dict)
39
+ loss: Dict[str, float] = field(default_factory=dict)
40
+ topk: Dict[str, Any] = field(default_factory=dict)
41
+ tuned_delta: float = 0.0
42
+
43
+
44
+ @dataclass
45
+ class RenderModel:
46
+ text: str
47
+ tokens: List[TokenInfo]
48
+ meta: Dict[str, Any] = field(default_factory=dict)
49
+
50
+ def to_dict(self) -> Dict[str, Any]:
51
+ return asdict(self)
52
+
53
+
54
+ def build_display(text: str, is_raw: bool = False) -> TokenDisplay:
55
+ if is_raw:
56
+ return TokenDisplay(text=text, kind="raw")
57
+ if _has_control_chars(text):
58
+ return TokenDisplay(text=text, kind="control")
59
+ return TokenDisplay(text=text, kind="normal")
core/segmentation.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Fallback segmentation utilities.
3
+
4
+ Used for offline tests or snapshot generation when model tokenizers
5
+ are unavailable.
6
+ """
7
+
8
+ from typing import Dict, List
9
+
10
+
11
+ def fallback_token_info(text: str) -> Dict[str, List]:
12
+ """Return minimal token info using UTF-8 codepoint boundaries."""
13
+ boundaries = [0]
14
+ byte_pos = 0
15
+ for ch in text:
16
+ byte_pos += len(ch.encode("utf-8"))
17
+ boundaries.append(byte_pos)
18
+ return {
19
+ "common_boundaries": boundaries,
20
+ "qwen_tokens": [],
21
+ "rwkv_tokens": [],
22
+ "byte_to_qwen": {},
23
+ "byte_to_rwkv": {},
24
+ }
tests/_out/stress.output.html ADDED
The diff for this file is too large to render. See raw diff
 
tests/_out/stress.render_model.json ADDED
The diff for this file is too large to render. See raw diff
 
tests/compare_html.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Compare two HTML files with a unified diff.
3
+ """
4
+
5
+ import argparse
6
+ import difflib
7
+ from pathlib import Path
8
+
9
+
10
+ def normalize(text: str, ignore_whitespace: bool) -> str:
11
+ text = text.replace("\r\n", "\n").replace("\r", "\n")
12
+ if ignore_whitespace:
13
+ return " ".join(text.split())
14
+ return text
15
+
16
+
17
+ def main() -> int:
18
+ parser = argparse.ArgumentParser(description="Compare HTML files.")
19
+ parser.add_argument("--baseline", type=Path, required=True, help="Baseline HTML path")
20
+ parser.add_argument("--candidate", type=Path, required=True, help="Candidate HTML path")
21
+ parser.add_argument("--ignore-whitespace", action="store_true", help="Normalize whitespace before diff")
22
+ parser.add_argument("--max-lines", type=int, default=200, help="Max diff lines to print")
23
+ args = parser.parse_args()
24
+
25
+ base_text = normalize(args.baseline.read_text(encoding="utf-8"), args.ignore_whitespace)
26
+ cand_text = normalize(args.candidate.read_text(encoding="utf-8"), args.ignore_whitespace)
27
+
28
+ base_lines = base_text.splitlines(keepends=True)
29
+ cand_lines = cand_text.splitlines(keepends=True)
30
+
31
+ diff = list(difflib.unified_diff(base_lines, cand_lines, fromfile=str(args.baseline), tofile=str(args.candidate)))
32
+
33
+ if not diff:
34
+ print("No differences found.")
35
+ return 0
36
+
37
+ print("Differences found:")
38
+ for line in diff[: args.max_lines]:
39
+ print(line, end="")
40
+ if len(diff) > args.max_lines:
41
+ print(f"\n... truncated ({len(diff)} total diff lines).")
42
+ return 1
43
+
44
+
45
+ if __name__ == "__main__":
46
+ raise SystemExit(main())
tests/compare_snapshots.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Compare two render-model JSON snapshots and report differences.
3
+ """
4
+
5
+ import argparse
6
+ import json
7
+ import math
8
+ import sys
9
+ from pathlib import Path
10
+ from typing import Any, List
11
+
12
+
13
+ def load_json(path: Path) -> Any:
14
+ with path.open("r", encoding="utf-8") as f:
15
+ return json.load(f)
16
+
17
+
18
+ def is_number(value: Any) -> bool:
19
+ return isinstance(value, (int, float)) and not isinstance(value, bool)
20
+
21
+
22
+ def nearly_equal(a: float, b: float, tol: float) -> bool:
23
+ if math.isnan(a) and math.isnan(b):
24
+ return True
25
+ return abs(a - b) <= tol
26
+
27
+
28
+ def compare(a: Any, b: Any, path: str, diffs: List[str], tol: float, max_diffs: int) -> None:
29
+ if len(diffs) >= max_diffs:
30
+ return
31
+
32
+ if type(a) != type(b):
33
+ diffs.append(f"{path}: type {type(a).__name__} != {type(b).__name__}")
34
+ return
35
+
36
+ if isinstance(a, dict):
37
+ a_keys = set(a.keys())
38
+ b_keys = set(b.keys())
39
+ for key in sorted(a_keys - b_keys):
40
+ diffs.append(f"{path}.{key}: missing in candidate")
41
+ if len(diffs) >= max_diffs:
42
+ return
43
+ for key in sorted(b_keys - a_keys):
44
+ diffs.append(f"{path}.{key}: extra in candidate")
45
+ if len(diffs) >= max_diffs:
46
+ return
47
+ for key in sorted(a_keys & b_keys):
48
+ compare(a[key], b[key], f"{path}.{key}", diffs, tol, max_diffs)
49
+ if len(diffs) >= max_diffs:
50
+ return
51
+ return
52
+
53
+ if isinstance(a, list):
54
+ if len(a) != len(b):
55
+ diffs.append(f"{path}: list length {len(a)} != {len(b)}")
56
+ min_len = min(len(a), len(b))
57
+ for idx in range(min_len):
58
+ compare(a[idx], b[idx], f"{path}[{idx}]", diffs, tol, max_diffs)
59
+ if len(diffs) >= max_diffs:
60
+ return
61
+ return
62
+
63
+ if is_number(a) and is_number(b) and tol > 0:
64
+ if not nearly_equal(float(a), float(b), tol):
65
+ diffs.append(f"{path}: {a} != {b} (tol={tol})")
66
+ return
67
+
68
+ if a != b:
69
+ diffs.append(f"{path}: {a!r} != {b!r}")
70
+
71
+
72
+ def main() -> int:
73
+ parser = argparse.ArgumentParser(description="Compare render-model JSON snapshots.")
74
+ parser.add_argument("--baseline", type=Path, required=True, help="Baseline JSON path")
75
+ parser.add_argument("--candidate", type=Path, required=True, help="Candidate JSON path")
76
+ parser.add_argument("--float-tol", type=float, default=0.0, help="Float comparison tolerance")
77
+ parser.add_argument("--max-diffs", type=int, default=200, help="Max diffs to display")
78
+ args = parser.parse_args()
79
+
80
+ baseline = load_json(args.baseline)
81
+ candidate = load_json(args.candidate)
82
+
83
+ diffs: List[str] = []
84
+ compare(baseline, candidate, "$", diffs, args.float_tol, args.max_diffs)
85
+
86
+ if diffs:
87
+ print(f"Differences found: {len(diffs)}")
88
+ for line in diffs:
89
+ print(line)
90
+ return 1
91
+
92
+ print("No differences found.")
93
+ return 0
94
+
95
+
96
+ if __name__ == "__main__":
97
+ sys.exit(main())
tests/generate_snapshots.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Generate offline HTML + render-model snapshots for stress inputs.
3
+
4
+ Uses fallback segmentation (no model downloads).
5
+ """
6
+
7
+ import argparse
8
+ import json
9
+ import sys
10
+ from pathlib import Path
11
+
12
+ ROOT = Path(__file__).resolve().parents[1]
13
+ sys.path.insert(0, str(ROOT))
14
+
15
+ from core.segmentation import fallback_token_info
16
+ from visualization.html_generator import generate_comparison_html
17
+ SAMPLES_DIR = ROOT / "tests" / "samples"
18
+
19
+
20
+ def main() -> None:
21
+ parser = argparse.ArgumentParser(description="Generate offline snapshot HTML/JSON.")
22
+ parser.add_argument("--out", type=Path, default=ROOT / "tests" / "golden", help="Output directory")
23
+ args = parser.parse_args()
24
+
25
+ input_path = SAMPLES_DIR / "stress_inputs.txt"
26
+ text = input_path.read_text(encoding="utf-8")
27
+
28
+ byte_len = len(text.encode("utf-8"))
29
+ losses_a = [0.5] * byte_len
30
+ losses_b = [0.6] * byte_len
31
+
32
+ html, render_model = generate_comparison_html(
33
+ text=text,
34
+ byte_losses_a=losses_a,
35
+ byte_losses_b=losses_b,
36
+ model_a_name="RWKV7 (dummy)",
37
+ model_b_name="Qwen3 (dummy)",
38
+ topk_predictions_a=None,
39
+ topk_predictions_b=None,
40
+ tokenizer_a=None,
41
+ tokenizer_b=None,
42
+ model_type_a="rwkv7",
43
+ model_type_b="hf",
44
+ token_info_override=fallback_token_info(text),
45
+ return_render_model=True,
46
+ )
47
+
48
+ out_dir = args.out
49
+ out_dir.mkdir(parents=True, exist_ok=True)
50
+
51
+ html_path = out_dir / "stress.output.html"
52
+ json_path = out_dir / "stress.render_model.json"
53
+
54
+ html_path.write_text(html, encoding="utf-8")
55
+ with json_path.open("w", encoding="utf-8") as f:
56
+ json.dump(render_model, f, ensure_ascii=False, indent=2)
57
+
58
+ print(f"Wrote {html_path}")
59
+ print(f"Wrote {json_path}")
60
+
61
+
62
+ if __name__ == "__main__":
63
+ main()
tests/golden/README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Golden Snapshots
2
+
3
+ This folder stores render-model JSON and HTML snapshots for regression checks.
4
+
5
+ Suggested naming:
6
+ - sample_01.render_model.json
7
+ - sample_01.output.html
8
+
9
+ Keep these files in sync with `tests/samples/`.
10
+
11
+ Generate snapshots with:
12
+ `conda run -n torch2 python tests/generate_snapshots.py --out tests/golden`
13
+
14
+ Generate a candidate snapshot:
15
+ `conda run -n torch2 python tests/generate_snapshots.py --out tests/_out`
16
+
17
+ Compare snapshots:
18
+ `conda run -n torch2 python tests/compare_snapshots.py --baseline tests/golden/stress.render_model.json --candidate tests/_out/stress.render_model.json`
19
+
20
+ Compare HTML output:
21
+ `conda run -n torch2 python tests/compare_html.py --baseline tests/golden/stress.output.html --candidate tests/_out/stress.output.html`
tests/golden/stress.output.html ADDED
The diff for this file is too large to render. See raw diff
 
tests/golden/stress.render_model.json ADDED
The diff for this file is too large to render. See raw diff
 
tests/samples/stress_inputs.txt ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BEGIN TEST
2
+ Leading spaces (2) + trailing spaces (2)··
3
+ TAB_LITERAL: [START] [END] (there is a real TAB between)
4
+
5
+ Raw escape-like text: \n \r \t \\n \\r \\t \\x00 \\x1f \\x7f \\xff \u0000 \u202E \u200F \u200E
6
+ Bytes-ish hex: e5 bd 93 e7 84 b6 | 00 1f 7f ff | 0x00 0x1F 0x7F 0xFF | b"\x00\x1f\x7f\xff"
7
+
8
+ HTML tags (should render as text, not tags):
9
+ <think></think> <think>inner</think> <script>alert('x')</script> <style>body{color:red}</style>
10
+ <div class="x" data-x="1 & 2">Hello</div> <span>Span</span> <a href="https://example.com?q=1&x=<tag>">link</a>
11
+ <img src=x onerror=alert(1)> <br> <hr> <p>para</p> <table><tr><td>cell</td></tr></table>
12
+ Nested-ish: </span><span data-x="</span>">confuse</span>
13
+
14
+ HTML entities:
15
+ &lt;think&gt; &lt;/think&gt; &amp; &quot; &#39; &nbsp; &#10; &#x3C; &#x3E; &#x26;
16
+
17
+ Markdown-ish:
18
+ # H1
19
+ ## H2
20
+ - list item 1
21
+ - list item 2
22
+ > blockquote
23
+ --- (three hyphens)
24
+
25
+ Languages:
26
+ 中文 简体/繁體 日本語 かな カタカナ 한국어 العربية עברית हिन्दी ไทย Русский Ελληνικά Español Français Português Türkçe Việt
27
+ RTL mix: العربية ABC עברית 123 (mixed direction)
tests/visual_smoke.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Placeholder for visual smoke checks.
3
+
4
+ This script is intentionally minimal; hook Playwright or similar later.
5
+ """
6
+
7
+ import argparse
8
+ from pathlib import Path
9
+
10
+
11
+ def main() -> None:
12
+ parser = argparse.ArgumentParser(description="Visual smoke check placeholder.")
13
+ parser.add_argument("--html", type=Path, required=True, help="HTML file to inspect")
14
+ args = parser.parse_args()
15
+
16
+ if not args.html.exists():
17
+ raise SystemExit(f"Missing HTML file: {args.html}")
18
+
19
+ print("Visual smoke placeholder:")
20
+ print(f"- Open in browser and visually verify: {args.html}")
21
+ print("- TODO: integrate Playwright for screenshot diffs.")
22
+
23
+
24
+ if __name__ == "__main__":
25
+ main()
visualization/assets/main.css ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ font-family: Consolas, 'Courier New', monospace;
3
+ margin: 0;
4
+ padding: 0;
5
+ background-color: #f5f5f5;
6
+ }
7
+ .header {
8
+ background-color: #333;
9
+ color: white;
10
+ padding: 20px;
11
+ position: sticky;
12
+ top: 0;
13
+ z-index: 100;
14
+ }
15
+ .header h1 {
16
+ margin: 0 0 15px 0;
17
+ font-size: 18px;
18
+ }
19
+ .meta {
20
+ display: flex;
21
+ flex-wrap: wrap;
22
+ gap: 20px;
23
+ font-size: 12px;
24
+ color: #c8c8c8;
25
+ }
26
+ .legend {
27
+ display: flex;
28
+ gap: 15px;
29
+ margin-top: 10px;
30
+ }
31
+ .legend-item {
32
+ display: flex;
33
+ align-items: center;
34
+ gap: 5px;
35
+ }
36
+ .legend-box {
37
+ width: 20px;
38
+ height: 12px;
39
+ border: 1px solid #666;
40
+ }
41
+ .content {
42
+ background-color: white;
43
+ margin: 10px;
44
+ padding: 15px;
45
+ border: 1px solid #ccc;
46
+ font-size: 14px;
47
+ line-height: 1.8;
48
+ white-space: pre-wrap;
49
+ overflow-wrap: anywhere;
50
+ word-wrap: break-word;
51
+ position: relative;
52
+ }
53
+ .content span {
54
+ padding: 1px 0;
55
+ }
56
+ .word {
57
+ cursor: pointer;
58
+ position: relative;
59
+ }
60
+ .word:hover {
61
+ outline: 2px solid #007bff;
62
+ outline-offset: 1px;
63
+ }
64
+ .word.highlighted {
65
+ outline: 2px solid #ff6b6b;
66
+ outline-offset: 1px;
67
+ }
68
+ #svg-overlay {
69
+ position: fixed;
70
+ top: 0;
71
+ left: 0;
72
+ width: 100%;
73
+ height: 100%;
74
+ pointer-events: none;
75
+ z-index: 1000;
76
+ }
77
+ .link-line {
78
+ stroke: #007bff;
79
+ stroke-width: 2;
80
+ fill: none;
81
+ opacity: 0.7;
82
+ }
83
+ .link-dot {
84
+ fill: #007bff;
85
+ opacity: 0.8;
86
+ }
87
+ .token {
88
+ position: relative;
89
+ cursor: help;
90
+ }
91
+ .token:hover {
92
+ outline: 1px dashed #666;
93
+ }
94
+ .token-kind-control {
95
+ color: #f59e0b;
96
+ }
97
+ .token-kind-raw {
98
+ color: #fb7185;
99
+ }
100
+ #tooltip {
101
+ position: fixed;
102
+ background-color: rgba(0, 0, 0, 0.9);
103
+ color: white;
104
+ padding: 10px 14px;
105
+ border-radius: 6px;
106
+ font-size: 12px;
107
+ max-width: none;
108
+ width: max-content;
109
+ z-index: 2000;
110
+ pointer-events: none;
111
+ display: none;
112
+ line-height: 1.6;
113
+ box-shadow: 0 2px 10px rgba(0,0,0,0.3);
114
+ }
115
+ #tooltip .label {
116
+ color: #aaa;
117
+ font-weight: bold;
118
+ }
119
+ #tooltip .bytes {
120
+ color: #a5f3fc;
121
+ font-family: monospace;
122
+ }
123
+ #tooltip .loss-a {
124
+ color: #86efac;
125
+ font-family: monospace;
126
+ }
127
+ #tooltip .loss-b {
128
+ color: #fca5a5;
129
+ font-family: monospace;
130
+ }
131
+ #tooltip .model-a {
132
+ color: #fcd34d;
133
+ }
134
+ #tooltip .model-b {
135
+ color: #7dd3fc;
136
+ }
137
+ #tooltip .topk-section {
138
+ margin-top: 8px;
139
+ padding-top: 8px;
140
+ border-top: 1px solid #555;
141
+ }
142
+ #tooltip .topk-container {
143
+ display: flex;
144
+ gap: 16px;
145
+ }
146
+ #tooltip .topk-column {
147
+ flex: 1;
148
+ min-width: 180px;
149
+ }
150
+ #tooltip .topk-title {
151
+ color: #aaa;
152
+ font-weight: bold;
153
+ margin-bottom: 4px;
154
+ font-size: 11px;
155
+ }
156
+ #tooltip .topk-title.model-a {
157
+ color: #86efac;
158
+ }
159
+ #tooltip .topk-title.model-b {
160
+ color: #fca5a5;
161
+ }
162
+ #tooltip .topk-list {
163
+ font-size: 11px;
164
+ }
165
+ #tooltip .topk-item {
166
+ display: flex;
167
+ gap: 4px;
168
+ padding: 1px 0;
169
+ align-items: flex-start;
170
+ }
171
+ #tooltip .token-block {
172
+ margin-top: 6px;
173
+ display: flex;
174
+ align-items: center;
175
+ gap: 6px;
176
+ white-space: nowrap;
177
+ flex-wrap: nowrap;
178
+ overflow-x: visible;
179
+ }
180
+ #tooltip .token-chips {
181
+ display: flex;
182
+ flex-wrap: nowrap;
183
+ gap: 4px;
184
+ align-items: center;
185
+ flex: 0 0 auto;
186
+ }
187
+ #tooltip .token-chip-group {
188
+ display: inline-flex;
189
+ align-items: center;
190
+ gap: 4px;
191
+ flex: 0 0 auto;
192
+ white-space: nowrap;
193
+ }
194
+ #tooltip .token-prob {
195
+ color: #86efac;
196
+ font-family: monospace;
197
+ font-size: 11px;
198
+ white-space: nowrap;
199
+ }
200
+ #tooltip .token-id {
201
+ color: #888;
202
+ font-family: monospace;
203
+ white-space: nowrap;
204
+ }
205
+ #tooltip .token-chip {
206
+ max-width: 100%;
207
+ }
208
+ #tooltip .token-chip-group .topk-token {
209
+ white-space: pre;
210
+ overflow-wrap: normal;
211
+ word-break: normal;
212
+ }
213
+ #tooltip .topk-rank {
214
+ color: #888;
215
+ min-width: 18px;
216
+ }
217
+ #tooltip .topk-rank.hit {
218
+ color: #ffd700;
219
+ }
220
+ #tooltip .topk-token {
221
+ color: #a5f3fc;
222
+ white-space: pre-wrap;
223
+ overflow-wrap: anywhere;
224
+ word-break: break-word;
225
+ font-family: monospace;
226
+ background-color: rgba(255, 255, 255, 0.08);
227
+ padding: 0 4px;
228
+ border-radius: 3px;
229
+ display: inline-block;
230
+ max-width: 100%;
231
+ }
232
+ #tooltip .esc-control {
233
+ color: #fbbf24;
234
+ }
235
+ #tooltip .esc-raw {
236
+ color: #fb7185;
237
+ }
238
+ #tooltip .topk-prob {
239
+ color: #86efac;
240
+ min-width: 45px;
241
+ text-align: right;
242
+ }
243
+ #tooltip .topk-hit {
244
+ color: #22c55e;
245
+ }
246
+ #tooltip .topk-miss {
247
+ color: #ef4444;
248
+ font-style: italic;
249
+ }
250
+
visualization/assets/main.js ADDED
@@ -0,0 +1,615 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const svgOverlay = document.getElementById('svg-overlay');
2
+ const content = document.querySelector('.content');
3
+ const renderModelEl = document.getElementById('render-model');
4
+ let renderModel = {};
5
+ let renderTokens = [];
6
+ try {
7
+ renderModel = JSON.parse(renderModelEl ? renderModelEl.textContent : '{}') || {};
8
+ renderTokens = Array.isArray(renderModel.tokens) ? renderModel.tokens : [];
9
+ } catch (e) {
10
+ console.warn('Failed to parse render model JSON:', e);
11
+ renderModel = {};
12
+ renderTokens = [];
13
+ }
14
+
15
+ function escapeControlChars(text) {
16
+ if (!text) return text;
17
+ let out = '';
18
+ for (let i = 0; i < text.length; i++) {
19
+ const ch = text[i];
20
+ const code = text.charCodeAt(i);
21
+ if (ch === '\\') {
22
+ out += '\\\\';
23
+ } else if (ch === '\n') {
24
+ out += '\\n';
25
+ } else if (ch === '\r') {
26
+ out += '\\r';
27
+ } else if (ch === '\t') {
28
+ out += '\\t';
29
+ } else if (code < 32 || code === 127) {
30
+ out += '\\x' + code.toString(16).padStart(2, '0');
31
+ } else {
32
+ out += ch;
33
+ }
34
+ }
35
+ return out;
36
+ }
37
+
38
+ if (content) {
39
+ while (content.firstChild) {
40
+ content.removeChild(content.firstChild);
41
+ }
42
+ const wordCounts = {};
43
+ renderTokens.forEach((token) => {
44
+ if (token && token.is_word && token.word_key) {
45
+ wordCounts[token.word_key] = (wordCounts[token.word_key] || 0) + 1;
46
+ }
47
+ });
48
+
49
+ renderTokens.forEach((token, idx) => {
50
+ const span = document.createElement('span');
51
+ span.className = 'token';
52
+ span.dataset.tokenIdx = String(idx);
53
+ span.dataset.tunedDelta = (token && typeof token.tuned_delta === 'number') ? String(token.tuned_delta) : '0';
54
+ const kind = (token && token.display && token.display.kind) ? token.display.kind : 'normal';
55
+ const text = (token && token.display && typeof token.display.text === 'string') ? token.display.text : '';
56
+ const hasVisible = (() => {
57
+ if (!text) return false;
58
+ for (let i = 0; i < text.length; i++) {
59
+ const code = text.charCodeAt(i);
60
+ if (code >= 32 && code !== 127) {
61
+ return true;
62
+ }
63
+ }
64
+ return false;
65
+ })();
66
+ const mainKind = (kind === 'control' && hasVisible) ? 'normal' : kind;
67
+ if (text.includes('\n') || text.includes('\r')) {
68
+ span.dataset.hasLinebreak = '1';
69
+ }
70
+ if (mainKind === 'control') {
71
+ span.classList.add('token-kind-control');
72
+ span.textContent = text;
73
+ } else if (mainKind === 'raw') {
74
+ span.classList.add('token-kind-raw');
75
+ span.textContent = text;
76
+ } else {
77
+ span.textContent = text;
78
+ }
79
+
80
+ if (token && token.is_word && token.word_key && wordCounts[token.word_key] > 1) {
81
+ span.classList.add('word');
82
+ span.dataset.word = token.word_key;
83
+ if (token.word_id !== undefined && token.word_id !== null) {
84
+ span.dataset.wordId = String(token.word_id);
85
+ }
86
+ }
87
+
88
+ content.appendChild(span);
89
+ });
90
+ }
91
+
92
+ const words = document.querySelectorAll('.word');
93
+ const wordGroups = {};
94
+ words.forEach(word => {
95
+ const wordText = word.getAttribute('data-word');
96
+ if (!wordGroups[wordText]) {
97
+ wordGroups[wordText] = [];
98
+ }
99
+ wordGroups[wordText].push(word);
100
+ });
101
+
102
+ function clearLines() {
103
+ while (svgOverlay.firstChild) {
104
+ svgOverlay.removeChild(svgOverlay.firstChild);
105
+ }
106
+ words.forEach(w => w.classList.remove('highlighted'));
107
+ }
108
+
109
+ function pickRectByY(rects, targetY) {
110
+ if (!rects || rects.length === 0) return null;
111
+ let best = rects[0];
112
+ let bestDist = Infinity;
113
+ rects.forEach(r => {
114
+ const cy = r.top + r.height / 2;
115
+ const dist = Math.abs(cy - targetY);
116
+ if (dist < bestDist) {
117
+ best = r;
118
+ bestDist = dist;
119
+ }
120
+ });
121
+ return best;
122
+ }
123
+
124
+ function getAnchorRect(element, targetY) {
125
+ const rects = Array.from(element.getClientRects());
126
+ if (rects.length === 0) return element.getBoundingClientRect();
127
+ if (rects.length === 1) return rects[0];
128
+ const picked = pickRectByY(rects, targetY);
129
+ return picked || rects[0];
130
+ }
131
+
132
+ function drawLines(hoveredWord, evt) {
133
+ clearLines();
134
+
135
+ const wordText = hoveredWord.getAttribute('data-word');
136
+ const wordId = parseInt(hoveredWord.getAttribute('data-word-id'));
137
+ const sameWords = wordGroups[wordText] || [];
138
+
139
+ const previousWords = sameWords.filter(w => {
140
+ const id = parseInt(w.getAttribute('data-word-id'));
141
+ return id < wordId;
142
+ });
143
+
144
+ if (previousWords.length === 0) return;
145
+
146
+ sameWords.forEach(w => w.classList.add('highlighted'));
147
+
148
+ const targetY = evt ? evt.clientY : (hoveredWord.getBoundingClientRect().top + hoveredWord.getBoundingClientRect().height / 2);
149
+ const hoveredRect = getAnchorRect(hoveredWord, targetY);
150
+ const hoveredX = hoveredRect.left + hoveredRect.width / 2;
151
+ const hoveredY = hoveredRect.top + hoveredRect.height / 2;
152
+
153
+ previousWords.forEach(prevWord => {
154
+ const prevRect = getAnchorRect(prevWord, hoveredY);
155
+ const prevX = prevRect.left + prevRect.width / 2;
156
+ const prevY = prevRect.top + prevRect.height / 2;
157
+
158
+ const midX = (hoveredX + prevX) / 2;
159
+ const midY = Math.min(hoveredY, prevY) - 30;
160
+
161
+ const path = document.createElementNS('http://www.w3.org/2000/svg', 'path');
162
+ path.setAttribute('class', 'link-line');
163
+ path.setAttribute('d', `M ${prevX} ${prevY} Q ${midX} ${midY} ${hoveredX} ${hoveredY}`);
164
+ svgOverlay.appendChild(path);
165
+
166
+ const dot1 = document.createElementNS('http://www.w3.org/2000/svg', 'circle');
167
+ dot1.setAttribute('class', 'link-dot');
168
+ dot1.setAttribute('cx', prevX);
169
+ dot1.setAttribute('cy', prevY);
170
+ dot1.setAttribute('r', 4);
171
+ svgOverlay.appendChild(dot1);
172
+
173
+ const dot2 = document.createElementNS('http://www.w3.org/2000/svg', 'circle');
174
+ dot2.setAttribute('class', 'link-dot');
175
+ dot2.setAttribute('cx', hoveredX);
176
+ dot2.setAttribute('cy', hoveredY);
177
+ dot2.setAttribute('r', 4);
178
+ svgOverlay.appendChild(dot2);
179
+ });
180
+ }
181
+
182
+ words.forEach(word => {
183
+ word.addEventListener('mouseenter', (e) => drawLines(word, e));
184
+ word.addEventListener('mouseleave', clearLines);
185
+ });
186
+
187
+ window.addEventListener('scroll', clearLines);
188
+
189
+ const tooltip = document.getElementById('tooltip');
190
+ const tokenSpans = document.querySelectorAll('.token');
191
+
192
+ tokenSpans.forEach(token => {
193
+ token.addEventListener('mouseenter', (e) => {
194
+ const tokenIdx = parseInt(token.dataset.tokenIdx);
195
+ const tokenInfo = (!isNaN(tokenIdx) && renderTokens[tokenIdx]) ? renderTokens[tokenIdx] : null;
196
+ const bytes = (tokenInfo && tokenInfo.bytes_hex) ? tokenInfo.bytes_hex : '';
197
+ const compressionA = (tokenInfo && tokenInfo.compression && tokenInfo.compression.rwkv) ? tokenInfo.compression.rwkv : '';
198
+ const compressionB = (tokenInfo && tokenInfo.compression && tokenInfo.compression.qwen) ? tokenInfo.compression.qwen : '';
199
+ const avgCompressionA = (tokenInfo && tokenInfo.loss && typeof tokenInfo.loss.rwkv === 'number') ? tokenInfo.loss.rwkv.toFixed(2) : '';
200
+ const avgCompressionB = (tokenInfo && tokenInfo.loss && typeof tokenInfo.loss.qwen === 'number') ? tokenInfo.loss.qwen.toFixed(2) : '';
201
+ const modelA = (tokenInfo && tokenInfo.model_tokens && tokenInfo.model_tokens.rwkv) ? tokenInfo.model_tokens.rwkv : null;
202
+ const modelB = (tokenInfo && tokenInfo.model_tokens && tokenInfo.model_tokens.qwen) ? tokenInfo.model_tokens.qwen : null;
203
+ const top5A = (tokenInfo && tokenInfo.topk && tokenInfo.topk.rwkv) ? tokenInfo.topk.rwkv : null;
204
+ const top5B = (tokenInfo && tokenInfo.topk && tokenInfo.topk.qwen) ? tokenInfo.topk.qwen : null;
205
+
206
+ function hasControlChars(text) {
207
+ if (!text) return false;
208
+ for (let i = 0; i < text.length; i++) {
209
+ const code = text.charCodeAt(i);
210
+ if (code < 32 || code === 127) {
211
+ return true;
212
+ }
213
+ }
214
+ return false;
215
+ }
216
+
217
+ function resolveKind(text, kindHint) {
218
+ if (kindHint === 'raw' || kindHint === 'control' || kindHint === 'normal') {
219
+ return kindHint;
220
+ }
221
+ if (kindHint === true) {
222
+ return 'raw';
223
+ }
224
+ if (hasControlChars(text)) {
225
+ return 'control';
226
+ }
227
+ return 'normal';
228
+ }
229
+
230
+ function appendEscapedWithControlColor(container, text) {
231
+ if (text === undefined || text === null) return;
232
+ let buffer = '';
233
+ const flush = () => {
234
+ if (buffer) {
235
+ container.appendChild(document.createTextNode(buffer));
236
+ buffer = '';
237
+ }
238
+ };
239
+ for (let i = 0; i < text.length; i++) {
240
+ const ch = text[i];
241
+ const code = text.charCodeAt(i);
242
+ if (ch === '\\') {
243
+ buffer += '\\\\';
244
+ continue;
245
+ }
246
+ if (ch === '\n' || ch === '\r' || ch === '\t' || code < 32 || code === 127) {
247
+ flush();
248
+ const span = document.createElement('span');
249
+ span.className = 'esc-control';
250
+ if (ch === '\n') {
251
+ span.textContent = '\\n';
252
+ } else if (ch === '\r') {
253
+ span.textContent = '\\r';
254
+ } else if (ch === '\t') {
255
+ span.textContent = '\\t';
256
+ } else {
257
+ span.textContent = '\\x' + code.toString(16).padStart(2, '0');
258
+ }
259
+ container.appendChild(span);
260
+ continue;
261
+ }
262
+ buffer += ch;
263
+ }
264
+ flush();
265
+ }
266
+
267
+ function appendTokenText(container, text, kindHint) {
268
+ const display = (text !== undefined && text !== null) ? text : '';
269
+ const kind = resolveKind(display, kindHint);
270
+ while (container.firstChild) {
271
+ container.removeChild(container.firstChild);
272
+ }
273
+ if (kind === 'raw') {
274
+ const span = document.createElement('span');
275
+ span.className = 'esc-raw';
276
+ span.textContent = display;
277
+ container.appendChild(span);
278
+ return;
279
+ }
280
+ if (kind === 'control') {
281
+ appendEscapedWithControlColor(container, display);
282
+ return;
283
+ }
284
+ container.textContent = display;
285
+ }
286
+
287
+ function formatTopkColumn(topkData, modelName, titleClass) {
288
+ const column = document.createElement('div');
289
+ column.className = 'topk-column';
290
+ const title = document.createElement('div');
291
+ title.className = 'topk-title ' + titleClass;
292
+ title.textContent = modelName;
293
+ column.appendChild(title);
294
+ const list = document.createElement('div');
295
+ list.className = 'topk-list';
296
+ column.appendChild(list);
297
+
298
+ if (!topkData) {
299
+ list.textContent = 'N/A';
300
+ return column;
301
+ }
302
+ try {
303
+ const data = topkData;
304
+ let actualId = null;
305
+ let rank = null;
306
+ let actualProb = null;
307
+ let topkList = [];
308
+ if (data.length >= 4) {
309
+ [actualId, rank, actualProb, topkList] = data;
310
+ } else {
311
+ [actualId, rank, topkList] = data;
312
+ }
313
+ topkList.forEach((item, idx) => {
314
+ const tokenId = item[0];
315
+ const prob = item[1];
316
+ const tokenText = item[2];
317
+ const isRaw = item.length > 3 ? item[3] : false;
318
+ const isHit = tokenId === actualId;
319
+ const rankClass = isHit ? 'topk-rank hit' : 'topk-rank';
320
+ const rawText = (tokenText !== undefined && tokenText !== null) ? tokenText : '';
321
+ const displayText = (rawText !== '') ? rawText : ('[' + tokenId + ']');
322
+
323
+ const row = document.createElement('div');
324
+ row.className = 'topk-item';
325
+
326
+ const rankSpan = document.createElement('span');
327
+ rankSpan.className = rankClass;
328
+ rankSpan.textContent = (idx + 1) + '.';
329
+ row.appendChild(rankSpan);
330
+
331
+ const tokenSpan = document.createElement('span');
332
+ tokenSpan.className = 'topk-token';
333
+ tokenSpan.title = 'ID: ' + tokenId;
334
+ appendTokenText(tokenSpan, displayText, isRaw);
335
+ row.appendChild(tokenSpan);
336
+
337
+ const probSpan = document.createElement('span');
338
+ probSpan.className = 'topk-prob';
339
+ probSpan.textContent = (prob * 100).toFixed(2) + '%';
340
+ row.appendChild(probSpan);
341
+
342
+ if (isHit) {
343
+ const hit = document.createElement('span');
344
+ hit.className = 'topk-hit';
345
+ hit.textContent = '✓';
346
+ row.appendChild(hit);
347
+ }
348
+
349
+ list.appendChild(row);
350
+ });
351
+
352
+ if (rank > 10) {
353
+ let probSuffix = '';
354
+ const probVal = parseFloat(actualProb);
355
+ if (!isNaN(probVal)) {
356
+ probSuffix = ' (' + (probVal * 100).toFixed(4) + '%)';
357
+ }
358
+ const miss = document.createElement('div');
359
+ miss.className = 'topk-item topk-miss';
360
+ miss.textContent = 'Actual rank: ' + rank + probSuffix;
361
+ list.appendChild(miss);
362
+ }
363
+ return column;
364
+ } catch (e) {
365
+ console.error('Error in formatTopkColumn for ' + modelName + ':', e);
366
+ console.error('topkData:', topkData);
367
+ list.textContent = 'Error: ' + e.message;
368
+ return column;
369
+ }
370
+ }
371
+
372
+ function formatTokenChips(modelData, label, labelClass) {
373
+ const block = document.createElement('div');
374
+ block.className = 'token-block';
375
+ const labelSpan = document.createElement('span');
376
+ labelSpan.className = 'label ' + labelClass;
377
+ labelSpan.textContent = label + ':';
378
+ block.appendChild(labelSpan);
379
+
380
+ const chips = document.createElement('div');
381
+ chips.className = 'token-chips';
382
+ block.appendChild(chips);
383
+
384
+ if (!modelData) {
385
+ const na = document.createElement('span');
386
+ na.className = 'topk-token token-chip';
387
+ na.textContent = 'N/A';
388
+ chips.appendChild(na);
389
+ return block;
390
+ }
391
+ try {
392
+ const tokenList = modelData;
393
+ tokenList.forEach((item) => {
394
+ const tokenId = item[0];
395
+ const tokenText = item[1];
396
+ const kindHint = item.length > 2 ? item[2] : false;
397
+ const probVal = item.length > 3 ? item[3] : null;
398
+ const displayText = (tokenText !== undefined && tokenText !== null) ? tokenText : '';
399
+
400
+ const group = document.createElement('span');
401
+ group.className = 'token-chip-group';
402
+ group.title = 'ID: ' + tokenId;
403
+
404
+ const idSpan = document.createElement('span');
405
+ idSpan.className = 'token-id';
406
+ idSpan.textContent = '[' + tokenId + ']';
407
+ group.appendChild(idSpan);
408
+
409
+ const chipSpan = document.createElement('span');
410
+ chipSpan.className = 'topk-token token-chip';
411
+ appendTokenText(chipSpan, displayText, kindHint);
412
+ group.appendChild(chipSpan);
413
+
414
+ if (probVal !== null && probVal !== undefined) {
415
+ const probSpan = document.createElement('span');
416
+ probSpan.className = 'token-prob';
417
+ const numericProb = typeof probVal === 'number' ? probVal : parseFloat(probVal);
418
+ if (!isNaN(numericProb)) {
419
+ probSpan.textContent = (numericProb * 100).toFixed(2) + '%';
420
+ } else {
421
+ probSpan.textContent = String(probVal);
422
+ }
423
+ group.appendChild(probSpan);
424
+ }
425
+
426
+ chips.appendChild(group);
427
+ });
428
+ return block;
429
+ } catch (e) {
430
+ console.error('Error in formatTokenChips for ' + label + ':', e);
431
+ console.error('modelData:', modelData);
432
+ const err = document.createElement('span');
433
+ err.className = 'topk-token token-chip';
434
+ err.textContent = 'Error: ' + e.message;
435
+ chips.appendChild(err);
436
+ return block;
437
+ }
438
+ }
439
+
440
+ tooltip.replaceChildren();
441
+
442
+ const bytesRow = document.createElement('div');
443
+ const bytesLabel = document.createElement('span');
444
+ bytesLabel.className = 'label';
445
+ bytesLabel.textContent = 'Bytes:';
446
+ const bytesValue = document.createElement('span');
447
+ bytesValue.className = 'bytes';
448
+ bytesValue.textContent = bytes || '(empty)';
449
+ bytesRow.appendChild(bytesLabel);
450
+ bytesRow.appendChild(document.createTextNode(' '));
451
+ bytesRow.appendChild(bytesValue);
452
+ tooltip.appendChild(bytesRow);
453
+
454
+ const rwkvRow = document.createElement('div');
455
+ const rwkvLabel = document.createElement('span');
456
+ rwkvLabel.className = 'label';
457
+ rwkvLabel.textContent = 'RWKV Compression Rate:';
458
+ const rwkvValue = document.createElement('span');
459
+ rwkvValue.className = 'loss-a';
460
+ rwkvValue.textContent = compressionA || '(empty)';
461
+ if (avgCompressionA) {
462
+ rwkvValue.textContent += ' (avg: ' + avgCompressionA + '%)';
463
+ }
464
+ rwkvRow.appendChild(rwkvLabel);
465
+ rwkvRow.appendChild(document.createTextNode(' '));
466
+ rwkvRow.appendChild(rwkvValue);
467
+ tooltip.appendChild(rwkvRow);
468
+
469
+ const qwenRow = document.createElement('div');
470
+ const qwenLabel = document.createElement('span');
471
+ qwenLabel.className = 'label';
472
+ qwenLabel.textContent = 'Qwen Compression Rate:';
473
+ const qwenValue = document.createElement('span');
474
+ qwenValue.className = 'loss-b';
475
+ qwenValue.textContent = compressionB || '(empty)';
476
+ if (avgCompressionB) {
477
+ qwenValue.textContent += ' (avg: ' + avgCompressionB + '%)';
478
+ }
479
+ qwenRow.appendChild(qwenLabel);
480
+ qwenRow.appendChild(document.createTextNode(' '));
481
+ qwenRow.appendChild(qwenValue);
482
+ tooltip.appendChild(qwenRow);
483
+
484
+ const hr = document.createElement('hr');
485
+ hr.style.borderColor = '#555';
486
+ hr.style.margin = '6px 0';
487
+ tooltip.appendChild(hr);
488
+
489
+ tooltip.appendChild(formatTokenChips(modelA, 'RWKV', 'model-a'));
490
+ tooltip.appendChild(formatTokenChips(modelB, 'Qwen', 'model-b'));
491
+
492
+ if (top5A || top5B) {
493
+ const topkSection = document.createElement('div');
494
+ topkSection.className = 'topk-section';
495
+ const topkContainer = document.createElement('div');
496
+ topkContainer.className = 'topk-container';
497
+ topkContainer.appendChild(formatTopkColumn(top5A, 'RWKV Top10', 'model-a'));
498
+ topkContainer.appendChild(formatTopkColumn(top5B, 'Qwen Top10', 'model-b'));
499
+ topkSection.appendChild(topkContainer);
500
+ tooltip.appendChild(topkSection);
501
+ }
502
+
503
+ tooltip.style.display = 'block';
504
+ });
505
+
506
+ token.addEventListener('mousemove', (e) => {
507
+ const tooltipRect = tooltip.getBoundingClientRect();
508
+ const viewportWidth = window.innerWidth;
509
+ const viewportHeight = window.innerHeight;
510
+
511
+ let x = e.clientX + 15;
512
+ let y = e.clientY + 15;
513
+
514
+ if (x + tooltipRect.width > viewportWidth - 10) {
515
+ x = e.clientX - tooltipRect.width - 15;
516
+ }
517
+ if (y + tooltipRect.height > viewportHeight - 10) {
518
+ y = e.clientY - tooltipRect.height - 15;
519
+ }
520
+ if (x < 10) x = 10;
521
+ if (y < 10) y = 10;
522
+
523
+ tooltip.style.left = x + 'px';
524
+ tooltip.style.top = y + 'px';
525
+ });
526
+
527
+ token.addEventListener('mouseleave', () => {
528
+ tooltip.style.display = 'none';
529
+ });
530
+ });
531
+
532
+ const slider = document.getElementById('color-range-slider');
533
+ const rangeValue = document.getElementById('color-range-value');
534
+
535
+ // Collect all tuned_delta values
536
+ const tokenData = [];
537
+ const linebreakTokens = [];
538
+ tokenSpans.forEach((token, idx) => {
539
+ if (token.dataset.hasLinebreak === '1') {
540
+ linebreakTokens.push(token);
541
+ return;
542
+ }
543
+ const tunedDelta = parseFloat(token.dataset.tunedDelta);
544
+ if (!isNaN(tunedDelta)) {
545
+ tokenData.push({ token, tunedDelta, absDelta: Math.abs(tunedDelta) });
546
+ }
547
+ });
548
+
549
+ // Calculate max_abs_tuned_delta for normalization
550
+ const maxAbsDelta = Math.max(...tokenData.map(d => d.absDelta), 1e-9);
551
+
552
+ // Sort by |tuned_delta| to get rankings
553
+ const sortedByAbs = [...tokenData].sort((a, b) => b.absDelta - a.absDelta);
554
+ sortedByAbs.forEach((item, rank) => {
555
+ item.rank = rank; // rank 0 = largest deviation
556
+ });
557
+
558
+ function tunedDeltaToColor(tunedDelta, maxAbsDelta, exponent) {
559
+ // Normalize to [-1, 1]
560
+ const normalized = Math.max(-1, Math.min(1, tunedDelta / maxAbsDelta));
561
+ let r, g, b;
562
+ if (normalized < 0) {
563
+ // Green (RWKV better)
564
+ const intensity = Math.pow(-normalized, exponent);
565
+ r = Math.round(255 * (1 - intensity * 0.85));
566
+ g = 255;
567
+ b = Math.round(255 * (1 - intensity * 0.85));
568
+ } else {
569
+ // Red (RWKV worse)
570
+ const intensity = Math.pow(normalized, exponent);
571
+ r = 255;
572
+ g = Math.round(255 * (1 - intensity * 0.85));
573
+ b = Math.round(255 * (1 - intensity * 0.85));
574
+ }
575
+ return `rgb(${r}, ${g}, ${b})`;
576
+ }
577
+
578
+ function updateColors(colorRangePercent) {
579
+ // colorRangePercent: 0-100, represents the proportion of tokens to color
580
+ const colorCount = Math.round(tokenData.length * colorRangePercent / 100);
581
+
582
+ // Calculate exponent: 100% -> 0.5, 0% -> 1.0
583
+ const exponent = 1 - (colorRangePercent / 100) * 0.5;
584
+
585
+ // Calculate max deviation within the colored range
586
+ let maxAbsDeltaInRange = 1e-9;
587
+ tokenData.forEach(item => {
588
+ if (item.rank < colorCount) {
589
+ maxAbsDeltaInRange = Math.max(maxAbsDeltaInRange, item.absDelta);
590
+ }
591
+ });
592
+
593
+ tokenData.forEach(item => {
594
+ if (item.rank < colorCount) {
595
+ // Use dynamic normalization based on colored range
596
+ item.token.style.backgroundColor = tunedDeltaToColor(item.tunedDelta, maxAbsDeltaInRange, exponent);
597
+ } else {
598
+ // Outside color range, white
599
+ item.token.style.backgroundColor = 'rgb(255, 255, 255)';
600
+ }
601
+ });
602
+ linebreakTokens.forEach(token => {
603
+ token.style.backgroundColor = 'rgb(255, 255, 255)';
604
+ });
605
+ }
606
+
607
+ slider.addEventListener('input', (e) => {
608
+ const val = parseFloat(e.target.value);
609
+ rangeValue.textContent = val.toFixed(1) + '%';
610
+ updateColors(val);
611
+ });
612
+
613
+ // Apply default color range on page load
614
+ updateColors(10);
615
+
visualization/html_generator.py CHANGED
@@ -4,16 +4,22 @@ HTML visualization generator for UncheatableEval.
4
  Generates interactive HTML visualizations comparing byte-level losses between two models.
5
  """
6
 
7
- import base64
8
  import json
9
  import math
10
  import re
 
11
  from typing import List, Tuple, Optional, Set
12
 
13
  import numpy as np
14
 
 
 
 
15
  from core.helpers import TokenizerBytesConverter
16
 
 
 
17
 
18
  # Compression rate conversion factor
19
  COMPRESSION_RATE_FACTOR = (1.0 / math.log(2.0)) * 0.125 * 100.0
@@ -113,10 +119,17 @@ def get_token_info_for_text(text: str) -> dict:
113
  qwen_boundaries = set([0] + [t[1] for t in qwen_tokens])
114
  rwkv_boundaries = set([0] + [t[1] for t in rwkv_tokens])
115
  utf8_boundaries = set([0])
 
 
116
  byte_pos = 0
117
  for ch in text:
118
- byte_pos += len(ch.encode("utf-8"))
 
119
  utf8_boundaries.add(byte_pos)
 
 
 
 
120
  common_boundaries = sorted(qwen_boundaries & rwkv_boundaries & utf8_boundaries)
121
  # Ensure we always include the end boundary
122
  text_end = len(text.encode("utf-8"))
@@ -124,6 +137,75 @@ def get_token_info_for_text(text: str) -> dict:
124
  common_boundaries.append(text_end)
125
  common_boundaries = sorted(common_boundaries)
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  return {
128
  "common_boundaries": common_boundaries,
129
  "qwen_tokens": qwen_tokens,
@@ -145,6 +227,8 @@ def generate_comparison_html(
145
  tokenizer_b=None,
146
  model_type_a: str = "hf",
147
  model_type_b: str = "rwkv7",
 
 
148
  ) -> str:
149
  """
150
  Generate an interactive HTML visualization comparing two models.
@@ -161,9 +245,11 @@ def generate_comparison_html(
161
  tokenizer_b: Tokenizer for model B
162
  model_type_a: Type of model A ("hf" or "rwkv7")
163
  model_type_b: Type of model B ("hf" or "rwkv7")
 
 
164
 
165
  Returns:
166
- HTML string with interactive visualization
167
  """
168
 
169
  def decode_token(token_id: int, tokenizer, model_type: str) -> Tuple[str, bool]:
@@ -197,7 +283,12 @@ def generate_comparison_html(
197
  try:
198
  if model_type in ["rwkv", "rwkv7"]:
199
  # RWKV tokenizer provides raw bytes
200
- token_bytes = tokenizer.decodeBytes([token_id])
 
 
 
 
 
201
  if token_bytes:
202
  try:
203
  decoded = token_bytes.decode("utf-8")
@@ -290,7 +381,7 @@ def generate_comparison_html(
290
 
291
  # Get token info
292
  text_bytes = text.encode("utf-8")
293
- token_info = get_token_info_for_text(text)
294
  common_boundaries = token_info["common_boundaries"]
295
  qwen_tokens = token_info["qwen_tokens"]
296
  rwkv_tokens = token_info["rwkv_tokens"]
@@ -301,14 +392,13 @@ def generate_comparison_html(
301
 
302
  def get_tokens_for_range(byte_start, byte_end, token_list):
303
  result = []
304
- for t_start, t_end, token_id, t_bytes in token_list:
305
  if t_start < byte_end and t_end > byte_start:
306
- result.append((token_id, t_bytes))
307
  return result
308
 
309
  # Build tokens based on common boundaries
310
  tokens = []
311
- token_count = 0
312
  for i in range(len(common_boundaries) - 1):
313
  start_byte = common_boundaries[i]
314
  end_byte = common_boundaries[i + 1]
@@ -361,22 +451,8 @@ def generate_comparison_html(
361
  token["word_id"] = word_id_counter
362
  word_id_counter += 1
363
 
364
- # Build HTML content
365
- html_content = []
366
-
367
- def escape_for_attr(s):
368
- # Escape all characters that could break HTML attributes
369
- # Order matters: & must be escaped first
370
- return (
371
- s.replace("&", "&amp;")
372
- .replace('"', "&quot;")
373
- .replace("'", "&#39;")
374
- .replace("<", "&lt;")
375
- .replace(">", "&gt;")
376
- .replace("\n", "&#10;")
377
- .replace("\r", "&#13;")
378
- .replace("\t", "&#9;")
379
- )
380
 
381
  for token in tokens:
382
  token_text = token["text"]
@@ -402,18 +478,6 @@ def generate_comparison_html(
402
  except UnicodeDecodeError:
403
  return "".join([f"\\x{b:02x}" for b in token_bytes]), True
404
 
405
- # Model A (RWKV7) - tokens overlapping this byte range
406
- model_a_info = ""
407
- if token["rwkv_tokens"]:
408
- model_a_list = [[tid, *token_bytes_to_display_text(tb)] for tid, tb in token["rwkv_tokens"]]
409
- model_a_info = base64.b64encode(json.dumps(model_a_list, ensure_ascii=False).encode("utf-8")).decode("ascii")
410
-
411
- # Model B (Qwen3) - tokens overlapping this byte range
412
- model_b_info = ""
413
- if token["qwen_tokens"]:
414
- model_b_list = [[tid, *token_bytes_to_display_text(tb)] for tid, tb in token["qwen_tokens"]]
415
- model_b_info = base64.b64encode(json.dumps(model_b_list, ensure_ascii=False).encode("utf-8")).decode("ascii")
416
-
417
  raw_bytes = list(text_bytes[byte_start:byte_end])
418
  losses_a = byte_losses_a[byte_start:byte_end]
419
  losses_b = byte_losses_b[byte_start:byte_end]
@@ -426,8 +490,8 @@ def generate_comparison_html(
426
  avg_compression_a_token = sum(losses_a) / len(losses_a) * COMPRESSION_RATE_FACTOR if losses_a else 0
427
  avg_compression_b_token = sum(losses_b) / len(losses_b) * COMPRESSION_RATE_FACTOR if losses_b else 0
428
 
429
- topk_a_json = ""
430
- topk_b_json = ""
431
  if topk_predictions_a is not None and model_a_token_ranges:
432
  model_a_token_idx = find_token_for_byte(byte_start, model_a_token_ranges)
433
  if model_a_token_idx is not None and model_a_token_idx < len(topk_predictions_a):
@@ -435,19 +499,18 @@ def generate_comparison_html(
435
  try:
436
  if len(pred) >= 4:
437
  actual_id, rank, actual_prob, topk_list = pred[0], pred[1], pred[2], pred[3]
438
- decoded_pred = [
439
  actual_id,
440
  rank,
441
  actual_prob,
442
  [[tid, prob, *decode_token(tid, tokenizer_a, model_type_a)] for tid, prob in topk_list],
443
  ]
444
  else:
445
- decoded_pred = [
446
  pred[0],
447
  pred[1],
448
  [[tid, prob, *decode_token(tid, tokenizer_a, model_type_a)] for tid, prob in pred[2]],
449
  ]
450
- topk_a_json = base64.b64encode(json.dumps(decoded_pred, ensure_ascii=False).encode("utf-8")).decode("ascii")
451
  except Exception as e:
452
  pass
453
  if topk_predictions_b is not None and model_b_token_ranges:
@@ -457,20 +520,17 @@ def generate_comparison_html(
457
  try:
458
  if len(pred) >= 4:
459
  actual_id, rank, actual_prob, topk_list = pred[0], pred[1], pred[2], pred[3]
460
- decoded_pred = [
461
  actual_id,
462
  rank,
463
  actual_prob,
464
  [[tid, prob, *decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in topk_list],
465
  ]
466
  else:
467
- decoded_pred = [pred[0], pred[1], [[tid, prob, *decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in pred[2]]]
468
- topk_b_json = base64.b64encode(json.dumps(decoded_pred, ensure_ascii=False).encode("utf-8")).decode("ascii")
469
  except Exception as e:
470
  pass
471
 
472
- token_count += 1
473
-
474
  token_deltas = deltas[byte_start:byte_end]
475
  avg_token_delta = sum(token_deltas) / len(token_deltas) if token_deltas else 0
476
  tuned_delta = avg_token_delta - avg_delta
@@ -478,695 +538,124 @@ def generate_comparison_html(
478
  # Initial rendering uses white color, JavaScript will apply colors based on slider
479
  r, g, b = 255, 255, 255
480
 
481
- token_html_parts = []
482
- for char in token_text:
483
- if char == "<":
484
- escaped_char = "&lt;"
485
- elif char == ">":
486
- escaped_char = "&gt;"
487
- elif char == "&":
488
- escaped_char = "&amp;"
489
- elif char == "\t":
490
- escaped_char = " "
491
- else:
492
- escaped_char = char
493
- token_html_parts.append(escaped_char)
494
-
495
- token_span_content = "".join(token_html_parts)
496
- data_attrs = (
497
- f'data-model-a="{escape_for_attr(model_a_info)}" '
498
- f'data-model-b="{escape_for_attr(model_b_info)}" '
499
- f'data-bytes="{escape_for_attr(bytes_str)}" '
500
- f'data-compression-a="{escape_for_attr(compression_a_str)}" '
501
- f'data-compression-b="{escape_for_attr(compression_b_str)}" '
502
- f'data-avg-compression-a="{avg_compression_a_token:.2f}" '
503
- f'data-avg-compression-b="{avg_compression_b_token:.2f}" '
504
- f'data-tuned-delta="{tuned_delta:.6f}" '
505
- f'data-topk-a="{escape_for_attr(topk_a_json)}" '
506
- f'data-topk-b="{escape_for_attr(topk_b_json)}"'
507
- )
508
- style_attr = f'style="background-color: rgb({r},{g},{b})"'
509
 
510
- if token["type"] == "word":
511
- word_lower = token["word_lower"]
512
- occurrences = word_occurrences[word_lower]
513
- if len(occurrences) > 1:
514
- word_id = token["word_id"]
515
- html_content.append(
516
- f'<span class="token word" {data_attrs} {style_attr} data-word="{word_lower}" data-word-id="{word_id}">'
517
- + token_span_content
518
- + "</span>"
519
- )
520
- else:
521
- html_content.append(f'<span class="token" {data_attrs} {style_attr}>{token_span_content}</span>')
522
- else:
523
- html_content.append(f'<span class="token" {data_attrs} {style_attr}>{token_span_content}</span>')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
 
525
  delta_color = "#64ff64" if avg_delta < 0 else "#ff6464"
526
 
527
- html = f"""<!DOCTYPE html>
528
- <html>
529
- <head>
530
- <meta charset="UTF-8">
531
- <title>Model Comparison</title>
532
- <style>
533
- body {{
534
- font-family: Consolas, 'Courier New', monospace;
535
- margin: 0;
536
- padding: 0;
537
- background-color: #f5f5f5;
538
- }}
539
- .header {{
540
- background-color: #333;
541
- color: white;
542
- padding: 20px;
543
- position: sticky;
544
- top: 0;
545
- z-index: 100;
546
- }}
547
- .header h1 {{
548
- margin: 0 0 15px 0;
549
- font-size: 18px;
550
- }}
551
- .meta {{
552
- display: flex;
553
- flex-wrap: wrap;
554
- gap: 20px;
555
- font-size: 12px;
556
- color: #c8c8c8;
557
- }}
558
- .legend {{
559
- display: flex;
560
- gap: 15px;
561
- margin-top: 10px;
562
- }}
563
- .legend-item {{
564
- display: flex;
565
- align-items: center;
566
- gap: 5px;
567
- }}
568
- .legend-box {{
569
- width: 20px;
570
- height: 12px;
571
- border: 1px solid #666;
572
- }}
573
- .content {{
574
- background-color: white;
575
- margin: 10px;
576
- padding: 15px;
577
- border: 1px solid #ccc;
578
- font-size: 14px;
579
- line-height: 1.8;
580
- white-space: pre-wrap;
581
- overflow-wrap: anywhere;
582
- word-wrap: break-word;
583
- position: relative;
584
- }}
585
- .content span {{
586
- padding: 1px 0;
587
- }}
588
- .word {{
589
- cursor: pointer;
590
- position: relative;
591
- }}
592
- .word:hover {{
593
- outline: 2px solid #007bff;
594
- outline-offset: 1px;
595
- }}
596
- .word.highlighted {{
597
- outline: 2px solid #ff6b6b;
598
- outline-offset: 1px;
599
- }}
600
- #svg-overlay {{
601
- position: fixed;
602
- top: 0;
603
- left: 0;
604
- width: 100%;
605
- height: 100%;
606
- pointer-events: none;
607
- z-index: 1000;
608
- }}
609
- .link-line {{
610
- stroke: #007bff;
611
- stroke-width: 2;
612
- fill: none;
613
- opacity: 0.7;
614
- }}
615
- .link-dot {{
616
- fill: #007bff;
617
- opacity: 0.8;
618
- }}
619
- .token {{
620
- position: relative;
621
- cursor: help;
622
- }}
623
- .token:hover {{
624
- outline: 1px dashed #666;
625
- }}
626
- #tooltip {{
627
- position: fixed;
628
- background-color: rgba(0, 0, 0, 0.9);
629
- color: white;
630
- padding: 10px 14px;
631
- border-radius: 6px;
632
- font-size: 12px;
633
- max-width: 500px;
634
- z-index: 2000;
635
- pointer-events: none;
636
- display: none;
637
- line-height: 1.6;
638
- box-shadow: 0 2px 10px rgba(0,0,0,0.3);
639
- }}
640
- #tooltip .label {{
641
- color: #aaa;
642
- font-weight: bold;
643
- }}
644
- #tooltip .bytes {{
645
- color: #a5f3fc;
646
- font-family: monospace;
647
- }}
648
- #tooltip .loss-a {{
649
- color: #86efac;
650
- font-family: monospace;
651
- }}
652
- #tooltip .loss-b {{
653
- color: #fca5a5;
654
- font-family: monospace;
655
- }}
656
- #tooltip .model-a {{
657
- color: #fcd34d;
658
- }}
659
- #tooltip .model-b {{
660
- color: #7dd3fc;
661
- }}
662
- #tooltip .topk-section {{
663
- margin-top: 8px;
664
- padding-top: 8px;
665
- border-top: 1px solid #555;
666
- }}
667
- #tooltip .topk-container {{
668
- display: flex;
669
- gap: 16px;
670
- }}
671
- #tooltip .topk-column {{
672
- flex: 1;
673
- min-width: 180px;
674
- }}
675
- #tooltip .topk-title {{
676
- color: #aaa;
677
- font-weight: bold;
678
- margin-bottom: 4px;
679
- font-size: 11px;
680
- }}
681
- #tooltip .topk-title.model-a {{
682
- color: #86efac;
683
- }}
684
- #tooltip .topk-title.model-b {{
685
- color: #fca5a5;
686
- }}
687
- #tooltip .topk-list {{
688
- font-size: 11px;
689
- }}
690
- #tooltip .topk-item {{
691
- display: flex;
692
- gap: 4px;
693
- padding: 1px 0;
694
- align-items: flex-start;
695
- }}
696
- #tooltip .token-block {{
697
- margin-top: 6px;
698
- display: flex;
699
- align-items: center;
700
- gap: 6px;
701
- white-space: nowrap;
702
- }}
703
- #tooltip .token-chips {{
704
- display: flex;
705
- flex-wrap: nowrap;
706
- gap: 4px;
707
- }}
708
- #tooltip .token-chip-group {{
709
- display: inline-flex;
710
- align-items: center;
711
- gap: 4px;
712
- }}
713
- #tooltip .token-id {{
714
- color: #888;
715
- font-family: monospace;
716
- }}
717
- #tooltip .token-chip {{
718
- max-width: 100%;
719
- }}
720
- #tooltip .topk-rank {{
721
- color: #888;
722
- min-width: 18px;
723
- }}
724
- #tooltip .topk-rank.hit {{
725
- color: #ffd700;
726
- }}
727
- #tooltip .topk-token {{
728
- color: #a5f3fc;
729
- white-space: pre-wrap;
730
- overflow-wrap: anywhere;
731
- word-break: break-word;
732
- font-family: monospace;
733
- background-color: rgba(255, 255, 255, 0.08);
734
- padding: 0 4px;
735
- border-radius: 3px;
736
- display: inline-block;
737
- max-width: 100%;
738
- }}
739
- #tooltip .esc-control {{
740
- color: #fbbf24;
741
- }}
742
- #tooltip .esc-raw {{
743
- color: #fb7185;
744
- }}
745
- #tooltip .topk-prob {{
746
- color: #86efac;
747
- min-width: 45px;
748
- text-align: right;
749
- }}
750
- #tooltip .topk-hit {{
751
- color: #22c55e;
752
- }}
753
- #tooltip .topk-miss {{
754
- color: #ef4444;
755
- font-style: italic;
756
- }}
757
- </style>
758
- </head>
759
- <body>
760
- <svg id="svg-overlay"></svg>
761
- <div id="tooltip"></div>
762
- <div class="header">
763
- <div class="meta">
764
- <div>Model A: {model_a_name}</div>
765
- <div>Model B: {model_b_name}</div>
766
- <div>RWKV Compression: {avg_compression_a:.2f}%</div>
767
- <div>Qwen Compression: {avg_compression_b:.2f}%</div>
768
- <div style="color: {delta_color}">Avg Delta: {avg_delta_compression:+.2f}%</div>
769
- </div>
770
- <div class="legend">
771
- <div class="legend-item">
772
- <div class="legend-box" style="background-color: rgb(77, 255, 77)"></div>
773
- <span>RWKV better than avg</span>
774
- </div>
775
- <div class="legend-item">
776
- <div class="legend-box" style="background-color: rgb(255, 255, 255)"></div>
777
- <span>Equal to avg</span>
778
  </div>
779
- <div class="legend-item">
780
- <div class="legend-box" style="background-color: rgb(255, 77, 77)"></div>
781
- <span>RWKV worse than avg</span>
782
- </div>
783
- <div class="legend-item" style="margin-left: 20px;">
784
- <span style="color: #aaa;">Color Range:</span>
785
- <input type="range" id="color-range-slider" min="0" max="100" value="10" step="0.1" style="width: 200px; vertical-align: middle;">
786
- <span id="color-range-value" style="color: #fff; min-width: 45px; display: inline-block;">10%</span>
 
 
 
 
 
 
 
 
 
 
787
  </div>
788
  </div>
789
- </div>
790
- <div class="content">{''.join(html_content)}</div>
791
- <script>
792
- const svgOverlay = document.getElementById('svg-overlay');
793
- const words = document.querySelectorAll('.word');
794
-
795
- const wordGroups = {{}};
796
- words.forEach(word => {{
797
- const wordText = word.getAttribute('data-word');
798
- if (!wordGroups[wordText]) {{
799
- wordGroups[wordText] = [];
800
- }}
801
- wordGroups[wordText].push(word);
802
- }});
803
-
804
- function clearLines() {{
805
- svgOverlay.innerHTML = '';
806
- words.forEach(w => w.classList.remove('highlighted'));
807
- }}
808
-
809
- function pickRectByY(rects, targetY) {{
810
- if (!rects || rects.length === 0) return null;
811
- let best = rects[0];
812
- let bestDist = Infinity;
813
- rects.forEach(r => {{
814
- const cy = r.top + r.height / 2;
815
- const dist = Math.abs(cy - targetY);
816
- if (dist < bestDist) {{
817
- best = r;
818
- bestDist = dist;
819
- }}
820
- }});
821
- return best;
822
- }}
823
-
824
- function getAnchorRect(element, targetY) {{
825
- const rects = Array.from(element.getClientRects());
826
- if (rects.length === 0) return element.getBoundingClientRect();
827
- if (rects.length === 1) return rects[0];
828
- const picked = pickRectByY(rects, targetY);
829
- return picked || rects[0];
830
- }}
831
-
832
- function drawLines(hoveredWord, evt) {{
833
- clearLines();
834
-
835
- const wordText = hoveredWord.getAttribute('data-word');
836
- const wordId = parseInt(hoveredWord.getAttribute('data-word-id'));
837
- const sameWords = wordGroups[wordText] || [];
838
-
839
- const previousWords = sameWords.filter(w => {{
840
- const id = parseInt(w.getAttribute('data-word-id'));
841
- return id < wordId;
842
- }});
843
-
844
- if (previousWords.length === 0) return;
845
-
846
- sameWords.forEach(w => w.classList.add('highlighted'));
847
-
848
- const targetY = evt ? evt.clientY : (hoveredWord.getBoundingClientRect().top + hoveredWord.getBoundingClientRect().height / 2);
849
- const hoveredRect = getAnchorRect(hoveredWord, targetY);
850
- const hoveredX = hoveredRect.left + hoveredRect.width / 2;
851
- const hoveredY = hoveredRect.top + hoveredRect.height / 2;
852
-
853
- previousWords.forEach(prevWord => {{
854
- const prevRect = getAnchorRect(prevWord, hoveredY);
855
- const prevX = prevRect.left + prevRect.width / 2;
856
- const prevY = prevRect.top + prevRect.height / 2;
857
-
858
- const midX = (hoveredX + prevX) / 2;
859
- const midY = Math.min(hoveredY, prevY) - 30;
860
-
861
- const path = document.createElementNS('http://www.w3.org/2000/svg', 'path');
862
- path.setAttribute('class', 'link-line');
863
- path.setAttribute('d', `M ${{prevX}} ${{prevY}} Q ${{midX}} ${{midY}} ${{hoveredX}} ${{hoveredY}}`);
864
- svgOverlay.appendChild(path);
865
-
866
- const dot1 = document.createElementNS('http://www.w3.org/2000/svg', 'circle');
867
- dot1.setAttribute('class', 'link-dot');
868
- dot1.setAttribute('cx', prevX);
869
- dot1.setAttribute('cy', prevY);
870
- dot1.setAttribute('r', 4);
871
- svgOverlay.appendChild(dot1);
872
-
873
- const dot2 = document.createElementNS('http://www.w3.org/2000/svg', 'circle');
874
- dot2.setAttribute('class', 'link-dot');
875
- dot2.setAttribute('cx', hoveredX);
876
- dot2.setAttribute('cy', hoveredY);
877
- dot2.setAttribute('r', 4);
878
- svgOverlay.appendChild(dot2);
879
- }});
880
- }}
881
-
882
- words.forEach(word => {{
883
- word.addEventListener('mouseenter', (e) => drawLines(word, e));
884
- word.addEventListener('mouseleave', clearLines);
885
- }});
886
-
887
- window.addEventListener('scroll', clearLines);
888
-
889
- const tooltip = document.getElementById('tooltip');
890
- const tokenSpans = document.querySelectorAll('.token');
891
-
892
- tokenSpans.forEach(token => {{
893
- token.addEventListener('mouseenter', (e) => {{
894
- const modelA = token.getAttribute('data-model-a') || '';
895
- const modelB = token.getAttribute('data-model-b') || '';
896
- const bytes = token.getAttribute('data-bytes') || '';
897
- const compressionA = token.getAttribute('data-compression-a') || '';
898
- const compressionB = token.getAttribute('data-compression-b') || '';
899
- const avgCompressionA = token.getAttribute('data-avg-compression-a') || '';
900
- const avgCompressionB = token.getAttribute('data-avg-compression-b') || '';
901
- const top5A = token.getAttribute('data-topk-a') || '';
902
- const top5B = token.getAttribute('data-topk-b') || '';
903
-
904
- function decodeBase64Json(base64Str) {{
905
- const binaryString = atob(base64Str);
906
- const bytes = new Uint8Array(binaryString.length);
907
- for (let i = 0; i < binaryString.length; i++) {{
908
- bytes[i] = binaryString.charCodeAt(i);
909
- }}
910
- const jsonStr = new TextDecoder('utf-8').decode(bytes);
911
- return JSON.parse(jsonStr);
912
- }}
913
-
914
- function escapeControlChars(text) {{
915
- if (!text) return text;
916
- let out = '';
917
- for (let i = 0; i < text.length; i++) {{
918
- const ch = text[i];
919
- const code = text.charCodeAt(i);
920
- if (ch === '\\\\') {{
921
- out += '\\\\\\\\';
922
- }} else if (ch === '\\n') {{
923
- out += '\\\\n';
924
- }} else if (ch === '\\r') {{
925
- out += '\\\\r';
926
- }} else if (ch === '\\t') {{
927
- out += '\\\\t';
928
- }} else if (code < 32 || code === 127) {{
929
- out += '\\\\x' + code.toString(16).padStart(2, '0');
930
- }} else {{
931
- out += ch;
932
- }}
933
- }}
934
- return out;
935
- }}
936
-
937
- function renderEscapedWithControlColor(text) {{
938
- const escaped = (text || '')
939
- .replace(/&/g, '&amp;')
940
- .replace(/</g, '&lt;')
941
- .replace(/>/g, '&gt;');
942
- return escaped.replace(/\\\\(x[0-9a-fA-F]{2}|[nrt])/g, '<span class="esc-control">\\\\$1</span>');
943
- }}
944
-
945
- function formatTopkColumn(topkBase64, modelName, titleClass) {{
946
- if (!topkBase64) return '<div class="topk-column"><div class="topk-title ' + titleClass + '">' + modelName + '</div><div class="topk-list">N/A</div></div>';
947
- try {{
948
- const data = decodeBase64Json(topkBase64);
949
- let actualId = null;
950
- let rank = null;
951
- let actualProb = null;
952
- let topkList = [];
953
- if (data.length >= 4) {{
954
- [actualId, rank, actualProb, topkList] = data;
955
- }} else {{
956
- [actualId, rank, topkList] = data;
957
- }}
958
- let html = '<div class="topk-column">';
959
- html += '<div class="topk-title ' + titleClass + '">' + modelName + '</div>';
960
- html += '<div class="topk-list">';
961
- topkList.forEach((item, idx) => {{
962
- const tokenId = item[0];
963
- const prob = item[1];
964
- const tokenText = item[2];
965
- const isRaw = item.length > 3 ? item[3] : false;
966
- const isHit = tokenId === actualId;
967
- const rankClass = isHit ? 'topk-rank hit' : 'topk-rank';
968
- const rawText = (tokenText !== undefined && tokenText !== null) ? tokenText : '';
969
- let displayText = '';
970
- let htmlText = '';
971
- if (isRaw) {{
972
- displayText = (rawText !== '') ? rawText : ('[' + tokenId + ']');
973
- const escapedText = displayText
974
- .replace(/&/g, '&amp;')
975
- .replace(/</g, '&lt;')
976
- .replace(/>/g, '&gt;');
977
- htmlText = '<span class="esc-raw">' + escapedText + '</span>';
978
- }} else {{
979
- const visibleText = escapeControlChars(rawText);
980
- displayText = (visibleText !== '') ? visibleText : ('[' + tokenId + ']');
981
- htmlText = renderEscapedWithControlColor(displayText);
982
- }}
983
- html += '<div class="topk-item">';
984
- html += '<span class="' + rankClass + '">' + (idx + 1) + '.</span>';
985
- html += '<span class="topk-token" title="ID: ' + tokenId + '">' + htmlText + '</span>';
986
- html += '<span class="topk-prob">' + (prob * 100).toFixed(1) + '%</span>';
987
- if (isHit) html += '<span class="topk-hit">✓</span>';
988
- html += '</div>';
989
- }});
990
- if (rank > 10) {{
991
- let probSuffix = '';
992
- const probVal = parseFloat(actualProb);
993
- if (!isNaN(probVal)) {{
994
- probSuffix = ' (' + (probVal * 100).toFixed(4) + '%)';
995
- }}
996
- html += '<div class="topk-item topk-miss">Actual rank: ' + rank + probSuffix + '</div>';
997
- }}
998
- html += '</div></div>';
999
- return html;
1000
- }} catch (e) {{
1001
- console.error('Error in formatTopkColumn for ' + modelName + ':', e);
1002
- console.error('topkBase64:', topkBase64);
1003
- return '<div class="topk-column"><div class="topk-title ' + titleClass + '">' + modelName + '</div><div class="topk-list">Error: ' + e.message + '</div></div>';
1004
- }}
1005
- }}
1006
-
1007
- function formatTokenChips(modelBase64, label, labelClass) {{
1008
- if (!modelBase64) {{
1009
- return '<div class="token-block"><span class="label ' + labelClass + '">' + label + ':</span> <span class="topk-token token-chip">N/A</span></div>';
1010
- }}
1011
- try {{
1012
- const tokenList = decodeBase64Json(modelBase64);
1013
- let html = '<div class="token-block">';
1014
- html += '<span class="label ' + labelClass + '">' + label + ':</span>';
1015
- html += '<div class="token-chips">';
1016
- tokenList.forEach((item) => {{
1017
- const tokenId = item[0];
1018
- const tokenText = item[1];
1019
- const isRaw = item.length > 2 ? item[2] : false;
1020
- let displayText = '';
1021
- let htmlText = '';
1022
- if (isRaw) {{
1023
- displayText = tokenText || '';
1024
- const escapedText = displayText
1025
- .replace(/&/g, '&amp;')
1026
- .replace(/</g, '&lt;')
1027
- .replace(/>/g, '&gt;');
1028
- htmlText = '<span class="esc-raw">' + escapedText + '</span>';
1029
- }} else {{
1030
- const visible = escapeControlChars(tokenText || '');
1031
- displayText = (visible !== '') ? visible : '';
1032
- htmlText = renderEscapedWithControlColor(displayText);
1033
- }}
1034
- html += '<span class="token-chip-group" title="ID: ' + tokenId + '">';
1035
- html += '<span class="token-id">[' + tokenId + ']</span>';
1036
- html += '<span class="topk-token token-chip">' + htmlText + '</span>';
1037
- html += '</span>';
1038
- }});
1039
- html += '</div></div>';
1040
- return html;
1041
- }} catch (e) {{
1042
- console.error('Error in formatTokenChips for ' + label + ':', e);
1043
- console.error('modelBase64:', modelBase64);
1044
- return '<div class="token-block"><span class="label ' + labelClass + '">' + label + ':</span> <span class="topk-token token-chip">Error: ' + e.message + '</span></div>';
1045
- }}
1046
- }}
1047
-
1048
- let tooltipHtml = `
1049
- <div><span class="label">Bytes:</span> <span class="bytes">${{bytes || '(empty)'}}</span></div>
1050
- <div><span class="label">RWKV Compression Rate:</span> <span class="loss-a">${{compressionA || '(empty)'}}${{avgCompressionA ? ' (avg: ' + avgCompressionA + '%)' : ''}}</span></div>
1051
- <div><span class="label">Qwen Compression Rate:</span> <span class="loss-b">${{compressionB || '(empty)'}}${{avgCompressionB ? ' (avg: ' + avgCompressionB + '%)' : ''}}</span></div>
1052
- <hr style="border-color: #555; margin: 6px 0;">
1053
- ${{formatTokenChips(modelA, 'RWKV', 'model-a')}}
1054
- ${{formatTokenChips(modelB, 'Qwen', 'model-b')}}
1055
- `;
1056
- if (top5A || top5B) {{
1057
- tooltipHtml += '<div class="topk-section"><div class="topk-container">';
1058
- tooltipHtml += formatTopkColumn(top5A, 'RWKV Top10', 'model-a');
1059
- tooltipHtml += formatTopkColumn(top5B, 'Qwen Top10', 'model-b');
1060
- tooltipHtml += '</div></div>';
1061
- }}
1062
- tooltip.innerHTML = tooltipHtml;
1063
- tooltip.style.display = 'block';
1064
- }});
1065
-
1066
- token.addEventListener('mousemove', (e) => {{
1067
- const tooltipRect = tooltip.getBoundingClientRect();
1068
- const viewportWidth = window.innerWidth;
1069
- const viewportHeight = window.innerHeight;
1070
-
1071
- let x = e.clientX + 15;
1072
- let y = e.clientY + 15;
1073
-
1074
- if (x + tooltipRect.width > viewportWidth - 10) {{
1075
- x = e.clientX - tooltipRect.width - 15;
1076
- }}
1077
- if (y + tooltipRect.height > viewportHeight - 10) {{
1078
- y = e.clientY - tooltipRect.height - 15;
1079
- }}
1080
- if (x < 10) x = 10;
1081
- if (y < 10) y = 10;
1082
-
1083
- tooltip.style.left = x + 'px';
1084
- tooltip.style.top = y + 'px';
1085
- }});
1086
-
1087
- token.addEventListener('mouseleave', () => {{
1088
- tooltip.style.display = 'none';
1089
- }});
1090
- }});
1091
-
1092
- const slider = document.getElementById('color-range-slider');
1093
- const rangeValue = document.getElementById('color-range-value');
1094
-
1095
- // Collect all tuned_delta values
1096
- const tokenData = [];
1097
- tokenSpans.forEach((token, idx) => {{
1098
- const tunedDelta = parseFloat(token.getAttribute('data-tuned-delta'));
1099
- if (!isNaN(tunedDelta)) {{
1100
- tokenData.push({{ token, tunedDelta, absDelta: Math.abs(tunedDelta) }});
1101
- }}
1102
- }});
1103
-
1104
- // Calculate max_abs_tuned_delta for normalization
1105
- const maxAbsDelta = Math.max(...tokenData.map(d => d.absDelta), 1e-9);
1106
-
1107
- // Sort by |tuned_delta| to get rankings
1108
- const sortedByAbs = [...tokenData].sort((a, b) => b.absDelta - a.absDelta);
1109
- sortedByAbs.forEach((item, rank) => {{
1110
- item.rank = rank; // rank 0 = largest deviation
1111
- }});
1112
-
1113
- function tunedDeltaToColor(tunedDelta, maxAbsDelta, exponent) {{
1114
- // Normalize to [-1, 1]
1115
- const normalized = Math.max(-1, Math.min(1, tunedDelta / maxAbsDelta));
1116
- let r, g, b;
1117
- if (normalized < 0) {{
1118
- // Green (RWKV better)
1119
- const intensity = Math.pow(-normalized, exponent);
1120
- r = Math.round(255 * (1 - intensity * 0.85));
1121
- g = 255;
1122
- b = Math.round(255 * (1 - intensity * 0.85));
1123
- }} else {{
1124
- // Red (RWKV worse)
1125
- const intensity = Math.pow(normalized, exponent);
1126
- r = 255;
1127
- g = Math.round(255 * (1 - intensity * 0.85));
1128
- b = Math.round(255 * (1 - intensity * 0.85));
1129
- }}
1130
- return `rgb(${{r}}, ${{g}}, ${{b}})`;
1131
- }}
1132
-
1133
- function updateColors(colorRangePercent) {{
1134
- // colorRangePercent: 0-100, represents the proportion of tokens to color
1135
- const colorCount = Math.round(tokenData.length * colorRangePercent / 100);
1136
-
1137
- // Calculate exponent: 100% -> 0.5, 0% -> 1.0
1138
- const exponent = 1 - (colorRangePercent / 100) * 0.5;
1139
-
1140
- // Calculate max deviation within the colored range
1141
- let maxAbsDeltaInRange = 1e-9;
1142
- tokenData.forEach(item => {{
1143
- if (item.rank < colorCount) {{
1144
- maxAbsDeltaInRange = Math.max(maxAbsDeltaInRange, item.absDelta);
1145
- }}
1146
- }});
1147
-
1148
- tokenData.forEach(item => {{
1149
- if (item.rank < colorCount) {{
1150
- // Use dynamic normalization based on colored range
1151
- item.token.style.backgroundColor = tunedDeltaToColor(item.tunedDelta, maxAbsDeltaInRange, exponent);
1152
- }} else {{
1153
- // Outside color range, white
1154
- item.token.style.backgroundColor = 'rgb(255, 255, 255)';
1155
- }}
1156
- }});
1157
- }}
1158
-
1159
- slider.addEventListener('input', (e) => {{
1160
- const val = parseFloat(e.target.value);
1161
- rangeValue.textContent = val.toFixed(1) + '%';
1162
- updateColors(val);
1163
- }});
1164
-
1165
- // Apply default color range on page load
1166
- updateColors(10);
1167
- </script>
1168
- </body>
1169
- </html>
1170
- """
1171
-
1172
- return html
 
4
  Generates interactive HTML visualizations comparing byte-level losses between two models.
5
  """
6
 
7
+ import bisect
8
  import json
9
  import math
10
  import re
11
+ from pathlib import Path
12
  from typing import List, Tuple, Optional, Set
13
 
14
  import numpy as np
15
 
16
+ from core.escaping import escape_json_for_script
17
+ from core.render_model import RenderModel, TokenInfo, build_display
18
+ from visualization.render import render_page
19
  from core.helpers import TokenizerBytesConverter
20
 
21
+ ASSETS_DIR = Path(__file__).resolve().parent / "assets"
22
+
23
 
24
  # Compression rate conversion factor
25
  COMPRESSION_RATE_FACTOR = (1.0 / math.log(2.0)) * 0.125 * 100.0
 
119
  qwen_boundaries = set([0] + [t[1] for t in qwen_tokens])
120
  rwkv_boundaries = set([0] + [t[1] for t in rwkv_tokens])
121
  utf8_boundaries = set([0])
122
+ whitespace_boundaries = set()
123
+ linebreak_boundaries = set()
124
  byte_pos = 0
125
  for ch in text:
126
+ ch_bytes = ch.encode("utf-8")
127
+ byte_pos += len(ch_bytes)
128
  utf8_boundaries.add(byte_pos)
129
+ if ch.isspace():
130
+ whitespace_boundaries.add(byte_pos)
131
+ if ch in ("\n", "\r"):
132
+ linebreak_boundaries.add(byte_pos)
133
  common_boundaries = sorted(qwen_boundaries & rwkv_boundaries & utf8_boundaries)
134
  # Ensure we always include the end boundary
135
  text_end = len(text.encode("utf-8"))
 
137
  common_boundaries.append(text_end)
138
  common_boundaries = sorted(common_boundaries)
139
 
140
+ # Refine overly large segments to avoid giant spans in the UI.
141
+ max_segment_bytes = 24
142
+ utf8_sorted = sorted(utf8_boundaries)
143
+ linebreak_sorted = sorted(linebreak_boundaries)
144
+
145
+ def split_by_max(start: int, end: int) -> List[int]:
146
+ if end - start <= max_segment_bytes:
147
+ return [end]
148
+ left = bisect.bisect_right(utf8_sorted, start)
149
+ right = bisect.bisect_left(utf8_sorted, end)
150
+ candidates = utf8_sorted[left:right]
151
+ if not candidates:
152
+ return [end]
153
+ out = []
154
+ pos = start
155
+ idx = 0
156
+ while pos < end:
157
+ limit = min(end, pos + max_segment_bytes)
158
+ j = bisect.bisect_right(candidates, limit) - 1
159
+ if j < idx:
160
+ out.append(end)
161
+ break
162
+ split_at = None
163
+ for k in range(j, idx - 1, -1):
164
+ if candidates[k] in whitespace_boundaries:
165
+ split_at = candidates[k]
166
+ j = k
167
+ break
168
+ if split_at is None:
169
+ split_at = candidates[j]
170
+ if split_at <= pos:
171
+ split_at = candidates[j]
172
+ out.append(split_at)
173
+ pos = split_at
174
+ idx = j + 1
175
+ if pos >= end:
176
+ break
177
+ if idx >= len(candidates):
178
+ out.append(end)
179
+ break
180
+ if not out:
181
+ out = [end]
182
+ elif out[-1] != end:
183
+ out.append(end)
184
+ return out
185
+
186
+ def split_segment(start: int, end: int) -> List[int]:
187
+ if start >= end:
188
+ return []
189
+ lb_left = bisect.bisect_right(linebreak_sorted, start)
190
+ lb_right = bisect.bisect_left(linebreak_sorted, end)
191
+ linebreaks = linebreak_sorted[lb_left:lb_right]
192
+ if not linebreaks:
193
+ return split_by_max(start, end)
194
+ out = []
195
+ seg_start = start
196
+ for lb in linebreaks:
197
+ out.extend(split_by_max(seg_start, lb))
198
+ seg_start = lb
199
+ out.extend(split_by_max(seg_start, end))
200
+ return out
201
+
202
+ refined_boundaries = [common_boundaries[0]] if common_boundaries else [0]
203
+ for i in range(len(common_boundaries) - 1):
204
+ start = common_boundaries[i]
205
+ end = common_boundaries[i + 1]
206
+ refined_boundaries.extend(split_segment(start, end))
207
+ common_boundaries = sorted(set(refined_boundaries))
208
+
209
  return {
210
  "common_boundaries": common_boundaries,
211
  "qwen_tokens": qwen_tokens,
 
227
  tokenizer_b=None,
228
  model_type_a: str = "hf",
229
  model_type_b: str = "rwkv7",
230
+ token_info_override: Optional[dict] = None,
231
+ return_render_model: bool = False,
232
  ) -> str:
233
  """
234
  Generate an interactive HTML visualization comparing two models.
 
245
  tokenizer_b: Tokenizer for model B
246
  model_type_a: Type of model A ("hf" or "rwkv7")
247
  model_type_b: Type of model B ("hf" or "rwkv7")
248
+ token_info_override: Optional precomputed token info (for offline tests).
249
+ return_render_model: If True, return (html, render_model_dict)
250
 
251
  Returns:
252
+ HTML string with interactive visualization, or (html, render_model_dict) if return_render_model=True
253
  """
254
 
255
  def decode_token(token_id: int, tokenizer, model_type: str) -> Tuple[str, bool]:
 
283
  try:
284
  if model_type in ["rwkv", "rwkv7"]:
285
  # RWKV tokenizer provides raw bytes
286
+ try:
287
+ token_bytes = tokenizer.decodeBytes([token_id])
288
+ except Exception as e:
289
+ if token_id == 0:
290
+ return f"[{token_id}]", False
291
+ raise e
292
  if token_bytes:
293
  try:
294
  decoded = token_bytes.decode("utf-8")
 
381
 
382
  # Get token info
383
  text_bytes = text.encode("utf-8")
384
+ token_info = token_info_override if token_info_override is not None else get_token_info_for_text(text)
385
  common_boundaries = token_info["common_boundaries"]
386
  qwen_tokens = token_info["qwen_tokens"]
387
  rwkv_tokens = token_info["rwkv_tokens"]
 
392
 
393
  def get_tokens_for_range(byte_start, byte_end, token_list):
394
  result = []
395
+ for idx, (t_start, t_end, token_id, t_bytes) in enumerate(token_list):
396
  if t_start < byte_end and t_end > byte_start:
397
+ result.append((idx, token_id, t_bytes))
398
  return result
399
 
400
  # Build tokens based on common boundaries
401
  tokens = []
 
402
  for i in range(len(common_boundaries) - 1):
403
  start_byte = common_boundaries[i]
404
  end_byte = common_boundaries[i + 1]
 
451
  token["word_id"] = word_id_counter
452
  word_id_counter += 1
453
 
454
+ # Build render model (HTML content built in JS)
455
+ render_tokens = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456
 
457
  for token in tokens:
458
  token_text = token["text"]
 
478
  except UnicodeDecodeError:
479
  return "".join([f"\\x{b:02x}" for b in token_bytes]), True
480
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  raw_bytes = list(text_bytes[byte_start:byte_end])
482
  losses_a = byte_losses_a[byte_start:byte_end]
483
  losses_b = byte_losses_b[byte_start:byte_end]
 
490
  avg_compression_a_token = sum(losses_a) / len(losses_a) * COMPRESSION_RATE_FACTOR if losses_a else 0
491
  avg_compression_b_token = sum(losses_b) / len(losses_b) * COMPRESSION_RATE_FACTOR if losses_b else 0
492
 
493
+ topk_a_data = None
494
+ topk_b_data = None
495
  if topk_predictions_a is not None and model_a_token_ranges:
496
  model_a_token_idx = find_token_for_byte(byte_start, model_a_token_ranges)
497
  if model_a_token_idx is not None and model_a_token_idx < len(topk_predictions_a):
 
499
  try:
500
  if len(pred) >= 4:
501
  actual_id, rank, actual_prob, topk_list = pred[0], pred[1], pred[2], pred[3]
502
+ topk_a_data = [
503
  actual_id,
504
  rank,
505
  actual_prob,
506
  [[tid, prob, *decode_token(tid, tokenizer_a, model_type_a)] for tid, prob in topk_list],
507
  ]
508
  else:
509
+ topk_a_data = [
510
  pred[0],
511
  pred[1],
512
  [[tid, prob, *decode_token(tid, tokenizer_a, model_type_a)] for tid, prob in pred[2]],
513
  ]
 
514
  except Exception as e:
515
  pass
516
  if topk_predictions_b is not None and model_b_token_ranges:
 
520
  try:
521
  if len(pred) >= 4:
522
  actual_id, rank, actual_prob, topk_list = pred[0], pred[1], pred[2], pred[3]
523
+ topk_b_data = [
524
  actual_id,
525
  rank,
526
  actual_prob,
527
  [[tid, prob, *decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in topk_list],
528
  ]
529
  else:
530
+ topk_b_data = [pred[0], pred[1], [[tid, prob, *decode_token(tid, tokenizer_b, model_type_b)] for tid, prob in pred[2]]]
 
531
  except Exception as e:
532
  pass
533
 
 
 
534
  token_deltas = deltas[byte_start:byte_end]
535
  avg_token_delta = sum(token_deltas) / len(token_deltas) if token_deltas else 0
536
  tuned_delta = avg_token_delta - avg_delta
 
538
  # Initial rendering uses white color, JavaScript will apply colors based on slider
539
  r, g, b = 255, 255, 255
540
 
541
+ raw_display_text = token_text
542
+ display_text = token_text.replace("\t", " ")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
543
 
544
+ def classify_kind(text_value: str, is_raw_value: bool) -> str:
545
+ return build_display(text_value, is_raw=is_raw_value).kind
546
+
547
+ def get_actual_prob(topk_predictions, token_idx: Optional[int]):
548
+ if not topk_predictions or token_idx is None:
549
+ return None
550
+ if token_idx < 0 or token_idx >= len(topk_predictions):
551
+ return None
552
+ pred = topk_predictions[token_idx]
553
+ if isinstance(pred, (list, tuple)) and len(pred) >= 3:
554
+ return pred[2]
555
+ return None
556
+
557
+ model_tokens_render = {}
558
+ if token["rwkv_tokens"]:
559
+ rwkv_items = []
560
+ for tok_idx, tid, tb in token["rwkv_tokens"]:
561
+ txt, is_raw = token_bytes_to_display_text(tb)
562
+ rwkv_items.append([tid, txt, classify_kind(txt, is_raw), get_actual_prob(topk_predictions_a, tok_idx)])
563
+ model_tokens_render["rwkv"] = rwkv_items
564
+ if token["qwen_tokens"]:
565
+ qwen_items = []
566
+ for tok_idx, tid, tb in token["qwen_tokens"]:
567
+ txt, is_raw = token_bytes_to_display_text(tb)
568
+ qwen_items.append([tid, txt, classify_kind(txt, is_raw), get_actual_prob(topk_predictions_b, tok_idx)])
569
+ model_tokens_render["qwen"] = qwen_items
570
+
571
+ display_info = build_display(raw_display_text, is_raw=not decoded_ok)
572
+ if display_info.kind == "control":
573
+ display_text = raw_display_text
574
+ display_info.text = display_text
575
+ render_tokens.append(
576
+ TokenInfo(
577
+ byte_start=byte_start,
578
+ byte_end=byte_end,
579
+ display=display_info,
580
+ is_word=token["type"] == "word",
581
+ word_id=token.get("word_id"),
582
+ word_key=token.get("word_lower"),
583
+ bytes_hex=bytes_str,
584
+ compression={"rwkv": compression_a_str, "qwen": compression_b_str},
585
+ model_tokens=model_tokens_render,
586
+ loss={"rwkv": avg_compression_a_token, "qwen": avg_compression_b_token},
587
+ topk={
588
+ "rwkv": topk_a_data,
589
+ "qwen": topk_b_data,
590
+ },
591
+ tuned_delta=tuned_delta,
592
+ )
593
+ )
594
 
595
  delta_color = "#64ff64" if avg_delta < 0 else "#ff6464"
596
 
597
+ render_model = RenderModel(
598
+ text=text,
599
+ tokens=render_tokens,
600
+ meta={
601
+ "model_a": model_a_name,
602
+ "model_b": model_b_name,
603
+ "avg_compression": {
604
+ "rwkv": avg_compression_a,
605
+ "qwen": avg_compression_b,
606
+ },
607
+ "avg_delta": avg_delta,
608
+ "avg_delta_compression": avg_delta_compression,
609
+ },
610
+ )
611
+ render_model_json = escape_json_for_script(render_model.to_dict())
612
+
613
+ style_block = (ASSETS_DIR / "main.css").read_text(encoding="utf-8")
614
+
615
+ header_html = f"""
616
+ <div class="header">
617
+ <div class="meta">
618
+ <div>Model A: {model_a_name}</div>
619
+ <div>Model B: {model_b_name}</div>
620
+ <div>RWKV Compression: {avg_compression_a:.2f}%</div>
621
+ <div>Qwen Compression: {avg_compression_b:.2f}%</div>
622
+ <div style="color: {delta_color}">Avg Delta: {avg_delta_compression:+.2f}%</div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
623
  </div>
624
+ <div class="legend">
625
+ <div class="legend-item">
626
+ <div class="legend-box" style="background-color: rgb(77, 255, 77)"></div>
627
+ <span>RWKV better than avg</span>
628
+ </div>
629
+ <div class="legend-item">
630
+ <div class="legend-box" style="background-color: rgb(255, 255, 255)"></div>
631
+ <span>Equal to avg</span>
632
+ </div>
633
+ <div class="legend-item">
634
+ <div class="legend-box" style="background-color: rgb(255, 77, 77)"></div>
635
+ <span>RWKV worse than avg</span>
636
+ </div>
637
+ <div class="legend-item" style="margin-left: 20px;">
638
+ <span style="color: #aaa;">Color Range:</span>
639
+ <input type="range" id="color-range-slider" min="0" max="100" value="10" step="0.1" style="width: 200px; vertical-align: middle;">
640
+ <span id="color-range-value" style="color: #fff; min-width: 45px; display: inline-block;">10%</span>
641
+ </div>
642
  </div>
643
  </div>
644
+ """.strip("\n")
645
+
646
+ script_body = (ASSETS_DIR / "main.js").read_text(encoding="utf-8")
647
+
648
+ html_doc = render_page(
649
+ {
650
+ "page_title": "Model Comparison",
651
+ "style_block": style_block.strip("\n"),
652
+ "header_html": header_html,
653
+ "content_html": "",
654
+ "render_model_json": render_model_json,
655
+ "script_body": script_body.strip("\n"),
656
+ }
657
+ )
658
+
659
+ if return_render_model:
660
+ return html_doc, render_model.to_dict()
661
+ return html_doc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
visualization/render.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Page rendering using a lightweight template.
3
+ """
4
+
5
+ from pathlib import Path
6
+ from string import Template
7
+ from typing import Dict
8
+
9
+
10
+ _TEMPLATE_PATH = Path(__file__).resolve().parent / "templates" / "page.html.tmpl"
11
+
12
+
13
+ def render_page(context: Dict[str, str]) -> str:
14
+ template_text = _TEMPLATE_PATH.read_text(encoding="utf-8")
15
+ template = Template(template_text)
16
+ return template.safe_substitute(context)
visualization/templates/page.html.tmpl ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>$page_title</title>
6
+ <style>
7
+ $style_block
8
+ </style>
9
+ </head>
10
+ <body>
11
+ <svg id="svg-overlay"></svg>
12
+ <div id="tooltip"></div>
13
+ $header_html
14
+ <div class="content">$content_html</div>
15
+ <script id="render-model" type="application/json">$render_model_json</script>
16
+ <script>
17
+ $script_body
18
+ </script>
19
+ </body>
20
+ </html>