Jellyfish042 commited on
Commit
48754a8
·
1 Parent(s): 24da7ec

improvements

Browse files
app.py CHANGED
@@ -141,6 +141,7 @@ def load_precomputed_example():
141
 
142
  if html_path.exists() and metadata_path.exists():
143
  import json
 
144
  with open(html_path, "r", encoding="utf-8") as f:
145
  _precomputed_html = f.read()
146
  with open(metadata_path, "r", encoding="utf-8") as f:
@@ -176,6 +177,7 @@ def initialize_models():
176
 
177
  # Initialize stats manager
178
  from core.inference_stats import InferenceStatsManager
 
179
  _stats_manager = InferenceStatsManager()
180
 
181
  print("Models loaded successfully!")
@@ -186,11 +188,20 @@ def wrap_html_in_iframe(html: str) -> str:
186
  # For srcdoc attribute, we only need to escape quotes
187
  # The HTML entities inside (like ", 
) should remain as-is
188
  escaped = html.replace('"', """)
 
 
 
 
 
 
 
 
189
  return f"""
190
- <div style="width:100%;height:700px;border:1px solid #ddd;border-radius:8px;overflow:hidden;">
191
  <iframe srcdoc="{escaped}"
192
- style="width:100%;height:100%;border:none;"
193
- sandbox="allow-scripts"></iframe>
 
194
  </div>
195
  """
196
 
@@ -307,6 +318,23 @@ with gr.Blocks(
307
  #input-text textarea {
308
  font-family: Consolas, 'Courier New', monospace;
309
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  """,
311
  ) as demo:
312
  gr.HTML(
@@ -346,6 +374,29 @@ with gr.Blocks(
346
  with gr.Column():
347
  output_html = gr.HTML(label="Visualization")
348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  # Event handlers
350
  clear_btn.click(fn=clear_inputs, outputs=[text_input, output_html])
351
 
 
141
 
142
  if html_path.exists() and metadata_path.exists():
143
  import json
144
+
145
  with open(html_path, "r", encoding="utf-8") as f:
146
  _precomputed_html = f.read()
147
  with open(metadata_path, "r", encoding="utf-8") as f:
 
177
 
178
  # Initialize stats manager
179
  from core.inference_stats import InferenceStatsManager
180
+
181
  _stats_manager = InferenceStatsManager()
182
 
183
  print("Models loaded successfully!")
 
188
  # For srcdoc attribute, we only need to escape quotes
189
  # The HTML entities inside (like &quot;, &#10;) should remain as-is
190
  escaped = html.replace('"', "&quot;")
191
+ onload_js = (
192
+ "(function(f){"
193
+ "function r(){try{var d=f.contentWindow.document;"
194
+ "if(!d)return;var h=Math.max(d.body.scrollHeight,d.documentElement.scrollHeight);"
195
+ "f.style.height=(h+2)+'px';}catch(e){}}"
196
+ "r();setTimeout(r,50);setTimeout(r,200);"
197
+ "})(this)"
198
+ )
199
  return f"""
200
+ <div style="width:100%;border:1px solid #ddd;border-radius:8px;overflow:hidden;">
201
  <iframe srcdoc="{escaped}"
202
+ style="width:100%;border:none;height:400px;"
203
+ sandbox="allow-scripts allow-same-origin"
204
+ onload="{onload_js}"></iframe>
205
  </div>
206
  """
207
 
 
318
  #input-text textarea {
319
  font-family: Consolas, 'Courier New', monospace;
320
  }
321
+ .gr-accordion-content {
322
+ max-height: none !important;
323
+ height: auto !important;
324
+ overflow: visible !important;
325
+ }
326
+ .gr-accordion-content > div {
327
+ max-height: none !important;
328
+ height: auto !important;
329
+ overflow: visible !important;
330
+ }
331
+ .gr-accordion-content .prose,
332
+ .gr-accordion-content .markdown,
333
+ .gr-accordion-content .md {
334
+ max-height: none !important;
335
+ height: auto !important;
336
+ overflow: visible !important;
337
+ }
338
  """,
339
  ) as demo:
340
  gr.HTML(
 
374
  with gr.Column():
375
  output_html = gr.HTML(label="Visualization")
376
 
377
+ with gr.Accordion("How to calculate compression rate?", open=False):
378
+ gr.Markdown(
379
+ r"""
380
+ The compression rate $R(t)$ represents the ratio of the compressed bitstream length to the original data size. It is derived from the model's negative log-likelihood loss $\mathcal{L}_{\text{NLL}}(t) = -\ln P(t)$:
381
+
382
+ $$
383
+ R(t) = \frac{\mathcal{L}_{\text{NLL}}(t)}{\ln 2 \cdot 8 \cdot L(t)} \times 100\%
384
+ $$
385
+
386
+ where $L(t)$ is the token length in bytes, and the factor $(\ln 2 \cdot 8)^{-1}$ normalizes the loss from nats to percentage of the original data size.
387
+
388
+ **Example.** For a 1-byte token ($L=1$) with probability $P(t) = 0.5$:
389
+
390
+ $$
391
+ R(t) = \frac{-\ln(0.5)}{\ln 2 \cdot 8 \cdot 1} \times 100\% \approx 12.5\%
392
+ $$
393
+ """,
394
+ latex_delimiters=[
395
+ {"left": "$$", "right": "$$", "display": True},
396
+ {"left": "$", "right": "$", "display": False},
397
+ ],
398
+ )
399
+
400
  # Event handlers
401
  clear_btn.click(fn=clear_inputs, outputs=[text_input, output_html])
402
 
core/render_model.py CHANGED
@@ -38,6 +38,7 @@ class TokenInfo:
38
  model_tokens: Dict[str, List[List[Any]]] = field(default_factory=dict)
39
  loss: Dict[str, float] = field(default_factory=dict)
40
  topk: Dict[str, Any] = field(default_factory=dict)
 
41
  tuned_delta: float = 0.0
42
 
43
 
 
38
  model_tokens: Dict[str, List[List[Any]]] = field(default_factory=dict)
39
  loss: Dict[str, float] = field(default_factory=dict)
40
  topk: Dict[str, Any] = field(default_factory=dict)
41
+ raw_delta: float = 0.0
42
  tuned_delta: float = 0.0
43
 
44
 
precomputed/example_metadata.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "example_text": "The Bitter Lesson\nRich Sutton\nMarch 13, 2019\nThe biggest lesson that can be read from 70 years of AI research is that general methods that leverage computation are ultimately the most effective, and by a large margin. The ultimate reason for this is Moore's law, or rather its generalization of continued exponentially falling cost per unit of computation. Most AI research has been conducted as if the computation available to the agent were constant (in which case leveraging human knowledge would be one of the only ways to improve performance) but, over a slightly longer time than a typical research project, massively more computation inevitably becomes available. Seeking an improvement that makes a difference in the shorter term, researchers seek to leverage their human knowledge of the domain, but the only thing that matters in the long run is the leveraging of computation. These two need not run counter to each other, but in practice they tend to. Time spent on one is time not spent on the other. There are psychological commitments to investment in one approach or the other. And the human-knowledge approach tends to complicate methods in ways that make them less suited to taking advantage of general methods leveraging computation. There were many examples of AI researchers' belated learning of this bitter lesson, and it is instructive to review some of the most prominent.\n\nIn computer chess, the methods that defeated the world champion, Kasparov, in 1997, were based on massive, deep search. At the time, this was looked upon with dismay by the majority of computer-chess researchers who had pursued methods that leveraged human understanding of the special structure of chess. When a simpler, search-based approach with special hardware and software proved vastly more effective, these human-knowledge-based chess researchers were not good losers. They said that ``brute force\" search may have won this time, but it was not a general strategy, and anyway it was not how people played chess. These researchers wanted methods based on human input to win and were disappointed when they did not.\n\nA similar pattern of research progress was seen in computer Go, only delayed by a further 20 years. Enormous initial efforts went into avoiding search by taking advantage of human knowledge, or of the special features of the game, but all those efforts proved irrelevant, or worse, once search was applied effectively at scale. Also important was the use of learning by self play to learn a value function (as it was in many other games and even in chess, although learning did not play a big role in the 1997 program that first beat a world champion). Learning by self play, and learning in general, is like search in that it enables massive computation to be brought to bear. Search and learning are the two most important classes of techniques for utilizing massive amounts of computation in AI research. In computer Go, as in computer chess, researchers' initial effort was directed towards utilizing human understanding (so that less search was needed) and only much later was much greater success had by embracing search and learning.\n\nIn speech recognition, there was an early competition, sponsored by DARPA, in the 1970s. Entrants included a host of special methods that took advantage of human knowledge---knowledge of words, of phonemes, of the human vocal tract, etc. On the other side were newer methods that were more statistical in nature and did much more computation, based on hidden Markov models (HMMs). Again, the statistical methods won out over the human-knowledge-based methods. This led to a major change in all of natural language processing, gradually over decades, where statistics and computation came to dominate the field. The recent rise of deep learning in speech recognition is the most recent step in this consistent direction. Deep learning methods rely even less on human knowledge, and use even more computation, together with learning on huge training sets, to produce dramatically better speech recognition systems. As in the games, researchers always tried to make systems that worked the way the researchers thought their own minds worked---they tried to put that knowledge in their systems---but it proved ultimately counterproductive, and a colossal waste of researcher's time, when, through Moore's law, massive computation became available and a means was found to put it to good use.\n\nIn computer vision, there has been a similar pattern. Early methods conceived of vision as searching for edges, or generalized cylinders, or in terms of SIFT features. But today all this is discarded. Modern deep-learning neural networks use only the notions of convolution and certain kinds of invariances, and perform much better.\n\nThis is a big lesson. As a field, we still have not thoroughly learned it, as we are continuing to make the same kind of mistakes. To see this, and to effectively resist it, we have to understand the appeal of these mistakes. We have to learn the bitter lesson that building in how we think we think does not work in the long run. The bitter lesson is based on the historical observations that 1) AI researchers have often tried to build knowledge into their agents, 2) this always helps in the short term, and is personally satisfying to the researcher, but 3) in the long run it plateaus and even inhibits further progress, and 4) breakthrough progress eventually arrives by an opposing approach based on scaling computation by search and learning. The eventual success is tinged with bitterness, and often incompletely digested, because it is success over a favored, human-centric approach.\n\nOne thing that should be learned from the bitter lesson is the great power of general purpose methods, of methods that continue to scale with increased computation even as the available computation becomes very great. The two methods that seem to scale arbitrarily in this way are search and learning.\n\nThe second general point to be learned from the bitter lesson is that the actual contents of minds are tremendously, irredeemably complex; we should stop trying to find simple ways to think about the contents of minds, such as simple ways to think about space, objects, multiple agents, or symmetries. All these are part of the arbitrary, intrinsically-complex, outside world. They are not what should be built in, as their complexity is endless; instead we should build in only the meta-methods that can find and capture this arbitrary complexity. Essential to these methods is that they can find good approximations, but the search for them should be by our methods, not by us. We want AI agents that can discover like we can, not which contain what we have discovered. Building in our discoveries only makes it harder to see how the discovering process can be done.\n",
3
- "qwen_inference_time": 19.49976086616516,
4
- "rwkv_inference_time": 29.02472949028015,
5
  "qwen_compression_rate": 48.14428559434192,
6
  "rwkv_compression_rate": 47.62502588510778
7
  }
 
1
  {
2
  "example_text": "The Bitter Lesson\nRich Sutton\nMarch 13, 2019\nThe biggest lesson that can be read from 70 years of AI research is that general methods that leverage computation are ultimately the most effective, and by a large margin. The ultimate reason for this is Moore's law, or rather its generalization of continued exponentially falling cost per unit of computation. Most AI research has been conducted as if the computation available to the agent were constant (in which case leveraging human knowledge would be one of the only ways to improve performance) but, over a slightly longer time than a typical research project, massively more computation inevitably becomes available. Seeking an improvement that makes a difference in the shorter term, researchers seek to leverage their human knowledge of the domain, but the only thing that matters in the long run is the leveraging of computation. These two need not run counter to each other, but in practice they tend to. Time spent on one is time not spent on the other. There are psychological commitments to investment in one approach or the other. And the human-knowledge approach tends to complicate methods in ways that make them less suited to taking advantage of general methods leveraging computation. There were many examples of AI researchers' belated learning of this bitter lesson, and it is instructive to review some of the most prominent.\n\nIn computer chess, the methods that defeated the world champion, Kasparov, in 1997, were based on massive, deep search. At the time, this was looked upon with dismay by the majority of computer-chess researchers who had pursued methods that leveraged human understanding of the special structure of chess. When a simpler, search-based approach with special hardware and software proved vastly more effective, these human-knowledge-based chess researchers were not good losers. They said that ``brute force\" search may have won this time, but it was not a general strategy, and anyway it was not how people played chess. These researchers wanted methods based on human input to win and were disappointed when they did not.\n\nA similar pattern of research progress was seen in computer Go, only delayed by a further 20 years. Enormous initial efforts went into avoiding search by taking advantage of human knowledge, or of the special features of the game, but all those efforts proved irrelevant, or worse, once search was applied effectively at scale. Also important was the use of learning by self play to learn a value function (as it was in many other games and even in chess, although learning did not play a big role in the 1997 program that first beat a world champion). Learning by self play, and learning in general, is like search in that it enables massive computation to be brought to bear. Search and learning are the two most important classes of techniques for utilizing massive amounts of computation in AI research. In computer Go, as in computer chess, researchers' initial effort was directed towards utilizing human understanding (so that less search was needed) and only much later was much greater success had by embracing search and learning.\n\nIn speech recognition, there was an early competition, sponsored by DARPA, in the 1970s. Entrants included a host of special methods that took advantage of human knowledge---knowledge of words, of phonemes, of the human vocal tract, etc. On the other side were newer methods that were more statistical in nature and did much more computation, based on hidden Markov models (HMMs). Again, the statistical methods won out over the human-knowledge-based methods. This led to a major change in all of natural language processing, gradually over decades, where statistics and computation came to dominate the field. The recent rise of deep learning in speech recognition is the most recent step in this consistent direction. Deep learning methods rely even less on human knowledge, and use even more computation, together with learning on huge training sets, to produce dramatically better speech recognition systems. As in the games, researchers always tried to make systems that worked the way the researchers thought their own minds worked---they tried to put that knowledge in their systems---but it proved ultimately counterproductive, and a colossal waste of researcher's time, when, through Moore's law, massive computation became available and a means was found to put it to good use.\n\nIn computer vision, there has been a similar pattern. Early methods conceived of vision as searching for edges, or generalized cylinders, or in terms of SIFT features. But today all this is discarded. Modern deep-learning neural networks use only the notions of convolution and certain kinds of invariances, and perform much better.\n\nThis is a big lesson. As a field, we still have not thoroughly learned it, as we are continuing to make the same kind of mistakes. To see this, and to effectively resist it, we have to understand the appeal of these mistakes. We have to learn the bitter lesson that building in how we think we think does not work in the long run. The bitter lesson is based on the historical observations that 1) AI researchers have often tried to build knowledge into their agents, 2) this always helps in the short term, and is personally satisfying to the researcher, but 3) in the long run it plateaus and even inhibits further progress, and 4) breakthrough progress eventually arrives by an opposing approach based on scaling computation by search and learning. The eventual success is tinged with bitterness, and often incompletely digested, because it is success over a favored, human-centric approach.\n\nOne thing that should be learned from the bitter lesson is the great power of general purpose methods, of methods that continue to scale with increased computation even as the available computation becomes very great. The two methods that seem to scale arbitrarily in this way are search and learning.\n\nThe second general point to be learned from the bitter lesson is that the actual contents of minds are tremendously, irredeemably complex; we should stop trying to find simple ways to think about the contents of minds, such as simple ways to think about space, objects, multiple agents, or symmetries. All these are part of the arbitrary, intrinsically-complex, outside world. They are not what should be built in, as their complexity is endless; instead we should build in only the meta-methods that can find and capture this arbitrary complexity. Essential to these methods is that they can find good approximations, but the search for them should be by our methods, not by us. We want AI agents that can discover like we can, not which contain what we have discovered. Building in our discoveries only makes it harder to see how the discovering process can be done.\n",
3
+ "qwen_inference_time": 22.259804010391235,
4
+ "rwkv_inference_time": 30.444334983825684,
5
  "qwen_compression_rate": 48.14428559434192,
6
  "rwkv_compression_rate": 47.62502588510778
7
  }
precomputed/example_visualization.html CHANGED
The diff for this file is too large to render. See raw diff
 
visualization/assets/main.css CHANGED
@@ -25,14 +25,31 @@
25
  }
26
  .legend {
27
  display: flex;
28
- gap: 15px;
 
29
  margin-top: 10px;
30
  }
 
 
 
 
 
 
31
  .legend-item {
32
  display: flex;
33
  align-items: center;
34
  gap: 5px;
35
  }
 
 
 
 
 
 
 
 
 
 
36
  .legend-box {
37
  width: 20px;
38
  height: 12px;
 
25
  }
26
  .legend {
27
  display: flex;
28
+ flex-direction: column;
29
+ gap: 6px;
30
  margin-top: 10px;
31
  }
32
+ .legend-row {
33
+ display: flex;
34
+ gap: 15px;
35
+ align-items: center;
36
+ flex-wrap: wrap;
37
+ }
38
  .legend-item {
39
  display: flex;
40
  align-items: center;
41
  gap: 5px;
42
  }
43
+ .legend-toggle label {
44
+ display: inline-flex;
45
+ align-items: center;
46
+ gap: 4px;
47
+ color: #c8c8c8;
48
+ font-size: 12px;
49
+ }
50
+ .legend-toggle input[type="radio"] {
51
+ margin: 0 2px 0 6px;
52
+ }
53
  .legend-box {
54
  width: 20px;
55
  height: 12px;
visualization/assets/main.js CHANGED
@@ -51,6 +51,7 @@
51
  span.className = 'token';
52
  span.dataset.tokenIdx = String(idx);
53
  span.dataset.tunedDelta = (token && typeof token.tuned_delta === 'number') ? String(token.tuned_delta) : '0';
 
54
  const kind = (token && token.display && token.display.kind) ? token.display.kind : 'normal';
55
  const text = (token && token.display && typeof token.display.text === 'string') ? token.display.text : '';
56
  const hasVisible = (() => {
@@ -531,6 +532,11 @@
531
 
532
  const slider = document.getElementById('color-range-slider');
533
  const rangeValue = document.getElementById('color-range-value');
 
 
 
 
 
534
 
535
  // Collect all tuned_delta values
536
  const tokenData = [];
@@ -541,19 +547,17 @@
541
  return;
542
  }
543
  const tunedDelta = parseFloat(token.dataset.tunedDelta);
 
 
 
 
544
  if (!isNaN(tunedDelta)) {
545
- tokenData.push({ token, tunedDelta, absDelta: Math.abs(tunedDelta) });
546
  }
547
  });
548
-
549
- // Calculate max_abs_tuned_delta for normalization
550
- const maxAbsDelta = Math.max(...tokenData.map(d => d.absDelta), 1e-9);
551
-
552
- // Sort by |tuned_delta| to get rankings
553
- const sortedByAbs = [...tokenData].sort((a, b) => b.absDelta - a.absDelta);
554
- sortedByAbs.forEach((item, rank) => {
555
- item.rank = rank; // rank 0 = largest deviation
556
- });
557
 
558
  function tunedDeltaToColor(tunedDelta, maxAbsDelta, exponent) {
559
  // Normalize to [-1, 1]
@@ -577,6 +581,13 @@
577
 
578
  function updateColors(colorRangePercent) {
579
  // colorRangePercent: 0-100, represents the proportion of tokens to color
 
 
 
 
 
 
 
580
  const colorCount = Math.round(tokenData.length * colorRangePercent / 100);
581
 
582
  // Calculate exponent: 100% -> 0.5, 0% -> 1.0
@@ -586,14 +597,15 @@
586
  let maxAbsDeltaInRange = 1e-9;
587
  tokenData.forEach(item => {
588
  if (item.rank < colorCount) {
589
- maxAbsDeltaInRange = Math.max(maxAbsDeltaInRange, item.absDelta);
590
  }
591
  });
592
 
593
  tokenData.forEach(item => {
594
  if (item.rank < colorCount) {
595
  // Use dynamic normalization based on colored range
596
- item.token.style.backgroundColor = tunedDeltaToColor(item.tunedDelta, maxAbsDeltaInRange, exponent);
 
597
  } else {
598
  // Outside color range, white
599
  item.token.style.backgroundColor = 'rgb(255, 255, 255)';
@@ -604,12 +616,37 @@
604
  });
605
  }
606
 
 
 
 
 
 
 
 
 
 
 
 
 
 
607
  slider.addEventListener('input', (e) => {
608
  const val = parseFloat(e.target.value);
609
  rangeValue.textContent = val.toFixed(1) + '%';
610
  updateColors(val);
611
  });
612
 
 
 
 
 
 
 
 
 
 
 
 
 
613
  // Apply default color range on page load
614
  updateColors(10);
615
 
 
51
  span.className = 'token';
52
  span.dataset.tokenIdx = String(idx);
53
  span.dataset.tunedDelta = (token && typeof token.tuned_delta === 'number') ? String(token.tuned_delta) : '0';
54
+ span.dataset.rawDelta = (token && typeof token.raw_delta === 'number') ? String(token.raw_delta) : '';
55
  const kind = (token && token.display && token.display.kind) ? token.display.kind : 'normal';
56
  const text = (token && token.display && typeof token.display.text === 'string') ? token.display.text : '';
57
  const hasVisible = (() => {
 
532
 
533
  const slider = document.getElementById('color-range-slider');
534
  const rangeValue = document.getElementById('color-range-value');
535
+ const deltaModeInputs = document.querySelectorAll('input[name="delta-mode"]');
536
+ const legendBetter = document.getElementById('legend-better');
537
+ const legendEqual = document.getElementById('legend-equal');
538
+ const legendWorse = document.getElementById('legend-worse');
539
+ let deltaMode = 'relative';
540
 
541
  // Collect all tuned_delta values
542
  const tokenData = [];
 
547
  return;
548
  }
549
  const tunedDelta = parseFloat(token.dataset.tunedDelta);
550
+ let rawDelta = parseFloat(token.dataset.rawDelta);
551
+ if (isNaN(rawDelta)) {
552
+ rawDelta = tunedDelta;
553
+ }
554
  if (!isNaN(tunedDelta)) {
555
+ tokenData.push({ token, tunedDelta, rawDelta });
556
  }
557
  });
558
+ function getDeltaValue(item) {
559
+ return deltaMode === 'absolute' ? item.rawDelta : item.tunedDelta;
560
+ }
 
 
 
 
 
 
561
 
562
  function tunedDeltaToColor(tunedDelta, maxAbsDelta, exponent) {
563
  // Normalize to [-1, 1]
 
581
 
582
  function updateColors(colorRangePercent) {
583
  // colorRangePercent: 0-100, represents the proportion of tokens to color
584
+ const absValues = tokenData.map(item => Math.abs(getDeltaValue(item)));
585
+ const maxAbsDelta = Math.max(...absValues, 1e-9);
586
+
587
+ const sortedByAbs = [...tokenData].sort((a, b) => Math.abs(getDeltaValue(b)) - Math.abs(getDeltaValue(a)));
588
+ sortedByAbs.forEach((item, rank) => {
589
+ item.rank = rank;
590
+ });
591
  const colorCount = Math.round(tokenData.length * colorRangePercent / 100);
592
 
593
  // Calculate exponent: 100% -> 0.5, 0% -> 1.0
 
597
  let maxAbsDeltaInRange = 1e-9;
598
  tokenData.forEach(item => {
599
  if (item.rank < colorCount) {
600
+ maxAbsDeltaInRange = Math.max(maxAbsDeltaInRange, Math.abs(getDeltaValue(item)));
601
  }
602
  });
603
 
604
  tokenData.forEach(item => {
605
  if (item.rank < colorCount) {
606
  // Use dynamic normalization based on colored range
607
+ const deltaValue = getDeltaValue(item);
608
+ item.token.style.backgroundColor = tunedDeltaToColor(deltaValue, maxAbsDeltaInRange, exponent);
609
  } else {
610
  // Outside color range, white
611
  item.token.style.backgroundColor = 'rgb(255, 255, 255)';
 
616
  });
617
  }
618
 
619
+ function updateLegendText() {
620
+ if (!legendBetter || !legendEqual || !legendWorse) return;
621
+ if (deltaMode === 'absolute') {
622
+ legendBetter.textContent = 'RWKV better';
623
+ legendEqual.textContent = 'Equal';
624
+ legendWorse.textContent = 'RWKV worse';
625
+ } else {
626
+ legendBetter.textContent = 'RWKV better than avg delta';
627
+ legendEqual.textContent = 'Equal to avg delta';
628
+ legendWorse.textContent = 'RWKV worse than avg delta';
629
+ }
630
+ }
631
+
632
  slider.addEventListener('input', (e) => {
633
  const val = parseFloat(e.target.value);
634
  rangeValue.textContent = val.toFixed(1) + '%';
635
  updateColors(val);
636
  });
637
 
638
+ deltaModeInputs.forEach(input => {
639
+ input.addEventListener('change', (e) => {
640
+ const target = e.target;
641
+ if (target && target.checked) {
642
+ deltaMode = target.value === 'absolute' ? 'absolute' : 'relative';
643
+ updateLegendText();
644
+ updateColors(parseFloat(slider.value));
645
+ }
646
+ });
647
+ });
648
+
649
+ updateLegendText();
650
  // Apply default color range on page load
651
  updateColors(10);
652
 
visualization/html_generator.py CHANGED
@@ -534,6 +534,7 @@ def generate_comparison_html(
534
  token_deltas = deltas[byte_start:byte_end]
535
  avg_token_delta = sum(token_deltas) / len(token_deltas) if token_deltas else 0
536
  tuned_delta = avg_token_delta - avg_delta
 
537
 
538
  # Initial rendering uses white color, JavaScript will apply colors based on slider
539
  r, g, b = 255, 255, 255
@@ -588,6 +589,7 @@ def generate_comparison_html(
588
  "rwkv": topk_a_data,
589
  "qwen": topk_b_data,
590
  },
 
591
  tuned_delta=tuned_delta,
592
  )
593
  )
@@ -622,22 +624,31 @@ def generate_comparison_html(
622
  <div style="color: {delta_color}">Avg Delta: {avg_delta_compression:+.2f}%</div>
623
  </div>
624
  <div class="legend">
625
- <div class="legend-item">
626
- <div class="legend-box" style="background-color: rgb(77, 255, 77)"></div>
627
- <span>RWKV better than avg</span>
 
 
 
 
 
 
 
 
628
  </div>
629
- <div class="legend-item">
630
- <div class="legend-box" style="background-color: rgb(255, 255, 255)"></div>
631
- <span>Equal to avg</span>
632
- </div>
633
- <div class="legend-item">
634
- <div class="legend-box" style="background-color: rgb(255, 77, 77)"></div>
635
- <span>RWKV worse than avg</span>
636
- </div>
637
- <div class="legend-item" style="margin-left: 20px;">
638
- <span style="color: #aaa;">Color Range:</span>
639
- <input type="range" id="color-range-slider" min="0" max="100" value="10" step="0.1" style="width: 200px; vertical-align: middle;">
640
- <span id="color-range-value" style="color: #fff; min-width: 45px; display: inline-block;">10%</span>
 
641
  </div>
642
  </div>
643
  </div>
 
534
  token_deltas = deltas[byte_start:byte_end]
535
  avg_token_delta = sum(token_deltas) / len(token_deltas) if token_deltas else 0
536
  tuned_delta = avg_token_delta - avg_delta
537
+ raw_delta = avg_token_delta
538
 
539
  # Initial rendering uses white color, JavaScript will apply colors based on slider
540
  r, g, b = 255, 255, 255
 
589
  "rwkv": topk_a_data,
590
  "qwen": topk_b_data,
591
  },
592
+ raw_delta=raw_delta,
593
  tuned_delta=tuned_delta,
594
  )
595
  )
 
624
  <div style="color: {delta_color}">Avg Delta: {avg_delta_compression:+.2f}%</div>
625
  </div>
626
  <div class="legend">
627
+ <div class="legend-row">
628
+ <div class="legend-item legend-toggle">
629
+ <span style="color: #aaa;">Coloring Mode:</span>
630
+ <label><input type="radio" name="delta-mode" value="relative" checked> vs Avg Delta</label>
631
+ <label><input type="radio" name="delta-mode" value="absolute"> Absolute</label>
632
+ </div>
633
+ <div class="legend-item">
634
+ <span style="color: #aaa;">Color Range:</span>
635
+ <input type="range" id="color-range-slider" min="0" max="100" value="10" step="0.1" style="width: 200px; vertical-align: middle;">
636
+ <span id="color-range-value" style="color: #fff; min-width: 45px; display: inline-block;">10%</span>
637
+ </div>
638
  </div>
639
+ <div class="legend-row">
640
+ <div class="legend-item">
641
+ <div class="legend-box" style="background-color: rgb(77, 255, 77)"></div>
642
+ <span id="legend-better">RWKV better than avg delta</span>
643
+ </div>
644
+ <div class="legend-item">
645
+ <div class="legend-box" style="background-color: rgb(255, 255, 255)"></div>
646
+ <span id="legend-equal">Equal to avg delta</span>
647
+ </div>
648
+ <div class="legend-item">
649
+ <div class="legend-box" style="background-color: rgb(255, 77, 77)"></div>
650
+ <span id="legend-worse">RWKV worse than avg delta</span>
651
+ </div>
652
  </div>
653
  </div>
654
  </div>