dqy08 commited on
Commit
5e8149f
·
1 Parent(s): 33372ae

统计图背景改为透明;取消散点聚合,改为截断10000 token以内;移动平均窗口改为32

Browse files
client/src/css/start.scss CHANGED
@@ -18,7 +18,6 @@ $breakpoint-tablet: 768px;
18
  --text-color: #333;
19
  --text-muted: #777;
20
  --panel-bg: #fafafa;
21
- --hist-bg: #e2e2e2;
22
  --border-color: #ddd;
23
  --button-bg: #f8f8f8;
24
  --button-hover-bg: #f0f0f0;
@@ -74,7 +73,6 @@ html {
74
  --text-color: var(--text-color-light);
75
  --text-muted: #888;
76
  --panel-bg: #2d2d2d;
77
- --hist-bg: #282828;
78
  --border-color: #444;
79
  --button-bg: #353535;
80
  --button-hover-bg: #454545;
 
18
  --text-color: #333;
19
  --text-muted: #777;
20
  --panel-bg: #fafafa;
 
21
  --border-color: #ddd;
22
  --button-bg: #f8f8f8;
23
  --button-hover-bg: #f0f0f0;
 
73
  --text-color: var(--text-color-light);
74
  --text-muted: #888;
75
  --panel-bg: #2d2d2d;
 
76
  --border-color: #444;
77
  --button-bg: #353535;
78
  --button-hover-bg: #454545;
client/src/css/violin.scss CHANGED
@@ -44,6 +44,6 @@
44
  }
45
 
46
  .HistogramX {
47
- background-color: var(--hist-bg);
48
  border-radius: 6px;
49
  }
 
44
  }
45
 
46
  .HistogramX {
47
+ background-color: transparent;
48
  border-radius: 6px;
49
  }
client/src/ts/compare.ts CHANGED
@@ -396,8 +396,6 @@ window.onload = () => {
396
  if (stats.tokenSurprisals && stats.tokenSurprisals.length > 0) {
397
  columnData.histograms.stats_surprisal_progress.update({
398
  data: stats.tokenSurprisals,
399
- maxPoints: 1000,
400
- movingAverageWindow: 20,
401
  xLabel: 'token index',
402
  yLabel: 'surprisal (bits)'
403
  });
 
396
  if (stats.tokenSurprisals && stats.tokenSurprisals.length > 0) {
397
  columnData.histograms.stats_surprisal_progress.update({
398
  data: stats.tokenSurprisals,
 
 
399
  xLabel: 'token index',
400
  yLabel: 'surprisal (bits)'
401
  });
client/src/ts/utils/visualizationUpdater.ts CHANGED
@@ -133,8 +133,6 @@ export class VisualizationUpdater {
133
  if (currentSurprisals && currentSurprisals.length > 0) {
134
  this.deps.stats_surprisal_progress.update({
135
  data: currentSurprisals,
136
- maxPoints: 1000,
137
- movingAverageWindow: 20, // 基于token的窗口大小
138
  xLabel: 'token index',
139
  yLabel: 'surprisal (bits)'
140
  });
 
133
  if (currentSurprisals && currentSurprisals.length > 0) {
134
  this.deps.stats_surprisal_progress.update({
135
  data: currentSurprisals,
 
 
136
  xLabel: 'token index',
137
  yLabel: 'surprisal (bits)'
138
  });
client/src/ts/vis/Histogram.ts CHANGED
@@ -70,7 +70,7 @@ export class Histogram extends VComponent<HistogramData> {
70
  .attr('height', op.height)
71
  .attr('rx', 6)
72
  .attr('ry', 6)
73
- .style('fill', 'var(--hist-bg)');
74
 
75
  this.layers.bg.append('g')
76
  .attr('class', 'x-axis')
 
70
  .attr('height', op.height)
71
  .attr('rx', 6)
72
  .attr('ry', 6)
73
+ .style('fill', 'transparent');
74
 
75
  this.layers.bg.append('g')
76
  .attr('class', 'x-axis')
client/src/ts/vis/ScatterPlot.ts CHANGED
@@ -8,8 +8,6 @@ export type ScatterPlotData = {
8
  xLabel?: string,
9
  yLabel?: string,
10
  extent?: { x?: [number, number], y?: [number, number] },
11
- maxPoints?: number, // 最高绘制点数阈值(默认1000)
12
- movingAverageWindow?: number, // 移动平均窗口(基于token,默认10)
13
  scatterColor?: string, // 散点颜色(默认主题色)
14
  lineColor?: string // 移动平均线颜色(默认不同颜色)
15
  }
@@ -67,7 +65,7 @@ export class ScatterPlot extends VComponent<ScatterPlotData> {
67
  .attr('height', op.height)
68
  .attr('rx', 6)
69
  .attr('ry', 6)
70
- .style('fill', 'var(--hist-bg)');
71
 
72
  // Y轴
73
  this.layers.bg.append('g')
@@ -97,30 +95,25 @@ export class ScatterPlot extends VComponent<ScatterPlotData> {
97
  };
98
  }
99
 
100
- const maxPoints = data.maxPoints ?? 1000;
 
 
 
101
 
102
- // 计算采样粒度
103
- const N = Math.max(1, Math.ceil(T / maxPoints));
104
- const M = Math.ceil(T / N);
105
-
106
- // 聚合数据:每N个token的平均值,X坐标为真实token索引
107
- const aggregatedPoints: Point[] = [];
108
- for (let i = 0; i < M; i++) {
109
- const startTokenIdx = i * N; // 真实token索引
110
- const endTokenIdx = Math.min(startTokenIdx + N, T);
111
- const group = rawSurprisals.slice(startTokenIdx, endTokenIdx);
112
- const avgSurprisal = group.reduce((sum, val) => sum + val, 0) / group.length;
113
- aggregatedPoints.push({
114
- x: startTokenIdx, // 真实token索引
115
- y: avgSurprisal
116
  });
117
  }
118
 
119
- // 计算移动平均(基于原始token数据,窗口单位为token)
120
- const window = data.movingAverageWindow ?? 10;
121
  const movingAverage: Point[] = [];
122
  for (let i = 0; i < T; i++) {
123
- const halfWindow = Math.floor(window / 2);
124
  const startTokenIdx = Math.max(0, i - halfWindow);
125
  const endTokenIdx = Math.min(T, i + halfWindow + 1);
126
  const windowTokens = rawSurprisals.slice(startTokenIdx, endTokenIdx);
@@ -132,7 +125,7 @@ export class ScatterPlot extends VComponent<ScatterPlotData> {
132
  }
133
 
134
  return {
135
- scatterPoints: aggregatedPoints,
136
  movingAverageLine: movingAverage,
137
  extent: data.extent,
138
  xLabel: data.xLabel,
@@ -188,9 +181,9 @@ export class ScatterPlot extends VComponent<ScatterPlotData> {
188
  .attr('d', line)
189
  .style('fill', 'none')
190
  .style('stroke', rd.lineColor || '#ff6b6b')
191
- .style('stroke-width', 2);
192
 
193
- // 渲染散点
194
  this.layers.main.selectAll('.scatter-point')
195
  .data(rd.scatterPoints)
196
  .join('circle')
@@ -199,7 +192,7 @@ export class ScatterPlot extends VComponent<ScatterPlotData> {
199
  .attr('cy', d => yScale(d.y))
200
  .attr('r', 1.5)
201
  .style('fill', rd.scatterColor || '#70b0ff')
202
- .style('opacity', 0.6);
203
 
204
  // 渲染X轴
205
  const xAxis = d3.axisBottom(xScale)
 
8
  xLabel?: string,
9
  yLabel?: string,
10
  extent?: { x?: [number, number], y?: [number, number] },
 
 
11
  scatterColor?: string, // 散点颜色(默认主题色)
12
  lineColor?: string // 移动平均线颜色(默认不同颜色)
13
  }
 
65
  .attr('height', op.height)
66
  .attr('rx', 6)
67
  .attr('ry', 6)
68
+ .style('fill', 'transparent');
69
 
70
  // Y轴
71
  this.layers.bg.append('g')
 
95
  };
96
  }
97
 
98
+ const maxPoints = 10000;
99
+ // 移动平均窗口大小,反映了语义趋势粒度。
100
+ // 32个token的粒度大致相当于句子到段落之间。
101
+ const movingAverageWindow = 32;
102
 
103
+ // 直接取前maxPoints个点,超出部分丢掉
104
+ const scatterPointsCount = Math.min(T, maxPoints);
105
+ const scatterPoints: Point[] = [];
106
+ for (let i = 0; i < scatterPointsCount; i++) {
107
+ scatterPoints.push({
108
+ x: i, // token索引
109
+ y: rawSurprisals[i]
 
 
 
 
 
 
 
110
  });
111
  }
112
 
113
+ // 计算移动平均
 
114
  const movingAverage: Point[] = [];
115
  for (let i = 0; i < T; i++) {
116
+ const halfWindow = Math.floor(movingAverageWindow / 2);
117
  const startTokenIdx = Math.max(0, i - halfWindow);
118
  const endTokenIdx = Math.min(T, i + halfWindow + 1);
119
  const windowTokens = rawSurprisals.slice(startTokenIdx, endTokenIdx);
 
125
  }
126
 
127
  return {
128
+ scatterPoints: scatterPoints,
129
  movingAverageLine: movingAverage,
130
  extent: data.extent,
131
  xLabel: data.xLabel,
 
181
  .attr('d', line)
182
  .style('fill', 'none')
183
  .style('stroke', rd.lineColor || '#ff6b6b')
184
+ .style('stroke-width', 2)
185
 
186
+ // 渲染散点(后渲染,在线上方)
187
  this.layers.main.selectAll('.scatter-point')
188
  .data(rd.scatterPoints)
189
  .join('circle')
 
192
  .attr('cy', d => yScale(d.y))
193
  .attr('r', 1.5)
194
  .style('fill', rd.scatterColor || '#70b0ff')
195
+ .style('opacity', 0.5)
196
 
197
  // 渲染X轴
198
  const xAxis = d3.axisBottom(xScale)