统计图背景改为透明;取消散点聚合,改为截断10000 token以内;移动平均窗口改为32
Browse files
client/src/css/start.scss
CHANGED
|
@@ -18,7 +18,6 @@ $breakpoint-tablet: 768px;
|
|
| 18 |
--text-color: #333;
|
| 19 |
--text-muted: #777;
|
| 20 |
--panel-bg: #fafafa;
|
| 21 |
-
--hist-bg: #e2e2e2;
|
| 22 |
--border-color: #ddd;
|
| 23 |
--button-bg: #f8f8f8;
|
| 24 |
--button-hover-bg: #f0f0f0;
|
|
@@ -74,7 +73,6 @@ html {
|
|
| 74 |
--text-color: var(--text-color-light);
|
| 75 |
--text-muted: #888;
|
| 76 |
--panel-bg: #2d2d2d;
|
| 77 |
-
--hist-bg: #282828;
|
| 78 |
--border-color: #444;
|
| 79 |
--button-bg: #353535;
|
| 80 |
--button-hover-bg: #454545;
|
|
|
|
| 18 |
--text-color: #333;
|
| 19 |
--text-muted: #777;
|
| 20 |
--panel-bg: #fafafa;
|
|
|
|
| 21 |
--border-color: #ddd;
|
| 22 |
--button-bg: #f8f8f8;
|
| 23 |
--button-hover-bg: #f0f0f0;
|
|
|
|
| 73 |
--text-color: var(--text-color-light);
|
| 74 |
--text-muted: #888;
|
| 75 |
--panel-bg: #2d2d2d;
|
|
|
|
| 76 |
--border-color: #444;
|
| 77 |
--button-bg: #353535;
|
| 78 |
--button-hover-bg: #454545;
|
client/src/css/violin.scss
CHANGED
|
@@ -44,6 +44,6 @@
|
|
| 44 |
}
|
| 45 |
|
| 46 |
.HistogramX {
|
| 47 |
-
background-color:
|
| 48 |
border-radius: 6px;
|
| 49 |
}
|
|
|
|
| 44 |
}
|
| 45 |
|
| 46 |
.HistogramX {
|
| 47 |
+
background-color: transparent;
|
| 48 |
border-radius: 6px;
|
| 49 |
}
|
client/src/ts/compare.ts
CHANGED
|
@@ -396,8 +396,6 @@ window.onload = () => {
|
|
| 396 |
if (stats.tokenSurprisals && stats.tokenSurprisals.length > 0) {
|
| 397 |
columnData.histograms.stats_surprisal_progress.update({
|
| 398 |
data: stats.tokenSurprisals,
|
| 399 |
-
maxPoints: 1000,
|
| 400 |
-
movingAverageWindow: 20,
|
| 401 |
xLabel: 'token index',
|
| 402 |
yLabel: 'surprisal (bits)'
|
| 403 |
});
|
|
|
|
| 396 |
if (stats.tokenSurprisals && stats.tokenSurprisals.length > 0) {
|
| 397 |
columnData.histograms.stats_surprisal_progress.update({
|
| 398 |
data: stats.tokenSurprisals,
|
|
|
|
|
|
|
| 399 |
xLabel: 'token index',
|
| 400 |
yLabel: 'surprisal (bits)'
|
| 401 |
});
|
client/src/ts/utils/visualizationUpdater.ts
CHANGED
|
@@ -133,8 +133,6 @@ export class VisualizationUpdater {
|
|
| 133 |
if (currentSurprisals && currentSurprisals.length > 0) {
|
| 134 |
this.deps.stats_surprisal_progress.update({
|
| 135 |
data: currentSurprisals,
|
| 136 |
-
maxPoints: 1000,
|
| 137 |
-
movingAverageWindow: 20, // 基于token的窗口大小
|
| 138 |
xLabel: 'token index',
|
| 139 |
yLabel: 'surprisal (bits)'
|
| 140 |
});
|
|
|
|
| 133 |
if (currentSurprisals && currentSurprisals.length > 0) {
|
| 134 |
this.deps.stats_surprisal_progress.update({
|
| 135 |
data: currentSurprisals,
|
|
|
|
|
|
|
| 136 |
xLabel: 'token index',
|
| 137 |
yLabel: 'surprisal (bits)'
|
| 138 |
});
|
client/src/ts/vis/Histogram.ts
CHANGED
|
@@ -70,7 +70,7 @@ export class Histogram extends VComponent<HistogramData> {
|
|
| 70 |
.attr('height', op.height)
|
| 71 |
.attr('rx', 6)
|
| 72 |
.attr('ry', 6)
|
| 73 |
-
.style('fill', '
|
| 74 |
|
| 75 |
this.layers.bg.append('g')
|
| 76 |
.attr('class', 'x-axis')
|
|
|
|
| 70 |
.attr('height', op.height)
|
| 71 |
.attr('rx', 6)
|
| 72 |
.attr('ry', 6)
|
| 73 |
+
.style('fill', 'transparent');
|
| 74 |
|
| 75 |
this.layers.bg.append('g')
|
| 76 |
.attr('class', 'x-axis')
|
client/src/ts/vis/ScatterPlot.ts
CHANGED
|
@@ -8,8 +8,6 @@ export type ScatterPlotData = {
|
|
| 8 |
xLabel?: string,
|
| 9 |
yLabel?: string,
|
| 10 |
extent?: { x?: [number, number], y?: [number, number] },
|
| 11 |
-
maxPoints?: number, // 最高绘制点数阈值(默认1000)
|
| 12 |
-
movingAverageWindow?: number, // 移动平均窗口(基于token,默认10)
|
| 13 |
scatterColor?: string, // 散点颜色(默认主题色)
|
| 14 |
lineColor?: string // 移动平均线颜色(默认不同颜色)
|
| 15 |
}
|
|
@@ -67,7 +65,7 @@ export class ScatterPlot extends VComponent<ScatterPlotData> {
|
|
| 67 |
.attr('height', op.height)
|
| 68 |
.attr('rx', 6)
|
| 69 |
.attr('ry', 6)
|
| 70 |
-
.style('fill', '
|
| 71 |
|
| 72 |
// Y轴
|
| 73 |
this.layers.bg.append('g')
|
|
@@ -97,30 +95,25 @@ export class ScatterPlot extends VComponent<ScatterPlotData> {
|
|
| 97 |
};
|
| 98 |
}
|
| 99 |
|
| 100 |
-
const maxPoints =
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
-
//
|
| 103 |
-
const
|
| 104 |
-
const
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
const startTokenIdx = i * N; // 真实token索引
|
| 110 |
-
const endTokenIdx = Math.min(startTokenIdx + N, T);
|
| 111 |
-
const group = rawSurprisals.slice(startTokenIdx, endTokenIdx);
|
| 112 |
-
const avgSurprisal = group.reduce((sum, val) => sum + val, 0) / group.length;
|
| 113 |
-
aggregatedPoints.push({
|
| 114 |
-
x: startTokenIdx, // 真实token索引
|
| 115 |
-
y: avgSurprisal
|
| 116 |
});
|
| 117 |
}
|
| 118 |
|
| 119 |
-
// 计算移动平均
|
| 120 |
-
const window = data.movingAverageWindow ?? 10;
|
| 121 |
const movingAverage: Point[] = [];
|
| 122 |
for (let i = 0; i < T; i++) {
|
| 123 |
-
const halfWindow = Math.floor(
|
| 124 |
const startTokenIdx = Math.max(0, i - halfWindow);
|
| 125 |
const endTokenIdx = Math.min(T, i + halfWindow + 1);
|
| 126 |
const windowTokens = rawSurprisals.slice(startTokenIdx, endTokenIdx);
|
|
@@ -132,7 +125,7 @@ export class ScatterPlot extends VComponent<ScatterPlotData> {
|
|
| 132 |
}
|
| 133 |
|
| 134 |
return {
|
| 135 |
-
scatterPoints:
|
| 136 |
movingAverageLine: movingAverage,
|
| 137 |
extent: data.extent,
|
| 138 |
xLabel: data.xLabel,
|
|
@@ -188,9 +181,9 @@ export class ScatterPlot extends VComponent<ScatterPlotData> {
|
|
| 188 |
.attr('d', line)
|
| 189 |
.style('fill', 'none')
|
| 190 |
.style('stroke', rd.lineColor || '#ff6b6b')
|
| 191 |
-
.style('stroke-width', 2)
|
| 192 |
|
| 193 |
-
// 渲染散点
|
| 194 |
this.layers.main.selectAll('.scatter-point')
|
| 195 |
.data(rd.scatterPoints)
|
| 196 |
.join('circle')
|
|
@@ -199,7 +192,7 @@ export class ScatterPlot extends VComponent<ScatterPlotData> {
|
|
| 199 |
.attr('cy', d => yScale(d.y))
|
| 200 |
.attr('r', 1.5)
|
| 201 |
.style('fill', rd.scatterColor || '#70b0ff')
|
| 202 |
-
.style('opacity', 0.
|
| 203 |
|
| 204 |
// 渲染X轴
|
| 205 |
const xAxis = d3.axisBottom(xScale)
|
|
|
|
| 8 |
xLabel?: string,
|
| 9 |
yLabel?: string,
|
| 10 |
extent?: { x?: [number, number], y?: [number, number] },
|
|
|
|
|
|
|
| 11 |
scatterColor?: string, // 散点颜色(默认主题色)
|
| 12 |
lineColor?: string // 移动平均线颜色(默认不同颜色)
|
| 13 |
}
|
|
|
|
| 65 |
.attr('height', op.height)
|
| 66 |
.attr('rx', 6)
|
| 67 |
.attr('ry', 6)
|
| 68 |
+
.style('fill', 'transparent');
|
| 69 |
|
| 70 |
// Y轴
|
| 71 |
this.layers.bg.append('g')
|
|
|
|
| 95 |
};
|
| 96 |
}
|
| 97 |
|
| 98 |
+
const maxPoints = 10000;
|
| 99 |
+
// 移动平均窗口大小,反映了语义趋势粒度。
|
| 100 |
+
// 32个token的粒度大致相当于句子到段落之间。
|
| 101 |
+
const movingAverageWindow = 32;
|
| 102 |
|
| 103 |
+
// 直接取前maxPoints个点,超出部分丢掉
|
| 104 |
+
const scatterPointsCount = Math.min(T, maxPoints);
|
| 105 |
+
const scatterPoints: Point[] = [];
|
| 106 |
+
for (let i = 0; i < scatterPointsCount; i++) {
|
| 107 |
+
scatterPoints.push({
|
| 108 |
+
x: i, // token索引
|
| 109 |
+
y: rawSurprisals[i]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
});
|
| 111 |
}
|
| 112 |
|
| 113 |
+
// 计算移动平均
|
|
|
|
| 114 |
const movingAverage: Point[] = [];
|
| 115 |
for (let i = 0; i < T; i++) {
|
| 116 |
+
const halfWindow = Math.floor(movingAverageWindow / 2);
|
| 117 |
const startTokenIdx = Math.max(0, i - halfWindow);
|
| 118 |
const endTokenIdx = Math.min(T, i + halfWindow + 1);
|
| 119 |
const windowTokens = rawSurprisals.slice(startTokenIdx, endTokenIdx);
|
|
|
|
| 125 |
}
|
| 126 |
|
| 127 |
return {
|
| 128 |
+
scatterPoints: scatterPoints,
|
| 129 |
movingAverageLine: movingAverage,
|
| 130 |
extent: data.extent,
|
| 131 |
xLabel: data.xLabel,
|
|
|
|
| 181 |
.attr('d', line)
|
| 182 |
.style('fill', 'none')
|
| 183 |
.style('stroke', rd.lineColor || '#ff6b6b')
|
| 184 |
+
.style('stroke-width', 2)
|
| 185 |
|
| 186 |
+
// 渲染散点(后渲染,在线上方)
|
| 187 |
this.layers.main.selectAll('.scatter-point')
|
| 188 |
.data(rd.scatterPoints)
|
| 189 |
.join('circle')
|
|
|
|
| 192 |
.attr('cy', d => yScale(d.y))
|
| 193 |
.attr('r', 1.5)
|
| 194 |
.style('fill', rd.scatterColor || '#70b0ff')
|
| 195 |
+
.style('opacity', 0.5)
|
| 196 |
|
| 197 |
// 渲染X轴
|
| 198 |
const xAxis = d3.axisBottom(xScale)
|