InfoLens / client /src /features /analysis /visualizationUpdater.ts
dqy08's picture
重构仓库目录;增加Propagated attribution动画;UI改进
17037b0
Raw
History Blame Contribute Delete
57.7 kB
/**
* 可视化更新模块
* 负责处理分析结果的可视化更新逻辑
*/
import * as d3 from 'd3';
import type { AnalyzeResponse, FrontendAnalyzeResult, FrontendToken } from '../../shared/api/GLTR_API';
import type { GLTR_Text_Box } from '../../shared/vis/GLTR_Text_Box';
import type { HighlightController } from '../../shared/controllers/highlightController';
import type { TextInputController } from '../../shared/controllers/textInputController';
import type { Histogram } from '../../shared/vis/Histogram';
import type { ScatterPlot } from '../../shared/vis/ScatterPlot';
import type { AppStateManager } from './appStateManager';
import {
cloneFrontendToken,
mergeTokensForRendering,
createRawSnapshot
} from '../../shared/cross/tokenUtils';
import { getAttentionRawScore, mergeAttentionTokensFullyForRendering, normalizeTokenScores } from '../../shared/cross/semanticUtils';
import {
validateTokenConsistency,
validateTokenProbabilities,
validateTokenPredictions
} from '../../shared/cross/dataValidation';
import {
calculateTextStats,
calculateMergedTokenSurprisals,
computeAverage,
computeP90,
type TextStats
} from '../../shared/cross/textStatistics';
import {
getTokenSurprisalHistogramConfig,
getSurprisalProgressConfig,
getMatchScoreProgressConfig,
getRawScoreNormedHistogramConfig
} from "./visualizationConfigs";
import { getSemanticSimilarityColor, HISTOGRAM_MIN_ALPHA } from '../../shared/cross/SurprisalColorConfig';
import { showAlertDialog } from '../../shared/ui/dialog';
import { tr } from '../../shared/lang/i18n-lite';
import { computeExpectedCounts } from './lognormalFit';
import { findSignalThresholdWithLog, type signalFitResult, type SignalThresholdBin } from './signalThresholdDetector';
import { getSemanticAnalysisEnabled } from '../../shared/cross/semanticAnalysisManager';
import { getDigitsMergeEnabled } from '../../shared/cross/digitsMergeManager';
import { getSemanticMatchThreshold } from '../../shared/cross/semanticThresholdManager';
import { applySemanticDebugInfoPanel } from '../../shared/prediction_attribution/core/semanticDebugInfo';
/** Token 边界不一致时抛出,用于中断联合展示 */
export class TokenBoundaryInconsistentError extends Error {
constructor() {
super('Tokenizer results inconsistent: semantic and info-density token boundaries differ.');
this.name = 'TokenBoundaryInconsistentError';
}
}
/**
* P(signal | raw_score_normed = s) 复用 findSignalThreshold 的 bins
* 每个样本 s 落入对应 bin,P(signal) = (obsInBin - expInBin) / obsInBin
*/
function signalProbFromBins(scores: number[], bins: SignalThresholdBin[]): number[] {
if (scores.length === 0 || bins.length === 0) return [];
const tauLefts = bins.map((b) => b.tauLeft);
return scores.map((s) => {
const i = Math.max(0, Math.min(bins.length - 1, d3.bisectRight(tauLefts, s) - 1));
const b = bins[i]!;
if (s < b.tauLeft || s >= b.tauRight) return 0;
return b.obsInBin > 0 ? Math.max(0, Math.min(1, (b.obsInBin - b.expInBin) / b.obsInBin)) : 0;
});
}
/**
* 可视化更新依赖
*/
export interface VisualizationDependencies {
lmf: GLTR_Text_Box;
highlightController: HighlightController;
textInputController: TextInputController;
stats_frac: Histogram;
stats_raw_score_normed: Histogram;
stats_surprisal_progress: ScatterPlot;
stats_match_score_progress: ScatterPlot;
appStateManager: AppStateManager;
surprisalColorScale: d3.ScaleSequential<string>;
}
/** 语义分析原始数据(独立存储) */
export interface SemanticData {
text: string;
model?: string;
/** 整段模式:API 返回的 token_attention 副本,用于切换 digit merge 时重算(分块模式不存) */
semanticTokenAttentionFromApi?: Array<{
offset: [number, number];
raw: string;
score: number;
rawScore?: number;
}>;
token_attention: Array<{
offset: [number, number];
raw: string;
score: number;
rawScore?: number;
}>;
/** 拟合结果,由数据层在归一化后计算并传入;整段模式使用 */
signalFitResult?: signalFitResult | null;
/** 分块边界;分块模式使用,每项可含该块独立拟合的 thresholdResult */
chunkInfos?: Array<{ startOffset: number; endOffset: number; chunkIndex: number; chunkMatchDegree: number; thresholdResult?: signalFitResult }>;
/** 全文匹配度;非分块模式使用,用于 pw_score 的匹配度乘法因子 */
full_match_degree?: number;
}
/** 是否有语义分析数据:token_attention 或 chunkInfos 任一非空即视为有数据 */
function hasSemanticData(data: { token_attention?: unknown[]; chunkInfos?: unknown[] } | null | undefined): boolean {
return (data?.token_attention?.length ?? 0) > 0 || (data?.chunkInfos?.length ?? 0) > 0;
}
/**
* 当前数据状态
* 信息密度与语义分析独立存储,展示时根据一致性决定单独或联合
*/
export interface CurrentDataState {
/** 信息密度分析结果(独立) */
infoDensityData: AnalyzeResponse | null;
/** 语义分析结果(独立) */
semanticData: SemanticData | null;
rawApiResponse: AnalyzeResponse | null;
currentSurprisals: number[] | null;
currentTokenAvg: number | null;
currentTokenP90: number | null;
currentTotalSurprisal: number | null;
}
/**
* 可视化更新管理器
*/
export class VisualizationUpdater {
private deps: VisualizationDependencies;
private currentState: CurrentDataState;
constructor(deps: VisualizationDependencies) {
this.deps = deps;
this.currentState = {
infoDensityData: null,
semanticData: null,
rawApiResponse: null,
currentSurprisals: null,
currentTokenAvg: null,
currentTokenP90: null,
currentTotalSurprisal: null
};
}
/**
* 获取当前数据状态
*/
getCurrentState(): Readonly<CurrentDataState> {
return { ...this.currentState };
}
/**
* 获取当前原始API响应
*/
getRawApiResponse(): AnalyzeResponse | null {
return this.currentState.rawApiResponse;
}
/**
* 获取当前展示数据(由 infoDensityData 与 semanticData 按展示逻辑计算)
*/
getCurrentData(): AnalyzeResponse | null {
const display = this.computeDisplayResult();
if (!display) return null;
return { request: { text: display.originalText }, result: display };
}
/**
* 获取当前 surprisal 数据
*/
getCurrentSurprisals(): number[] | null {
return this.currentState.currentSurprisals;
}
/**
* 更新文本指标(包括模型显示)
*/
private updateTextMetrics(stats: TextStats | null, modelName?: string | null | undefined): void {
this.deps.textInputController.updateTextMetrics(stats, modelName);
}
/**
* 清除高亮
*/
private clearHighlights(): void {
this.deps.highlightController.clearHighlights();
}
/**
* 计算展示结果:仅信息密度 / 仅语义 / 联合(两者一致时)
*/
private computeDisplayResult(): (FrontendAnalyzeResult & {
rawScoresNormed?: number[];
attentionRawScores?: number[];
chunkInfos?: SemanticData['chunkInfos'];
}) | null {
const info = this.currentState.infoDensityData;
const sem = this.currentState.semanticData;
const infoResult = info?.result as FrontendAnalyzeResult | undefined;
const infoText = info?.request?.text ?? infoResult?.originalText ?? '';
const semText = sem?.text ?? '';
if (infoResult && sem && infoText === semText && hasSemanticData(sem)) {
const infoMerged = infoResult.bpeBpeMergedTokens ?? infoResult.bpe_strings;
if (infoMerged?.length) {
// 有 token_attention 时校验边界;仅 chunkInfos 时跳过(无语义着色)
if (sem.token_attention?.length) {
const boundaryError = this.checkSemanticAlignsWithInfo(sem.token_attention, infoMerged, semText);
if (boundaryError) {
const { aSample, bSample, aNext, bNext, textBefore, textAt, textAfter } = boundaryError;
console.warn(
'[联合模式] 两种分析的分词token边界不一致:\n' +
' 语义分析:', aSample, '\n' +
' 信息密度:', bSample, '\n' +
' 语义后一个:', aNext, '\n' +
' 信息后一个:', bNext, '\n' +
' 位置附近原文:', JSON.stringify(textBefore), '|', JSON.stringify(textAt), '|', JSON.stringify(textAfter)
);
showAlertDialog(tr('Error'), tr('Tokenizer results inconsistent: semantic and info-density token boundaries differ.'));
this.currentState.semanticData = null;
throw new TokenBoundaryInconsistentError();
}
}
// 联合模式:bpeMerged 与语义 tokens 超出部分合并为并集,使 rect/渲染范围与截断边界一致
const tokenAttention = sem.token_attention ?? [];
const { unionTokens, scoresForUnion, rawScoresForUnion } = tokenAttention.length
? this.mergeBpeWithSemanticBeyond(infoMerged, tokenAttention)
: (() => {
const m = this.mapTokenAttentionToMerged(infoMerged, []);
return {
unionTokens: infoMerged,
scoresForUnion: m.scores,
rawScoresForUnion: m.rawScores,
};
})();
return {
...infoResult,
bpeBpeMergedTokens: unionTokens,
bpe_strings: unionTokens,
rawScoresNormed: scoresForUnion,
attentionRawScores: rawScoresForUnion,
chunkInfos: sem.chunkInfos,
};
}
}
// 有语义数据(token_attention 或 chunkInfos)时用 buildSemanticOnlyResult
if (sem && hasSemanticData(sem)) {
return this.buildSemanticOnlyResult({ model: sem.model }, sem.token_attention, sem.text, sem.chunkInfos);
}
if (infoResult) return { ...infoResult, chunkInfos: sem?.chunkInfos ?? undefined };
return null;
}
/**
* 分析开始前更新直方图显示/隐藏:基于「已有数据 + 将要得到的数据」判断各统计图是否有意义
* @param mode 即将进行的分析类型
* @param text 即将分析的文本(用于判断与已有数据是否一致、能否联合展示)
* @param willBeChunked 语义分析时:true 表示将走分块模式,直方图不显示
*/
public updateHistogramVisibilityForPending(mode: 'infoDensity' | 'semantic', text: string, willBeChunked?: boolean): void {
const tokenHistogramItem = document.getElementById('token_histogram_item');
const surprisalProgressItem = document.getElementById('surprisal_progress_item');
const rawScoreNormedItem = document.getElementById('raw_score_normed_histogram_item');
const matchScoreProgressItem = document.getElementById('match_score_progress_item');
const infoText = this.currentState.infoDensityData?.request?.text ?? '';
const semText = this.currentState.semanticData?.text ?? '';
const semanticQueryOn = getSemanticAnalysisEnabled();
let showInfoDensity = false;
let showSemantic = false;
if (mode === 'infoDensity') {
/** Semantic Query 勾选时统计区不出现信息密度图占位 */
showInfoDensity = !semanticQueryOn;
showSemantic =
semanticQueryOn &&
hasSemanticData(this.currentState.semanticData) &&
semText === text;
} else {
showSemantic = true;
showInfoDensity =
!semanticQueryOn &&
!!(this.currentState.infoDensityData && infoText === text);
}
if (tokenHistogramItem) tokenHistogramItem.style.display = showInfoDensity ? '' : 'none';
if (surprisalProgressItem) surprisalProgressItem.style.display = showInfoDensity ? '' : 'none';
/** 直方图仅在整段模式显示,chunk 模式下不显示 */
const showRawScoreHistogram = showSemantic && !willBeChunked;
if (rawScoreNormedItem) rawScoreNormedItem.style.display = showRawScoreHistogram ? '' : 'none';
/** semantic match per chunk progress 仅 chunk 模式显示 */
if (matchScoreProgressItem) matchScoreProgressItem.style.display = showSemantic && !!willBeChunked ? '' : 'none';
// pending 时渲染空统计图(坐标轴 + 空柱体/散点),避免空白
if (showInfoDensity && mode === 'infoDensity') {
const tokenConfig = getTokenSurprisalHistogramConfig();
this.deps.stats_frac.update({ ...tokenConfig, data: [], colorScale: () => 'transparent' });
const tokenTitle = document.getElementById('token_histogram_title');
if (tokenTitle) tokenTitle.textContent = tokenConfig.label;
const progressConfig = getSurprisalProgressConfig();
this.deps.stats_surprisal_progress.update({ ...progressConfig, data: [] });
const progressTitle = document.getElementById('surprisal_progress_title');
if (progressTitle && progressConfig.label) progressTitle.textContent = progressConfig.label;
}
if (showRawScoreHistogram && mode === 'semantic') {
const rawScoreNormedConfig = getRawScoreNormedHistogramConfig();
this.deps.stats_raw_score_normed.update({ ...rawScoreNormedConfig, data: [], colorScale: () => 'transparent' });
const titleEl = document.getElementById('raw_score_normed_histogram_title');
if (titleEl) titleEl.textContent = rawScoreNormedConfig.label;
}
if (showSemantic && mode === 'semantic' && willBeChunked) {
const matchScoreProgressConfig = getMatchScoreProgressConfig();
const docLen = text.length;
this.deps.stats_match_score_progress.update({
...matchScoreProgressConfig,
data: [],
showMovingAverage: false,
chunkLines: [],
thresholdLine: getSemanticMatchThreshold(),
extent: { x: docLen > 0 ? [0, docLen] : undefined, y: [0, 1] }
});
const matchScoreTitleEl = document.getElementById('match_score_progress_title');
if (matchScoreTitleEl && matchScoreProgressConfig.label) matchScoreTitleEl.textContent = matchScoreProgressConfig.label;
}
}
/**
* 重新渲染直方图(内部方法)
* Semantic Query 勾选:仅语义相关图;未勾选:有信息密度数据时显示 token + surprisal
* @param skipLmfUpdate 为 true 时跳过 lmf.update(主题切换时由 rerenderOnThemeChange 统一重绘,避免竞态)
*/
private updateVisualizationInternal(skipLmfUpdate = false): void {
const hasInfoDensity = !!this.currentState.infoDensityData;
const displayResult = this.computeDisplayResult();
const sem = this.currentState.semanticData;
const showInfoDensityCharts = hasInfoDensity && !getSemanticAnalysisEnabled();
const tokenHistogramItem = document.getElementById('token_histogram_item');
const surprisalProgressItem = document.getElementById('surprisal_progress_item');
const rawScoreNormedItem = document.getElementById('raw_score_normed_histogram_item');
if (showInfoDensityCharts) {
const currentSurprisals = this.currentState.currentSurprisals;
const currentTokenAvg = this.currentState.currentTokenAvg;
const currentTokenP90 = this.currentState.currentTokenP90;
if (currentSurprisals) {
const tokenHistogramConfig = getTokenSurprisalHistogramConfig();
this.deps.stats_frac.update({
...tokenHistogramConfig,
data: currentSurprisals,
colorScale: this.deps.surprisalColorScale,
averageValue: currentTokenAvg ?? undefined,
p90Value: currentTokenP90 ?? undefined,
p90Label: tokenHistogramConfig.averageLabel,
});
const titleElement = document.getElementById('token_histogram_title');
if (titleElement) titleElement.textContent = tokenHistogramConfig.label;
}
if (currentSurprisals && currentSurprisals.length > 0) {
const surprisalProgressConfig = getSurprisalProgressConfig();
this.deps.stats_surprisal_progress.update({
...surprisalProgressConfig,
data: currentSurprisals,
});
const surprisalProgressTitleElement = document.getElementById('surprisal_progress_title');
if (surprisalProgressTitleElement && surprisalProgressConfig.label) {
surprisalProgressTitleElement.textContent = surprisalProgressConfig.label;
}
}
if (tokenHistogramItem) tokenHistogramItem.style.display = '';
if (surprisalProgressItem) surprisalProgressItem.style.display = '';
} else {
if (tokenHistogramItem) tokenHistogramItem.style.display = 'none';
if (surprisalProgressItem) surprisalProgressItem.style.display = 'none';
}
const rawScoresNormed = displayResult?.rawScoresNormed;
const validRawScoresNormed = rawScoresNormed?.filter((s) => typeof s === 'number' && isFinite(s));
const signalFitResult = sem?.signalFitResult ?? null;
const chunkInfos = sem?.chunkInfos;
const isChunkMode = (chunkInfos?.length ?? 0) > 0;
const chunksWithThreshold = chunkInfos?.filter((c) => c.thresholdResult != null) ?? [];
const usePerChunkThreshold = chunksWithThreshold.length > 0;
const thresholdByChunk = usePerChunkThreshold
? new Map(chunksWithThreshold.map((c) => [c.chunkIndex, c.thresholdResult!]))
: null;
if (validRawScoresNormed && validRawScoresNormed.length > 0) {
const rawScoreNormedConfig = getRawScoreNormedHistogramConfig();
const colorScale = (v: number) => getSemanticSimilarityColor(v, HISTOGRAM_MIN_ALPHA);
const thresholdForHistogram = usePerChunkThreshold && chunksWithThreshold.length > 0
? chunksWithThreshold[0]!.thresholdResult!
: signalFitResult;
// confidence>0:findSignalThreshold 成功(≥ MIN_ACCEPTABLE);confidence===0 为 P90 回退,不画截尾对数正态期望曲线
const fitResult = validRawScoresNormed.length >= 2 && thresholdForHistogram != null && thresholdForHistogram.confidence > 0
? {
mu: thresholdForHistogram.mu,
sigma: thresholdForHistogram.sigma,
expectedCounts: computeExpectedCounts(
thresholdForHistogram.mu,
thresholdForHistogram.sigma,
rawScoreNormedConfig.extent as [number, number],
rawScoreNormedConfig.no_bins,
validRawScoresNormed.length
),
}
: null;
const signalProbs = thresholdForHistogram != null
? signalProbFromBins(validRawScoresNormed, thresholdForHistogram.bins)
: [];
/**
* P_pw:后验信号概率的简化映射,x <= threshold 时为 0,x > threshold 时为 1
* pw_score = score × P_pw × matchDegree
* 分块模式:每个 token 使用其所属 chunk 的 threshold 和 chunkMatchDegree
* 非分块模式:使用全文匹配度 full_match_degree
*/
const rawScoresNormedFull = displayResult!.rawScoresNormed ?? [];
const bpeBpeMergedTokens = displayResult?.bpeBpeMergedTokens ?? [];
const getChunkForToken = (tokenIndex: number) => {
const token = bpeBpeMergedTokens[tokenIndex];
if (!token || !isChunkMode) return null;
const offset = token.offset[0];
return chunkInfos!.find((c) => c.startOffset <= offset && offset < c.endOffset) ?? null;
};
const getThresholdForToken = (i: number): number => {
const chunk = getChunkForToken(i);
if (chunk && thresholdByChunk != null) {
const tr = thresholdByChunk.get(chunk.chunkIndex);
if (tr) return tr.threshold;
}
return signalFitResult?.threshold ?? 0;
};
const getMatchDegreeForToken = (i: number): number => {
const chunk = getChunkForToken(i);
if (chunk) return chunk.chunkMatchDegree;
return sem?.full_match_degree ?? 1;
};
const hasThreshold = signalFitResult != null || thresholdByChunk != null;
const pPwValues = hasThreshold
? rawScoresNormedFull.map((s, i) => {
const threshold = getThresholdForToken(i);
const isAboveThreshold = typeof s === 'number' && isFinite(s) && s > threshold;
return isAboveThreshold ? 1 : 0;
})
: [];
const pwScores = hasThreshold
? rawScoresNormedFull.map((s, i) => {
const threshold = getThresholdForToken(i);
const isAboveThreshold = typeof s === 'number' && isFinite(s) && s > threshold;
const baseScore = isAboveThreshold ? s : 0;
const matchDegree = getMatchDegreeForToken(i);
return baseScore * matchDegree;
})
: [];
const colorSourceEl = document.getElementById('semantic_color_source_select') as HTMLSelectElement | null;
const colorSource = colorSourceEl?.value ?? 'pw_score';
const scoresForColor = colorSource === 'signal_probability' ? pPwValues
: colorSource === 'pw_score' ? pwScores
: (displayResult!.rawScoresNormed ?? []);
// 联合模式下 tooltip 需要 pPwValues/pwScores 显示语义匹配信息,即使 fitResult 为 null 也要传递
const resultWithExt = hasThreshold
? { ...displayResult, signalProbs, pPwValues, pwScores }
: displayResult!;
if (fitResult != null) {
this.deps.highlightController.updateCurrentData({ result: resultWithExt, signalProbs, pPwValues, pwScores });
if (!skipLmfUpdate) {
this.deps.lmf.update({ ...resultWithExt, pwScores, colorScores: scoresForColor } as FrontendAnalyzeResult & { pPwValues?: number[]; pwScores?: number[]; colorScores?: number[] });
}
} else {
this.deps.highlightController.updateCurrentData({ result: resultWithExt });
if (!skipLmfUpdate) {
this.deps.lmf.update({ ...resultWithExt, colorScores: scoresForColor } as FrontendAnalyzeResult & { pPwValues?: number[]; pwScores?: number[]; colorScores?: number[] });
}
}
/** 直方图仅在整段模式显示,chunk 模式下不统计、不显示 */
if (!isChunkMode) {
const probCurveData = signalProbs.length > 0
? (() => {
const pairs = validRawScoresNormed.map((x, i) => ({ x, y: signalProbs[i]! })).sort((a, b) => a.x - b.x);
return { x: pairs.map(p => p.x), y: pairs.map(p => p.y) };
})()
: undefined;
const signalThresholdPercentile = thresholdForHistogram != null && validRawScoresNormed.length > 0
? Math.round((validRawScoresNormed.filter((s) => s < thresholdForHistogram.threshold).length / validRawScoresNormed.length) * 100)
: undefined;
this.deps.stats_raw_score_normed.update({
...rawScoreNormedConfig,
data: validRawScoresNormed,
colorScale,
fitExpectedCounts: fitResult?.expectedCounts,
showProbCurve: true,
probCurveData: probCurveData?.x.length ? probCurveData : undefined,
signalThreshold: thresholdForHistogram?.threshold ?? undefined,
signalThresholdPercentile: signalThresholdPercentile ?? undefined,
});
const titleEl = document.getElementById('raw_score_normed_histogram_title');
if (titleEl) titleEl.textContent = rawScoreNormedConfig.label;
if (rawScoreNormedItem) rawScoreNormedItem.style.display = '';
} else {
if (rawScoreNormedItem) rawScoreNormedItem.style.display = 'none';
}
/** semantic match per chunk progress:仅 chunk 模式,仅绘制 chunk 匹配线,不绘制点 */
if (isChunkMode) {
const matchScoreProgressConfig = getMatchScoreProgressConfig();
const docLen = (displayResult?.originalText ?? '').length;
const chunkLines = chunkInfos?.length
? chunkInfos.map((c) => ({ x0: c.startOffset, x1: c.endOffset, y: c.chunkMatchDegree }))
: [];
const thresholdLine = getSemanticMatchThreshold();
this.deps.stats_match_score_progress.update({
...matchScoreProgressConfig,
data: [],
showMovingAverage: false,
chunkLines,
thresholdLine,
chunkInteraction: true,
extent: { x: docLen > 0 ? [0, docLen] : undefined, y: [0, 1] }
});
const matchScoreTitleEl = document.getElementById('match_score_progress_title');
if (matchScoreTitleEl && matchScoreProgressConfig.label) matchScoreTitleEl.textContent = matchScoreProgressConfig.label;
const matchScoreProgressItem = document.getElementById('match_score_progress_item');
if (matchScoreProgressItem) matchScoreProgressItem.style.display = '';
} else {
const matchScoreProgressItem = document.getElementById('match_score_progress_item');
if (matchScoreProgressItem) matchScoreProgressItem.style.display = 'none';
}
} else {
const needLmfUpdate = !!displayResult && (hasInfoDensity || !!validRawScoresNormed?.length || hasSemanticData(sem));
if (displayResult) this.deps.highlightController.updateCurrentData({ result: displayResult });
if (needLmfUpdate && !skipLmfUpdate) {
this.deps.lmf.update(displayResult!);
}
/** chunk 模式下不显示直方图;整段模式且无数据时显示空占位 */
if (getSemanticAnalysisEnabled() && !isChunkMode) {
const rawScoreNormedConfig = getRawScoreNormedHistogramConfig();
this.deps.stats_raw_score_normed.update({ ...rawScoreNormedConfig, data: [], colorScale: () => 'transparent' });
const titleEl = document.getElementById('raw_score_normed_histogram_title');
if (titleEl) titleEl.textContent = rawScoreNormedConfig.label;
if (rawScoreNormedItem) rawScoreNormedItem.style.display = '';
} else {
if (rawScoreNormedItem) rawScoreNormedItem.style.display = 'none';
}
/** semantic match per chunk progress 无数据时显示空占位(仅 chunk 模式) */
if (getSemanticAnalysisEnabled() && isChunkMode) {
const matchScoreProgressConfig = getMatchScoreProgressConfig();
const docLen = (displayResult?.originalText ?? '').length;
const chunkLines = chunkInfos?.length
? chunkInfos.map((c) => ({ x0: c.startOffset, x1: c.endOffset, y: c.chunkMatchDegree }))
: [];
const thresholdLine = getSemanticMatchThreshold();
this.deps.stats_match_score_progress.update({
...matchScoreProgressConfig,
data: [],
showMovingAverage: false,
chunkLines,
thresholdLine,
chunkInteraction: true,
extent: { x: docLen > 0 ? [0, docLen] : undefined, y: [0, 1] }
});
const matchScoreTitleEl = document.getElementById('match_score_progress_title');
if (matchScoreTitleEl && matchScoreProgressConfig.label) matchScoreTitleEl.textContent = matchScoreProgressConfig.label;
const matchScoreProgressItem = document.getElementById('match_score_progress_item');
if (matchScoreProgressItem) matchScoreProgressItem.style.display = '';
} else {
const matchScoreProgressItem = document.getElementById('match_score_progress_item');
if (matchScoreProgressItem) matchScoreProgressItem.style.display = 'none';
}
}
}
/** 重新渲染直方图(供外部调用) */
public rerenderHistograms(): void {
this.updateVisualizationInternal(false);
}
/** 仅更新语义着色源(color source 切换时调用,不重新拟合) */
public updateSemanticColorSource(): void {
const cd = this.deps.highlightController.getCurrentData();
const r = cd?.result as (FrontendAnalyzeResult & { rawScoresNormed?: number[] }) | undefined;
if (!r?.rawScoresNormed?.length) return;
const el = document.getElementById('semantic_color_source_select') as HTMLSelectElement | null;
const v = el?.value ?? 'pw_score';
const scoresForColor = v === 'signal_probability' ? (cd!.pPwValues ?? [])
: v === 'pw_score' ? (cd!.pwScores ?? [])
: r.rawScoresNormed;
this.deps.lmf.update({ ...r, pPwValues: cd!.pPwValues, pwScores: cd!.pwScores, colorScores: scoresForColor } as FrontendAnalyzeResult & { pPwValues?: number[]; pwScores?: number[]; colorScores?: number[] });
}
/** 主题切换时调用:在样式生效后统一重绘直方图与文本(rgba 透出背景,需等新主题生效) */
public rerenderOnThemeChange(): void {
requestAnimationFrame(() => requestAnimationFrame(() => {
this.updateVisualizationInternal(true);
this.deps.lmf.reRenderCurrent();
}));
}
/**
* 文本修改时清除独立存储的数据(避免展示与输入不一致)
*/
public clearDataOnTextChange(): void {
this.currentState.infoDensityData = null;
this.currentState.semanticData = null;
this.currentState.rawApiResponse = null;
this.currentState.currentSurprisals = null;
this.currentState.currentTokenAvg = null;
this.currentState.currentTokenP90 = null;
this.currentState.currentTotalSurprisal = null;
this.deps.highlightController.updateCurrentData(null);
d3.select('#all_result').style('opacity', 0);
this.updateSemanticDebugInfo();
}
/**
* 清除语义分析相关数据(直方图、debug、semanticData),用于打开模式时初始化
*/
public clearSemanticState(): void {
this.currentState.semanticData = null;
const rawScoreNormedItem = document.getElementById('raw_score_normed_histogram_item');
if (rawScoreNormedItem) rawScoreNormedItem.style.display = 'none';
const matchScoreProgressItem = document.getElementById('match_score_progress_item');
if (matchScoreProgressItem) matchScoreProgressItem.style.display = 'none';
this.updateSemanticDebugInfo();
}
/**
* digit merge 用户偏好变化时:对信息密度与整段语义从可重算数据源刷新;分块语义无副本则保持当前展示不变
*/
public applyDigitsMergeSetting(): void {
const digitMerge = getDigitsMergeEnabled();
const info = this.currentState.infoDensityData;
if (info?.result) {
const fr = info.result as FrontendAnalyzeResult;
const text = info.request?.text ?? fr.originalText ?? '';
if (fr.originalTokens?.length && text) {
const newMerged = mergeTokensForRendering(fr.originalTokens, text, { digitMerge });
fr.bpeBpeMergedTokens = newMerged;
fr.bpe_strings = newMerged;
}
}
const sem = this.currentState.semanticData;
if (sem && !sem.chunkInfos?.length && sem.semanticTokenAttentionFromApi?.length && sem.text) {
const mergedAttention = mergeAttentionTokensFullyForRendering(
sem.semanticTokenAttentionFromApi,
sem.text,
{ digitMerge }
);
const normalizedAttention = normalizeTokenScores(mergedAttention);
const computedSignalFit = findSignalThresholdWithLog(normalizedAttention);
sem.token_attention = normalizedAttention;
sem.signalFitResult = computedSignalFit ?? undefined;
}
const infoResult = this.currentState.infoDensityData?.result as FrontendAnalyzeResult | undefined;
const safeText = this.currentState.infoDensityData?.request?.text ?? infoResult?.originalText ?? '';
if (infoResult?.bpeBpeMergedTokens?.length && safeText) {
const mergedSurprisals = calculateMergedTokenSurprisals(infoResult.bpeBpeMergedTokens);
this.currentState.currentSurprisals = mergedSurprisals;
this.currentState.currentTokenAvg = computeAverage(mergedSurprisals);
this.currentState.currentTokenP90 = computeP90(mergedSurprisals);
}
let displayResult: ReturnType<VisualizationUpdater['computeDisplayResult']>;
try {
displayResult = this.computeDisplayResult();
} catch (e) {
if (e instanceof TokenBoundaryInconsistentError) {
displayResult = this.computeDisplayResult();
} else {
console.error(e);
return;
}
}
this.deps.highlightController.updateCurrentData(displayResult ? { result: displayResult } : null);
this.deps.lmf.clearHighlight();
if (displayResult) this.deps.lmf.update(displayResult);
this.updateVisualizationInternal();
this.deps.appStateManager.updateButtonStates();
}
/**
* 根据语义分析配置同步 UI 状态(查询输入框、文本渲染模式等)
* 界面完全由配置决定,不因数据有无而改变
*/
public syncSemanticUiFromConfig(): void {
const enabled = getSemanticAnalysisEnabled();
const el = document.getElementById('semantic_analysis_section');
if (el) el.style.display = enabled ? '' : 'none';
this.deps.lmf.updateOptions({ semanticAnalysisMode: enabled }, false);
if (!enabled) {
// 关闭时清除语义数据;统计图由下方 updateVisualizationInternal 统一刷新
this.currentState.semanticData = null;
const rawScoreNormedItem = document.getElementById('raw_score_normed_histogram_item');
if (rawScoreNormedItem) rawScoreNormedItem.style.display = 'none';
const matchScoreProgressItem = document.getElementById('match_score_progress_item');
if (matchScoreProgressItem) matchScoreProgressItem.style.display = 'none';
this.updateSemanticDebugInfo();
const displayResult = this.computeDisplayResult();
this.deps.highlightController.updateCurrentData(displayResult ? { result: displayResult } : null);
if (!displayResult) {
d3.select('#all_result').style('opacity', 0);
this.deps.appStateManager.updateState({ hasValidData: false });
}
}
/** 勾选 / 关闭 Semantic Query 后立即刷新统计图显隐(与 getSemanticAnalysisEnabled 一致) */
this.updateVisualizationInternal(false);
// 语义分析配置影响 Upload/Save 的 dataReadyForSave 条件,需始终更新按钮状态
this.deps.appStateManager.updateButtonStates();
}
/**
* 更新可视化(核心方法)
*
* @param data 分析响应数据
* @param disableAnimation 是否禁用动画
* @param options 选项
*/
updateFromRequest(
data: AnalyzeResponse,
disableAnimation: boolean = false,
options: { enableSave?: boolean } = {}
): void {
const { enableSave = true } = options;
const abortDueToInvalidResponse = (message: string) => {
console.error(message);
showAlertDialog(tr('Error'), message);
this.deps.appStateManager.updateState({ hasValidData: false });
this.syncSemanticUiFromConfig();
};
try {
// 只有 Analyze 触发时开启动画,其它情况保持关闭(默认已关闭)
if (!disableAnimation) {
this.deps.lmf.updateOptions({ enableRenderAnimation: true }, false);
}
// Semantic analysis 模式由配置决定
this.deps.lmf.updateOptions({
semanticAnalysisMode: getSemanticAnalysisEnabled(),
}, false);
d3.select('#all_result').style('opacity', 1).style('display', null);
this.deps.appStateManager.setIsAnalyzing(false);
this.deps.appStateManager.setGlobalLoading(false);
// 隐藏文本区域的加载状态(会在lmf.update中自动隐藏,但这里提前隐藏以提升体验)
this.deps.lmf.hideLoading();
// 验证数据结构
if (!data || !data.result) {
console.error('Invalid data structure:', data);
throw new Error('Invalid API response structure');
}
const result = data.result;
// 确保所有必需的字段都存在且类型正确
if (!Array.isArray(result.bpe_strings) || result.bpe_strings.length === 0) {
abortDueToInvalidResponse(tr('Returned JSON missing valid bpe_strings array, processing cancelled.'));
return;
}
const predTopkError = validateTokenPredictions(result.bpe_strings as Array<{ pred_topk?: [string, number][] }>);
if (predTopkError) {
abortDueToInvalidResponse(predTopkError);
return;
}
const probabilityError = validateTokenProbabilities(result.bpe_strings as Array<{ real_topk?: [number, number] }>);
if (probabilityError) {
abortDueToInvalidResponse(probabilityError);
return;
}
const safeText = data.request.text;
const validationError = validateTokenConsistency(result.bpe_strings, safeText, { allowOverlap: true });
if (validationError) {
abortDueToInvalidResponse(validationError);
return;
}
const rawSnapshot = createRawSnapshot(data);
const originalTokens = result.bpe_strings.map((token) => cloneFrontendToken(token as FrontendToken));
const bpeBpeMergedTokens = mergeTokensForRendering(originalTokens, safeText, {
digitMerge: getDigitsMergeEnabled(),
});
const mergedValidationError = validateTokenConsistency(bpeBpeMergedTokens, safeText);
if (mergedValidationError) {
abortDueToInvalidResponse(mergedValidationError);
return;
}
const enhancedResult: FrontendAnalyzeResult = {
...result,
originalTokens,
bpeBpeMergedTokens,
bpe_strings: bpeBpeMergedTokens,
originalText: safeText,
};
data.result = enhancedResult;
// 独立存储信息密度数据(info density 无 debug 信息,隐藏 semantic debug)
this.currentState.infoDensityData = data;
this.currentState.rawApiResponse = rawSnapshot;
this.updateSemanticDebugInfo();
let displayResult: ReturnType<VisualizationUpdater['computeDisplayResult']>;
try {
displayResult = this.computeDisplayResult();
} catch (e) {
if (e instanceof TokenBoundaryInconsistentError) {
displayResult = this.computeDisplayResult();
} else {
throw e;
}
}
this.deps.highlightController.updateCurrentData(displayResult ? { result: displayResult } : null);
this.deps.lmf.clearHighlight();
if (displayResult) this.deps.lmf.update(displayResult);
const textStats = calculateTextStats(enhancedResult, safeText);
const mergedSurprisals = calculateMergedTokenSurprisals(enhancedResult.bpeBpeMergedTokens);
// 直方图 / progress:合并后 token;文本指标仍用 textStats(原始 token)
this.currentState.currentSurprisals = mergedSurprisals;
this.currentState.currentTokenAvg = computeAverage(mergedSurprisals);
this.currentState.currentTokenP90 = computeP90(mergedSurprisals);
this.currentState.currentTotalSurprisal = textStats.totalSurprisal;
// 更新文本指标和模型显示(从分析结果中获取实际使用的模型)
const resultModel = data.result.model;
this.updateTextMetrics(textStats, resultModel);
// Analyze 渲染完成后关闭动画,避免拖拽等二次渲染再次播放
if (!disableAnimation) {
// 延迟关闭,确保动画有足够时间完成
// 动画时长估算:初始延迟100ms + 批次处理时间(根据token数量)
const tokenCount = enhancedResult.bpe_strings.length;
const estimatedAnimationTime = 100 + Math.ceil(tokenCount / 50) * 100;
const delayTime = Math.max(2000, estimatedAnimationTime + 500);
setTimeout(() => {
this.deps.lmf.updateOptions({ enableRenderAnimation: false }, false);
}, delayTime);
}
} catch (error) {
console.error('Error updating visualization:', error);
this.deps.appStateManager.setIsAnalyzing(false);
this.deps.appStateManager.setGlobalLoading(false);
this.deps.appStateManager.updateState({ hasValidData: false });
this.syncSemanticUiFromConfig();
showAlertDialog(tr('Error'), 'Error rendering visualization. Check console for details.');
return;
}
// 清除之前的选中状态
this.clearHighlights();
// 重新渲染直方图
this.updateVisualizationInternal();
// 数据成功处理,标记为有效数据(TextMetrics 显示,Analyze 变灰)
this.deps.appStateManager.updateState({ hasValidData: true });
this.syncSemanticUiFromConfig();
}
/**
* 语义分析响应:独立存储 semanticData,按展示逻辑计算并渲染。
* @returns true 成功;false 校验失败或计算异常,调用方应停止后续分析。
*/
public handleSemanticResponse(
res: {
model?: string;
token_attention?: Array<{
offset: [number, number];
raw: string;
score: number;
rawScore?: number;
}>;
debug_info?: { abbrev?: string; topk_tokens?: string[]; topk_probs?: number[] };
chunkInfos?: Array<{ startOffset: number; endOffset: number; chunkIndex: number; chunkMatchDegree: number; thresholdResult?: signalFitResult }>;
full_match_degree?: number;
},
text?: string,
signalFitResult?: signalFitResult | null
): boolean {
const chunkInfos = res?.chunkInfos;
const tokenAttention = res?.token_attention;
const currentText = text ?? '';
if (!hasSemanticData(res)) {
this.clearSemanticState();
this.rerenderHistograms();
this.deps.lmf.hideLoading();
return true;
}
if (!currentText) return false;
// 整段模式(无 chunkInfos)需校验 token 边界
if (tokenAttention?.length && !chunkInfos?.length) {
const err = validateTokenConsistency(tokenAttention!, currentText, { allowOverlap: true });
if (err) {
showAlertDialog(tr('Error'), err);
return false;
}
}
/** 分块模式:装配端已按 chunk 完成 overlap+digit+normalize,禁止全文再合并/再归一化(避免跨 chunk 合数字、跨 chunk 定标)。 */
const isChunkedSemantic = Boolean(chunkInfos?.length);
const semanticTokenAttentionFromApi =
!isChunkedSemantic && tokenAttention && tokenAttention.length > 0
? tokenAttention.map((t) => ({
...t,
offset: [t.offset[0], t.offset[1]] as [number, number],
}))
: undefined;
const mergedAttention = isChunkedSemantic
? (tokenAttention ?? [])
: mergeAttentionTokensFullyForRendering(tokenAttention ?? [], currentText, {
digitMerge: getDigitsMergeEnabled(),
});
const normalizedAttention = isChunkedSemantic ? mergedAttention : normalizeTokenScores(mergedAttention);
const computedSignalFit = isChunkedSemantic
? undefined
: findSignalThresholdWithLog(normalizedAttention);
const chunkInfosResolved =
chunkInfos?.length
? chunkInfos.map((info) => {
const slice = normalizedAttention.filter(
(t) => t.offset[0] < info.endOffset && t.offset[1] > info.startOffset
);
const thresholdResult =
slice.length > 0 ? findSignalThresholdWithLog(slice) : null;
return { ...info, ...(thresholdResult ? { thresholdResult } : {}) };
})
: chunkInfos;
this.currentState.semanticData = {
text: currentText,
model: res.model,
semanticTokenAttentionFromApi,
token_attention: normalizedAttention,
signalFitResult: signalFitResult ?? computedSignalFit ?? undefined,
chunkInfos: chunkInfosResolved,
full_match_degree: res.full_match_degree,
};
let displayResult: ReturnType<VisualizationUpdater['computeDisplayResult']>;
try {
displayResult = this.computeDisplayResult();
} catch (e) {
this.currentState.semanticData = null;
if (e instanceof TokenBoundaryInconsistentError) {
this.deps.lmf.hideLoading();
this.rerenderHistograms();
return false;
}
showAlertDialog(tr('Error'), e instanceof Error ? e.message : String(e));
return false;
}
d3.select('#all_result').style('opacity', 1).style('display', null);
this.deps.lmf.hideLoading();
this.deps.highlightController.updateCurrentData({ result: displayResult });
this.deps.lmf.clearHighlight();
this.clearHighlights();
this.updateVisualizationInternal();
this.updateSemanticDebugInfo(res.debug_info);
return true;
}
/** 更新文本渲染区下方的 debug 信息(abbrev + top10) */
private updateSemanticDebugInfo(di?: { abbrev?: string; topk_tokens?: string[]; topk_probs?: number[] }): void {
applySemanticDebugInfoPanel('results', 'semantic_debug_info', { debugInfo: di });
}
private buildSemanticOnlyResult(
res: { model?: string },
tokenAttention: Array<{
offset: [number, number];
raw: string;
score: number;
rawScore?: number;
}>,
text: string,
chunkInfos?: SemanticData['chunkInfos']
): (FrontendAnalyzeResult & {
rawScoresNormed: number[];
attentionRawScores: number[];
chunkInfos?: SemanticData['chunkInfos'];
}) | null {
const safeText = text ?? '';
if (!safeText) return null;
/** `semanticData.token_attention` 已在 handleSemanticResponse 中完成 overlap + digit + normalize */
const bpeTokens: FrontendToken[] = tokenAttention.map((t) => ({
offset: t.offset,
raw: t.raw,
pred_topk: []
})) as FrontendToken[];
const rawScoresNormed = tokenAttention.map((t) => t.score);
const attentionRawScores = tokenAttention.map((t) => getAttentionRawScore(t));
const cloneRow = (t: FrontendToken): FrontendToken => ({ ...t });
return {
model: res.model,
bpe_strings: bpeTokens.map(cloneRow),
originalTokens: bpeTokens.map(cloneRow),
bpeBpeMergedTokens: bpeTokens.map(cloneRow),
originalText: safeText,
rawScoresNormed,
attentionRawScores,
chunkInfos
};
}
/**
* 检查 semantic token_attention 的边界是否与 info 一致;允许稀疏覆盖(semantic 不必覆盖全文)
* @returns 不一致时返回错误描述(含前后文本),一致时返回 null
*/
private checkSemanticAlignsWithInfo(
tokenAttention: Array<{ offset: [number, number]; raw?: string }>,
infoMerged: Array<{ offset: [number, number] }>,
text: string
): { firstBadIdx: number; aSample: string; bSample: string; aNext: string; bNext: string; textBefore: string; textAt: string; textAfter: string } | null {
const boundaries = new Set<number>([0]);
for (const t of infoMerged) boundaries.add(t.offset[1]);
const infoEnd = infoMerged.length > 0 ? infoMerged[infoMerged.length - 1]!.offset[1] : 0;
const totalChars = text.length;
const ctx = 30;
const esc = (s: string) => JSON.stringify(s).slice(1, -1);
const fmt = (t: { offset: [number, number]; raw?: string }, idx: number) => {
const raw = (t as { raw?: string }).raw ?? text.slice(t.offset[0], t.offset[1]);
const s = raw.slice(0, 20) + (raw.length > 20 ? '…' : '');
return `第${idx}个token分词 [字符${t.offset[0]}-${t.offset[1]}] "${esc(s)}"`;
};
for (let i = 0; i < tokenAttention.length; i++) {
const [as, ae] = tokenAttention[i].offset;
if (as < 0 || ae > totalChars || ae <= as) continue; // 由 validateTokenConsistency 处理
if (ae > infoEnd) continue; // 超出双方重叠范围,不参与检查
if (!boundaries.has(as) || !boundaries.has(ae)) {
const raw = (tokenAttention[i] as { raw?: string }).raw ?? '';
const infoIdx = infoMerged.findIndex(t => t.offset[0] <= as && as < t.offset[1]);
const infoAt = infoIdx >= 0 ? infoMerged[infoIdx]! : null;
const rawShort = (raw || text.slice(as, ae)).slice(0, 20);
const infoRaw = infoAt ? (text.slice(infoAt.offset[0], infoAt.offset[1]).slice(0, 20) || '') : '';
const nextSem = tokenAttention[i + 1];
const nextInfo = infoIdx >= 0 && infoIdx + 1 < infoMerged.length ? infoMerged[infoIdx + 1]! : null;
return {
firstBadIdx: i,
aSample: `第${i}个token分词 [字符${as}-${ae}] "${esc(rawShort)}${rawShort.length >= 20 ? '…' : ''}"`,
bSample: infoAt ? `同一位置token分词 [字符${infoAt.offset[0]}-${infoAt.offset[1]}] "${esc(infoRaw)}${infoRaw.length >= 20 ? '…' : ''}"` : '无对应',
aNext: nextSem ? fmt(nextSem, i + 1) : '无',
bNext: nextInfo ? fmt(nextInfo, infoIdx + 1) : '无',
textBefore: text.slice(Math.max(0, as - ctx), as),
textAt: text.slice(as, ae),
textAfter: text.slice(ae, Math.min(totalChars, ae + ctx)),
};
}
}
return null;
}
/**
* 联合模式:将 bpeMergedTokens 与超出信息密度范围的语义 tokens 合并为并集,用于 rect/渲染范围与截断边界一致。
* @returns { unionTokens, scoresForUnion }
*/
private mergeBpeWithSemanticBeyond(
bpeMerged: FrontendToken[],
tokenAttention: Array<{
offset: [number, number];
raw: string;
score: number;
rawScore?: number;
}>
): {
unionTokens: FrontendToken[];
scoresForUnion: (number | undefined)[];
rawScoresForUnion: (number | undefined)[];
} {
const infoEnd = bpeMerged.length > 0 ? bpeMerged[bpeMerged.length - 1]!.offset[1] : 0;
const beyond = tokenAttention.filter((t) => t.offset[0] >= infoEnd);
if (beyond.length === 0) {
const { scores, rawScores } = this.mapTokenAttentionToMerged(bpeMerged, tokenAttention);
return {
unionTokens: bpeMerged,
scoresForUnion: scores,
rawScoresForUnion: rawScores,
};
}
/** beyond 已在 handleSemanticResponse 中 overlap+digit 合并;段内用原始梯度重新归一化 */
const beyondRenormed = normalizeTokenScores(beyond.map((t) => ({ ...t, score: getAttentionRawScore(t) })));
const semanticAsFrontend: FrontendToken[] = beyondRenormed.map((t) => ({
offset: [t.offset[0], t.offset[1]],
raw: t.raw,
real_topk: [0, 1] as [number, number],
pred_topk: [],
}));
const unionTokens = [...bpeMerged, ...semanticAsFrontend];
const { scores: infoScores, rawScores: infoRawScores } = this.mapTokenAttentionToMerged(
bpeMerged,
tokenAttention
);
const beyondScores: (number | undefined)[] = beyondRenormed.map((t) =>
Number.isFinite(t.score) ? t.score : undefined
);
const beyondRawScores: (number | undefined)[] = beyondRenormed.map((t) => {
const r = getAttentionRawScore(t);
return Number.isFinite(r) ? r : undefined;
});
const scoresForUnion = [...infoScores, ...beyondScores];
const rawScoresForUnion = [...infoRawScores, ...beyondRawScores];
return { unionTokens, scoresForUnion, rawScoresForUnion };
}
/**
* 将 token_attention(offset 为原文字符偏移)映射到 merged tokens
*/
/**
* 将 token_attention 映射到 merged tokens,双指针 O(N+M)。
* 前提:两个数组均按 offset 升序排列。
*/
private mapTokenAttentionToMerged(
bpeBpeMergedTokens: Array<{ offset: [number, number] }>,
tokenAttention: Array<{ offset: [number, number]; score: number; rawScore?: number }>
): {
scores: (number | undefined)[];
rawScores: (number | undefined)[];
} {
const n = bpeBpeMergedTokens.length;
const scores: number[] = new Array(n).fill(0);
const rawScores: number[] = new Array(n).fill(0);
const weights: number[] = new Array(n).fill(0);
let j = 0; // 跳过所有在当前 attn 之前结束的 merged token
for (const attn of tokenAttention) {
const [as, ae] = attn.offset;
const rawPart = getAttentionRawScore(attn);
while (j < n && bpeBpeMergedTokens[j].offset[1] <= as) j++;
for (let k = j; k < n && bpeBpeMergedTokens[k].offset[0] < ae; k++) {
const [s, e] = bpeBpeMergedTokens[k].offset;
// j/k 的推进条件已保证 e > as 且 s < ae,overlap 必然 > 0
const overlap = Math.min(e, ae) - Math.max(s, as);
scores[k] += attn.score * overlap;
rawScores[k] += rawPart * overlap;
weights[k] += overlap;
}
}
const norm = (vals: number[]) => vals.map((v, i) => (weights[i] > 0 ? v / weights[i] : undefined));
return {
scores: norm(scores),
rawScores: norm(rawScores),
};
}
}