Spaces:

Girlz
/

TokenTrace

Running

TokenTrace / client /src /features /analysis /visualizationUpdater.ts

cccmmd

init: TokenTrace - LLM interpretability toolbox

76b5743 17 days ago

57.7 kB

	/**
	* 可视化更新模块
	* 负责处理分析结果的可视化更新逻辑
	*/

	import * as d3 from 'd3';
	import type { AnalyzeResponse, FrontendAnalyzeResult, FrontendToken } from '../../shared/api/GLTR_API';
	import type { GLTR_Text_Box } from '../../shared/vis/GLTR_Text_Box';
	import type { HighlightController } from '../../shared/controllers/highlightController';
	import type { TextInputController } from '../../shared/controllers/textInputController';
	import type { Histogram } from '../../shared/vis/Histogram';
	import type { ScatterPlot } from '../../shared/vis/ScatterPlot';
	import type { AppStateManager } from './appStateManager';
	import {
	cloneFrontendToken,
	mergeTokensForRendering,
	createRawSnapshot
	} from '../../shared/cross/tokenUtils';
	import { getAttentionRawScore, mergeAttentionTokensFullyForRendering, normalizeTokenScores } from '../../shared/cross/semanticUtils';
	import {
	validateTokenConsistency,
	validateTokenProbabilities,
	validateTokenPredictions
	} from '../../shared/cross/dataValidation';
	import {
	calculateTextStats,
	calculateMergedTokenSurprisals,
	computeAverage,
	computeP90,
	type TextStats
	} from '../../shared/cross/textStatistics';
	import {
	getTokenSurprisalHistogramConfig,
	getSurprisalProgressConfig,
	getMatchScoreProgressConfig,
	getRawScoreNormedHistogramConfig
	} from "./visualizationConfigs";
	import { getSemanticSimilarityColor, HISTOGRAM_MIN_ALPHA } from '../../shared/cross/SurprisalColorConfig';
	import { showAlertDialog } from '../../shared/ui/dialog';
	import { tr } from '../../shared/lang/i18n-lite';
	import { computeExpectedCounts } from './lognormalFit';
	import { findSignalThresholdWithLog, type signalFitResult, type SignalThresholdBin } from './signalThresholdDetector';
	import { getSemanticAnalysisEnabled } from '../../shared/cross/semanticAnalysisManager';
	import { getDigitsMergeEnabled } from '../../shared/cross/digitsMergeManager';
	import { getSemanticMatchThreshold } from '../../shared/cross/semanticThresholdManager';
	import { applySemanticDebugInfoPanel } from '../../shared/prediction_attribution/core/semanticDebugInfo';

	/** Token 边界不一致时抛出，用于中断联合展示 */
	export class TokenBoundaryInconsistentError extends Error {
	constructor() {
	super('Tokenizer results inconsistent: semantic and info-density token boundaries differ.');
	this.name = 'TokenBoundaryInconsistentError';
	}
	}

	/**
	* P(signal \| raw_score_normed = s) 复用 findSignalThreshold 的 bins
	* 每个样本 s 落入对应 bin，P(signal) = (obsInBin - expInBin) / obsInBin
	*/
	function signalProbFromBins(scores: number[], bins: SignalThresholdBin[]): number[] {
	if (scores.length === 0 \|\| bins.length === 0) return [];
	const tauLefts = bins.map((b) => b.tauLeft);
	return scores.map((s) => {
	const i = Math.max(0, Math.min(bins.length - 1, d3.bisectRight(tauLefts, s) - 1));
	const b = bins[i]!;
	if (s < b.tauLeft \|\| s >= b.tauRight) return 0;
	return b.obsInBin > 0 ? Math.max(0, Math.min(1, (b.obsInBin - b.expInBin) / b.obsInBin)) : 0;
	});
	}

	/**
	* 可视化更新依赖
	*/
	export interface VisualizationDependencies {
	lmf: GLTR_Text_Box;
	highlightController: HighlightController;
	textInputController: TextInputController;
	stats_frac: Histogram;
	stats_raw_score_normed: Histogram;
	stats_surprisal_progress: ScatterPlot;
	stats_match_score_progress: ScatterPlot;
	appStateManager: AppStateManager;
	surprisalColorScale: d3.ScaleSequential<string>;
	}

	/** 语义分析原始数据（独立存储） */
	export interface SemanticData {
	text: string;
	model?: string;
	/** 整段模式：API 返回的 token_attention 副本，用于切换 digit merge 时重算（分块模式不存） */
	semanticTokenAttentionFromApi?: Array<{
	offset: [number, number];
	raw: string;
	score: number;
	rawScore?: number;
	}>;
	token_attention: Array<{
	offset: [number, number];
	raw: string;
	score: number;
	rawScore?: number;
	}>;
	/** 拟合结果，由数据层在归一化后计算并传入；整段模式使用 */
	signalFitResult?: signalFitResult \| null;
	/** 分块边界；分块模式使用，每项可含该块独立拟合的 thresholdResult */
	chunkInfos?: Array<{ startOffset: number; endOffset: number; chunkIndex: number; chunkMatchDegree: number; thresholdResult?: signalFitResult }>;
	/** 全文匹配度；非分块模式使用，用于 pw_score 的匹配度乘法因子 */
	full_match_degree?: number;
	}

	/** 是否有语义分析数据：token_attention 或 chunkInfos 任一非空即视为有数据 */
	function hasSemanticData(data: { token_attention?: unknown[]; chunkInfos?: unknown[] } \| null \| undefined): boolean {
	return (data?.token_attention?.length ?? 0) > 0 \|\| (data?.chunkInfos?.length ?? 0) > 0;
	}

	/**
	* 当前数据状态
	* 信息密度与语义分析独立存储，展示时根据一致性决定单独或联合
	*/
	export interface CurrentDataState {
	/** 信息密度分析结果（独立） */
	infoDensityData: AnalyzeResponse \| null;
	/** 语义分析结果（独立） */
	semanticData: SemanticData \| null;
	rawApiResponse: AnalyzeResponse \| null;
	currentSurprisals: number[] \| null;
	currentTokenAvg: number \| null;
	currentTokenP90: number \| null;
	currentTotalSurprisal: number \| null;
	}

	/**
	* 可视化更新管理器
	*/
	export class VisualizationUpdater {
	private deps: VisualizationDependencies;
	private currentState: CurrentDataState;

	constructor(deps: VisualizationDependencies) {
	this.deps = deps;
	this.currentState = {
	infoDensityData: null,
	semanticData: null,
	rawApiResponse: null,
	currentSurprisals: null,
	currentTokenAvg: null,
	currentTokenP90: null,
	currentTotalSurprisal: null
	};
	}

	/**
	* 获取当前数据状态
	*/
	getCurrentState(): Readonly<CurrentDataState> {
	return { ...this.currentState };
	}

	/**
	* 获取当前原始API响应
	*/
	getRawApiResponse(): AnalyzeResponse \| null {
	return this.currentState.rawApiResponse;
	}

	/**
	* 获取当前展示数据（由 infoDensityData 与 semanticData 按展示逻辑计算）
	*/
	getCurrentData(): AnalyzeResponse \| null {
	const display = this.computeDisplayResult();
	if (!display) return null;
	return { request: { text: display.originalText }, result: display };
	}

	/**
	* 获取当前 surprisal 数据
	*/
	getCurrentSurprisals(): number[] \| null {
	return this.currentState.currentSurprisals;
	}

	/**
	* 更新文本指标（包括模型显示）
	*/
	private updateTextMetrics(stats: TextStats \| null, modelName?: string \| null \| undefined): void {
	this.deps.textInputController.updateTextMetrics(stats, modelName);
	}

	/**
	* 清除高亮
	*/
	private clearHighlights(): void {
	this.deps.highlightController.clearHighlights();
	}

	/**
	* 计算展示结果：仅信息密度 / 仅语义 / 联合（两者一致时）
	*/
	private computeDisplayResult(): (FrontendAnalyzeResult & {
	rawScoresNormed?: number[];
	attentionRawScores?: number[];
	chunkInfos?: SemanticData['chunkInfos'];
	}) \| null {
	const info = this.currentState.infoDensityData;
	const sem = this.currentState.semanticData;
	const infoResult = info?.result as FrontendAnalyzeResult \| undefined;
	const infoText = info?.request?.text ?? infoResult?.originalText ?? '';
	const semText = sem?.text ?? '';

	if (infoResult && sem && infoText === semText && hasSemanticData(sem)) {
	const infoMerged = infoResult.bpeBpeMergedTokens ?? infoResult.bpe_strings;
	if (infoMerged?.length) {
	// 有 token_attention 时校验边界；仅 chunkInfos 时跳过（无语义着色）
	if (sem.token_attention?.length) {
	const boundaryError = this.checkSemanticAlignsWithInfo(sem.token_attention, infoMerged, semText);
	if (boundaryError) {
	const { aSample, bSample, aNext, bNext, textBefore, textAt, textAfter } = boundaryError;
	console.warn(
	'[联合模式] 两种分析的分词token边界不一致：\n' +
	' 语义分析：', aSample, '\n' +
	' 信息密度：', bSample, '\n' +
	' 语义后一个：', aNext, '\n' +
	' 信息后一个：', bNext, '\n' +
	' 位置附近原文：', JSON.stringify(textBefore), '\|', JSON.stringify(textAt), '\|', JSON.stringify(textAfter)
	);
	showAlertDialog(tr('Error'), tr('Tokenizer results inconsistent: semantic and info-density token boundaries differ.'));
	this.currentState.semanticData = null;
	throw new TokenBoundaryInconsistentError();
	}
	}
	// 联合模式：bpeMerged 与语义 tokens 超出部分合并为并集，使 rect/渲染范围与截断边界一致
	const tokenAttention = sem.token_attention ?? [];
	const { unionTokens, scoresForUnion, rawScoresForUnion } = tokenAttention.length
	? this.mergeBpeWithSemanticBeyond(infoMerged, tokenAttention)
	: (() => {
	const m = this.mapTokenAttentionToMerged(infoMerged, []);
	return {
	unionTokens: infoMerged,
	scoresForUnion: m.scores,
	rawScoresForUnion: m.rawScores,
	};
	})();
	return {
	...infoResult,
	bpeBpeMergedTokens: unionTokens,
	bpe_strings: unionTokens,
	rawScoresNormed: scoresForUnion,
	attentionRawScores: rawScoresForUnion,
	chunkInfos: sem.chunkInfos,
	};
	}
	}
	// 有语义数据（token_attention 或 chunkInfos）时用 buildSemanticOnlyResult
	if (sem && hasSemanticData(sem)) {
	return this.buildSemanticOnlyResult({ model: sem.model }, sem.token_attention, sem.text, sem.chunkInfos);
	}
	if (infoResult) return { ...infoResult, chunkInfos: sem?.chunkInfos ?? undefined };
	return null;
	}

	/**
	* 分析开始前更新直方图显示/隐藏：基于「已有数据 + 将要得到的数据」判断各统计图是否有意义
	* @param mode 即将进行的分析类型
	* @param text 即将分析的文本（用于判断与已有数据是否一致、能否联合展示）
	* @param willBeChunked 语义分析时：true 表示将走分块模式，直方图不显示
	*/
	public updateHistogramVisibilityForPending(mode: 'infoDensity' \| 'semantic', text: string, willBeChunked?: boolean): void {
	const tokenHistogramItem = document.getElementById('token_histogram_item');
	const surprisalProgressItem = document.getElementById('surprisal_progress_item');
	const rawScoreNormedItem = document.getElementById('raw_score_normed_histogram_item');
	const matchScoreProgressItem = document.getElementById('match_score_progress_item');

	const infoText = this.currentState.infoDensityData?.request?.text ?? '';
	const semText = this.currentState.semanticData?.text ?? '';
	const semanticQueryOn = getSemanticAnalysisEnabled();

	let showInfoDensity = false;
	let showSemantic = false;

	if (mode === 'infoDensity') {
	/** Semantic Query 勾选时统计区不出现信息密度图占位 */
	showInfoDensity = !semanticQueryOn;
	showSemantic =
	semanticQueryOn &&
	hasSemanticData(this.currentState.semanticData) &&
	semText === text;
	} else {
	showSemantic = true;
	showInfoDensity =
	!semanticQueryOn &&
	!!(this.currentState.infoDensityData && infoText === text);
	}

	if (tokenHistogramItem) tokenHistogramItem.style.display = showInfoDensity ? '' : 'none';
	if (surprisalProgressItem) surprisalProgressItem.style.display = showInfoDensity ? '' : 'none';
	/** 直方图仅在整段模式显示，chunk 模式下不显示 */
	const showRawScoreHistogram = showSemantic && !willBeChunked;
	if (rawScoreNormedItem) rawScoreNormedItem.style.display = showRawScoreHistogram ? '' : 'none';
	/** semantic match progress 仅 chunk 模式显示 */
	if (matchScoreProgressItem) matchScoreProgressItem.style.display = showSemantic && !!willBeChunked ? '' : 'none';

	// pending 时渲染空统计图（坐标轴 + 空柱体/散点），避免空白
	if (showInfoDensity && mode === 'infoDensity') {
	const tokenConfig = getTokenSurprisalHistogramConfig();
	this.deps.stats_frac.update({ ...tokenConfig, data: [], colorScale: () => 'transparent' });
	const tokenTitle = document.getElementById('token_histogram_title');
	if (tokenTitle) tokenTitle.textContent = tokenConfig.label;
	const progressConfig = getSurprisalProgressConfig();
	this.deps.stats_surprisal_progress.update({ ...progressConfig, data: [] });
	const progressTitle = document.getElementById('surprisal_progress_title');
	if (progressTitle && progressConfig.label) progressTitle.textContent = progressConfig.label;
	}
	if (showRawScoreHistogram && mode === 'semantic') {
	const rawScoreNormedConfig = getRawScoreNormedHistogramConfig();
	this.deps.stats_raw_score_normed.update({ ...rawScoreNormedConfig, data: [], colorScale: () => 'transparent' });
	const titleEl = document.getElementById('raw_score_normed_histogram_title');
	if (titleEl) titleEl.textContent = rawScoreNormedConfig.label;
	}
	if (showSemantic && mode === 'semantic' && willBeChunked) {
	const matchScoreProgressConfig = getMatchScoreProgressConfig();
	const docLen = text.length;
	this.deps.stats_match_score_progress.update({
	...matchScoreProgressConfig,
	data: [],
	showMovingAverage: false,
	chunkLines: [],
	thresholdLine: getSemanticMatchThreshold(),
	extent: { x: docLen > 0 ? [0, docLen] : undefined, y: [0, 1] }
	});
	const matchScoreTitleEl = document.getElementById('match_score_progress_title');
	if (matchScoreTitleEl && matchScoreProgressConfig.label) matchScoreTitleEl.textContent = matchScoreProgressConfig.label;
	}
	}

	/**
	* 重新渲染直方图（内部方法）
	* Semantic Query 勾选：仅语义相关图；未勾选：有信息密度数据时显示 token + surprisal
	* @param skipLmfUpdate 为 true 时跳过 lmf.update（主题切换时由 rerenderOnThemeChange 统一重绘，避免竞态）
	*/
	private updateVisualizationInternal(skipLmfUpdate = false): void {
	const hasInfoDensity = !!this.currentState.infoDensityData;
	const displayResult = this.computeDisplayResult();
	const sem = this.currentState.semanticData;
	const showInfoDensityCharts = hasInfoDensity && !getSemanticAnalysisEnabled();

	const tokenHistogramItem = document.getElementById('token_histogram_item');
	const surprisalProgressItem = document.getElementById('surprisal_progress_item');
	const rawScoreNormedItem = document.getElementById('raw_score_normed_histogram_item');

	if (showInfoDensityCharts) {
	const currentSurprisals = this.currentState.currentSurprisals;
	const currentTokenAvg = this.currentState.currentTokenAvg;
	const currentTokenP90 = this.currentState.currentTokenP90;
	if (currentSurprisals) {
	const tokenHistogramConfig = getTokenSurprisalHistogramConfig();
	this.deps.stats_frac.update({
	...tokenHistogramConfig,
	data: currentSurprisals,
	colorScale: this.deps.surprisalColorScale,
	averageValue: currentTokenAvg ?? undefined,
	p90Value: currentTokenP90 ?? undefined,
	p90Label: tokenHistogramConfig.averageLabel,
	});
	const titleElement = document.getElementById('token_histogram_title');
	if (titleElement) titleElement.textContent = tokenHistogramConfig.label;
	}
	if (currentSurprisals && currentSurprisals.length > 0) {
	const surprisalProgressConfig = getSurprisalProgressConfig();
	this.deps.stats_surprisal_progress.update({
	...surprisalProgressConfig,
	data: currentSurprisals,
	});
	const surprisalProgressTitleElement = document.getElementById('surprisal_progress_title');
	if (surprisalProgressTitleElement && surprisalProgressConfig.label) {
	surprisalProgressTitleElement.textContent = surprisalProgressConfig.label;
	}
	}
	if (tokenHistogramItem) tokenHistogramItem.style.display = '';
	if (surprisalProgressItem) surprisalProgressItem.style.display = '';
	} else {
	if (tokenHistogramItem) tokenHistogramItem.style.display = 'none';
	if (surprisalProgressItem) surprisalProgressItem.style.display = 'none';
	}

	const rawScoresNormed = displayResult?.rawScoresNormed;
	const validRawScoresNormed = rawScoresNormed?.filter((s) => typeof s === 'number' && isFinite(s));
	const signalFitResult = sem?.signalFitResult ?? null;
	const chunkInfos = sem?.chunkInfos;
	const isChunkMode = (chunkInfos?.length ?? 0) > 0;
	const chunksWithThreshold = chunkInfos?.filter((c) => c.thresholdResult != null) ?? [];
	const usePerChunkThreshold = chunksWithThreshold.length > 0;
	const thresholdByChunk = usePerChunkThreshold
	? new Map(chunksWithThreshold.map((c) => [c.chunkIndex, c.thresholdResult!]))
	: null;
	if (validRawScoresNormed && validRawScoresNormed.length > 0) {
	const rawScoreNormedConfig = getRawScoreNormedHistogramConfig();
	const colorScale = (v: number) => getSemanticSimilarityColor(v, HISTOGRAM_MIN_ALPHA);
	const thresholdForHistogram = usePerChunkThreshold && chunksWithThreshold.length > 0
	? chunksWithThreshold[0]!.thresholdResult!
	: signalFitResult;
	// confidence>0：findSignalThreshold 成功（≥ MIN_ACCEPTABLE）；confidence===0 为 P90 回退，不画截尾对数正态期望曲线
	const fitResult = validRawScoresNormed.length >= 2 && thresholdForHistogram != null && thresholdForHistogram.confidence > 0
	? {
	mu: thresholdForHistogram.mu,
	sigma: thresholdForHistogram.sigma,
	expectedCounts: computeExpectedCounts(
	thresholdForHistogram.mu,
	thresholdForHistogram.sigma,
	rawScoreNormedConfig.extent as [number, number],
	rawScoreNormedConfig.no_bins,
	validRawScoresNormed.length
	),
	}
	: null;
	const signalProbs = thresholdForHistogram != null
	? signalProbFromBins(validRawScoresNormed, thresholdForHistogram.bins)
	: [];
	/**
	* P_pw：后验信号概率的简化映射，x <= threshold 时为 0，x > threshold 时为 1
	* pw_score = score × P_pw × matchDegree
	* 分块模式：每个 token 使用其所属 chunk 的 threshold 和 chunkMatchDegree
	* 非分块模式：使用全文匹配度 full_match_degree
	*/
	const rawScoresNormedFull = displayResult!.rawScoresNormed ?? [];
	const bpeBpeMergedTokens = displayResult?.bpeBpeMergedTokens ?? [];

	const getChunkForToken = (tokenIndex: number) => {
	const token = bpeBpeMergedTokens[tokenIndex];
	if (!token \|\| !isChunkMode) return null;
	const offset = token.offset[0];
	return chunkInfos!.find((c) => c.startOffset <= offset && offset < c.endOffset) ?? null;
	};

	const getThresholdForToken = (i: number): number => {
	const chunk = getChunkForToken(i);
	if (chunk && thresholdByChunk != null) {
	const tr = thresholdByChunk.get(chunk.chunkIndex);
	if (tr) return tr.threshold;
	}
	return signalFitResult?.threshold ?? 0;
	};

	const getMatchDegreeForToken = (i: number): number => {
	const chunk = getChunkForToken(i);
	if (chunk) return chunk.chunkMatchDegree;
	return sem?.full_match_degree ?? 1;
	};

	const hasThreshold = signalFitResult != null \|\| thresholdByChunk != null;
	const pPwValues = hasThreshold
	? rawScoresNormedFull.map((s, i) => {
	const threshold = getThresholdForToken(i);
	const isAboveThreshold = typeof s === 'number' && isFinite(s) && s > threshold;
	return isAboveThreshold ? 1 : 0;
	})
	: [];
	const pwScores = hasThreshold
	? rawScoresNormedFull.map((s, i) => {
	const threshold = getThresholdForToken(i);
	const isAboveThreshold = typeof s === 'number' && isFinite(s) && s > threshold;
	const baseScore = isAboveThreshold ? s : 0;
	const matchDegree = getMatchDegreeForToken(i);
	return baseScore * matchDegree;
	})
	: [];

	const colorSourceEl = document.getElementById('semantic_color_source_select') as HTMLSelectElement \| null;
	const colorSource = colorSourceEl?.value ?? 'pw_score';
	const scoresForColor = colorSource === 'signal_probability' ? pPwValues
	: colorSource === 'pw_score' ? pwScores
	: (displayResult!.rawScoresNormed ?? []);

	// 联合模式下 tooltip 需要 pPwValues/pwScores 显示语义匹配信息，即使 fitResult 为 null 也要传递
	const resultWithExt = hasThreshold
	? { ...displayResult, signalProbs, pPwValues, pwScores }
	: displayResult!;
	if (fitResult != null) {
	this.deps.highlightController.updateCurrentData({ result: resultWithExt, signalProbs, pPwValues, pwScores });
	if (!skipLmfUpdate) {
	this.deps.lmf.update({ ...resultWithExt, pwScores, colorScores: scoresForColor } as FrontendAnalyzeResult & { pPwValues?: number[]; pwScores?: number[]; colorScores?: number[] });
	}
	} else {
	this.deps.highlightController.updateCurrentData({ result: resultWithExt });
	if (!skipLmfUpdate) {
	this.deps.lmf.update({ ...resultWithExt, colorScores: scoresForColor } as FrontendAnalyzeResult & { pPwValues?: number[]; pwScores?: number[]; colorScores?: number[] });
	}
	}

	/** 直方图仅在整段模式显示，chunk 模式下不统计、不显示 */
	if (!isChunkMode) {
	const probCurveData = signalProbs.length > 0
	? (() => {
	const pairs = validRawScoresNormed.map((x, i) => ({ x, y: signalProbs[i]! })).sort((a, b) => a.x - b.x);
	return { x: pairs.map(p => p.x), y: pairs.map(p => p.y) };
	})()
	: undefined;
	const signalThresholdPercentile = thresholdForHistogram != null && validRawScoresNormed.length > 0
	? Math.round((validRawScoresNormed.filter((s) => s < thresholdForHistogram.threshold).length / validRawScoresNormed.length) * 100)
	: undefined;
	this.deps.stats_raw_score_normed.update({
	...rawScoreNormedConfig,
	data: validRawScoresNormed,
	colorScale,
	fitExpectedCounts: fitResult?.expectedCounts,
	showProbCurve: true,
	probCurveData: probCurveData?.x.length ? probCurveData : undefined,
	signalThreshold: thresholdForHistogram?.threshold ?? undefined,
	signalThresholdPercentile: signalThresholdPercentile ?? undefined,
	});
	const titleEl = document.getElementById('raw_score_normed_histogram_title');
	if (titleEl) titleEl.textContent = rawScoreNormedConfig.label;
	if (rawScoreNormedItem) rawScoreNormedItem.style.display = '';
	} else {
	if (rawScoreNormedItem) rawScoreNormedItem.style.display = 'none';
	}
	/** semantic match progress：仅 chunk 模式，仅绘制 chunk 匹配线，不绘制点 */
	if (isChunkMode) {
	const matchScoreProgressConfig = getMatchScoreProgressConfig();
	const docLen = (displayResult?.originalText ?? '').length;
	const chunkLines = chunkInfos?.length
	? chunkInfos.map((c) => ({ x0: c.startOffset, x1: c.endOffset, y: c.chunkMatchDegree }))
	: [];
	const thresholdLine = getSemanticMatchThreshold();
	this.deps.stats_match_score_progress.update({
	...matchScoreProgressConfig,
	data: [],
	showMovingAverage: false,
	chunkLines,
	thresholdLine,
	chunkInteraction: true,
	extent: { x: docLen > 0 ? [0, docLen] : undefined, y: [0, 1] }
	});
	const matchScoreTitleEl = document.getElementById('match_score_progress_title');
	if (matchScoreTitleEl && matchScoreProgressConfig.label) matchScoreTitleEl.textContent = matchScoreProgressConfig.label;
	const matchScoreProgressItem = document.getElementById('match_score_progress_item');
	if (matchScoreProgressItem) matchScoreProgressItem.style.display = '';
	} else {
	const matchScoreProgressItem = document.getElementById('match_score_progress_item');
	if (matchScoreProgressItem) matchScoreProgressItem.style.display = 'none';
	}
	} else {
	const needLmfUpdate = !!displayResult && (hasInfoDensity \|\| !!validRawScoresNormed?.length \|\| hasSemanticData(sem));
	if (displayResult) this.deps.highlightController.updateCurrentData({ result: displayResult });
	if (needLmfUpdate && !skipLmfUpdate) {
	this.deps.lmf.update(displayResult!);
	}
	/** chunk 模式下不显示直方图；整段模式且无数据时显示空占位 */
	if (getSemanticAnalysisEnabled() && !isChunkMode) {
	const rawScoreNormedConfig = getRawScoreNormedHistogramConfig();
	this.deps.stats_raw_score_normed.update({ ...rawScoreNormedConfig, data: [], colorScale: () => 'transparent' });
	const titleEl = document.getElementById('raw_score_normed_histogram_title');
	if (titleEl) titleEl.textContent = rawScoreNormedConfig.label;
	if (rawScoreNormedItem) rawScoreNormedItem.style.display = '';
	} else {
	if (rawScoreNormedItem) rawScoreNormedItem.style.display = 'none';
	}
	/** semantic match progress 无数据时显示空占位（仅 chunk 模式） */
	if (getSemanticAnalysisEnabled() && isChunkMode) {
	const matchScoreProgressConfig = getMatchScoreProgressConfig();
	const docLen = (displayResult?.originalText ?? '').length;
	const chunkLines = chunkInfos?.length
	? chunkInfos.map((c) => ({ x0: c.startOffset, x1: c.endOffset, y: c.chunkMatchDegree }))
	: [];
	const thresholdLine = getSemanticMatchThreshold();
	this.deps.stats_match_score_progress.update({
	...matchScoreProgressConfig,
	data: [],
	showMovingAverage: false,
	chunkLines,
	thresholdLine,
	chunkInteraction: true,
	extent: { x: docLen > 0 ? [0, docLen] : undefined, y: [0, 1] }
	});
	const matchScoreTitleEl = document.getElementById('match_score_progress_title');
	if (matchScoreTitleEl && matchScoreProgressConfig.label) matchScoreTitleEl.textContent = matchScoreProgressConfig.label;
	const matchScoreProgressItem = document.getElementById('match_score_progress_item');
	if (matchScoreProgressItem) matchScoreProgressItem.style.display = '';
	} else {
	const matchScoreProgressItem = document.getElementById('match_score_progress_item');
	if (matchScoreProgressItem) matchScoreProgressItem.style.display = 'none';
	}
	}
	}

	/** 重新渲染直方图（供外部调用） */
	public rerenderHistograms(): void {
	this.updateVisualizationInternal(false);
	}

	/** 仅更新语义着色源（color source 切换时调用，不重新拟合） */
	public updateSemanticColorSource(): void {
	const cd = this.deps.highlightController.getCurrentData();
	const r = cd?.result as (FrontendAnalyzeResult & { rawScoresNormed?: number[] }) \| undefined;
	if (!r?.rawScoresNormed?.length) return;
	const el = document.getElementById('semantic_color_source_select') as HTMLSelectElement \| null;
	const v = el?.value ?? 'pw_score';
	const scoresForColor = v === 'signal_probability' ? (cd!.pPwValues ?? [])
	: v === 'pw_score' ? (cd!.pwScores ?? [])
	: r.rawScoresNormed;
	this.deps.lmf.update({ ...r, pPwValues: cd!.pPwValues, pwScores: cd!.pwScores, colorScores: scoresForColor } as FrontendAnalyzeResult & { pPwValues?: number[]; pwScores?: number[]; colorScores?: number[] });
	}

	/** 主题切换时调用：在样式生效后统一重绘直方图与文本（rgba 透出背景，需等新主题生效） */
	public rerenderOnThemeChange(): void {
	requestAnimationFrame(() => requestAnimationFrame(() => {
	this.updateVisualizationInternal(true);
	this.deps.lmf.reRenderCurrent();
	}));
	}

	/**
	* 文本修改时清除独立存储的数据（避免展示与输入不一致）
	*/
	public clearDataOnTextChange(): void {
	this.currentState.infoDensityData = null;
	this.currentState.semanticData = null;
	this.currentState.rawApiResponse = null;
	this.currentState.currentSurprisals = null;
	this.currentState.currentTokenAvg = null;
	this.currentState.currentTokenP90 = null;
	this.currentState.currentTotalSurprisal = null;
	this.deps.highlightController.updateCurrentData(null);
	d3.select('#all_result').style('opacity', 0);
	this.updateSemanticDebugInfo();
	}

	/**
	* 清除语义分析相关数据（直方图、debug、semanticData），用于打开模式时初始化
	*/
	public clearSemanticState(): void {
	this.currentState.semanticData = null;
	const rawScoreNormedItem = document.getElementById('raw_score_normed_histogram_item');
	if (rawScoreNormedItem) rawScoreNormedItem.style.display = 'none';
	const matchScoreProgressItem = document.getElementById('match_score_progress_item');
	if (matchScoreProgressItem) matchScoreProgressItem.style.display = 'none';
	this.updateSemanticDebugInfo();
	}

	/**
	* digit merge 用户偏好变化时：对信息密度与整段语义从可重算数据源刷新；分块语义无副本则保持当前展示不变
	*/
	public applyDigitsMergeSetting(): void {
	const digitMerge = getDigitsMergeEnabled();
	const info = this.currentState.infoDensityData;
	if (info?.result) {
	const fr = info.result as FrontendAnalyzeResult;
	const text = info.request?.text ?? fr.originalText ?? '';
	if (fr.originalTokens?.length && text) {
	const newMerged = mergeTokensForRendering(fr.originalTokens, text, { digitMerge });
	fr.bpeBpeMergedTokens = newMerged;
	fr.bpe_strings = newMerged;
	}
	}
	const sem = this.currentState.semanticData;
	if (sem && !sem.chunkInfos?.length && sem.semanticTokenAttentionFromApi?.length && sem.text) {
	const mergedAttention = mergeAttentionTokensFullyForRendering(
	sem.semanticTokenAttentionFromApi,
	sem.text,
	{ digitMerge }
	);
	const normalizedAttention = normalizeTokenScores(mergedAttention);
	const computedSignalFit = findSignalThresholdWithLog(normalizedAttention);
	sem.token_attention = normalizedAttention;
	sem.signalFitResult = computedSignalFit ?? undefined;
	}
	const infoResult = this.currentState.infoDensityData?.result as FrontendAnalyzeResult \| undefined;
	const safeText = this.currentState.infoDensityData?.request?.text ?? infoResult?.originalText ?? '';
	if (infoResult?.bpeBpeMergedTokens?.length && safeText) {
	const mergedSurprisals = calculateMergedTokenSurprisals(infoResult.bpeBpeMergedTokens);
	this.currentState.currentSurprisals = mergedSurprisals;
	this.currentState.currentTokenAvg = computeAverage(mergedSurprisals);
	this.currentState.currentTokenP90 = computeP90(mergedSurprisals);
	}
	let displayResult: ReturnType<VisualizationUpdater['computeDisplayResult']>;
	try {
	displayResult = this.computeDisplayResult();
	} catch (e) {
	if (e instanceof TokenBoundaryInconsistentError) {
	displayResult = this.computeDisplayResult();
	} else {
	console.error(e);
	return;
	}
	}
	this.deps.highlightController.updateCurrentData(displayResult ? { result: displayResult } : null);
	this.deps.lmf.clearHighlight();
	if (displayResult) this.deps.lmf.update(displayResult);
	this.updateVisualizationInternal();
	this.deps.appStateManager.updateButtonStates();
	}

	/**
	* 根据语义分析配置同步 UI 状态（查询输入框、文本渲染模式等）
	* 界面完全由配置决定，不因数据有无而改变
	*/
	public syncSemanticUiFromConfig(): void {
	const enabled = getSemanticAnalysisEnabled();
	const el = document.getElementById('semantic_analysis_section');
	if (el) el.style.display = enabled ? '' : 'none';
	this.deps.lmf.updateOptions({ semanticAnalysisMode: enabled }, false);
	if (!enabled) {
	// 关闭时清除语义数据；统计图由下方 updateVisualizationInternal 统一刷新
	this.currentState.semanticData = null;
	const rawScoreNormedItem = document.getElementById('raw_score_normed_histogram_item');
	if (rawScoreNormedItem) rawScoreNormedItem.style.display = 'none';
	const matchScoreProgressItem = document.getElementById('match_score_progress_item');
	if (matchScoreProgressItem) matchScoreProgressItem.style.display = 'none';
	this.updateSemanticDebugInfo();
	const displayResult = this.computeDisplayResult();
	this.deps.highlightController.updateCurrentData(displayResult ? { result: displayResult } : null);
	if (!displayResult) {
	d3.select('#all_result').style('opacity', 0);
	this.deps.appStateManager.updateState({ hasValidData: false });
	}
	}
	/** 勾选 / 关闭 Semantic Query 后立即刷新统计图显隐（与 getSemanticAnalysisEnabled 一致） */
	this.updateVisualizationInternal(false);
	// 语义分析配置影响 Upload/Save 的 dataReadyForSave 条件，需始终更新按钮状态
	this.deps.appStateManager.updateButtonStates();
	}

	/**
	* 更新可视化（核心方法）
	*
	* @param data 分析响应数据
	* @param disableAnimation 是否禁用动画
	* @param options 选项
	*/
	updateFromRequest(
	data: AnalyzeResponse,
	disableAnimation: boolean = false,
	options: { enableSave?: boolean } = {}
	): void {
	const { enableSave = true } = options;

	const abortDueToInvalidResponse = (message: string) => {
	console.error(message);
	showAlertDialog(tr('Error'), message);
	this.deps.appStateManager.updateState({ hasValidData: false });
	this.syncSemanticUiFromConfig();
	};

	try {
	// 只有 Analyze 触发时开启动画，其它情况保持关闭（默认已关闭）
	if (!disableAnimation) {
	this.deps.lmf.updateOptions({ enableRenderAnimation: true }, false);
	}
	// Semantic analysis 模式由配置决定
	this.deps.lmf.updateOptions({
	semanticAnalysisMode: getSemanticAnalysisEnabled(),
	}, false);

	d3.select('#all_result').style('opacity', 1).style('display', null);
	this.deps.appStateManager.setIsAnalyzing(false);
	this.deps.appStateManager.setGlobalLoading(false);

	// 隐藏文本区域的加载状态（会在lmf.update中自动隐藏，但这里提前隐藏以提升体验）
	this.deps.lmf.hideLoading();

	// 验证数据结构
	if (!data \|\| !data.result) {
	console.error('Invalid data structure:', data);
	throw new Error('Invalid API response structure');
	}

	const result = data.result;

	// 确保所有必需的字段都存在且类型正确
	if (!Array.isArray(result.bpe_strings) \|\| result.bpe_strings.length === 0) {
	abortDueToInvalidResponse(tr('Returned JSON missing valid bpe_strings array, processing cancelled.'));
	return;
	}
	const predTopkError = validateTokenPredictions(result.bpe_strings as Array<{ pred_topk?: [string, number][] }>);
	if (predTopkError) {
	abortDueToInvalidResponse(predTopkError);
	return;
	}
	const probabilityError = validateTokenProbabilities(result.bpe_strings as Array<{ real_topk?: [number, number] }>);
	if (probabilityError) {
	abortDueToInvalidResponse(probabilityError);
	return;
	}

	const safeText = data.request.text;
	const validationError = validateTokenConsistency(result.bpe_strings, safeText, { allowOverlap: true });
	if (validationError) {
	abortDueToInvalidResponse(validationError);
	return;
	}

	const rawSnapshot = createRawSnapshot(data);
	const originalTokens = result.bpe_strings.map((token) => cloneFrontendToken(token as FrontendToken));
	const bpeBpeMergedTokens = mergeTokensForRendering(originalTokens, safeText, {
	digitMerge: getDigitsMergeEnabled(),
	});
	const mergedValidationError = validateTokenConsistency(bpeBpeMergedTokens, safeText);
	if (mergedValidationError) {
	abortDueToInvalidResponse(mergedValidationError);
	return;
	}

	const enhancedResult: FrontendAnalyzeResult = {
	...result,
	originalTokens,
	bpeBpeMergedTokens,
	bpe_strings: bpeBpeMergedTokens,
	originalText: safeText,
	};
	data.result = enhancedResult;

	// 独立存储信息密度数据（info density 无 debug 信息，隐藏 semantic debug）
	this.currentState.infoDensityData = data;
	this.currentState.rawApiResponse = rawSnapshot;
	this.updateSemanticDebugInfo();
	let displayResult: ReturnType<VisualizationUpdater['computeDisplayResult']>;
	try {
	displayResult = this.computeDisplayResult();
	} catch (e) {
	if (e instanceof TokenBoundaryInconsistentError) {
	displayResult = this.computeDisplayResult();
	} else {
	throw e;
	}
	}
	this.deps.highlightController.updateCurrentData(displayResult ? { result: displayResult } : null);

	this.deps.lmf.clearHighlight();
	if (displayResult) this.deps.lmf.update(displayResult);

	const textStats = calculateTextStats(enhancedResult, safeText);

	const mergedSurprisals = calculateMergedTokenSurprisals(enhancedResult.bpeBpeMergedTokens);
	// 直方图 / progress：合并后 token；文本指标仍用 textStats（原始 token）
	this.currentState.currentSurprisals = mergedSurprisals;
	this.currentState.currentTokenAvg = computeAverage(mergedSurprisals);
	this.currentState.currentTokenP90 = computeP90(mergedSurprisals);
	this.currentState.currentTotalSurprisal = textStats.totalSurprisal;

	// 更新文本指标和模型显示（从分析结果中获取实际使用的模型）
	const resultModel = data.result.model;
	this.updateTextMetrics(textStats, resultModel);

	// Analyze 渲染完成后关闭动画，避免拖拽等二次渲染再次播放
	if (!disableAnimation) {
	// 延迟关闭，确保动画有足够时间完成
	// 动画时长估算：初始延迟100ms + 批次处理时间（根据token数量）
	const tokenCount = enhancedResult.bpe_strings.length;
	const estimatedAnimationTime = 100 + Math.ceil(tokenCount / 50) * 100;
	const delayTime = Math.max(2000, estimatedAnimationTime + 500);

	setTimeout(() => {
	this.deps.lmf.updateOptions({ enableRenderAnimation: false }, false);
	}, delayTime);
	}
	} catch (error) {
	console.error('Error updating visualization:', error);
	this.deps.appStateManager.setIsAnalyzing(false);
	this.deps.appStateManager.setGlobalLoading(false);
	this.deps.appStateManager.updateState({ hasValidData: false });
	this.syncSemanticUiFromConfig();
	showAlertDialog(tr('Error'), 'Error rendering visualization. Check console for details.');
	return;
	}

	// 清除之前的选中状态
	this.clearHighlights();

	// 重新渲染直方图
	this.updateVisualizationInternal();

	// 数据成功处理，标记为有效数据（TextMetrics 显示，Analyze 变灰）
	this.deps.appStateManager.updateState({ hasValidData: true });

	this.syncSemanticUiFromConfig();
	}

	/**
	* 语义分析响应：独立存储 semanticData，按展示逻辑计算并渲染。
	* @returns true 成功；false 校验失败或计算异常，调用方应停止后续分析。
	*/
	public handleSemanticResponse(
	res: {
	model?: string;
	token_attention?: Array<{
	offset: [number, number];
	raw: string;
	score: number;
	rawScore?: number;
	}>;
	debug_info?: { abbrev?: string; topk_tokens?: string[]; topk_probs?: number[] };
	chunkInfos?: Array<{ startOffset: number; endOffset: number; chunkIndex: number; chunkMatchDegree: number; thresholdResult?: signalFitResult }>;
	full_match_degree?: number;
	},
	text?: string,
	signalFitResult?: signalFitResult \| null
	): boolean {
	const chunkInfos = res?.chunkInfos;
	const tokenAttention = res?.token_attention;
	const currentText = text ?? '';

	if (!hasSemanticData(res)) {
	this.clearSemanticState();
	this.rerenderHistograms();
	this.deps.lmf.hideLoading();
	return true;
	}
	if (!currentText) return false;

	// 整段模式（无 chunkInfos）需校验 token 边界
	if (tokenAttention?.length && !chunkInfos?.length) {
	const err = validateTokenConsistency(tokenAttention!, currentText, { allowOverlap: true });
	if (err) {
	showAlertDialog(tr('Error'), err);
	return false;
	}
	}

	/** 分块模式：装配端已按 chunk 完成 overlap+digit+normalize，禁止全文再合并/再归一化（避免跨 chunk 合数字、跨 chunk 定标）。 */
	const isChunkedSemantic = Boolean(chunkInfos?.length);
	const semanticTokenAttentionFromApi =
	!isChunkedSemantic && tokenAttention && tokenAttention.length > 0
	? tokenAttention.map((t) => ({
	...t,
	offset: [t.offset[0], t.offset[1]] as [number, number],
	}))
	: undefined;
	const mergedAttention = isChunkedSemantic
	? (tokenAttention ?? [])
	: mergeAttentionTokensFullyForRendering(tokenAttention ?? [], currentText, {
	digitMerge: getDigitsMergeEnabled(),
	});
	const normalizedAttention = isChunkedSemantic ? mergedAttention : normalizeTokenScores(mergedAttention);
	const computedSignalFit = isChunkedSemantic
	? undefined
	: findSignalThresholdWithLog(normalizedAttention);
	const chunkInfosResolved =
	chunkInfos?.length
	? chunkInfos.map((info) => {
	const slice = normalizedAttention.filter(
	(t) => t.offset[0] < info.endOffset && t.offset[1] > info.startOffset
	);
	const thresholdResult =
	slice.length > 0 ? findSignalThresholdWithLog(slice) : null;
	return { ...info, ...(thresholdResult ? { thresholdResult } : {}) };
	})
	: chunkInfos;

	this.currentState.semanticData = {
	text: currentText,
	model: res.model,
	semanticTokenAttentionFromApi,
	token_attention: normalizedAttention,
	signalFitResult: signalFitResult ?? computedSignalFit ?? undefined,
	chunkInfos: chunkInfosResolved,
	full_match_degree: res.full_match_degree,
	};
	let displayResult: ReturnType<VisualizationUpdater['computeDisplayResult']>;
	try {
	displayResult = this.computeDisplayResult();
	} catch (e) {
	this.currentState.semanticData = null;
	if (e instanceof TokenBoundaryInconsistentError) {
	this.deps.lmf.hideLoading();
	this.rerenderHistograms();
	return false;
	}
	showAlertDialog(tr('Error'), e instanceof Error ? e.message : String(e));
	return false;
	}

	d3.select('#all_result').style('opacity', 1).style('display', null);
	this.deps.lmf.hideLoading();
	this.deps.highlightController.updateCurrentData({ result: displayResult });
	this.deps.lmf.clearHighlight();
	this.clearHighlights();
	this.updateVisualizationInternal();

	this.updateSemanticDebugInfo(res.debug_info);
	return true;
	}

	/** 更新文本渲染区下方的 debug 信息（abbrev + top10） */
	private updateSemanticDebugInfo(di?: { abbrev?: string; topk_tokens?: string[]; topk_probs?: number[] }): void {
	applySemanticDebugInfoPanel('results', 'semantic_debug_info', { debugInfo: di });
	}

	private buildSemanticOnlyResult(
	res: { model?: string },
	tokenAttention: Array<{
	offset: [number, number];
	raw: string;
	score: number;
	rawScore?: number;
	}>,
	text: string,
	chunkInfos?: SemanticData['chunkInfos']
	): (FrontendAnalyzeResult & {
	rawScoresNormed: number[];
	attentionRawScores: number[];
	chunkInfos?: SemanticData['chunkInfos'];
	}) \| null {
	const safeText = text ?? '';
	if (!safeText) return null;
	/** `semanticData.token_attention` 已在 handleSemanticResponse 中完成 overlap + digit + normalize */
	const bpeTokens: FrontendToken[] = tokenAttention.map((t) => ({
	offset: t.offset,
	raw: t.raw,
	pred_topk: []
	})) as FrontendToken[];
	const rawScoresNormed = tokenAttention.map((t) => t.score);
	const attentionRawScores = tokenAttention.map((t) => getAttentionRawScore(t));
	const cloneRow = (t: FrontendToken): FrontendToken => ({ ...t });
	return {
	model: res.model,
	bpe_strings: bpeTokens.map(cloneRow),
	originalTokens: bpeTokens.map(cloneRow),
	bpeBpeMergedTokens: bpeTokens.map(cloneRow),
	originalText: safeText,
	rawScoresNormed,
	attentionRawScores,
	chunkInfos
	};
	}

	/**
	* 检查 semantic token_attention 的边界是否与 info 一致；允许稀疏覆盖（semantic 不必覆盖全文）
	* @returns 不一致时返回错误描述（含前后文本），一致时返回 null
	*/
	private checkSemanticAlignsWithInfo(
	tokenAttention: Array<{ offset: [number, number]; raw?: string }>,
	infoMerged: Array<{ offset: [number, number] }>,
	text: string
	): { firstBadIdx: number; aSample: string; bSample: string; aNext: string; bNext: string; textBefore: string; textAt: string; textAfter: string } \| null {
	const boundaries = new Set<number>([0]);
	for (const t of infoMerged) boundaries.add(t.offset[1]);
	const infoEnd = infoMerged.length > 0 ? infoMerged[infoMerged.length - 1]!.offset[1] : 0;
	const totalChars = text.length;
	const ctx = 30;
	const esc = (s: string) => JSON.stringify(s).slice(1, -1);
	const fmt = (t: { offset: [number, number]; raw?: string }, idx: number) => {
	const raw = (t as { raw?: string }).raw ?? text.slice(t.offset[0], t.offset[1]);
	const s = raw.slice(0, 20) + (raw.length > 20 ? '…' : '');
	return `第${idx}个token分词 [字符${t.offset[0]}-${t.offset[1]}] "${esc(s)}"`;
	};
	for (let i = 0; i < tokenAttention.length; i++) {
	const [as, ae] = tokenAttention[i].offset;
	if (as < 0 \|\| ae > totalChars \|\| ae <= as) continue; // 由 validateTokenConsistency 处理
	if (ae > infoEnd) continue; // 超出双方重叠范围，不参与检查
	if (!boundaries.has(as) \|\| !boundaries.has(ae)) {
	const raw = (tokenAttention[i] as { raw?: string }).raw ?? '';
	const infoIdx = infoMerged.findIndex(t => t.offset[0] <= as && as < t.offset[1]);
	const infoAt = infoIdx >= 0 ? infoMerged[infoIdx]! : null;
	const rawShort = (raw \|\| text.slice(as, ae)).slice(0, 20);
	const infoRaw = infoAt ? (text.slice(infoAt.offset[0], infoAt.offset[1]).slice(0, 20) \|\| '') : '';
	const nextSem = tokenAttention[i + 1];
	const nextInfo = infoIdx >= 0 && infoIdx + 1 < infoMerged.length ? infoMerged[infoIdx + 1]! : null;
	return {
	firstBadIdx: i,
	aSample: `第${i}个token分词 [字符${as}-${ae}] "${esc(rawShort)}${rawShort.length >= 20 ? '…' : ''}"`,
	bSample: infoAt ? `同一位置token分词 [字符${infoAt.offset[0]}-${infoAt.offset[1]}] "${esc(infoRaw)}${infoRaw.length >= 20 ? '…' : ''}"` : '无对应',
	aNext: nextSem ? fmt(nextSem, i + 1) : '无',
	bNext: nextInfo ? fmt(nextInfo, infoIdx + 1) : '无',
	textBefore: text.slice(Math.max(0, as - ctx), as),
	textAt: text.slice(as, ae),
	textAfter: text.slice(ae, Math.min(totalChars, ae + ctx)),
	};
	}
	}
	return null;
	}

	/**
	* 联合模式：将 bpeMergedTokens 与超出信息密度范围的语义 tokens 合并为并集，用于 rect/渲染范围与截断边界一致。
	* @returns { unionTokens, scoresForUnion }
	*/
	private mergeBpeWithSemanticBeyond(
	bpeMerged: FrontendToken[],
	tokenAttention: Array<{
	offset: [number, number];
	raw: string;
	score: number;
	rawScore?: number;
	}>
	): {
	unionTokens: FrontendToken[];
	scoresForUnion: (number \| undefined)[];
	rawScoresForUnion: (number \| undefined)[];
	} {
	const infoEnd = bpeMerged.length > 0 ? bpeMerged[bpeMerged.length - 1]!.offset[1] : 0;
	const beyond = tokenAttention.filter((t) => t.offset[0] >= infoEnd);
	if (beyond.length === 0) {
	const { scores, rawScores } = this.mapTokenAttentionToMerged(bpeMerged, tokenAttention);
	return {
	unionTokens: bpeMerged,
	scoresForUnion: scores,
	rawScoresForUnion: rawScores,
	};
	}
	/** beyond 已在 handleSemanticResponse 中 overlap+digit 合并；段内用原始梯度重新归一化 */
	const beyondRenormed = normalizeTokenScores(beyond.map((t) => ({ ...t, score: getAttentionRawScore(t) })));
	const semanticAsFrontend: FrontendToken[] = beyondRenormed.map((t) => ({
	offset: [t.offset[0], t.offset[1]],
	raw: t.raw,
	real_topk: [0, 1] as [number, number],
	pred_topk: [],
	}));
	const unionTokens = [...bpeMerged, ...semanticAsFrontend];
	const { scores: infoScores, rawScores: infoRawScores } = this.mapTokenAttentionToMerged(
	bpeMerged,
	tokenAttention
	);
	const beyondScores: (number \| undefined)[] = beyondRenormed.map((t) =>
	Number.isFinite(t.score) ? t.score : undefined
	);
	const beyondRawScores: (number \| undefined)[] = beyondRenormed.map((t) => {
	const r = getAttentionRawScore(t);
	return Number.isFinite(r) ? r : undefined;
	});
	const scoresForUnion = [...infoScores, ...beyondScores];
	const rawScoresForUnion = [...infoRawScores, ...beyondRawScores];
	return { unionTokens, scoresForUnion, rawScoresForUnion };
	}

	/**
	* 将 token_attention（offset 为原文字符偏移）映射到 merged tokens
	*/
	/**
	* 将 token_attention 映射到 merged tokens，双指针 O(N+M)。
	* 前提：两个数组均按 offset 升序排列。
	*/
	private mapTokenAttentionToMerged(
	bpeBpeMergedTokens: Array<{ offset: [number, number] }>,
	tokenAttention: Array<{ offset: [number, number]; score: number; rawScore?: number }>
	): {
	scores: (number \| undefined)[];
	rawScores: (number \| undefined)[];
	} {
	const n = bpeBpeMergedTokens.length;
	const scores: number[] = new Array(n).fill(0);
	const rawScores: number[] = new Array(n).fill(0);
	const weights: number[] = new Array(n).fill(0);

	let j = 0; // 跳过所有在当前 attn 之前结束的 merged token
	for (const attn of tokenAttention) {
	const [as, ae] = attn.offset;
	const rawPart = getAttentionRawScore(attn);
	while (j < n && bpeBpeMergedTokens[j].offset[1] <= as) j++;
	for (let k = j; k < n && bpeBpeMergedTokens[k].offset[0] < ae; k++) {
	const [s, e] = bpeBpeMergedTokens[k].offset;
	// j/k 的推进条件已保证 e > as 且 s < ae，overlap 必然 > 0
	const overlap = Math.min(e, ae) - Math.max(s, as);
	scores[k] += attn.score * overlap;
	rawScores[k] += rawPart * overlap;
	weights[k] += overlap;
	}
	}

	const norm = (vals: number[]) => vals.map((v, i) => (weights[i] > 0 ? v / weights[i] : undefined));
	return {
	scores: norm(scores),
	rawScores: norm(rawScores),
	};
	}
	}