Spaces:

dqy08
/

InfoRadar

Running

File size: 57,253 Bytes

/**
 * 可视化更新模块
 * 负责处理分析结果的可视化更新逻辑
 */

import * as d3 from 'd3';
import type { AnalyzeResponse, FrontendAnalyzeResult, FrontendToken } from '../api/GLTR_API';
import type { GLTR_Text_Box } from '../vis/GLTR_Text_Box';
import type { HighlightController } from '../controllers/highlightController';
import type { TextInputController } from '../controllers/textInputController';
import type { Histogram } from '../vis/Histogram';
import type { ScatterPlot } from '../vis/ScatterPlot';
import type { AppStateManager } from './appStateManager';
import {
    cloneFrontendToken,
    mergeTokensForRendering,
    createRawSnapshot
} from './tokenUtils';
import { getAttentionRawScore, mergeAttentionTokensFullyForRendering, normalizeTokenScores } from './semanticUtils';
import {
    validateTokenConsistency,
    validateTokenProbabilities,
    validateTokenPredictions
} from './dataValidation';
import {
    calculateTextStats,
    calculateMergedTokenSurprisals,
    computeAverage,
    computeP90,
    type TextStats
} from './textStatistics';
import {
    getTokenSurprisalHistogramConfig,
    getSurprisalProgressConfig,
    getMatchScoreProgressConfig,
    getRawScoreNormedHistogramConfig
} from "./visualizationConfigs";
import { getSemanticSimilarityColor, HISTOGRAM_MIN_ALPHA } from './SurprisalColorConfig';
import { showAlertDialog } from '../ui/dialog';
import { tr } from '../lang/i18n-lite';
import { computeExpectedCounts } from './lognormalFit';
import { findSignalThresholdWithLog, type signalFitResult, type SignalThresholdBin } from './signalThresholdDetector';
import { getSemanticAnalysisEnabled } from './semanticAnalysisManager';
import { getDigitsMergeEnabled } from './digitsMergeManager';
import { getSemanticMatchThreshold } from './semanticThresholdManager';
import { applySemanticDebugInfoPanel } from '../attribution/semanticDebugInfo';

/** Token 边界不一致时抛出，用于中断联合展示 */
export class TokenBoundaryInconsistentError extends Error {
    constructor() {
        super('Tokenizer results inconsistent: semantic and info-density token boundaries differ.');
        this.name = 'TokenBoundaryInconsistentError';
    }
}

/**
 * P(signal | raw_score_normed = s) 复用 findSignalThreshold 的 bins
 * 每个样本 s 落入对应 bin，P(signal) = (obsInBin - expInBin) / obsInBin
 */
function signalProbFromBins(scores: number[], bins: SignalThresholdBin[]): number[] {
    if (scores.length === 0 || bins.length === 0) return [];
    const tauLefts = bins.map((b) => b.tauLeft);
    return scores.map((s) => {
        const i = Math.max(0, Math.min(bins.length - 1, d3.bisectRight(tauLefts, s) - 1));
        const b = bins[i]!;
        if (s < b.tauLeft || s >= b.tauRight) return 0;
        return b.obsInBin > 0 ? Math.max(0, Math.min(1, (b.obsInBin - b.expInBin) / b.obsInBin)) : 0;
    });
}

/**
 * 可视化更新依赖
 */
export interface VisualizationDependencies {
    lmf: GLTR_Text_Box;
    highlightController: HighlightController;
    textInputController: TextInputController;
    stats_frac: Histogram;
    stats_raw_score_normed: Histogram;
    stats_surprisal_progress: ScatterPlot;
    stats_match_score_progress: ScatterPlot;
    appStateManager: AppStateManager;
    surprisalColorScale: d3.ScaleSequential<string>;
}

/** 语义分析原始数据（独立存储） */
export interface SemanticData {
    text: string;
    model?: string;
    /** 整段模式：API 返回的 token_attention 副本，用于切换 digit merge 时重算（分块模式不存） */
    semanticTokenAttentionFromApi?: Array<{ offset: [number, number]; raw: string; score: number; rawScore?: number }>;
    token_attention: Array<{ offset: [number, number]; raw: string; score: number; rawScore?: number }>;
    /** 拟合结果，由数据层在归一化后计算并传入；整段模式使用 */
    signalFitResult?: signalFitResult | null;
    /** 分块边界；分块模式使用，每项可含该块独立拟合的 thresholdResult */
    chunkInfos?: Array<{ startOffset: number; endOffset: number; chunkIndex: number; chunkMatchDegree: number; thresholdResult?: signalFitResult }>;
    /** 全文匹配度；非分块模式使用，用于 pw_score 的匹配度乘法因子 */
    full_match_degree?: number;
}

/** 是否有语义分析数据：token_attention 或 chunkInfos 任一非空即视为有数据 */
function hasSemanticData(data: { token_attention?: unknown[]; chunkInfos?: unknown[] } | null | undefined): boolean {
    return (data?.token_attention?.length ?? 0) > 0 || (data?.chunkInfos?.length ?? 0) > 0;
}

/**
 * 当前数据状态
 * 信息密度与语义分析独立存储，展示时根据一致性决定单独或联合
 */
export interface CurrentDataState {
    /** 信息密度分析结果（独立） */
    infoDensityData: AnalyzeResponse | null;
    /** 语义分析结果（独立） */
    semanticData: SemanticData | null;
    rawApiResponse: AnalyzeResponse | null;
    currentSurprisals: number[] | null;
    currentTokenAvg: number | null;
    currentTokenP90: number | null;
    currentTotalSurprisal: number | null;
}

/**
 * 可视化更新管理器
 */
export class VisualizationUpdater {
    private deps: VisualizationDependencies;
    private currentState: CurrentDataState;

    constructor(deps: VisualizationDependencies) {
        this.deps = deps;
        this.currentState = {
            infoDensityData: null,
            semanticData: null,
            rawApiResponse: null,
            currentSurprisals: null,
            currentTokenAvg: null,
            currentTokenP90: null,
            currentTotalSurprisal: null
        };
    }

    /**
     * 获取当前数据状态
     */
    getCurrentState(): Readonly<CurrentDataState> {
        return { ...this.currentState };
    }

    /**
     * 获取当前原始API响应
     */
    getRawApiResponse(): AnalyzeResponse | null {
        return this.currentState.rawApiResponse;
    }

    /**
     * 获取当前展示数据（由 infoDensityData 与 semanticData 按展示逻辑计算）
     */
    getCurrentData(): AnalyzeResponse | null {
        const display = this.computeDisplayResult();
        if (!display) return null;
        return { request: { text: display.originalText }, result: display };
    }

    /**
     * 获取当前 surprisal 数据
     */
    getCurrentSurprisals(): number[] | null {
        return this.currentState.currentSurprisals;
    }

    /**
     * 更新文本指标（包括模型显示）
     */
    private updateTextMetrics(stats: TextStats | null, modelName?: string | null | undefined): void {
        this.deps.textInputController.updateTextMetrics(stats, modelName);
    }

    /**
     * 清除高亮
     */
    private clearHighlights(): void {
        this.deps.highlightController.clearHighlights();
    }

    /**
     * 计算展示结果：仅信息密度 / 仅语义 / 联合（两者一致时）
     */
    private computeDisplayResult(): (FrontendAnalyzeResult & { rawScoresNormed?: number[]; attentionRawScores?: number[]; chunkInfos?: SemanticData['chunkInfos'] }) | null {
        const info = this.currentState.infoDensityData;
        const sem = this.currentState.semanticData;
        const infoResult = info?.result as FrontendAnalyzeResult | undefined;
        const infoText = info?.request?.text ?? infoResult?.originalText ?? '';
        const semText = sem?.text ?? '';

        if (infoResult && sem && infoText === semText && hasSemanticData(sem)) {
            const infoMerged = infoResult.bpeBpeMergedTokens ?? infoResult.bpe_strings;
            if (infoMerged?.length) {
                // 有 token_attention 时校验边界；仅 chunkInfos 时跳过（无语义着色）
                if (sem.token_attention?.length) {
                    const boundaryError = this.checkSemanticAlignsWithInfo(sem.token_attention, infoMerged, semText);
                if (boundaryError) {
                    const { aSample, bSample, aNext, bNext, textBefore, textAt, textAfter } = boundaryError;
                    console.warn(
                        '[联合模式] 两种分析的分词token边界不一致：\n' +
                        '  语义分析：', aSample, '\n' +
                        '  信息密度：', bSample, '\n' +
                        '  语义后一个：', aNext, '\n' +
                        '  信息后一个：', bNext, '\n' +
                        '  位置附近原文：', JSON.stringify(textBefore), '|', JSON.stringify(textAt), '|', JSON.stringify(textAfter)
                    );
                    showAlertDialog(tr('Error'), tr('Tokenizer results inconsistent: semantic and info-density token boundaries differ.'));
                    this.currentState.semanticData = null;
                    throw new TokenBoundaryInconsistentError();
                }
                }
                // 联合模式：bpeMerged 与语义 tokens 超出部分合并为并集，使 rect/渲染范围与截断边界一致
                const tokenAttention = sem.token_attention ?? [];
                const { unionTokens, scoresForUnion, rawScoresForUnion } = tokenAttention.length
                    ? this.mergeBpeWithSemanticBeyond(infoMerged, tokenAttention)
                    : (() => {
                        const m = this.mapTokenAttentionToMerged(infoMerged, []);
                        return { unionTokens: infoMerged, scoresForUnion: m.scores, rawScoresForUnion: m.rawScores };
                    })();
                return {
                    ...infoResult,
                    bpeBpeMergedTokens: unionTokens,
                    bpe_strings: unionTokens,
                    rawScoresNormed: scoresForUnion,
                    attentionRawScores: rawScoresForUnion,
                    chunkInfos: sem.chunkInfos,
                };
            }
        }
        // 有语义数据（token_attention 或 chunkInfos）时用 buildSemanticOnlyResult
        if (sem && hasSemanticData(sem)) {
            return this.buildSemanticOnlyResult({ model: sem.model }, sem.token_attention, sem.text, sem.chunkInfos);
        }
        if (infoResult) return { ...infoResult, chunkInfos: sem?.chunkInfos ?? undefined };
        return null;
    }

    /**
     * 分析开始前更新直方图显示/隐藏：基于「已有数据 + 将要得到的数据」判断各统计图是否有意义
     * @param mode 即将进行的分析类型
     * @param text 即将分析的文本（用于判断与已有数据是否一致、能否联合展示）
     * @param willBeChunked 语义分析时：true 表示将走分块模式，直方图不显示
     */
    public updateHistogramVisibilityForPending(mode: 'infoDensity' | 'semantic', text: string, willBeChunked?: boolean): void {
        const tokenHistogramItem = document.getElementById('token_histogram_item');
        const surprisalProgressItem = document.getElementById('surprisal_progress_item');
        const rawScoreNormedItem = document.getElementById('raw_score_normed_histogram_item');
        const matchScoreProgressItem = document.getElementById('match_score_progress_item');

        const infoText = this.currentState.infoDensityData?.request?.text ?? '';
        const semText = this.currentState.semanticData?.text ?? '';

        let showInfoDensity = false;
        let showSemantic = false;

        if (mode === 'infoDensity') {
            showInfoDensity = true;
            showSemantic = hasSemanticData(this.currentState.semanticData) && semText === text;
        } else {
            showSemantic = true;
            showInfoDensity = !!this.currentState.infoDensityData && infoText === text;
        }

        if (tokenHistogramItem) tokenHistogramItem.style.display = showInfoDensity ? '' : 'none';
        if (surprisalProgressItem) surprisalProgressItem.style.display = showInfoDensity ? '' : 'none';
        /** 直方图仅在整段模式显示，chunk 模式下不显示 */
        const showRawScoreHistogram = showSemantic && !willBeChunked;
        if (rawScoreNormedItem) rawScoreNormedItem.style.display = showRawScoreHistogram ? '' : 'none';
        /** semantic match per chunk progress 仅 chunk 模式显示 */
        if (matchScoreProgressItem) matchScoreProgressItem.style.display = showSemantic && !!willBeChunked ? '' : 'none';

        // pending 时渲染空统计图（坐标轴 + 空柱体/散点），避免空白
        if (showInfoDensity && mode === 'infoDensity') {
            const tokenConfig = getTokenSurprisalHistogramConfig();
            this.deps.stats_frac.update({ ...tokenConfig, data: [], colorScale: () => 'transparent' });
            const tokenTitle = document.getElementById('token_histogram_title');
            if (tokenTitle) tokenTitle.textContent = tokenConfig.label;
            const progressConfig = getSurprisalProgressConfig();
            this.deps.stats_surprisal_progress.update({ ...progressConfig, data: [] });
            const progressTitle = document.getElementById('surprisal_progress_title');
            if (progressTitle && progressConfig.label) progressTitle.textContent = progressConfig.label;
        }
        if (showRawScoreHistogram && mode === 'semantic') {
            const rawScoreNormedConfig = getRawScoreNormedHistogramConfig();
            this.deps.stats_raw_score_normed.update({ ...rawScoreNormedConfig, data: [], colorScale: () => 'transparent' });
            const titleEl = document.getElementById('raw_score_normed_histogram_title');
            if (titleEl) titleEl.textContent = rawScoreNormedConfig.label;
        }
        if (showSemantic && mode === 'semantic' && willBeChunked) {
            const matchScoreProgressConfig = getMatchScoreProgressConfig();
            const docLen = text.length;
            this.deps.stats_match_score_progress.update({
                ...matchScoreProgressConfig,
                data: [],
                showMovingAverage: false,
                chunkLines: [],
                thresholdLine: getSemanticMatchThreshold(),
                extent: { x: docLen > 0 ? [0, docLen] : undefined, y: [0, 1] }
            });
            const matchScoreTitleEl = document.getElementById('match_score_progress_title');
            if (matchScoreTitleEl && matchScoreProgressConfig.label) matchScoreTitleEl.textContent = matchScoreProgressConfig.label;
        }
    }

    /**
     * 重新渲染直方图（内部方法）
     * 仅信息密度：只显示 token/surprisal progress；仅语义：只显示 raw score normed；联合：全部显示
     * @param skipLmfUpdate 为 true 时跳过 lmf.update（主题切换时由 rerenderOnThemeChange 统一重绘，避免竞态）
     */
    private updateVisualizationInternal(skipLmfUpdate = false): void {
        const hasInfoDensity = !!this.currentState.infoDensityData;
        const displayResult = this.computeDisplayResult();

        const tokenHistogramItem = document.getElementById('token_histogram_item');
        const surprisalProgressItem = document.getElementById('surprisal_progress_item');
        const rawScoreNormedItem = document.getElementById('raw_score_normed_histogram_item');

        if (hasInfoDensity) {
            const currentSurprisals = this.currentState.currentSurprisals;
            const currentTokenAvg = this.currentState.currentTokenAvg;
            const currentTokenP90 = this.currentState.currentTokenP90;
            if (currentSurprisals) {
                const tokenHistogramConfig = getTokenSurprisalHistogramConfig();
                this.deps.stats_frac.update({
                    ...tokenHistogramConfig,
                    data: currentSurprisals,
                    colorScale: this.deps.surprisalColorScale,
                    averageValue: currentTokenAvg ?? undefined,
                    p90Value: currentTokenP90 ?? undefined,
                    p90Label: tokenHistogramConfig.averageLabel,
                });
                const titleElement = document.getElementById('token_histogram_title');
                if (titleElement) titleElement.textContent = tokenHistogramConfig.label;
            }
            if (currentSurprisals && currentSurprisals.length > 0) {
                const surprisalProgressConfig = getSurprisalProgressConfig();
                this.deps.stats_surprisal_progress.update({
                    ...surprisalProgressConfig,
                    data: currentSurprisals,
                });
                const surprisalProgressTitleElement = document.getElementById('surprisal_progress_title');
                if (surprisalProgressTitleElement && surprisalProgressConfig.label) {
                    surprisalProgressTitleElement.textContent = surprisalProgressConfig.label;
                }
            }
            if (tokenHistogramItem) tokenHistogramItem.style.display = '';
            if (surprisalProgressItem) surprisalProgressItem.style.display = '';
        } else {
            if (tokenHistogramItem) tokenHistogramItem.style.display = 'none';
            if (surprisalProgressItem) surprisalProgressItem.style.display = 'none';
        }

        const rawScoresNormed = displayResult?.rawScoresNormed;
        const validRawScoresNormed = rawScoresNormed?.filter((s) => typeof s === 'number' && isFinite(s));
        const sem = this.currentState.semanticData;
        const signalFitResult = sem?.signalFitResult ?? null;
        const chunkInfos = sem?.chunkInfos;
        const isChunkMode = (chunkInfos?.length ?? 0) > 0;
        const chunksWithThreshold = chunkInfos?.filter((c) => c.thresholdResult != null) ?? [];
        const usePerChunkThreshold = chunksWithThreshold.length > 0;
        const thresholdByChunk = usePerChunkThreshold
            ? new Map(chunksWithThreshold.map((c) => [c.chunkIndex, c.thresholdResult!]))
            : null;
        if (validRawScoresNormed && validRawScoresNormed.length > 0) {
            const rawScoreNormedConfig = getRawScoreNormedHistogramConfig();
            const colorScale = (v: number) => getSemanticSimilarityColor(v, HISTOGRAM_MIN_ALPHA);
            const thresholdForHistogram = usePerChunkThreshold && chunksWithThreshold.length > 0
                ? chunksWithThreshold[0]!.thresholdResult!
                : signalFitResult;
            // confidence>0：findSignalThreshold 成功（≥ MIN_ACCEPTABLE）；confidence===0 为 P90 回退，不画截尾对数正态期望曲线
            const fitResult = validRawScoresNormed.length >= 2 && thresholdForHistogram != null && thresholdForHistogram.confidence > 0
                ? {
                    mu: thresholdForHistogram.mu,
                    sigma: thresholdForHistogram.sigma,
                    expectedCounts: computeExpectedCounts(
                        thresholdForHistogram.mu,
                        thresholdForHistogram.sigma,
                        rawScoreNormedConfig.extent as [number, number],
                        rawScoreNormedConfig.no_bins,
                        validRawScoresNormed.length
                    ),
                }
                : null;
            const signalProbs = thresholdForHistogram != null
                ? signalProbFromBins(validRawScoresNormed, thresholdForHistogram.bins)
                : [];
            /**
             * P_pw：后验信号概率的简化映射，x <= threshold 时为 0，x > threshold 时为 1
             * pw_score = score × P_pw × matchDegree
             * 分块模式：每个 token 使用其所属 chunk 的 threshold 和 chunkMatchDegree
             * 非分块模式：使用全文匹配度 full_match_degree
             */
            const rawScoresNormedFull = displayResult!.rawScoresNormed ?? [];
            const bpeBpeMergedTokens = displayResult?.bpeBpeMergedTokens ?? [];

            const getChunkForToken = (tokenIndex: number) => {
                const token = bpeBpeMergedTokens[tokenIndex];
                if (!token || !isChunkMode) return null;
                const offset = token.offset[0];
                return chunkInfos!.find((c) => c.startOffset <= offset && offset < c.endOffset) ?? null;
            };

            const getThresholdForToken = (i: number): number => {
                const chunk = getChunkForToken(i);
                if (chunk && thresholdByChunk != null) {
                    const tr = thresholdByChunk.get(chunk.chunkIndex);
                    if (tr) return tr.threshold;
                }
                return signalFitResult?.threshold ?? 0;
            };

            const getMatchDegreeForToken = (i: number): number => {
                const chunk = getChunkForToken(i);
                if (chunk) return chunk.chunkMatchDegree;
                return sem?.full_match_degree ?? 1;
            };

            const hasThreshold = signalFitResult != null || thresholdByChunk != null;
            const pPwValues = hasThreshold
                ? rawScoresNormedFull.map((s, i) => {
                    const threshold = getThresholdForToken(i);
                    const isAboveThreshold = typeof s === 'number' && isFinite(s) && s > threshold;
                    return isAboveThreshold ? 1 : 0;
                })
                : [];
            const pwScores = hasThreshold
                ? rawScoresNormedFull.map((s, i) => {
                    const threshold = getThresholdForToken(i);
                    const isAboveThreshold = typeof s === 'number' && isFinite(s) && s > threshold;
                    const baseScore = isAboveThreshold ? s : 0;
                    const matchDegree = getMatchDegreeForToken(i);
                    return baseScore * matchDegree;
                })
                : [];

            const colorSourceEl = document.getElementById('semantic_color_source_select') as HTMLSelectElement | null;
            const colorSource = colorSourceEl?.value ?? 'pw_score';
            const scoresForColor = colorSource === 'signal_probability' ? pPwValues
                : colorSource === 'pw_score' ? pwScores
                : (displayResult!.rawScoresNormed ?? []);

            // 联合模式下 tooltip 需要 pPwValues/pwScores 显示语义匹配信息，即使 fitResult 为 null 也要传递
            const resultWithExt = hasThreshold
                ? { ...displayResult, signalProbs, pPwValues, pwScores }
                : displayResult!;
            if (fitResult != null) {
                this.deps.highlightController.updateCurrentData({ result: resultWithExt, signalProbs, pPwValues, pwScores });
                if (!skipLmfUpdate) {
                    this.deps.lmf.update({ ...resultWithExt, pwScores, colorScores: scoresForColor } as FrontendAnalyzeResult & { pPwValues?: number[]; pwScores?: number[]; colorScores?: number[] });
                }
            } else {
                this.deps.highlightController.updateCurrentData({ result: resultWithExt });
                if (!skipLmfUpdate) {
                    this.deps.lmf.update({ ...resultWithExt, colorScores: scoresForColor } as FrontendAnalyzeResult & { pPwValues?: number[]; pwScores?: number[]; colorScores?: number[] });
                }
            }

            /** 直方图仅在整段模式显示，chunk 模式下不统计、不显示 */
            if (!isChunkMode) {
                const probCurveData = signalProbs.length > 0
                    ? (() => {
                        const pairs = validRawScoresNormed.map((x, i) => ({ x, y: signalProbs[i]! })).sort((a, b) => a.x - b.x);
                        return { x: pairs.map(p => p.x), y: pairs.map(p => p.y) };
                    })()
                    : undefined;
                const signalThresholdPercentile = thresholdForHistogram != null && validRawScoresNormed.length > 0
                    ? Math.round((validRawScoresNormed.filter((s) => s < thresholdForHistogram.threshold).length / validRawScoresNormed.length) * 100)
                    : undefined;
                this.deps.stats_raw_score_normed.update({
                    ...rawScoreNormedConfig,
                    data: validRawScoresNormed,
                    colorScale,
                    fitExpectedCounts: fitResult?.expectedCounts,
                    showProbCurve: true,
                    probCurveData: probCurveData?.x.length ? probCurveData : undefined,
                    signalThreshold: thresholdForHistogram?.threshold ?? undefined,
                    signalThresholdPercentile: signalThresholdPercentile ?? undefined,
                });
                const titleEl = document.getElementById('raw_score_normed_histogram_title');
                if (titleEl) titleEl.textContent = rawScoreNormedConfig.label;
                if (rawScoreNormedItem) rawScoreNormedItem.style.display = '';
            } else {
                if (rawScoreNormedItem) rawScoreNormedItem.style.display = 'none';
            }
            /** semantic match per chunk progress：仅 chunk 模式，仅绘制 chunk 匹配线，不绘制点 */
            if (isChunkMode) {
                const matchScoreProgressConfig = getMatchScoreProgressConfig();
                const docLen = (displayResult?.originalText ?? '').length;
                const chunkLines = chunkInfos?.length
                    ? chunkInfos.map((c) => ({ x0: c.startOffset, x1: c.endOffset, y: c.chunkMatchDegree }))
                    : [];
                const thresholdLine = getSemanticMatchThreshold();
                this.deps.stats_match_score_progress.update({
                    ...matchScoreProgressConfig,
                    data: [],
                    showMovingAverage: false,
                    chunkLines,
                    thresholdLine,
                    chunkInteraction: true,
                    extent: { x: docLen > 0 ? [0, docLen] : undefined, y: [0, 1] }
                });
                const matchScoreTitleEl = document.getElementById('match_score_progress_title');
                if (matchScoreTitleEl && matchScoreProgressConfig.label) matchScoreTitleEl.textContent = matchScoreProgressConfig.label;
                const matchScoreProgressItem = document.getElementById('match_score_progress_item');
                if (matchScoreProgressItem) matchScoreProgressItem.style.display = '';
            } else {
                const matchScoreProgressItem = document.getElementById('match_score_progress_item');
                if (matchScoreProgressItem) matchScoreProgressItem.style.display = 'none';
            }
        } else {
            const needLmfUpdate = !!displayResult && (hasInfoDensity || !!validRawScoresNormed?.length || hasSemanticData(sem));
            if (displayResult) this.deps.highlightController.updateCurrentData({ result: displayResult });
            if (needLmfUpdate && !skipLmfUpdate) {
                this.deps.lmf.update(displayResult!);
            }
            /** chunk 模式下不显示直方图；整段模式且无数据时显示空占位 */
            if (getSemanticAnalysisEnabled() && !isChunkMode) {
                const rawScoreNormedConfig = getRawScoreNormedHistogramConfig();
                this.deps.stats_raw_score_normed.update({ ...rawScoreNormedConfig, data: [], colorScale: () => 'transparent' });
                const titleEl = document.getElementById('raw_score_normed_histogram_title');
                if (titleEl) titleEl.textContent = rawScoreNormedConfig.label;
                if (rawScoreNormedItem) rawScoreNormedItem.style.display = '';
            } else {
                if (rawScoreNormedItem) rawScoreNormedItem.style.display = 'none';
            }
            /** semantic match per chunk progress 无数据时显示空占位（仅 chunk 模式） */
            if (getSemanticAnalysisEnabled() && isChunkMode) {
                const matchScoreProgressConfig = getMatchScoreProgressConfig();
                const docLen = (displayResult?.originalText ?? '').length;
                const chunkLines = chunkInfos?.length
                    ? chunkInfos.map((c) => ({ x0: c.startOffset, x1: c.endOffset, y: c.chunkMatchDegree }))
                    : [];
                const thresholdLine = getSemanticMatchThreshold();
                this.deps.stats_match_score_progress.update({
                    ...matchScoreProgressConfig,
                    data: [],
                    showMovingAverage: false,
                    chunkLines,
                    thresholdLine,
                    chunkInteraction: true,
                    extent: { x: docLen > 0 ? [0, docLen] : undefined, y: [0, 1] }
                });
                const matchScoreTitleEl = document.getElementById('match_score_progress_title');
                if (matchScoreTitleEl && matchScoreProgressConfig.label) matchScoreTitleEl.textContent = matchScoreProgressConfig.label;
                const matchScoreProgressItem = document.getElementById('match_score_progress_item');
                if (matchScoreProgressItem) matchScoreProgressItem.style.display = '';
            } else {
                const matchScoreProgressItem = document.getElementById('match_score_progress_item');
                if (matchScoreProgressItem) matchScoreProgressItem.style.display = 'none';
            }
        }
    }

    /** 重新渲染直方图（供外部调用） */
    public rerenderHistograms(): void {
        this.updateVisualizationInternal(false);
    }

    /** 仅更新语义着色源（color source 切换时调用，不重新拟合） */
    public updateSemanticColorSource(): void {
        const cd = this.deps.highlightController.getCurrentData();
        const r = cd?.result as (FrontendAnalyzeResult & { rawScoresNormed?: number[] }) | undefined;
        if (!r?.rawScoresNormed?.length) return;
        const el = document.getElementById('semantic_color_source_select') as HTMLSelectElement | null;
        const v = el?.value ?? 'pw_score';
        const scoresForColor = v === 'signal_probability' ? (cd!.pPwValues ?? [])
            : v === 'pw_score' ? (cd!.pwScores ?? [])
            : r.rawScoresNormed;
        this.deps.lmf.update({ ...r, pPwValues: cd!.pPwValues, pwScores: cd!.pwScores, colorScores: scoresForColor } as FrontendAnalyzeResult & { pPwValues?: number[]; pwScores?: number[]; colorScores?: number[] });
    }

    /** 主题切换时调用：在样式生效后统一重绘直方图与文本（rgba 透出背景，需等新主题生效） */
    public rerenderOnThemeChange(): void {
        requestAnimationFrame(() => requestAnimationFrame(() => {
            this.updateVisualizationInternal(true);
            this.deps.lmf.reRenderCurrent();
        }));
    }

    /**
     * 文本修改时清除独立存储的数据（避免展示与输入不一致）
     */
    public clearDataOnTextChange(): void {
        this.currentState.infoDensityData = null;
        this.currentState.semanticData = null;
        this.currentState.rawApiResponse = null;
        this.currentState.currentSurprisals = null;
        this.currentState.currentTokenAvg = null;
        this.currentState.currentTokenP90 = null;
        this.currentState.currentTotalSurprisal = null;
        this.deps.highlightController.updateCurrentData(null);
        d3.select('#all_result').style('opacity', 0);
        this.updateSemanticDebugInfo();
        this.syncDigitsMergeUi();
    }

    /**
     * 清除语义分析相关数据（直方图、debug、semanticData），用于打开模式时初始化
     */
    public clearSemanticState(): void {
        this.currentState.semanticData = null;
        const rawScoreNormedItem = document.getElementById('raw_score_normed_histogram_item');
        if (rawScoreNormedItem) rawScoreNormedItem.style.display = 'none';
        const matchScoreProgressItem = document.getElementById('match_score_progress_item');
        if (matchScoreProgressItem) matchScoreProgressItem.style.display = 'none';
        this.updateSemanticDebugInfo();
        this.syncDigitsMergeUi();
    }

    /**
     * 分块语义结果无法在客户端切换 digit 合并方式，禁用开关避免与信息密度边界不一致
     */
    private syncDigitsMergeUi(): void {
        const el = document.getElementById('enable_digits_merge_toggle') as HTMLInputElement | null;
        if (!el) return;
        const disabled = !!this.currentState.semanticData?.chunkInfos?.length;
        el.disabled = disabled;
        el.title = disabled
            ? 'Chunked semantic analysis locks digit merge; clear semantic data or use non-chunked mode to toggle.'
            : '';
    }

    /**
     * digit merge 开关变化时：从 originalTokens / API attention 重算合并并刷新文本与图表
     */
    public applyDigitsMergeSetting(): void {
        const digitMerge = getDigitsMergeEnabled();
        const info = this.currentState.infoDensityData;
        if (info?.result) {
            const fr = info.result as FrontendAnalyzeResult;
            const text = info.request?.text ?? fr.originalText ?? '';
            if (fr.originalTokens?.length && text) {
                const newMerged = mergeTokensForRendering(fr.originalTokens, text, { digitMerge });
                fr.bpeBpeMergedTokens = newMerged;
                fr.bpe_strings = newMerged;
            }
        }
        const sem = this.currentState.semanticData;
        if (sem && !sem.chunkInfos?.length && sem.semanticTokenAttentionFromApi?.length && sem.text) {
            const mergedAttention = mergeAttentionTokensFullyForRendering(
                sem.semanticTokenAttentionFromApi,
                sem.text,
                { digitMerge }
            );
            const normalizedAttention = normalizeTokenScores(mergedAttention);
            const computedSignalFit = findSignalThresholdWithLog(normalizedAttention);
            sem.token_attention = normalizedAttention;
            sem.signalFitResult = computedSignalFit ?? undefined;
        }
        const infoResult = this.currentState.infoDensityData?.result as FrontendAnalyzeResult | undefined;
        const safeText = this.currentState.infoDensityData?.request?.text ?? infoResult?.originalText ?? '';
        if (infoResult?.bpeBpeMergedTokens?.length && safeText) {
            const mergedSurprisals = calculateMergedTokenSurprisals(infoResult.bpeBpeMergedTokens);
            this.currentState.currentSurprisals = mergedSurprisals;
            this.currentState.currentTokenAvg = computeAverage(mergedSurprisals);
            this.currentState.currentTokenP90 = computeP90(mergedSurprisals);
        }
        this.syncDigitsMergeUi();
        let displayResult: ReturnType<VisualizationUpdater['computeDisplayResult']>;
        try {
            displayResult = this.computeDisplayResult();
        } catch (e) {
            if (e instanceof TokenBoundaryInconsistentError) {
                displayResult = this.computeDisplayResult();
            } else {
                console.error(e);
                return;
            }
        }
        this.deps.highlightController.updateCurrentData(displayResult ? { result: displayResult } : null);
        this.deps.lmf.clearHighlight();
        if (displayResult) this.deps.lmf.update(displayResult);
        this.updateVisualizationInternal();
        this.deps.appStateManager.updateButtonStates();
    }

    /**
     * 根据语义分析配置同步 UI 状态（查询输入框、文本渲染模式等）
     * 界面完全由配置决定，不因数据有无而改变
     */
    public syncSemanticUiFromConfig(): void {
        const enabled = getSemanticAnalysisEnabled();
        const el = document.getElementById('semantic_analysis_section');
        if (el) el.style.display = enabled ? '' : 'none';
        this.deps.lmf.updateOptions({ semanticAnalysisMode: enabled }, false);
        if (!enabled) {
            // 关闭时清除语义数据、直方图、debug 信息（不重渲染，避免重复渲染信息密度）
            this.currentState.semanticData = null;
            const rawScoreNormedItem = document.getElementById('raw_score_normed_histogram_item');
            if (rawScoreNormedItem) rawScoreNormedItem.style.display = 'none';
            const matchScoreProgressItem = document.getElementById('match_score_progress_item');
            if (matchScoreProgressItem) matchScoreProgressItem.style.display = 'none';
            this.updateSemanticDebugInfo();
            const displayResult = this.computeDisplayResult();
            this.deps.highlightController.updateCurrentData(displayResult ? { result: displayResult } : null);
            if (!displayResult) {
                d3.select('#all_result').style('opacity', 0);
                this.deps.appStateManager.updateState({ hasValidData: false });
            }
            // 关闭语义模式后立刻按当前数据重绘，确保语义着色和相关图表不残留
            this.updateVisualizationInternal(false);
        }
        this.syncDigitsMergeUi();
        // 语义分析配置影响 Upload/Save 的 dataReadyForSave 条件，需始终更新按钮状态
        this.deps.appStateManager.updateButtonStates();
    }

    /**
     * 更新可视化（核心方法）
     * 
     * @param data 分析响应数据
     * @param disableAnimation 是否禁用动画
     * @param options 选项
     */
    updateFromRequest(
        data: AnalyzeResponse,
        disableAnimation: boolean = false,
        options: { enableSave?: boolean } = {}
    ): void {
        const { enableSave = true } = options;

        const abortDueToInvalidResponse = (message: string) => {
            console.error(message);
            showAlertDialog(tr('Error'), message);
            this.deps.appStateManager.updateState({ hasValidData: false });
            this.syncSemanticUiFromConfig();
        };

        try {
            // 只有 Analyze 触发时开启动画，其它情况保持关闭（默认已关闭）
            if (!disableAnimation) {
                this.deps.lmf.updateOptions({ enableRenderAnimation: true }, false);
            }
            // Semantic analysis 模式由配置决定
            this.deps.lmf.updateOptions({
                semanticAnalysisMode: getSemanticAnalysisEnabled(),
            }, false);

            d3.select('#all_result').style('opacity', 1).style('display', null);
            this.deps.appStateManager.setIsAnalyzing(false);
            this.deps.appStateManager.setGlobalLoading(false);

            // 隐藏文本区域的加载状态（会在lmf.update中自动隐藏，但这里提前隐藏以提升体验）
            this.deps.lmf.hideLoading();

            // 验证数据结构
            if (!data || !data.result) {
                console.error('Invalid data structure:', data);
                throw new Error('Invalid API response structure');
            }

            const result = data.result;

            // 确保所有必需的字段都存在且类型正确
            if (!Array.isArray(result.bpe_strings) || result.bpe_strings.length === 0) {
                abortDueToInvalidResponse(tr('Returned JSON missing valid bpe_strings array, processing cancelled.'));
                return;
            }
            const predTopkError = validateTokenPredictions(result.bpe_strings as Array<{ pred_topk?: [string, number][] }>);
            if (predTopkError) {
                abortDueToInvalidResponse(predTopkError);
                return;
            }
            const probabilityError = validateTokenProbabilities(result.bpe_strings as Array<{ real_topk?: [number, number] }>);
            if (probabilityError) {
                abortDueToInvalidResponse(probabilityError);
                return;
            }

            const safeText = data.request.text;
            const validationError = validateTokenConsistency(result.bpe_strings, safeText, { allowOverlap: true });
            if (validationError) {
                abortDueToInvalidResponse(validationError);
                return;
            }

            const rawSnapshot = createRawSnapshot(data);
            const originalTokens = result.bpe_strings.map((token) => cloneFrontendToken(token as FrontendToken));
            const bpeBpeMergedTokens = mergeTokensForRendering(originalTokens, safeText, {
                digitMerge: getDigitsMergeEnabled(),
            });
            const mergedValidationError = validateTokenConsistency(bpeBpeMergedTokens, safeText);
            if (mergedValidationError) {
                abortDueToInvalidResponse(mergedValidationError);
                return;
            }

            const enhancedResult: FrontendAnalyzeResult = {
                ...result,
                originalTokens,
                bpeBpeMergedTokens,
                bpe_strings: bpeBpeMergedTokens,
                originalText: safeText,
            };
            data.result = enhancedResult;

            // 独立存储信息密度数据（info density 无 debug 信息，隐藏 semantic debug）
            this.currentState.infoDensityData = data;
            this.currentState.rawApiResponse = rawSnapshot;
            this.updateSemanticDebugInfo();
            let displayResult: ReturnType<VisualizationUpdater['computeDisplayResult']>;
            try {
                displayResult = this.computeDisplayResult();
            } catch (e) {
                if (e instanceof TokenBoundaryInconsistentError) {
                    displayResult = this.computeDisplayResult();
                } else {
                    throw e;
                }
            }
            this.deps.highlightController.updateCurrentData(displayResult ? { result: displayResult } : null);

            this.deps.lmf.clearHighlight();
            if (displayResult) this.deps.lmf.update(displayResult);

            const textStats = calculateTextStats(enhancedResult, safeText);

            const mergedSurprisals = calculateMergedTokenSurprisals(enhancedResult.bpeBpeMergedTokens);
            // 直方图 / progress：合并后 token；文本指标仍用 textStats（原始 token）
            this.currentState.currentSurprisals = mergedSurprisals;
            this.currentState.currentTokenAvg = computeAverage(mergedSurprisals);
            this.currentState.currentTokenP90 = computeP90(mergedSurprisals);
            this.currentState.currentTotalSurprisal = textStats.totalSurprisal;

            // 更新文本指标和模型显示（从分析结果中获取实际使用的模型）
            const resultModel = data.result.model;
            this.updateTextMetrics(textStats, resultModel);

            // Analyze 渲染完成后关闭动画，避免拖拽等二次渲染再次播放
            if (!disableAnimation) {
                // 延迟关闭，确保动画有足够时间完成
                // 动画时长估算：初始延迟100ms + 批次处理时间（根据token数量）
                const tokenCount = enhancedResult.bpe_strings.length;
                const estimatedAnimationTime = 100 + Math.ceil(tokenCount / 50) * 100;
                const delayTime = Math.max(2000, estimatedAnimationTime + 500);

                setTimeout(() => {
                    this.deps.lmf.updateOptions({ enableRenderAnimation: false }, false);
                }, delayTime);
            }
        } catch (error) {
            console.error('Error updating visualization:', error);
            this.deps.appStateManager.setIsAnalyzing(false);
            this.deps.appStateManager.setGlobalLoading(false);
            this.deps.appStateManager.updateState({ hasValidData: false });
            this.syncSemanticUiFromConfig();
            showAlertDialog(tr('Error'), 'Error rendering visualization. Check console for details.');
            return;
        }

        // 清除之前的选中状态
        this.clearHighlights();

        // 重新渲染直方图
        this.updateVisualizationInternal();

        // 数据成功处理，标记为有效数据（TextMetrics 显示，Analyze 变灰）
        this.deps.appStateManager.updateState({ hasValidData: true });

        this.syncSemanticUiFromConfig();
        this.syncDigitsMergeUi();
    }

    /**
     * 语义分析响应：独立存储 semanticData，按展示逻辑计算并渲染。
     * @returns true 成功；false 校验失败或计算异常，调用方应停止后续分析。
     */
    public handleSemanticResponse(
        res: {
            model?: string;
            token_attention?: Array<{ offset: [number, number]; raw: string; score: number; rawScore?: number }>;
            debug_info?: { abbrev?: string; topk_tokens?: string[]; topk_probs?: number[] };
            chunkInfos?: Array<{ startOffset: number; endOffset: number; chunkIndex: number; chunkMatchDegree: number; thresholdResult?: signalFitResult }>;
            full_match_degree?: number;
        },
        text?: string,
        signalFitResult?: signalFitResult | null
    ): boolean {
        const chunkInfos = res?.chunkInfos;
        const tokenAttention = res?.token_attention;
        const currentText = text ?? '';

        if (!hasSemanticData(res)) {
            this.clearSemanticState();
            this.rerenderHistograms();
            this.deps.lmf.hideLoading();
            return true;
        }
        if (!currentText) return false;

        // 整段模式（无 chunkInfos）需校验 token 边界
        if (tokenAttention?.length && !chunkInfos?.length) {
            const err = validateTokenConsistency(tokenAttention!, currentText, { allowOverlap: true });
            if (err) {
                showAlertDialog(tr('Error'), err);
                return false;
            }
        }

        /** 分块模式：装配端已按 chunk 完成 overlap+digit+normalize，禁止全文再合并/再归一化（避免跨 chunk 合数字、跨 chunk 定标）。 */
        const isChunkedSemantic = Boolean(chunkInfos?.length);
        const semanticTokenAttentionFromApi =
            !isChunkedSemantic && tokenAttention && tokenAttention.length > 0
                ? tokenAttention.map((t) => ({
                      ...t,
                      offset: [t.offset[0], t.offset[1]] as [number, number],
                  }))
                : undefined;
        const mergedAttention = isChunkedSemantic
            ? (tokenAttention ?? [])
            : mergeAttentionTokensFullyForRendering(tokenAttention ?? [], currentText, {
                  digitMerge: getDigitsMergeEnabled(),
              });
        const normalizedAttention = isChunkedSemantic ? mergedAttention : normalizeTokenScores(mergedAttention);
        const computedSignalFit = isChunkedSemantic
            ? undefined
            : findSignalThresholdWithLog(normalizedAttention);
        const chunkInfosResolved =
            chunkInfos?.length
                ? chunkInfos.map((info) => {
                      const slice = normalizedAttention.filter(
                          (t) => t.offset[0] < info.endOffset && t.offset[1] > info.startOffset
                      );
                      const thresholdResult =
                          slice.length > 0 ? findSignalThresholdWithLog(slice) : null;
                      return { ...info, ...(thresholdResult ? { thresholdResult } : {}) };
                  })
                : chunkInfos;

        this.currentState.semanticData = {
            text: currentText,
            model: res.model,
            semanticTokenAttentionFromApi,
            token_attention: normalizedAttention,
            signalFitResult: signalFitResult ?? computedSignalFit ?? undefined,
            chunkInfos: chunkInfosResolved,
            full_match_degree: res.full_match_degree,
        };
        let displayResult: ReturnType<VisualizationUpdater['computeDisplayResult']>;
        try {
            displayResult = this.computeDisplayResult();
        } catch (e) {
            this.currentState.semanticData = null;
            if (e instanceof TokenBoundaryInconsistentError) {
                this.deps.lmf.hideLoading();
                this.rerenderHistograms();
                return false;
            }
            showAlertDialog(tr('Error'), e instanceof Error ? e.message : String(e));
            return false;
        }

        d3.select('#all_result').style('opacity', 1).style('display', null);
        this.deps.lmf.hideLoading();
        this.deps.highlightController.updateCurrentData({ result: displayResult });
        this.deps.lmf.clearHighlight();
        this.clearHighlights();
        this.updateVisualizationInternal();

        this.updateSemanticDebugInfo(res.debug_info);
        this.syncDigitsMergeUi();
        return true;
    }

    /** 更新文本渲染区下方的 debug 信息（abbrev + top10） */
    private updateSemanticDebugInfo(di?: { abbrev?: string; topk_tokens?: string[]; topk_probs?: number[] }): void {
        applySemanticDebugInfoPanel('results', 'semantic_debug_info', { debugInfo: di });
    }

    private buildSemanticOnlyResult(
        res: { model?: string },
        tokenAttention: Array<{ offset: [number, number]; raw: string; score: number; rawScore?: number }>,
        text: string,
        chunkInfos?: SemanticData['chunkInfos']
    ): (FrontendAnalyzeResult & { rawScoresNormed: number[]; attentionRawScores: number[]; chunkInfos?: SemanticData['chunkInfos'] }) | null {
        const safeText = text ?? '';
        if (!safeText) return null;
        /** `semanticData.token_attention` 已在 handleSemanticResponse 中完成 overlap + digit + normalize */
        const bpeTokens: FrontendToken[] = tokenAttention.map((t) => ({
            offset: t.offset,
            raw: t.raw,
            pred_topk: []
        })) as FrontendToken[];
        const rawScoresNormed = tokenAttention.map((t) => t.score);
        const attentionRawScores = tokenAttention.map((t) => getAttentionRawScore(t));
        const cloneRow = (t: FrontendToken): FrontendToken => ({ ...t });
        return {
            model: res.model,
            bpe_strings: bpeTokens.map(cloneRow),
            originalTokens: bpeTokens.map(cloneRow),
            bpeBpeMergedTokens: bpeTokens.map(cloneRow),
            originalText: safeText,
            rawScoresNormed,
            attentionRawScores,
            chunkInfos
        };
    }

    /**
     * 检查 semantic token_attention 的边界是否与 info 一致；允许稀疏覆盖（semantic 不必覆盖全文）
     * @returns 不一致时返回错误描述（含前后文本），一致时返回 null
     */
    private checkSemanticAlignsWithInfo(
        tokenAttention: Array<{ offset: [number, number]; raw?: string }>,
        infoMerged: Array<{ offset: [number, number] }>,
        text: string
    ): { firstBadIdx: number; aSample: string; bSample: string; aNext: string; bNext: string; textBefore: string; textAt: string; textAfter: string } | null {
        const boundaries = new Set<number>([0]);
        for (const t of infoMerged) boundaries.add(t.offset[1]);
        const infoEnd = infoMerged.length > 0 ? infoMerged[infoMerged.length - 1]!.offset[1] : 0;
        const totalChars = text.length;
        const ctx = 30;
        const esc = (s: string) => JSON.stringify(s).slice(1, -1);
        const fmt = (t: { offset: [number, number]; raw?: string }, idx: number) => {
            const raw = (t as { raw?: string }).raw ?? text.slice(t.offset[0], t.offset[1]);
            const s = raw.slice(0, 20) + (raw.length > 20 ? '…' : '');
            return `第${idx}个token分词 [字符${t.offset[0]}-${t.offset[1]}] "${esc(s)}"`;
        };
        for (let i = 0; i < tokenAttention.length; i++) {
            const [as, ae] = tokenAttention[i].offset;
            if (as < 0 || ae > totalChars || ae <= as) continue; // 由 validateTokenConsistency 处理
            if (ae > infoEnd) continue; // 超出双方重叠范围，不参与检查
            if (!boundaries.has(as) || !boundaries.has(ae)) {
                const raw = (tokenAttention[i] as { raw?: string }).raw ?? '';
                const infoIdx = infoMerged.findIndex(t => t.offset[0] <= as && as < t.offset[1]);
                const infoAt = infoIdx >= 0 ? infoMerged[infoIdx]! : null;
                const rawShort = (raw || text.slice(as, ae)).slice(0, 20);
                const infoRaw = infoAt ? (text.slice(infoAt.offset[0], infoAt.offset[1]).slice(0, 20) || '') : '';
                const nextSem = tokenAttention[i + 1];
                const nextInfo = infoIdx >= 0 && infoIdx + 1 < infoMerged.length ? infoMerged[infoIdx + 1]! : null;
                return {
                    firstBadIdx: i,
                    aSample: `第${i}个token分词 [字符${as}-${ae}] "${esc(rawShort)}${rawShort.length >= 20 ? '…' : ''}"`,
                    bSample: infoAt ? `同一位置token分词 [字符${infoAt.offset[0]}-${infoAt.offset[1]}] "${esc(infoRaw)}${infoRaw.length >= 20 ? '…' : ''}"` : '无对应',
                    aNext: nextSem ? fmt(nextSem, i + 1) : '无',
                    bNext: nextInfo ? fmt(nextInfo, infoIdx + 1) : '无',
                    textBefore: text.slice(Math.max(0, as - ctx), as),
                    textAt: text.slice(as, ae),
                    textAfter: text.slice(ae, Math.min(totalChars, ae + ctx)),
                };
            }
        }
        return null;
    }

    /**
     * 联合模式：将 bpeMergedTokens 与超出信息密度范围的语义 tokens 合并为并集，用于 rect/渲染范围与截断边界一致。
     * @returns { unionTokens, scoresForUnion }
     */
    private mergeBpeWithSemanticBeyond(
        bpeMerged: FrontendToken[],
        tokenAttention: Array<{ offset: [number, number]; raw: string; score: number; rawScore?: number }>
    ): { unionTokens: FrontendToken[]; scoresForUnion: (number | undefined)[]; rawScoresForUnion: (number | undefined)[] } {
        const infoEnd = bpeMerged.length > 0 ? bpeMerged[bpeMerged.length - 1]!.offset[1] : 0;
        const beyond = tokenAttention.filter((t) => t.offset[0] >= infoEnd);
        if (beyond.length === 0) {
            const { scores, rawScores } = this.mapTokenAttentionToMerged(bpeMerged, tokenAttention);
            return { unionTokens: bpeMerged, scoresForUnion: scores, rawScoresForUnion: rawScores };
        }
        /** beyond 已在 handleSemanticResponse 中 overlap+digit 合并；段内用原始梯度重新归一化 */
        const beyondRenormed = normalizeTokenScores(beyond.map((t) => ({ ...t, score: getAttentionRawScore(t) })));
        const semanticAsFrontend: FrontendToken[] = beyondRenormed.map((t) => ({
            offset: [t.offset[0], t.offset[1]],
            raw: t.raw,
            real_topk: [0, 1] as [number, number],
            pred_topk: [],
        }));
        const unionTokens = [...bpeMerged, ...semanticAsFrontend];
        const { scores: infoScores, rawScores: infoRawScores } = this.mapTokenAttentionToMerged(bpeMerged, tokenAttention);
        const beyondScores: (number | undefined)[] = beyondRenormed.map((t) =>
            Number.isFinite(t.score) ? t.score : undefined
        );
        const beyondRawScores: (number | undefined)[] = beyondRenormed.map((t) => {
            const r = getAttentionRawScore(t);
            return Number.isFinite(r) ? r : undefined;
        });
        const scoresForUnion = [...infoScores, ...beyondScores];
        const rawScoresForUnion = [...infoRawScores, ...beyondRawScores];
        return { unionTokens, scoresForUnion, rawScoresForUnion };
    }

    /**
     * 将 token_attention（offset 为原文字符偏移）映射到 merged tokens
     */
    /**
     * 将 token_attention 映射到 merged tokens，双指针 O(N+M)。
     * 前提：两个数组均按 offset 升序排列。
     */
    private mapTokenAttentionToMerged(
        bpeBpeMergedTokens: Array<{ offset: [number, number] }>,
        tokenAttention: Array<{ offset: [number, number]; score: number; rawScore?: number }>
    ): { scores: (number | undefined)[]; rawScores: (number | undefined)[] } {
        const n = bpeBpeMergedTokens.length;
        const scores: number[] = new Array(n).fill(0);
        const rawScores: number[] = new Array(n).fill(0);
        const weights: number[] = new Array(n).fill(0);

        let j = 0; // 跳过所有在当前 attn 之前结束的 merged token
        for (const attn of tokenAttention) {
            const [as, ae] = attn.offset;
            const rawPart = getAttentionRawScore(attn);
            while (j < n && bpeBpeMergedTokens[j].offset[1] <= as) j++;
            for (let k = j; k < n && bpeBpeMergedTokens[k].offset[0] < ae; k++) {
                const [s, e] = bpeBpeMergedTokens[k].offset;
                // j/k 的推进条件已保证 e > as 且 s < ae，overlap 必然 > 0
                const overlap = Math.min(e, ae) - Math.max(s, as);
                scores[k] += attn.score * overlap;
                rawScores[k] += rawPart * overlap;
                weights[k] += overlap;
            }
        }

        const norm = (vals: number[]) => vals.map((v, i) => (weights[i] > 0 ? v / weights[i] : undefined));
        return { scores: norm(scores), rawScores: norm(rawScores) };
    }
}