/** * 可视化更新模块 * 负责处理分析结果的可视化更新逻辑 */ import * as d3 from 'd3'; import type { AnalyzeResponse, FrontendAnalyzeResult, FrontendToken } from '../../shared/api/GLTR_API'; import type { GLTR_Text_Box } from '../../shared/vis/GLTR_Text_Box'; import type { HighlightController } from '../../shared/controllers/highlightController'; import type { TextInputController } from '../../shared/controllers/textInputController'; import type { Histogram } from '../../shared/vis/Histogram'; import type { ScatterPlot } from '../../shared/vis/ScatterPlot'; import type { AppStateManager } from './appStateManager'; import { cloneFrontendToken, mergeTokensForRendering, createRawSnapshot } from '../../shared/cross/tokenUtils'; import { getAttentionRawScore, mergeAttentionTokensFullyForRendering, normalizeTokenScores } from '../../shared/cross/semanticUtils'; import { validateTokenConsistency, validateTokenProbabilities, validateTokenPredictions } from '../../shared/cross/dataValidation'; import { calculateTextStats, calculateMergedTokenSurprisals, computeAverage, computeP90, type TextStats } from '../../shared/cross/textStatistics'; import { getTokenSurprisalHistogramConfig, getSurprisalProgressConfig, getMatchScoreProgressConfig, getRawScoreNormedHistogramConfig } from "./visualizationConfigs"; import { getSemanticSimilarityColor, HISTOGRAM_MIN_ALPHA } from '../../shared/cross/SurprisalColorConfig'; import { showAlertDialog } from '../../shared/ui/dialog'; import { tr } from '../../shared/lang/i18n-lite'; import { computeExpectedCounts } from './lognormalFit'; import { findSignalThresholdWithLog, type signalFitResult, type SignalThresholdBin } from './signalThresholdDetector'; import { getSemanticAnalysisEnabled } from '../../shared/cross/semanticAnalysisManager'; import { getDigitsMergeEnabled } from '../../shared/cross/digitsMergeManager'; import { getSemanticMatchThreshold } from '../../shared/cross/semanticThresholdManager'; import { applySemanticDebugInfoPanel } from '../../shared/prediction_attribution/core/semanticDebugInfo'; /** Token 边界不一致时抛出,用于中断联合展示 */ export class TokenBoundaryInconsistentError extends Error { constructor() { super('Tokenizer results inconsistent: semantic and info-density token boundaries differ.'); this.name = 'TokenBoundaryInconsistentError'; } } /** * P(signal | raw_score_normed = s) 复用 findSignalThreshold 的 bins * 每个样本 s 落入对应 bin,P(signal) = (obsInBin - expInBin) / obsInBin */ function signalProbFromBins(scores: number[], bins: SignalThresholdBin[]): number[] { if (scores.length === 0 || bins.length === 0) return []; const tauLefts = bins.map((b) => b.tauLeft); return scores.map((s) => { const i = Math.max(0, Math.min(bins.length - 1, d3.bisectRight(tauLefts, s) - 1)); const b = bins[i]!; if (s < b.tauLeft || s >= b.tauRight) return 0; return b.obsInBin > 0 ? Math.max(0, Math.min(1, (b.obsInBin - b.expInBin) / b.obsInBin)) : 0; }); } /** * 可视化更新依赖 */ export interface VisualizationDependencies { lmf: GLTR_Text_Box; highlightController: HighlightController; textInputController: TextInputController; stats_frac: Histogram; stats_raw_score_normed: Histogram; stats_surprisal_progress: ScatterPlot; stats_match_score_progress: ScatterPlot; appStateManager: AppStateManager; surprisalColorScale: d3.ScaleSequential; } /** 语义分析原始数据(独立存储) */ export interface SemanticData { text: string; model?: string; /** 整段模式:API 返回的 token_attention 副本,用于切换 digit merge 时重算(分块模式不存) */ semanticTokenAttentionFromApi?: Array<{ offset: [number, number]; raw: string; score: number; rawScore?: number; }>; token_attention: Array<{ offset: [number, number]; raw: string; score: number; rawScore?: number; }>; /** 拟合结果,由数据层在归一化后计算并传入;整段模式使用 */ signalFitResult?: signalFitResult | null; /** 分块边界;分块模式使用,每项可含该块独立拟合的 thresholdResult */ chunkInfos?: Array<{ startOffset: number; endOffset: number; chunkIndex: number; chunkMatchDegree: number; thresholdResult?: signalFitResult }>; /** 全文匹配度;非分块模式使用,用于 pw_score 的匹配度乘法因子 */ full_match_degree?: number; } /** 是否有语义分析数据:token_attention 或 chunkInfos 任一非空即视为有数据 */ function hasSemanticData(data: { token_attention?: unknown[]; chunkInfos?: unknown[] } | null | undefined): boolean { return (data?.token_attention?.length ?? 0) > 0 || (data?.chunkInfos?.length ?? 0) > 0; } /** * 当前数据状态 * 信息密度与语义分析独立存储,展示时根据一致性决定单独或联合 */ export interface CurrentDataState { /** 信息密度分析结果(独立) */ infoDensityData: AnalyzeResponse | null; /** 语义分析结果(独立) */ semanticData: SemanticData | null; rawApiResponse: AnalyzeResponse | null; currentSurprisals: number[] | null; currentTokenAvg: number | null; currentTokenP90: number | null; currentTotalSurprisal: number | null; } /** * 可视化更新管理器 */ export class VisualizationUpdater { private deps: VisualizationDependencies; private currentState: CurrentDataState; constructor(deps: VisualizationDependencies) { this.deps = deps; this.currentState = { infoDensityData: null, semanticData: null, rawApiResponse: null, currentSurprisals: null, currentTokenAvg: null, currentTokenP90: null, currentTotalSurprisal: null }; } /** * 获取当前数据状态 */ getCurrentState(): Readonly { return { ...this.currentState }; } /** * 获取当前原始API响应 */ getRawApiResponse(): AnalyzeResponse | null { return this.currentState.rawApiResponse; } /** * 获取当前展示数据(由 infoDensityData 与 semanticData 按展示逻辑计算) */ getCurrentData(): AnalyzeResponse | null { const display = this.computeDisplayResult(); if (!display) return null; return { request: { text: display.originalText }, result: display }; } /** * 获取当前 surprisal 数据 */ getCurrentSurprisals(): number[] | null { return this.currentState.currentSurprisals; } /** * 更新文本指标(包括模型显示) */ private updateTextMetrics(stats: TextStats | null, modelName?: string | null | undefined): void { this.deps.textInputController.updateTextMetrics(stats, modelName); } /** * 清除高亮 */ private clearHighlights(): void { this.deps.highlightController.clearHighlights(); } /** * 计算展示结果:仅信息密度 / 仅语义 / 联合(两者一致时) */ private computeDisplayResult(): (FrontendAnalyzeResult & { rawScoresNormed?: number[]; attentionRawScores?: number[]; chunkInfos?: SemanticData['chunkInfos']; }) | null { const info = this.currentState.infoDensityData; const sem = this.currentState.semanticData; const infoResult = info?.result as FrontendAnalyzeResult | undefined; const infoText = info?.request?.text ?? infoResult?.originalText ?? ''; const semText = sem?.text ?? ''; if (infoResult && sem && infoText === semText && hasSemanticData(sem)) { const infoMerged = infoResult.bpeBpeMergedTokens ?? infoResult.bpe_strings; if (infoMerged?.length) { // 有 token_attention 时校验边界;仅 chunkInfos 时跳过(无语义着色) if (sem.token_attention?.length) { const boundaryError = this.checkSemanticAlignsWithInfo(sem.token_attention, infoMerged, semText); if (boundaryError) { const { aSample, bSample, aNext, bNext, textBefore, textAt, textAfter } = boundaryError; console.warn( '[联合模式] 两种分析的分词token边界不一致:\n' + ' 语义分析:', aSample, '\n' + ' 信息密度:', bSample, '\n' + ' 语义后一个:', aNext, '\n' + ' 信息后一个:', bNext, '\n' + ' 位置附近原文:', JSON.stringify(textBefore), '|', JSON.stringify(textAt), '|', JSON.stringify(textAfter) ); showAlertDialog(tr('Error'), tr('Tokenizer results inconsistent: semantic and info-density token boundaries differ.')); this.currentState.semanticData = null; throw new TokenBoundaryInconsistentError(); } } // 联合模式:bpeMerged 与语义 tokens 超出部分合并为并集,使 rect/渲染范围与截断边界一致 const tokenAttention = sem.token_attention ?? []; const { unionTokens, scoresForUnion, rawScoresForUnion } = tokenAttention.length ? this.mergeBpeWithSemanticBeyond(infoMerged, tokenAttention) : (() => { const m = this.mapTokenAttentionToMerged(infoMerged, []); return { unionTokens: infoMerged, scoresForUnion: m.scores, rawScoresForUnion: m.rawScores, }; })(); return { ...infoResult, bpeBpeMergedTokens: unionTokens, bpe_strings: unionTokens, rawScoresNormed: scoresForUnion, attentionRawScores: rawScoresForUnion, chunkInfos: sem.chunkInfos, }; } } // 有语义数据(token_attention 或 chunkInfos)时用 buildSemanticOnlyResult if (sem && hasSemanticData(sem)) { return this.buildSemanticOnlyResult({ model: sem.model }, sem.token_attention, sem.text, sem.chunkInfos); } if (infoResult) return { ...infoResult, chunkInfos: sem?.chunkInfos ?? undefined }; return null; } /** * 分析开始前更新直方图显示/隐藏:基于「已有数据 + 将要得到的数据」判断各统计图是否有意义 * @param mode 即将进行的分析类型 * @param text 即将分析的文本(用于判断与已有数据是否一致、能否联合展示) * @param willBeChunked 语义分析时:true 表示将走分块模式,直方图不显示 */ public updateHistogramVisibilityForPending(mode: 'infoDensity' | 'semantic', text: string, willBeChunked?: boolean): void { const tokenHistogramItem = document.getElementById('token_histogram_item'); const surprisalProgressItem = document.getElementById('surprisal_progress_item'); const rawScoreNormedItem = document.getElementById('raw_score_normed_histogram_item'); const matchScoreProgressItem = document.getElementById('match_score_progress_item'); const infoText = this.currentState.infoDensityData?.request?.text ?? ''; const semText = this.currentState.semanticData?.text ?? ''; const semanticQueryOn = getSemanticAnalysisEnabled(); let showInfoDensity = false; let showSemantic = false; if (mode === 'infoDensity') { /** Semantic Query 勾选时统计区不出现信息密度图占位 */ showInfoDensity = !semanticQueryOn; showSemantic = semanticQueryOn && hasSemanticData(this.currentState.semanticData) && semText === text; } else { showSemantic = true; showInfoDensity = !semanticQueryOn && !!(this.currentState.infoDensityData && infoText === text); } if (tokenHistogramItem) tokenHistogramItem.style.display = showInfoDensity ? '' : 'none'; if (surprisalProgressItem) surprisalProgressItem.style.display = showInfoDensity ? '' : 'none'; /** 直方图仅在整段模式显示,chunk 模式下不显示 */ const showRawScoreHistogram = showSemantic && !willBeChunked; if (rawScoreNormedItem) rawScoreNormedItem.style.display = showRawScoreHistogram ? '' : 'none'; /** semantic match progress 仅 chunk 模式显示 */ if (matchScoreProgressItem) matchScoreProgressItem.style.display = showSemantic && !!willBeChunked ? '' : 'none'; // pending 时渲染空统计图(坐标轴 + 空柱体/散点),避免空白 if (showInfoDensity && mode === 'infoDensity') { const tokenConfig = getTokenSurprisalHistogramConfig(); this.deps.stats_frac.update({ ...tokenConfig, data: [], colorScale: () => 'transparent' }); const tokenTitle = document.getElementById('token_histogram_title'); if (tokenTitle) tokenTitle.textContent = tokenConfig.label; const progressConfig = getSurprisalProgressConfig(); this.deps.stats_surprisal_progress.update({ ...progressConfig, data: [] }); const progressTitle = document.getElementById('surprisal_progress_title'); if (progressTitle && progressConfig.label) progressTitle.textContent = progressConfig.label; } if (showRawScoreHistogram && mode === 'semantic') { const rawScoreNormedConfig = getRawScoreNormedHistogramConfig(); this.deps.stats_raw_score_normed.update({ ...rawScoreNormedConfig, data: [], colorScale: () => 'transparent' }); const titleEl = document.getElementById('raw_score_normed_histogram_title'); if (titleEl) titleEl.textContent = rawScoreNormedConfig.label; } if (showSemantic && mode === 'semantic' && willBeChunked) { const matchScoreProgressConfig = getMatchScoreProgressConfig(); const docLen = text.length; this.deps.stats_match_score_progress.update({ ...matchScoreProgressConfig, data: [], showMovingAverage: false, chunkLines: [], thresholdLine: getSemanticMatchThreshold(), extent: { x: docLen > 0 ? [0, docLen] : undefined, y: [0, 1] } }); const matchScoreTitleEl = document.getElementById('match_score_progress_title'); if (matchScoreTitleEl && matchScoreProgressConfig.label) matchScoreTitleEl.textContent = matchScoreProgressConfig.label; } } /** * 重新渲染直方图(内部方法) * Semantic Query 勾选:仅语义相关图;未勾选:有信息密度数据时显示 token + surprisal * @param skipLmfUpdate 为 true 时跳过 lmf.update(主题切换时由 rerenderOnThemeChange 统一重绘,避免竞态) */ private updateVisualizationInternal(skipLmfUpdate = false): void { const hasInfoDensity = !!this.currentState.infoDensityData; const displayResult = this.computeDisplayResult(); const sem = this.currentState.semanticData; const showInfoDensityCharts = hasInfoDensity && !getSemanticAnalysisEnabled(); const tokenHistogramItem = document.getElementById('token_histogram_item'); const surprisalProgressItem = document.getElementById('surprisal_progress_item'); const rawScoreNormedItem = document.getElementById('raw_score_normed_histogram_item'); if (showInfoDensityCharts) { const currentSurprisals = this.currentState.currentSurprisals; const currentTokenAvg = this.currentState.currentTokenAvg; const currentTokenP90 = this.currentState.currentTokenP90; if (currentSurprisals) { const tokenHistogramConfig = getTokenSurprisalHistogramConfig(); this.deps.stats_frac.update({ ...tokenHistogramConfig, data: currentSurprisals, colorScale: this.deps.surprisalColorScale, averageValue: currentTokenAvg ?? undefined, p90Value: currentTokenP90 ?? undefined, p90Label: tokenHistogramConfig.averageLabel, }); const titleElement = document.getElementById('token_histogram_title'); if (titleElement) titleElement.textContent = tokenHistogramConfig.label; } if (currentSurprisals && currentSurprisals.length > 0) { const surprisalProgressConfig = getSurprisalProgressConfig(); this.deps.stats_surprisal_progress.update({ ...surprisalProgressConfig, data: currentSurprisals, }); const surprisalProgressTitleElement = document.getElementById('surprisal_progress_title'); if (surprisalProgressTitleElement && surprisalProgressConfig.label) { surprisalProgressTitleElement.textContent = surprisalProgressConfig.label; } } if (tokenHistogramItem) tokenHistogramItem.style.display = ''; if (surprisalProgressItem) surprisalProgressItem.style.display = ''; } else { if (tokenHistogramItem) tokenHistogramItem.style.display = 'none'; if (surprisalProgressItem) surprisalProgressItem.style.display = 'none'; } const rawScoresNormed = displayResult?.rawScoresNormed; const validRawScoresNormed = rawScoresNormed?.filter((s) => typeof s === 'number' && isFinite(s)); const signalFitResult = sem?.signalFitResult ?? null; const chunkInfos = sem?.chunkInfos; const isChunkMode = (chunkInfos?.length ?? 0) > 0; const chunksWithThreshold = chunkInfos?.filter((c) => c.thresholdResult != null) ?? []; const usePerChunkThreshold = chunksWithThreshold.length > 0; const thresholdByChunk = usePerChunkThreshold ? new Map(chunksWithThreshold.map((c) => [c.chunkIndex, c.thresholdResult!])) : null; if (validRawScoresNormed && validRawScoresNormed.length > 0) { const rawScoreNormedConfig = getRawScoreNormedHistogramConfig(); const colorScale = (v: number) => getSemanticSimilarityColor(v, HISTOGRAM_MIN_ALPHA); const thresholdForHistogram = usePerChunkThreshold && chunksWithThreshold.length > 0 ? chunksWithThreshold[0]!.thresholdResult! : signalFitResult; // confidence>0:findSignalThreshold 成功(≥ MIN_ACCEPTABLE);confidence===0 为 P90 回退,不画截尾对数正态期望曲线 const fitResult = validRawScoresNormed.length >= 2 && thresholdForHistogram != null && thresholdForHistogram.confidence > 0 ? { mu: thresholdForHistogram.mu, sigma: thresholdForHistogram.sigma, expectedCounts: computeExpectedCounts( thresholdForHistogram.mu, thresholdForHistogram.sigma, rawScoreNormedConfig.extent as [number, number], rawScoreNormedConfig.no_bins, validRawScoresNormed.length ), } : null; const signalProbs = thresholdForHistogram != null ? signalProbFromBins(validRawScoresNormed, thresholdForHistogram.bins) : []; /** * P_pw:后验信号概率的简化映射,x <= threshold 时为 0,x > threshold 时为 1 * pw_score = score × P_pw × matchDegree * 分块模式:每个 token 使用其所属 chunk 的 threshold 和 chunkMatchDegree * 非分块模式:使用全文匹配度 full_match_degree */ const rawScoresNormedFull = displayResult!.rawScoresNormed ?? []; const bpeBpeMergedTokens = displayResult?.bpeBpeMergedTokens ?? []; const getChunkForToken = (tokenIndex: number) => { const token = bpeBpeMergedTokens[tokenIndex]; if (!token || !isChunkMode) return null; const offset = token.offset[0]; return chunkInfos!.find((c) => c.startOffset <= offset && offset < c.endOffset) ?? null; }; const getThresholdForToken = (i: number): number => { const chunk = getChunkForToken(i); if (chunk && thresholdByChunk != null) { const tr = thresholdByChunk.get(chunk.chunkIndex); if (tr) return tr.threshold; } return signalFitResult?.threshold ?? 0; }; const getMatchDegreeForToken = (i: number): number => { const chunk = getChunkForToken(i); if (chunk) return chunk.chunkMatchDegree; return sem?.full_match_degree ?? 1; }; const hasThreshold = signalFitResult != null || thresholdByChunk != null; const pPwValues = hasThreshold ? rawScoresNormedFull.map((s, i) => { const threshold = getThresholdForToken(i); const isAboveThreshold = typeof s === 'number' && isFinite(s) && s > threshold; return isAboveThreshold ? 1 : 0; }) : []; const pwScores = hasThreshold ? rawScoresNormedFull.map((s, i) => { const threshold = getThresholdForToken(i); const isAboveThreshold = typeof s === 'number' && isFinite(s) && s > threshold; const baseScore = isAboveThreshold ? s : 0; const matchDegree = getMatchDegreeForToken(i); return baseScore * matchDegree; }) : []; const colorSourceEl = document.getElementById('semantic_color_source_select') as HTMLSelectElement | null; const colorSource = colorSourceEl?.value ?? 'pw_score'; const scoresForColor = colorSource === 'signal_probability' ? pPwValues : colorSource === 'pw_score' ? pwScores : (displayResult!.rawScoresNormed ?? []); // 联合模式下 tooltip 需要 pPwValues/pwScores 显示语义匹配信息,即使 fitResult 为 null 也要传递 const resultWithExt = hasThreshold ? { ...displayResult, signalProbs, pPwValues, pwScores } : displayResult!; if (fitResult != null) { this.deps.highlightController.updateCurrentData({ result: resultWithExt, signalProbs, pPwValues, pwScores }); if (!skipLmfUpdate) { this.deps.lmf.update({ ...resultWithExt, pwScores, colorScores: scoresForColor } as FrontendAnalyzeResult & { pPwValues?: number[]; pwScores?: number[]; colorScores?: number[] }); } } else { this.deps.highlightController.updateCurrentData({ result: resultWithExt }); if (!skipLmfUpdate) { this.deps.lmf.update({ ...resultWithExt, colorScores: scoresForColor } as FrontendAnalyzeResult & { pPwValues?: number[]; pwScores?: number[]; colorScores?: number[] }); } } /** 直方图仅在整段模式显示,chunk 模式下不统计、不显示 */ if (!isChunkMode) { const probCurveData = signalProbs.length > 0 ? (() => { const pairs = validRawScoresNormed.map((x, i) => ({ x, y: signalProbs[i]! })).sort((a, b) => a.x - b.x); return { x: pairs.map(p => p.x), y: pairs.map(p => p.y) }; })() : undefined; const signalThresholdPercentile = thresholdForHistogram != null && validRawScoresNormed.length > 0 ? Math.round((validRawScoresNormed.filter((s) => s < thresholdForHistogram.threshold).length / validRawScoresNormed.length) * 100) : undefined; this.deps.stats_raw_score_normed.update({ ...rawScoreNormedConfig, data: validRawScoresNormed, colorScale, fitExpectedCounts: fitResult?.expectedCounts, showProbCurve: true, probCurveData: probCurveData?.x.length ? probCurveData : undefined, signalThreshold: thresholdForHistogram?.threshold ?? undefined, signalThresholdPercentile: signalThresholdPercentile ?? undefined, }); const titleEl = document.getElementById('raw_score_normed_histogram_title'); if (titleEl) titleEl.textContent = rawScoreNormedConfig.label; if (rawScoreNormedItem) rawScoreNormedItem.style.display = ''; } else { if (rawScoreNormedItem) rawScoreNormedItem.style.display = 'none'; } /** semantic match progress:仅 chunk 模式,仅绘制 chunk 匹配线,不绘制点 */ if (isChunkMode) { const matchScoreProgressConfig = getMatchScoreProgressConfig(); const docLen = (displayResult?.originalText ?? '').length; const chunkLines = chunkInfos?.length ? chunkInfos.map((c) => ({ x0: c.startOffset, x1: c.endOffset, y: c.chunkMatchDegree })) : []; const thresholdLine = getSemanticMatchThreshold(); this.deps.stats_match_score_progress.update({ ...matchScoreProgressConfig, data: [], showMovingAverage: false, chunkLines, thresholdLine, chunkInteraction: true, extent: { x: docLen > 0 ? [0, docLen] : undefined, y: [0, 1] } }); const matchScoreTitleEl = document.getElementById('match_score_progress_title'); if (matchScoreTitleEl && matchScoreProgressConfig.label) matchScoreTitleEl.textContent = matchScoreProgressConfig.label; const matchScoreProgressItem = document.getElementById('match_score_progress_item'); if (matchScoreProgressItem) matchScoreProgressItem.style.display = ''; } else { const matchScoreProgressItem = document.getElementById('match_score_progress_item'); if (matchScoreProgressItem) matchScoreProgressItem.style.display = 'none'; } } else { const needLmfUpdate = !!displayResult && (hasInfoDensity || !!validRawScoresNormed?.length || hasSemanticData(sem)); if (displayResult) this.deps.highlightController.updateCurrentData({ result: displayResult }); if (needLmfUpdate && !skipLmfUpdate) { this.deps.lmf.update(displayResult!); } /** chunk 模式下不显示直方图;整段模式且无数据时显示空占位 */ if (getSemanticAnalysisEnabled() && !isChunkMode) { const rawScoreNormedConfig = getRawScoreNormedHistogramConfig(); this.deps.stats_raw_score_normed.update({ ...rawScoreNormedConfig, data: [], colorScale: () => 'transparent' }); const titleEl = document.getElementById('raw_score_normed_histogram_title'); if (titleEl) titleEl.textContent = rawScoreNormedConfig.label; if (rawScoreNormedItem) rawScoreNormedItem.style.display = ''; } else { if (rawScoreNormedItem) rawScoreNormedItem.style.display = 'none'; } /** semantic match progress 无数据时显示空占位(仅 chunk 模式) */ if (getSemanticAnalysisEnabled() && isChunkMode) { const matchScoreProgressConfig = getMatchScoreProgressConfig(); const docLen = (displayResult?.originalText ?? '').length; const chunkLines = chunkInfos?.length ? chunkInfos.map((c) => ({ x0: c.startOffset, x1: c.endOffset, y: c.chunkMatchDegree })) : []; const thresholdLine = getSemanticMatchThreshold(); this.deps.stats_match_score_progress.update({ ...matchScoreProgressConfig, data: [], showMovingAverage: false, chunkLines, thresholdLine, chunkInteraction: true, extent: { x: docLen > 0 ? [0, docLen] : undefined, y: [0, 1] } }); const matchScoreTitleEl = document.getElementById('match_score_progress_title'); if (matchScoreTitleEl && matchScoreProgressConfig.label) matchScoreTitleEl.textContent = matchScoreProgressConfig.label; const matchScoreProgressItem = document.getElementById('match_score_progress_item'); if (matchScoreProgressItem) matchScoreProgressItem.style.display = ''; } else { const matchScoreProgressItem = document.getElementById('match_score_progress_item'); if (matchScoreProgressItem) matchScoreProgressItem.style.display = 'none'; } } } /** 重新渲染直方图(供外部调用) */ public rerenderHistograms(): void { this.updateVisualizationInternal(false); } /** 仅更新语义着色源(color source 切换时调用,不重新拟合) */ public updateSemanticColorSource(): void { const cd = this.deps.highlightController.getCurrentData(); const r = cd?.result as (FrontendAnalyzeResult & { rawScoresNormed?: number[] }) | undefined; if (!r?.rawScoresNormed?.length) return; const el = document.getElementById('semantic_color_source_select') as HTMLSelectElement | null; const v = el?.value ?? 'pw_score'; const scoresForColor = v === 'signal_probability' ? (cd!.pPwValues ?? []) : v === 'pw_score' ? (cd!.pwScores ?? []) : r.rawScoresNormed; this.deps.lmf.update({ ...r, pPwValues: cd!.pPwValues, pwScores: cd!.pwScores, colorScores: scoresForColor } as FrontendAnalyzeResult & { pPwValues?: number[]; pwScores?: number[]; colorScores?: number[] }); } /** 主题切换时调用:在样式生效后统一重绘直方图与文本(rgba 透出背景,需等新主题生效) */ public rerenderOnThemeChange(): void { requestAnimationFrame(() => requestAnimationFrame(() => { this.updateVisualizationInternal(true); this.deps.lmf.reRenderCurrent(); })); } /** * 文本修改时清除独立存储的数据(避免展示与输入不一致) */ public clearDataOnTextChange(): void { this.currentState.infoDensityData = null; this.currentState.semanticData = null; this.currentState.rawApiResponse = null; this.currentState.currentSurprisals = null; this.currentState.currentTokenAvg = null; this.currentState.currentTokenP90 = null; this.currentState.currentTotalSurprisal = null; this.deps.highlightController.updateCurrentData(null); d3.select('#all_result').style('opacity', 0); this.updateSemanticDebugInfo(); } /** * 清除语义分析相关数据(直方图、debug、semanticData),用于打开模式时初始化 */ public clearSemanticState(): void { this.currentState.semanticData = null; const rawScoreNormedItem = document.getElementById('raw_score_normed_histogram_item'); if (rawScoreNormedItem) rawScoreNormedItem.style.display = 'none'; const matchScoreProgressItem = document.getElementById('match_score_progress_item'); if (matchScoreProgressItem) matchScoreProgressItem.style.display = 'none'; this.updateSemanticDebugInfo(); } /** * digit merge 用户偏好变化时:对信息密度与整段语义从可重算数据源刷新;分块语义无副本则保持当前展示不变 */ public applyDigitsMergeSetting(): void { const digitMerge = getDigitsMergeEnabled(); const info = this.currentState.infoDensityData; if (info?.result) { const fr = info.result as FrontendAnalyzeResult; const text = info.request?.text ?? fr.originalText ?? ''; if (fr.originalTokens?.length && text) { const newMerged = mergeTokensForRendering(fr.originalTokens, text, { digitMerge }); fr.bpeBpeMergedTokens = newMerged; fr.bpe_strings = newMerged; } } const sem = this.currentState.semanticData; if (sem && !sem.chunkInfos?.length && sem.semanticTokenAttentionFromApi?.length && sem.text) { const mergedAttention = mergeAttentionTokensFullyForRendering( sem.semanticTokenAttentionFromApi, sem.text, { digitMerge } ); const normalizedAttention = normalizeTokenScores(mergedAttention); const computedSignalFit = findSignalThresholdWithLog(normalizedAttention); sem.token_attention = normalizedAttention; sem.signalFitResult = computedSignalFit ?? undefined; } const infoResult = this.currentState.infoDensityData?.result as FrontendAnalyzeResult | undefined; const safeText = this.currentState.infoDensityData?.request?.text ?? infoResult?.originalText ?? ''; if (infoResult?.bpeBpeMergedTokens?.length && safeText) { const mergedSurprisals = calculateMergedTokenSurprisals(infoResult.bpeBpeMergedTokens); this.currentState.currentSurprisals = mergedSurprisals; this.currentState.currentTokenAvg = computeAverage(mergedSurprisals); this.currentState.currentTokenP90 = computeP90(mergedSurprisals); } let displayResult: ReturnType; try { displayResult = this.computeDisplayResult(); } catch (e) { if (e instanceof TokenBoundaryInconsistentError) { displayResult = this.computeDisplayResult(); } else { console.error(e); return; } } this.deps.highlightController.updateCurrentData(displayResult ? { result: displayResult } : null); this.deps.lmf.clearHighlight(); if (displayResult) this.deps.lmf.update(displayResult); this.updateVisualizationInternal(); this.deps.appStateManager.updateButtonStates(); } /** * 根据语义分析配置同步 UI 状态(查询输入框、文本渲染模式等) * 界面完全由配置决定,不因数据有无而改变 */ public syncSemanticUiFromConfig(): void { const enabled = getSemanticAnalysisEnabled(); const el = document.getElementById('semantic_analysis_section'); if (el) el.style.display = enabled ? '' : 'none'; this.deps.lmf.updateOptions({ semanticAnalysisMode: enabled }, false); if (!enabled) { // 关闭时清除语义数据;统计图由下方 updateVisualizationInternal 统一刷新 this.currentState.semanticData = null; const rawScoreNormedItem = document.getElementById('raw_score_normed_histogram_item'); if (rawScoreNormedItem) rawScoreNormedItem.style.display = 'none'; const matchScoreProgressItem = document.getElementById('match_score_progress_item'); if (matchScoreProgressItem) matchScoreProgressItem.style.display = 'none'; this.updateSemanticDebugInfo(); const displayResult = this.computeDisplayResult(); this.deps.highlightController.updateCurrentData(displayResult ? { result: displayResult } : null); if (!displayResult) { d3.select('#all_result').style('opacity', 0); this.deps.appStateManager.updateState({ hasValidData: false }); } } /** 勾选 / 关闭 Semantic Query 后立即刷新统计图显隐(与 getSemanticAnalysisEnabled 一致) */ this.updateVisualizationInternal(false); // 语义分析配置影响 Upload/Save 的 dataReadyForSave 条件,需始终更新按钮状态 this.deps.appStateManager.updateButtonStates(); } /** * 更新可视化(核心方法) * * @param data 分析响应数据 * @param disableAnimation 是否禁用动画 * @param options 选项 */ updateFromRequest( data: AnalyzeResponse, disableAnimation: boolean = false, options: { enableSave?: boolean } = {} ): void { const { enableSave = true } = options; const abortDueToInvalidResponse = (message: string) => { console.error(message); showAlertDialog(tr('Error'), message); this.deps.appStateManager.updateState({ hasValidData: false }); this.syncSemanticUiFromConfig(); }; try { // 只有 Analyze 触发时开启动画,其它情况保持关闭(默认已关闭) if (!disableAnimation) { this.deps.lmf.updateOptions({ enableRenderAnimation: true }, false); } // Semantic analysis 模式由配置决定 this.deps.lmf.updateOptions({ semanticAnalysisMode: getSemanticAnalysisEnabled(), }, false); d3.select('#all_result').style('opacity', 1).style('display', null); this.deps.appStateManager.setIsAnalyzing(false); this.deps.appStateManager.setGlobalLoading(false); // 隐藏文本区域的加载状态(会在lmf.update中自动隐藏,但这里提前隐藏以提升体验) this.deps.lmf.hideLoading(); // 验证数据结构 if (!data || !data.result) { console.error('Invalid data structure:', data); throw new Error('Invalid API response structure'); } const result = data.result; // 确保所有必需的字段都存在且类型正确 if (!Array.isArray(result.bpe_strings) || result.bpe_strings.length === 0) { abortDueToInvalidResponse(tr('Returned JSON missing valid bpe_strings array, processing cancelled.')); return; } const predTopkError = validateTokenPredictions(result.bpe_strings as Array<{ pred_topk?: [string, number][] }>); if (predTopkError) { abortDueToInvalidResponse(predTopkError); return; } const probabilityError = validateTokenProbabilities(result.bpe_strings as Array<{ real_topk?: [number, number] }>); if (probabilityError) { abortDueToInvalidResponse(probabilityError); return; } const safeText = data.request.text; const validationError = validateTokenConsistency(result.bpe_strings, safeText, { allowOverlap: true }); if (validationError) { abortDueToInvalidResponse(validationError); return; } const rawSnapshot = createRawSnapshot(data); const originalTokens = result.bpe_strings.map((token) => cloneFrontendToken(token as FrontendToken)); const bpeBpeMergedTokens = mergeTokensForRendering(originalTokens, safeText, { digitMerge: getDigitsMergeEnabled(), }); const mergedValidationError = validateTokenConsistency(bpeBpeMergedTokens, safeText); if (mergedValidationError) { abortDueToInvalidResponse(mergedValidationError); return; } const enhancedResult: FrontendAnalyzeResult = { ...result, originalTokens, bpeBpeMergedTokens, bpe_strings: bpeBpeMergedTokens, originalText: safeText, }; data.result = enhancedResult; // 独立存储信息密度数据(info density 无 debug 信息,隐藏 semantic debug) this.currentState.infoDensityData = data; this.currentState.rawApiResponse = rawSnapshot; this.updateSemanticDebugInfo(); let displayResult: ReturnType; try { displayResult = this.computeDisplayResult(); } catch (e) { if (e instanceof TokenBoundaryInconsistentError) { displayResult = this.computeDisplayResult(); } else { throw e; } } this.deps.highlightController.updateCurrentData(displayResult ? { result: displayResult } : null); this.deps.lmf.clearHighlight(); if (displayResult) this.deps.lmf.update(displayResult); const textStats = calculateTextStats(enhancedResult, safeText); const mergedSurprisals = calculateMergedTokenSurprisals(enhancedResult.bpeBpeMergedTokens); // 直方图 / progress:合并后 token;文本指标仍用 textStats(原始 token) this.currentState.currentSurprisals = mergedSurprisals; this.currentState.currentTokenAvg = computeAverage(mergedSurprisals); this.currentState.currentTokenP90 = computeP90(mergedSurprisals); this.currentState.currentTotalSurprisal = textStats.totalSurprisal; // 更新文本指标和模型显示(从分析结果中获取实际使用的模型) const resultModel = data.result.model; this.updateTextMetrics(textStats, resultModel); // Analyze 渲染完成后关闭动画,避免拖拽等二次渲染再次播放 if (!disableAnimation) { // 延迟关闭,确保动画有足够时间完成 // 动画时长估算:初始延迟100ms + 批次处理时间(根据token数量) const tokenCount = enhancedResult.bpe_strings.length; const estimatedAnimationTime = 100 + Math.ceil(tokenCount / 50) * 100; const delayTime = Math.max(2000, estimatedAnimationTime + 500); setTimeout(() => { this.deps.lmf.updateOptions({ enableRenderAnimation: false }, false); }, delayTime); } } catch (error) { console.error('Error updating visualization:', error); this.deps.appStateManager.setIsAnalyzing(false); this.deps.appStateManager.setGlobalLoading(false); this.deps.appStateManager.updateState({ hasValidData: false }); this.syncSemanticUiFromConfig(); showAlertDialog(tr('Error'), 'Error rendering visualization. Check console for details.'); return; } // 清除之前的选中状态 this.clearHighlights(); // 重新渲染直方图 this.updateVisualizationInternal(); // 数据成功处理,标记为有效数据(TextMetrics 显示,Analyze 变灰) this.deps.appStateManager.updateState({ hasValidData: true }); this.syncSemanticUiFromConfig(); } /** * 语义分析响应:独立存储 semanticData,按展示逻辑计算并渲染。 * @returns true 成功;false 校验失败或计算异常,调用方应停止后续分析。 */ public handleSemanticResponse( res: { model?: string; token_attention?: Array<{ offset: [number, number]; raw: string; score: number; rawScore?: number; }>; debug_info?: { abbrev?: string; topk_tokens?: string[]; topk_probs?: number[] }; chunkInfos?: Array<{ startOffset: number; endOffset: number; chunkIndex: number; chunkMatchDegree: number; thresholdResult?: signalFitResult }>; full_match_degree?: number; }, text?: string, signalFitResult?: signalFitResult | null ): boolean { const chunkInfos = res?.chunkInfos; const tokenAttention = res?.token_attention; const currentText = text ?? ''; if (!hasSemanticData(res)) { this.clearSemanticState(); this.rerenderHistograms(); this.deps.lmf.hideLoading(); return true; } if (!currentText) return false; // 整段模式(无 chunkInfos)需校验 token 边界 if (tokenAttention?.length && !chunkInfos?.length) { const err = validateTokenConsistency(tokenAttention!, currentText, { allowOverlap: true }); if (err) { showAlertDialog(tr('Error'), err); return false; } } /** 分块模式:装配端已按 chunk 完成 overlap+digit+normalize,禁止全文再合并/再归一化(避免跨 chunk 合数字、跨 chunk 定标)。 */ const isChunkedSemantic = Boolean(chunkInfos?.length); const semanticTokenAttentionFromApi = !isChunkedSemantic && tokenAttention && tokenAttention.length > 0 ? tokenAttention.map((t) => ({ ...t, offset: [t.offset[0], t.offset[1]] as [number, number], })) : undefined; const mergedAttention = isChunkedSemantic ? (tokenAttention ?? []) : mergeAttentionTokensFullyForRendering(tokenAttention ?? [], currentText, { digitMerge: getDigitsMergeEnabled(), }); const normalizedAttention = isChunkedSemantic ? mergedAttention : normalizeTokenScores(mergedAttention); const computedSignalFit = isChunkedSemantic ? undefined : findSignalThresholdWithLog(normalizedAttention); const chunkInfosResolved = chunkInfos?.length ? chunkInfos.map((info) => { const slice = normalizedAttention.filter( (t) => t.offset[0] < info.endOffset && t.offset[1] > info.startOffset ); const thresholdResult = slice.length > 0 ? findSignalThresholdWithLog(slice) : null; return { ...info, ...(thresholdResult ? { thresholdResult } : {}) }; }) : chunkInfos; this.currentState.semanticData = { text: currentText, model: res.model, semanticTokenAttentionFromApi, token_attention: normalizedAttention, signalFitResult: signalFitResult ?? computedSignalFit ?? undefined, chunkInfos: chunkInfosResolved, full_match_degree: res.full_match_degree, }; let displayResult: ReturnType; try { displayResult = this.computeDisplayResult(); } catch (e) { this.currentState.semanticData = null; if (e instanceof TokenBoundaryInconsistentError) { this.deps.lmf.hideLoading(); this.rerenderHistograms(); return false; } showAlertDialog(tr('Error'), e instanceof Error ? e.message : String(e)); return false; } d3.select('#all_result').style('opacity', 1).style('display', null); this.deps.lmf.hideLoading(); this.deps.highlightController.updateCurrentData({ result: displayResult }); this.deps.lmf.clearHighlight(); this.clearHighlights(); this.updateVisualizationInternal(); this.updateSemanticDebugInfo(res.debug_info); return true; } /** 更新文本渲染区下方的 debug 信息(abbrev + top10) */ private updateSemanticDebugInfo(di?: { abbrev?: string; topk_tokens?: string[]; topk_probs?: number[] }): void { applySemanticDebugInfoPanel('results', 'semantic_debug_info', { debugInfo: di }); } private buildSemanticOnlyResult( res: { model?: string }, tokenAttention: Array<{ offset: [number, number]; raw: string; score: number; rawScore?: number; }>, text: string, chunkInfos?: SemanticData['chunkInfos'] ): (FrontendAnalyzeResult & { rawScoresNormed: number[]; attentionRawScores: number[]; chunkInfos?: SemanticData['chunkInfos']; }) | null { const safeText = text ?? ''; if (!safeText) return null; /** `semanticData.token_attention` 已在 handleSemanticResponse 中完成 overlap + digit + normalize */ const bpeTokens: FrontendToken[] = tokenAttention.map((t) => ({ offset: t.offset, raw: t.raw, pred_topk: [] })) as FrontendToken[]; const rawScoresNormed = tokenAttention.map((t) => t.score); const attentionRawScores = tokenAttention.map((t) => getAttentionRawScore(t)); const cloneRow = (t: FrontendToken): FrontendToken => ({ ...t }); return { model: res.model, bpe_strings: bpeTokens.map(cloneRow), originalTokens: bpeTokens.map(cloneRow), bpeBpeMergedTokens: bpeTokens.map(cloneRow), originalText: safeText, rawScoresNormed, attentionRawScores, chunkInfos }; } /** * 检查 semantic token_attention 的边界是否与 info 一致;允许稀疏覆盖(semantic 不必覆盖全文) * @returns 不一致时返回错误描述(含前后文本),一致时返回 null */ private checkSemanticAlignsWithInfo( tokenAttention: Array<{ offset: [number, number]; raw?: string }>, infoMerged: Array<{ offset: [number, number] }>, text: string ): { firstBadIdx: number; aSample: string; bSample: string; aNext: string; bNext: string; textBefore: string; textAt: string; textAfter: string } | null { const boundaries = new Set([0]); for (const t of infoMerged) boundaries.add(t.offset[1]); const infoEnd = infoMerged.length > 0 ? infoMerged[infoMerged.length - 1]!.offset[1] : 0; const totalChars = text.length; const ctx = 30; const esc = (s: string) => JSON.stringify(s).slice(1, -1); const fmt = (t: { offset: [number, number]; raw?: string }, idx: number) => { const raw = (t as { raw?: string }).raw ?? text.slice(t.offset[0], t.offset[1]); const s = raw.slice(0, 20) + (raw.length > 20 ? '…' : ''); return `第${idx}个token分词 [字符${t.offset[0]}-${t.offset[1]}] "${esc(s)}"`; }; for (let i = 0; i < tokenAttention.length; i++) { const [as, ae] = tokenAttention[i].offset; if (as < 0 || ae > totalChars || ae <= as) continue; // 由 validateTokenConsistency 处理 if (ae > infoEnd) continue; // 超出双方重叠范围,不参与检查 if (!boundaries.has(as) || !boundaries.has(ae)) { const raw = (tokenAttention[i] as { raw?: string }).raw ?? ''; const infoIdx = infoMerged.findIndex(t => t.offset[0] <= as && as < t.offset[1]); const infoAt = infoIdx >= 0 ? infoMerged[infoIdx]! : null; const rawShort = (raw || text.slice(as, ae)).slice(0, 20); const infoRaw = infoAt ? (text.slice(infoAt.offset[0], infoAt.offset[1]).slice(0, 20) || '') : ''; const nextSem = tokenAttention[i + 1]; const nextInfo = infoIdx >= 0 && infoIdx + 1 < infoMerged.length ? infoMerged[infoIdx + 1]! : null; return { firstBadIdx: i, aSample: `第${i}个token分词 [字符${as}-${ae}] "${esc(rawShort)}${rawShort.length >= 20 ? '…' : ''}"`, bSample: infoAt ? `同一位置token分词 [字符${infoAt.offset[0]}-${infoAt.offset[1]}] "${esc(infoRaw)}${infoRaw.length >= 20 ? '…' : ''}"` : '无对应', aNext: nextSem ? fmt(nextSem, i + 1) : '无', bNext: nextInfo ? fmt(nextInfo, infoIdx + 1) : '无', textBefore: text.slice(Math.max(0, as - ctx), as), textAt: text.slice(as, ae), textAfter: text.slice(ae, Math.min(totalChars, ae + ctx)), }; } } return null; } /** * 联合模式:将 bpeMergedTokens 与超出信息密度范围的语义 tokens 合并为并集,用于 rect/渲染范围与截断边界一致。 * @returns { unionTokens, scoresForUnion } */ private mergeBpeWithSemanticBeyond( bpeMerged: FrontendToken[], tokenAttention: Array<{ offset: [number, number]; raw: string; score: number; rawScore?: number; }> ): { unionTokens: FrontendToken[]; scoresForUnion: (number | undefined)[]; rawScoresForUnion: (number | undefined)[]; } { const infoEnd = bpeMerged.length > 0 ? bpeMerged[bpeMerged.length - 1]!.offset[1] : 0; const beyond = tokenAttention.filter((t) => t.offset[0] >= infoEnd); if (beyond.length === 0) { const { scores, rawScores } = this.mapTokenAttentionToMerged(bpeMerged, tokenAttention); return { unionTokens: bpeMerged, scoresForUnion: scores, rawScoresForUnion: rawScores, }; } /** beyond 已在 handleSemanticResponse 中 overlap+digit 合并;段内用原始梯度重新归一化 */ const beyondRenormed = normalizeTokenScores(beyond.map((t) => ({ ...t, score: getAttentionRawScore(t) }))); const semanticAsFrontend: FrontendToken[] = beyondRenormed.map((t) => ({ offset: [t.offset[0], t.offset[1]], raw: t.raw, real_topk: [0, 1] as [number, number], pred_topk: [], })); const unionTokens = [...bpeMerged, ...semanticAsFrontend]; const { scores: infoScores, rawScores: infoRawScores } = this.mapTokenAttentionToMerged( bpeMerged, tokenAttention ); const beyondScores: (number | undefined)[] = beyondRenormed.map((t) => Number.isFinite(t.score) ? t.score : undefined ); const beyondRawScores: (number | undefined)[] = beyondRenormed.map((t) => { const r = getAttentionRawScore(t); return Number.isFinite(r) ? r : undefined; }); const scoresForUnion = [...infoScores, ...beyondScores]; const rawScoresForUnion = [...infoRawScores, ...beyondRawScores]; return { unionTokens, scoresForUnion, rawScoresForUnion }; } /** * 将 token_attention(offset 为原文字符偏移)映射到 merged tokens */ /** * 将 token_attention 映射到 merged tokens,双指针 O(N+M)。 * 前提:两个数组均按 offset 升序排列。 */ private mapTokenAttentionToMerged( bpeBpeMergedTokens: Array<{ offset: [number, number] }>, tokenAttention: Array<{ offset: [number, number]; score: number; rawScore?: number }> ): { scores: (number | undefined)[]; rawScores: (number | undefined)[]; } { const n = bpeBpeMergedTokens.length; const scores: number[] = new Array(n).fill(0); const rawScores: number[] = new Array(n).fill(0); const weights: number[] = new Array(n).fill(0); let j = 0; // 跳过所有在当前 attn 之前结束的 merged token for (const attn of tokenAttention) { const [as, ae] = attn.offset; const rawPart = getAttentionRawScore(attn); while (j < n && bpeBpeMergedTokens[j].offset[1] <= as) j++; for (let k = j; k < n && bpeBpeMergedTokens[k].offset[0] < ae; k++) { const [s, e] = bpeBpeMergedTokens[k].offset; // j/k 的推进条件已保证 e > as 且 s < ae,overlap 必然 > 0 const overlap = Math.min(e, ae) - Math.max(s, as); scores[k] += attn.score * overlap; rawScores[k] += rawPart * overlap; weights[k] += overlap; } } const norm = (vals: number[]) => vals.map((v, i) => (weights[i] > 0 ? v / weights[i] : undefined)); return { scores: norm(scores), rawScores: norm(rawScores), }; } }