InfoRadar / client /src /ts /utils /visualizationUpdater.ts
dqy08's picture
移除 jina-colbert-v2 相关
ff38960
/**
* 可视化更新模块
* 负责处理分析结果的可视化更新逻辑
*/
import * as d3 from 'd3';
import type { AnalyzeResponse, FrontendAnalyzeResult, FrontendToken } from '../api/GLTR_API';
import type { GLTR_Text_Box } from '../vis/GLTR_Text_Box';
import type { HighlightController } from '../controllers/highlightController';
import type { TextInputController } from '../controllers/textInputController';
import type { Histogram } from '../vis/Histogram';
import type { ScatterPlot } from '../vis/ScatterPlot';
import type { AppStateManager } from './appStateManager';
import {
cloneFrontendToken,
mergeTokensForRendering,
createRawSnapshot
} from './tokenUtils';
import type { ColbertToken } from './semanticOverlayUtils';
import { decodeColbertTokens, computeTokenSimilarities, cosToColorScale, queryEmbeddingFromColbertTokens } from './semanticOverlayUtils';
import {
validateTokenConsistency,
validateTokenProbabilities,
validateTokenPredictions
} from './dataValidation';
import {
calculateTextStats,
type TextStats
} from './textStatistics';
import { getTokenSurprisalHistogramConfig, getSurprisalProgressConfig, getSemanticCosHistogramConfig } from "./visualizationConfigs";
import { showAlertDialog } from '../ui/dialog';
import { tr } from '../lang/i18n-lite';
import { getSemanticAnalysisEnabled } from './semanticAnalysisManager';
/**
* 可视化更新依赖
*/
export interface VisualizationDependencies {
lmf: GLTR_Text_Box;
highlightController: HighlightController;
textInputController: TextInputController;
stats_frac: Histogram;
stats_semantic_cos: Histogram;
stats_surprisal_progress: ScatterPlot;
appStateManager: AppStateManager;
surprisalColorScale: d3.ScaleSequential<string>;
/** 语义索引模型名更新回调(用于 UI 显示 index model: xxx) */
onSemanticIndexModelUpdate?: (model: string | undefined) => void;
}
/**
* 当前数据状态
*/
export interface CurrentDataState {
currentData: AnalyzeResponse | null;
rawApiResponse: AnalyzeResponse | null;
currentSurprisals: number[] | null;
currentTokenAvg: number | null;
currentTokenP90: number | null;
currentTotalSurprisal: number | null;
}
/**
* 可视化更新管理器
*/
export class VisualizationUpdater {
private deps: VisualizationDependencies;
private currentState: CurrentDataState;
constructor(deps: VisualizationDependencies) {
this.deps = deps;
this.currentState = {
currentData: null,
rawApiResponse: null,
currentSurprisals: null,
currentTokenAvg: null,
currentTokenP90: null,
currentTotalSurprisal: null
};
}
/**
* 获取当前数据状态
*/
getCurrentState(): Readonly<CurrentDataState> {
return { ...this.currentState };
}
/**
* 获取当前原始API响应
*/
getRawApiResponse(): AnalyzeResponse | null {
return this.currentState.rawApiResponse;
}
/**
* 获取当前数据
*/
getCurrentData(): AnalyzeResponse | null {
return this.currentState.currentData;
}
/**
* 获取当前 surprisal 数据
*/
getCurrentSurprisals(): number[] | null {
return this.currentState.currentSurprisals;
}
/**
* 更新文本指标(包括模型显示)
*/
private updateTextMetrics(stats: TextStats | null, modelName?: string | null | undefined): void {
this.deps.textInputController.updateTextMetrics(stats, modelName);
}
/**
* 清除高亮
*/
private clearHighlights(): void {
this.deps.highlightController.clearHighlights();
}
/**
* 重新渲染直方图(内部方法)
*/
private rerenderHistogramsInternal(): void {
const currentSurprisals = this.currentState.currentSurprisals;
const currentTokenAvg = this.currentState.currentTokenAvg;
const currentTokenP90 = this.currentState.currentTokenP90;
if (currentSurprisals) {
// Token surprisal histogram: 使用 19 个台阶,对应区间:[0,1), [1,2), ..., [17,18), [18,∞)
const tokenHistogramConfig = getTokenSurprisalHistogramConfig();
this.deps.stats_frac.update({
...tokenHistogramConfig,
data: currentSurprisals,
colorScale: this.deps.surprisalColorScale,
averageValue: currentTokenAvg ?? undefined,
p90Value: currentTokenP90 ?? undefined,
});
// 更新主视图中 token surprisal histogram 的标题文本
const titleElement = document.getElementById('token_histogram_title');
if (titleElement) {
titleElement.textContent = tokenHistogramConfig.label;
}
}
if (currentSurprisals && currentSurprisals.length > 0) {
const surprisalProgressConfig = getSurprisalProgressConfig();
this.deps.stats_surprisal_progress.update({
...surprisalProgressConfig,
data: currentSurprisals,
});
// 更新主视图中 surprisal progress 的标题文本
const surprisalProgressTitleElement = document.getElementById('surprisal_progress_title');
if (surprisalProgressTitleElement && surprisalProgressConfig.label) {
surprisalProgressTitleElement.textContent = surprisalProgressConfig.label;
}
}
// 语义分析模式:若有 cos 数据则渲染 cos 直方图,颜色按 match score = max(cos, 0) 取值,小于 0 的 bin 用 0 的颜色
const tokensWithCos = (this.currentState.currentData?.result as FrontendAnalyzeResult & { semanticColbertTokens?: Array<{ cos?: number }> })?.semanticColbertTokens;
const cosValues = tokensWithCos?.map((t) => t.cos).filter((c): c is number => typeof c === 'number');
const cosHistogramItem = document.getElementById('semantic_cos_histogram_item');
if (cosValues && cosValues.length > 0) {
const cosConfig = getSemanticCosHistogramConfig();
const cosAvg = cosValues.reduce((a, b) => a + b, 0) / cosValues.length;
this.deps.stats_semantic_cos.update({
...cosConfig,
data: cosValues,
colorScale: cosToColorScale,
averageValue: cosAvg,
});
const cosTitleEl = document.getElementById('semantic_cos_histogram_title');
if (cosTitleEl) cosTitleEl.textContent = cosConfig.label;
if (cosHistogramItem) cosHistogramItem.style.display = '';
} else if (cosHistogramItem) {
cosHistogramItem.style.display = 'none';
}
}
/**
* 重新渲染直方图(公开方法,供外部调用,如主题切换时)
*/
public rerenderHistograms(): void {
this.rerenderHistogramsInternal();
}
/**
* 根据语义分析配置同步 UI 状态(查询输入框、文本渲染模式等)
* 界面完全由配置决定,不因数据有无而改变
*/
public syncSemanticUiFromConfig(): void {
const enabled = getSemanticAnalysisEnabled();
const el = document.getElementById('semantic_analysis_section');
if (el) el.style.display = enabled ? '' : 'none';
this.deps.lmf.updateOptions({ semanticAnalysisMode: enabled }, false);
// 语义分析配置影响 Upload/Save 的 dataReadyForSave 条件,需始终更新按钮状态
this.deps.appStateManager.updateButtonStates();
}
/**
* 更新可视化(核心方法)
*
* @param data 分析响应数据
* @param disableAnimation 是否禁用动画
* @param options 选项
*/
updateFromRequest(
data: AnalyzeResponse,
disableAnimation: boolean = false,
options: { enableSave?: boolean } = {}
): void {
const { enableSave = true } = options;
const dataHasSemantic = !!(data?.result as { semantic_index?: unknown })?.semantic_index;
const hasSemanticAnalysis = getSemanticAnalysisEnabled() && dataHasSemantic;
const abortDueToInvalidResponse = (message: string) => {
console.error(message);
showAlertDialog(tr('Error'), message);
this.deps.appStateManager.updateState({ hasValidData: false, hasSemanticIndex: false });
this.deps.onSemanticIndexModelUpdate?.(undefined);
this.syncSemanticUiFromConfig();
};
try {
// 只有 Analyze 触发时开启动画,其它情况保持关闭(默认已关闭)
if (!disableAnimation) {
this.deps.lmf.updateOptions({ enableRenderAnimation: true }, false);
}
// 语义分析模式由配置决定,不因数据有无而改变
this.deps.lmf.updateOptions({ semanticAnalysisMode: getSemanticAnalysisEnabled() }, false);
d3.select('#all_result').style('opacity', 1).style('display', null);
this.deps.appStateManager.setIsAnalyzing(false);
this.deps.appStateManager.setGlobalLoading(false);
// 隐藏文本区域的加载状态(会在lmf.update中自动隐藏,但这里提前隐藏以提升体验)
this.deps.lmf.hideLoading();
// 验证数据结构
if (!data || !data.result) {
console.error('Invalid data structure:', data);
throw new Error('Invalid API response structure');
}
const result = data.result;
// 确保所有必需的字段都存在且类型正确
if (!Array.isArray(result.bpe_strings) || result.bpe_strings.length === 0) {
abortDueToInvalidResponse(tr('Returned JSON missing valid bpe_strings array, processing cancelled.'));
return;
}
const predTopkError = validateTokenPredictions(result.bpe_strings as Array<{ pred_topk?: [string, number][] }>);
if (predTopkError) {
abortDueToInvalidResponse(predTopkError);
return;
}
const probabilityError = validateTokenProbabilities(result.bpe_strings as Array<{ real_topk?: [number, number] }>);
if (probabilityError) {
abortDueToInvalidResponse(probabilityError);
return;
}
const safeText = data.request.text;
const validationError = validateTokenConsistency(result.bpe_strings, safeText, { allowOverlap: true });
if (validationError) {
abortDueToInvalidResponse(validationError);
return;
}
const rawSnapshot = createRawSnapshot(data);
const originalTokens = result.bpe_strings.map((token) => cloneFrontendToken(token as FrontendToken));
const { mergedTokens, originalToMergedMap } = mergeTokensForRendering(originalTokens, safeText);
const mergedValidationError = validateTokenConsistency(mergedTokens, safeText);
if (mergedValidationError) {
abortDueToInvalidResponse(mergedValidationError);
return;
}
const si = (result as { semantic_index?: { colbert_tokens?: ColbertToken[] } }).semantic_index;
const semanticColbertTokens = hasSemanticAnalysis && si?.colbert_tokens
? decodeColbertTokens(si.colbert_tokens)
: undefined;
const enhancedResult: FrontendAnalyzeResult & { semanticColbertTokens?: ColbertToken[] } = {
...result,
originalTokens,
mergedTokens,
originalToMergedMap,
bpe_strings: mergedTokens,
originalText: safeText,
...(semanticColbertTokens && { semanticColbertTokens }),
};
data.result = enhancedResult;
// 保存当前数据(通过校验后)
this.currentState.currentData = data;
this.currentState.rawApiResponse = rawSnapshot;
// 更新高亮控制器的当前数据
this.deps.highlightController.updateCurrentData({ result: enhancedResult });
// 设置原始文本,用于从offset提取token文本
enhancedResult.originalText = safeText;
// 清除之前的高亮状态(新的分析数据)
this.deps.lmf.clearHighlight();
// 只调用 lmf.update,不调用任何统计更新
this.deps.lmf.update(enhancedResult);
const textStats = calculateTextStats(enhancedResult, safeText);
// 保存当前surprisal数据,用于主题切换时重新渲染
this.currentState.currentSurprisals = textStats.tokenSurprisals;
this.currentState.currentTokenAvg = textStats.tokenAverage;
this.currentState.currentTokenP90 = textStats.tokenP90;
this.currentState.currentTotalSurprisal = textStats.totalSurprisal;
// 更新文本指标和模型显示(从分析结果中获取实际使用的模型)
const resultModel = data.result.model;
this.updateTextMetrics(textStats, resultModel);
// Analyze 渲染完成后关闭动画,避免拖拽等二次渲染再次播放
if (!disableAnimation) {
// 延迟关闭,确保动画有足够时间完成
// 动画时长估算:初始延迟100ms + 批次处理时间(根据token数量)
const tokenCount = enhancedResult.bpe_strings.length;
const estimatedAnimationTime = 100 + Math.ceil(tokenCount / 50) * 100;
const delayTime = Math.max(2000, estimatedAnimationTime + 500);
setTimeout(() => {
this.deps.lmf.updateOptions({ enableRenderAnimation: false }, false);
}, delayTime);
}
} catch (error) {
console.error('Error updating visualization:', error);
this.deps.appStateManager.setIsAnalyzing(false);
this.deps.appStateManager.setGlobalLoading(false);
this.deps.appStateManager.updateState({ hasValidData: false, hasSemanticIndex: false });
this.deps.onSemanticIndexModelUpdate?.(undefined);
this.syncSemanticUiFromConfig();
showAlertDialog(tr('Error'), 'Error rendering visualization. Check console for details.');
return;
}
// 清除之前的选中状态
this.clearHighlights();
// 重新渲染直方图
this.rerenderHistogramsInternal();
// 数据成功处理,标记为有效数据(TextMetrics 显示,Analyze 变灰)
this.deps.appStateManager.updateState({ hasValidData: true, hasSemanticIndex: dataHasSemantic });
this.deps.onSemanticIndexModelUpdate?.(dataHasSemantic ? (data?.result as { semantic_index?: { model?: string } })?.semantic_index?.model : undefined);
this.syncSemanticUiFromConfig();
}
/**
* 语义索引响应:解码 colbert_tokens 并合并到当前数据
*/
public handleSemanticIndexResponse(semanticIndex: { model?: string; colbert_tokens?: ColbertToken[] }): void {
const data = this.currentState.currentData;
const raw = this.currentState.rawApiResponse;
if (!data?.result || !raw) return;
const colbertTokens = semanticIndex?.colbert_tokens;
if (!colbertTokens?.length) return;
const decodedTokens = decodeColbertTokens(colbertTokens);
const result = data.result as FrontendAnalyzeResult & { semanticColbertTokens?: ColbertToken[] };
const enhancedResult: FrontendAnalyzeResult & { semanticColbertTokens?: ColbertToken[] } = {
...result,
semanticColbertTokens: decodedTokens,
};
this.currentState.currentData = { ...data, result: enhancedResult };
this.currentState.rawApiResponse = {
...raw,
result: { ...raw.result, semantic_index: semanticIndex } as typeof raw.result,
};
this.deps.highlightController.updateCurrentData({ result: enhancedResult });
this.deps.lmf.updateOptions({ enableRenderAnimation: false }, false);
this.deps.lmf.update(enhancedResult);
this.rerenderHistogramsInternal();
this.deps.appStateManager.updateState({ hasSemanticIndex: true });
this.deps.onSemanticIndexModelUpdate?.(semanticIndex.model);
this.syncSemanticUiFromConfig();
}
/**
* 语义查询响应:从 semantic_index 的 colbert_tokens 计算 query embedding
* (Qwen3-Embedding 用 last-token pool,其他用平均),再计算每个 token 的 cos 相似度并更新染色
*/
public handleSemanticQueryResponse(semanticIndex: { model?: string; colbert_tokens?: ColbertToken[] }): void {
const data = this.currentState.currentData;
if (!data?.result) return;
const result = data.result as FrontendAnalyzeResult & { semanticColbertTokens?: ColbertToken[] };
const tokens = result.semanticColbertTokens;
if (!tokens?.length || !tokens.some((t) => t.embeddingVec)) return;
const queryTokens = semanticIndex?.colbert_tokens;
const queryVec = queryTokens ? queryEmbeddingFromColbertTokens(queryTokens) : null;
if (!queryVec) return;
const { cos, matchScore } = computeTokenSimilarities(tokens, queryVec);
const tokensWithSimilarities: Array<ColbertToken & { cos: number; matchScore: number }> = tokens.map((t, i) => ({
...t,
cos: cos[i],
matchScore: matchScore[i],
}));
const enhancedResult: FrontendAnalyzeResult & { semanticColbertTokens?: typeof tokensWithSimilarities } = {
...result,
semanticColbertTokens: tokensWithSimilarities,
};
this.currentState.currentData = { ...data, result: enhancedResult };
this.deps.highlightController.updateCurrentData({ result: enhancedResult });
this.deps.lmf.updateOptions({ enableRenderAnimation: false }, false);
this.deps.lmf.update(enhancedResult);
// 更新 cos 直方图
this.rerenderHistogramsInternal();
}
}