|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import * as d3 from 'd3'; |
|
|
import type { AnalyzeResponse, FrontendAnalyzeResult, FrontendToken } from '../api/GLTR_API'; |
|
|
import type { GLTR_Text_Box } from '../vis/GLTR_Text_Box'; |
|
|
import type { HighlightController } from '../controllers/highlightController'; |
|
|
import type { TextInputController } from '../controllers/textInputController'; |
|
|
import type { Histogram } from '../vis/Histogram'; |
|
|
import type { ScatterPlot } from '../vis/ScatterPlot'; |
|
|
import type { AppStateManager } from './appStateManager'; |
|
|
import { |
|
|
cloneFrontendToken, |
|
|
mergeTokensForRendering, |
|
|
createRawSnapshot |
|
|
} from './tokenUtils'; |
|
|
import type { ColbertToken } from './semanticOverlayUtils'; |
|
|
import { decodeColbertTokens, computeTokenSimilarities, cosToColorScale, queryEmbeddingFromColbertTokens } from './semanticOverlayUtils'; |
|
|
import { |
|
|
validateTokenConsistency, |
|
|
validateTokenProbabilities, |
|
|
validateTokenPredictions |
|
|
} from './dataValidation'; |
|
|
import { |
|
|
calculateTextStats, |
|
|
type TextStats |
|
|
} from './textStatistics'; |
|
|
import { getTokenSurprisalHistogramConfig, getSurprisalProgressConfig, getSemanticCosHistogramConfig } from "./visualizationConfigs"; |
|
|
import { showAlertDialog } from '../ui/dialog'; |
|
|
import { tr } from '../lang/i18n-lite'; |
|
|
import { getSemanticAnalysisEnabled } from './semanticAnalysisManager'; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export interface VisualizationDependencies { |
|
|
lmf: GLTR_Text_Box; |
|
|
highlightController: HighlightController; |
|
|
textInputController: TextInputController; |
|
|
stats_frac: Histogram; |
|
|
stats_semantic_cos: Histogram; |
|
|
stats_surprisal_progress: ScatterPlot; |
|
|
appStateManager: AppStateManager; |
|
|
surprisalColorScale: d3.ScaleSequential<string>; |
|
|
|
|
|
onSemanticIndexModelUpdate?: (model: string | undefined) => void; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export interface CurrentDataState { |
|
|
currentData: AnalyzeResponse | null; |
|
|
rawApiResponse: AnalyzeResponse | null; |
|
|
currentSurprisals: number[] | null; |
|
|
currentTokenAvg: number | null; |
|
|
currentTokenP90: number | null; |
|
|
currentTotalSurprisal: number | null; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export class VisualizationUpdater { |
|
|
private deps: VisualizationDependencies; |
|
|
private currentState: CurrentDataState; |
|
|
|
|
|
constructor(deps: VisualizationDependencies) { |
|
|
this.deps = deps; |
|
|
this.currentState = { |
|
|
currentData: null, |
|
|
rawApiResponse: null, |
|
|
currentSurprisals: null, |
|
|
currentTokenAvg: null, |
|
|
currentTokenP90: null, |
|
|
currentTotalSurprisal: null |
|
|
}; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
getCurrentState(): Readonly<CurrentDataState> { |
|
|
return { ...this.currentState }; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
getRawApiResponse(): AnalyzeResponse | null { |
|
|
return this.currentState.rawApiResponse; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
getCurrentData(): AnalyzeResponse | null { |
|
|
return this.currentState.currentData; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
getCurrentSurprisals(): number[] | null { |
|
|
return this.currentState.currentSurprisals; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private updateTextMetrics(stats: TextStats | null, modelName?: string | null | undefined): void { |
|
|
this.deps.textInputController.updateTextMetrics(stats, modelName); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private clearHighlights(): void { |
|
|
this.deps.highlightController.clearHighlights(); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private rerenderHistogramsInternal(): void { |
|
|
const currentSurprisals = this.currentState.currentSurprisals; |
|
|
const currentTokenAvg = this.currentState.currentTokenAvg; |
|
|
const currentTokenP90 = this.currentState.currentTokenP90; |
|
|
|
|
|
if (currentSurprisals) { |
|
|
|
|
|
const tokenHistogramConfig = getTokenSurprisalHistogramConfig(); |
|
|
this.deps.stats_frac.update({ |
|
|
...tokenHistogramConfig, |
|
|
data: currentSurprisals, |
|
|
colorScale: this.deps.surprisalColorScale, |
|
|
averageValue: currentTokenAvg ?? undefined, |
|
|
p90Value: currentTokenP90 ?? undefined, |
|
|
}); |
|
|
|
|
|
|
|
|
const titleElement = document.getElementById('token_histogram_title'); |
|
|
if (titleElement) { |
|
|
titleElement.textContent = tokenHistogramConfig.label; |
|
|
} |
|
|
} |
|
|
if (currentSurprisals && currentSurprisals.length > 0) { |
|
|
const surprisalProgressConfig = getSurprisalProgressConfig(); |
|
|
this.deps.stats_surprisal_progress.update({ |
|
|
...surprisalProgressConfig, |
|
|
data: currentSurprisals, |
|
|
}); |
|
|
|
|
|
|
|
|
const surprisalProgressTitleElement = document.getElementById('surprisal_progress_title'); |
|
|
if (surprisalProgressTitleElement && surprisalProgressConfig.label) { |
|
|
surprisalProgressTitleElement.textContent = surprisalProgressConfig.label; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const tokensWithCos = (this.currentState.currentData?.result as FrontendAnalyzeResult & { semanticColbertTokens?: Array<{ cos?: number }> })?.semanticColbertTokens; |
|
|
const cosValues = tokensWithCos?.map((t) => t.cos).filter((c): c is number => typeof c === 'number'); |
|
|
const cosHistogramItem = document.getElementById('semantic_cos_histogram_item'); |
|
|
if (cosValues && cosValues.length > 0) { |
|
|
const cosConfig = getSemanticCosHistogramConfig(); |
|
|
const cosAvg = cosValues.reduce((a, b) => a + b, 0) / cosValues.length; |
|
|
this.deps.stats_semantic_cos.update({ |
|
|
...cosConfig, |
|
|
data: cosValues, |
|
|
colorScale: cosToColorScale, |
|
|
averageValue: cosAvg, |
|
|
}); |
|
|
const cosTitleEl = document.getElementById('semantic_cos_histogram_title'); |
|
|
if (cosTitleEl) cosTitleEl.textContent = cosConfig.label; |
|
|
if (cosHistogramItem) cosHistogramItem.style.display = ''; |
|
|
} else if (cosHistogramItem) { |
|
|
cosHistogramItem.style.display = 'none'; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public rerenderHistograms(): void { |
|
|
this.rerenderHistogramsInternal(); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public syncSemanticUiFromConfig(): void { |
|
|
const enabled = getSemanticAnalysisEnabled(); |
|
|
const el = document.getElementById('semantic_analysis_section'); |
|
|
if (el) el.style.display = enabled ? '' : 'none'; |
|
|
this.deps.lmf.updateOptions({ semanticAnalysisMode: enabled }, false); |
|
|
|
|
|
this.deps.appStateManager.updateButtonStates(); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
updateFromRequest( |
|
|
data: AnalyzeResponse, |
|
|
disableAnimation: boolean = false, |
|
|
options: { enableSave?: boolean } = {} |
|
|
): void { |
|
|
const { enableSave = true } = options; |
|
|
const dataHasSemantic = !!(data?.result as { semantic_index?: unknown })?.semantic_index; |
|
|
const hasSemanticAnalysis = getSemanticAnalysisEnabled() && dataHasSemantic; |
|
|
|
|
|
const abortDueToInvalidResponse = (message: string) => { |
|
|
console.error(message); |
|
|
showAlertDialog(tr('Error'), message); |
|
|
this.deps.appStateManager.updateState({ hasValidData: false, hasSemanticIndex: false }); |
|
|
this.deps.onSemanticIndexModelUpdate?.(undefined); |
|
|
this.syncSemanticUiFromConfig(); |
|
|
}; |
|
|
|
|
|
try { |
|
|
|
|
|
if (!disableAnimation) { |
|
|
this.deps.lmf.updateOptions({ enableRenderAnimation: true }, false); |
|
|
} |
|
|
|
|
|
this.deps.lmf.updateOptions({ semanticAnalysisMode: getSemanticAnalysisEnabled() }, false); |
|
|
|
|
|
d3.select('#all_result').style('opacity', 1).style('display', null); |
|
|
this.deps.appStateManager.setIsAnalyzing(false); |
|
|
this.deps.appStateManager.setGlobalLoading(false); |
|
|
|
|
|
|
|
|
this.deps.lmf.hideLoading(); |
|
|
|
|
|
|
|
|
if (!data || !data.result) { |
|
|
console.error('Invalid data structure:', data); |
|
|
throw new Error('Invalid API response structure'); |
|
|
} |
|
|
|
|
|
const result = data.result; |
|
|
|
|
|
|
|
|
if (!Array.isArray(result.bpe_strings) || result.bpe_strings.length === 0) { |
|
|
abortDueToInvalidResponse(tr('Returned JSON missing valid bpe_strings array, processing cancelled.')); |
|
|
return; |
|
|
} |
|
|
const predTopkError = validateTokenPredictions(result.bpe_strings as Array<{ pred_topk?: [string, number][] }>); |
|
|
if (predTopkError) { |
|
|
abortDueToInvalidResponse(predTopkError); |
|
|
return; |
|
|
} |
|
|
const probabilityError = validateTokenProbabilities(result.bpe_strings as Array<{ real_topk?: [number, number] }>); |
|
|
if (probabilityError) { |
|
|
abortDueToInvalidResponse(probabilityError); |
|
|
return; |
|
|
} |
|
|
|
|
|
const safeText = data.request.text; |
|
|
const validationError = validateTokenConsistency(result.bpe_strings, safeText, { allowOverlap: true }); |
|
|
if (validationError) { |
|
|
abortDueToInvalidResponse(validationError); |
|
|
return; |
|
|
} |
|
|
|
|
|
const rawSnapshot = createRawSnapshot(data); |
|
|
const originalTokens = result.bpe_strings.map((token) => cloneFrontendToken(token as FrontendToken)); |
|
|
const { mergedTokens, originalToMergedMap } = mergeTokensForRendering(originalTokens, safeText); |
|
|
const mergedValidationError = validateTokenConsistency(mergedTokens, safeText); |
|
|
if (mergedValidationError) { |
|
|
abortDueToInvalidResponse(mergedValidationError); |
|
|
return; |
|
|
} |
|
|
|
|
|
const si = (result as { semantic_index?: { colbert_tokens?: ColbertToken[] } }).semantic_index; |
|
|
const semanticColbertTokens = hasSemanticAnalysis && si?.colbert_tokens |
|
|
? decodeColbertTokens(si.colbert_tokens) |
|
|
: undefined; |
|
|
|
|
|
const enhancedResult: FrontendAnalyzeResult & { semanticColbertTokens?: ColbertToken[] } = { |
|
|
...result, |
|
|
originalTokens, |
|
|
mergedTokens, |
|
|
originalToMergedMap, |
|
|
bpe_strings: mergedTokens, |
|
|
originalText: safeText, |
|
|
...(semanticColbertTokens && { semanticColbertTokens }), |
|
|
}; |
|
|
data.result = enhancedResult; |
|
|
|
|
|
|
|
|
this.currentState.currentData = data; |
|
|
this.currentState.rawApiResponse = rawSnapshot; |
|
|
|
|
|
this.deps.highlightController.updateCurrentData({ result: enhancedResult }); |
|
|
|
|
|
|
|
|
enhancedResult.originalText = safeText; |
|
|
|
|
|
|
|
|
this.deps.lmf.clearHighlight(); |
|
|
|
|
|
|
|
|
this.deps.lmf.update(enhancedResult); |
|
|
|
|
|
const textStats = calculateTextStats(enhancedResult, safeText); |
|
|
|
|
|
|
|
|
this.currentState.currentSurprisals = textStats.tokenSurprisals; |
|
|
this.currentState.currentTokenAvg = textStats.tokenAverage; |
|
|
this.currentState.currentTokenP90 = textStats.tokenP90; |
|
|
this.currentState.currentTotalSurprisal = textStats.totalSurprisal; |
|
|
|
|
|
|
|
|
const resultModel = data.result.model; |
|
|
this.updateTextMetrics(textStats, resultModel); |
|
|
|
|
|
|
|
|
if (!disableAnimation) { |
|
|
|
|
|
|
|
|
const tokenCount = enhancedResult.bpe_strings.length; |
|
|
const estimatedAnimationTime = 100 + Math.ceil(tokenCount / 50) * 100; |
|
|
const delayTime = Math.max(2000, estimatedAnimationTime + 500); |
|
|
|
|
|
setTimeout(() => { |
|
|
this.deps.lmf.updateOptions({ enableRenderAnimation: false }, false); |
|
|
}, delayTime); |
|
|
} |
|
|
} catch (error) { |
|
|
console.error('Error updating visualization:', error); |
|
|
this.deps.appStateManager.setIsAnalyzing(false); |
|
|
this.deps.appStateManager.setGlobalLoading(false); |
|
|
this.deps.appStateManager.updateState({ hasValidData: false, hasSemanticIndex: false }); |
|
|
this.deps.onSemanticIndexModelUpdate?.(undefined); |
|
|
this.syncSemanticUiFromConfig(); |
|
|
showAlertDialog(tr('Error'), 'Error rendering visualization. Check console for details.'); |
|
|
return; |
|
|
} |
|
|
|
|
|
|
|
|
this.clearHighlights(); |
|
|
|
|
|
|
|
|
this.rerenderHistogramsInternal(); |
|
|
|
|
|
|
|
|
this.deps.appStateManager.updateState({ hasValidData: true, hasSemanticIndex: dataHasSemantic }); |
|
|
|
|
|
this.deps.onSemanticIndexModelUpdate?.(dataHasSemantic ? (data?.result as { semantic_index?: { model?: string } })?.semantic_index?.model : undefined); |
|
|
this.syncSemanticUiFromConfig(); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public handleSemanticIndexResponse(semanticIndex: { model?: string; colbert_tokens?: ColbertToken[] }): void { |
|
|
const data = this.currentState.currentData; |
|
|
const raw = this.currentState.rawApiResponse; |
|
|
if (!data?.result || !raw) return; |
|
|
|
|
|
const colbertTokens = semanticIndex?.colbert_tokens; |
|
|
if (!colbertTokens?.length) return; |
|
|
|
|
|
const decodedTokens = decodeColbertTokens(colbertTokens); |
|
|
const result = data.result as FrontendAnalyzeResult & { semanticColbertTokens?: ColbertToken[] }; |
|
|
const enhancedResult: FrontendAnalyzeResult & { semanticColbertTokens?: ColbertToken[] } = { |
|
|
...result, |
|
|
semanticColbertTokens: decodedTokens, |
|
|
}; |
|
|
|
|
|
this.currentState.currentData = { ...data, result: enhancedResult }; |
|
|
this.currentState.rawApiResponse = { |
|
|
...raw, |
|
|
result: { ...raw.result, semantic_index: semanticIndex } as typeof raw.result, |
|
|
}; |
|
|
this.deps.highlightController.updateCurrentData({ result: enhancedResult }); |
|
|
this.deps.lmf.updateOptions({ enableRenderAnimation: false }, false); |
|
|
this.deps.lmf.update(enhancedResult); |
|
|
this.rerenderHistogramsInternal(); |
|
|
this.deps.appStateManager.updateState({ hasSemanticIndex: true }); |
|
|
this.deps.onSemanticIndexModelUpdate?.(semanticIndex.model); |
|
|
this.syncSemanticUiFromConfig(); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public handleSemanticQueryResponse(semanticIndex: { model?: string; colbert_tokens?: ColbertToken[] }): void { |
|
|
const data = this.currentState.currentData; |
|
|
if (!data?.result) return; |
|
|
|
|
|
const result = data.result as FrontendAnalyzeResult & { semanticColbertTokens?: ColbertToken[] }; |
|
|
const tokens = result.semanticColbertTokens; |
|
|
if (!tokens?.length || !tokens.some((t) => t.embeddingVec)) return; |
|
|
|
|
|
const queryTokens = semanticIndex?.colbert_tokens; |
|
|
const queryVec = queryTokens ? queryEmbeddingFromColbertTokens(queryTokens) : null; |
|
|
if (!queryVec) return; |
|
|
|
|
|
const { cos, matchScore } = computeTokenSimilarities(tokens, queryVec); |
|
|
const tokensWithSimilarities: Array<ColbertToken & { cos: number; matchScore: number }> = tokens.map((t, i) => ({ |
|
|
...t, |
|
|
cos: cos[i], |
|
|
matchScore: matchScore[i], |
|
|
})); |
|
|
|
|
|
const enhancedResult: FrontendAnalyzeResult & { semanticColbertTokens?: typeof tokensWithSimilarities } = { |
|
|
...result, |
|
|
semanticColbertTokens: tokensWithSimilarities, |
|
|
}; |
|
|
|
|
|
this.currentState.currentData = { ...data, result: enhancedResult }; |
|
|
this.deps.highlightController.updateCurrentData({ result: enhancedResult }); |
|
|
this.deps.lmf.updateOptions({ enableRenderAnimation: false }, false); |
|
|
this.deps.lmf.update(enhancedResult); |
|
|
|
|
|
|
|
|
this.rerenderHistogramsInternal(); |
|
|
} |
|
|
} |
|
|
|
|
|
|