Spaces:

Girlz
/

TokenTrace

Running

File size: 12,491 Bytes
/**
 * 语义搜索控制器
 * 负责执行语义分析（整段 / 分块模式）
 */

import * as d3 from 'd3';
import type { TextAnalysisAPI } from '../../shared/api/GLTR_API';
import { isSemanticFromCache } from '../../shared/api/GLTR_API';
import type { AppStateManager } from '../../features/analysis/appStateManager';
import type { VisualizationUpdater } from '../../features/analysis/visualizationUpdater';
import type { GLTR_Text_Box } from '../../shared/vis/GLTR_Text_Box';
import { SEMANTIC_CHUNK_BYTES } from '../core/constants';
import { getSemanticMatchThreshold } from '../cross/semanticThresholdManager';
import { getDigitsMergeEnabled } from '../cross/digitsMergeManager';
import {
    getAttentionRawScore,
    mergeAttentionTokensFullyForRendering,
    normalizeTokenScores,
    splitTextToChunks,
} from '../cross/semanticUtils';
import type { signalFitResult } from '../../features/analysis/signalThresholdDetector';
import { CHUNK_SEARCH_HOLD_MS } from '../vis/constants';
import * as semanticResultCache from '../cross/semanticResultCache';

function isChunkSemanticallyCached(chunkText: string, query: string, submode?: string): boolean {
    if (submode === 'hybrid') {
        return !!semanticResultCache.get(chunkText, query, 'count')
            && !!semanticResultCache.get(chunkText, query, 'fill_blank');
    }
    return !!semanticResultCache.get(chunkText, query, submode);
}

/** 可中止的短时等待（abort 时提前结束，不抛错） */
function delayAbortable(ms: number, signal: AbortSignal): Promise<void> {
    return new Promise((resolve) => {
        const id = window.setTimeout(resolve, ms);
        const onAbort = () => {
            window.clearTimeout(id);
            resolve();
        };
        if (signal.aborted) {
            onAbort();
            return;
        }
        signal.addEventListener('abort', onAbort, { once: true });
    });
}

export interface SemanticSearchControllerDeps {
    getQuery: () => string;
    getText: () => string;
    getSubmode: () => string | undefined;
    isChunkedMode: () => boolean;
    api: TextAnalysisAPI;
    appStateManager: AppStateManager;
    visualizationUpdater: VisualizationUpdater;
    lmf: GLTR_Text_Box;
    showToast: (message: string, type: 'success' | 'error') => void;
    showSemanticError: (message?: string) => void;
    onSearchStart: (query: string) => void;
    finishSemanticSearch: (query: string, matchDegree: number | null, fromCache: boolean) => void;
    tr: (key: string) => string;
    extractErrorMessage: (err: unknown, fallback: string) => string;
}

export class SemanticSearchController {
    private deps: SemanticSearchControllerDeps;
    private abortController: AbortController | null = null;

    constructor(deps: SemanticSearchControllerDeps) {
        this.deps = deps;
    }

    abort(): void {
        this.abortController?.abort();
    }

    run(): void {
        void this.runSemanticSearchBase(async ({ query, text, submode, signal }) => {
            if (this.deps.isChunkedMode()) {
                await this.runChunked({ query, text, submode, signal });
            } else {
                await this.runWhole({ query, text, submode, signal });
            }
        });
    }

    private async runSemanticSearchBase(
        execute: (params: { query: string; text: string; submode: string | undefined; signal: AbortSignal }) => Promise<void>
    ): Promise<void> {
        const query = this.deps.getQuery();
        if (!query) return;
        const text = this.deps.getText();
        if (!text) {
            this.deps.showToast(this.deps.tr('Please enter text first'), 'error');
            return;
        }
        this.abortController = new AbortController();
        const signal = this.abortController.signal;
        this.deps.onSearchStart(query);
        try {
            this.deps.appStateManager.setSemanticSearching(true);
            d3.select('#semantic_match_degree').style('display', 'none');
            d3.select('#semantic_search_loader').style('visibility', 'visible');
            d3.select('#all_result').style('opacity', 1).style('display', null);
            this.deps.lmf.setTextOnly(text);
            this.deps.visualizationUpdater.updateHistogramVisibilityForPending('semantic', text, this.deps.isChunkedMode());
            await execute({ query, text, submode: this.deps.getSubmode(), signal });
        } catch (err) {
            if (err instanceof Error && err.name === 'AbortError') {
                this.deps.lmf.hideLoading();
                this.deps.visualizationUpdater.rerenderHistograms();
                return;
            }
            this.deps.showToast(
                this.deps.extractErrorMessage(err, this.deps.tr('Semantic analysis failed')),
                'error'
            );
            this.deps.lmf.hideLoading();
            this.deps.visualizationUpdater.rerenderHistograms();
        } finally {
            this.abortController = null;
            this.deps.appStateManager.setSemanticSearching(false);
            d3.select('#semantic_search_loader').style('visibility', 'hidden');
        }
    }

    private async runWhole(params: { query: string; text: string; submode: string | undefined; signal: AbortSignal }): Promise<void> {
        const { query, text, submode, signal } = params;
        const onProgress = (step: number, totalSteps: number, stage: string, percentage?: number) => {
            const progressText = percentage !== undefined && percentage !== null
                ? `Step ${step}/${totalSteps}:\t ${stage} ${percentage}%`
                : `Step ${step}/${totalSteps}:\t ${stage}`;
            d3.select('#semantic_progress').text(progressText).style('display', 'inline-block');
        };
        const res = await this.deps.api.analyzeSemantic(query, text, { onProgress, submode, debug_info: true, signal });
        if (res?.success && res?.token_attention) {
            this.deps.visualizationUpdater.handleSemanticResponse(res, text);
            const md = res?.full_match_degree;
            this.deps.finishSemanticSearch(query, md != null && typeof md === 'number' ? md : null, isSemanticFromCache(res));
        } else {
            this.deps.showSemanticError(res?.message);
        }
    }

    private async runChunked(params: { query: string; text: string; submode: string | undefined; signal: AbortSignal }): Promise<void> {
        const { query, text, submode, signal } = params;
        const chunks = splitTextToChunks(text, SEMANTIC_CHUNK_BYTES);
        if (chunks.length === 0) {
            this.deps.visualizationUpdater.handleSemanticResponse({ token_attention: [] }, text, undefined);
            this.deps.finishSemanticSearch(query, null, true);
            return;
        }
        /** 各 chunk 内已 overlap+digit+normalize，仅做 offset 平移后拼接，全文不再合并/归一化 */
        const allChunkProcessedTokens: Array<{
            offset: [number, number];
            raw: string;
            score: number;
            rawScore?: number;
        }> = [];
        const chunkInfos: Array<{ startOffset: number; endOffset: number; chunkIndex: number; chunkMatchDegree: number; thresholdResult?: signalFitResult }> = [];
        let maxMatchDegree = 0;
        let allFromCache = true;
        let aborted = false;
        let lastChunkFromCache = false;
        /** 上一块上色后的 hold 期间已预发起的下一块分析 */
        let pendingNextAnalysis: ReturnType<TextAnalysisAPI['analyzeSemantic']> | null = null;
        /** hold 结束后已滚到下一块，本轮循环开头无需再滚 */
        let scrollDoneForIndex: number | null = null;

        const needsAutoScroll = chunks.some((c) => !isChunkSemanticallyCached(c.text, query, submode));
        if (needsAutoScroll) {
            this.deps.lmf.beginChunkSearchAutoScroll();
        }
        try {
        for (let i = 0; i < chunks.length; i++) {
            if (signal.aborted) break;
            const chunk = chunks[i];
            d3.select('#semantic_progress').text(`Chunk ${i + 1}/${chunks.length}`).style('display', 'inline-block');

            const res = pendingNextAnalysis
                ? await pendingNextAnalysis
                : await this.deps.api.analyzeSemantic(query, chunk.text, { submode, signal });
            pendingNextAnalysis = null;
            // 上色/直方图仍以本块返回的 isSemanticFromCache(res) 为准，从首个非缓存块起才刷新 UI。
            // isChunkSemanticallyCached 仅用于滚动跟随与预取，与 API 读同一套 semanticResultCache。
            if (signal.aborted) {
                aborted = true;
                break;
            }
            if (!res?.success) {
                this.deps.showSemanticError(res?.message);
                aborted = true;
                break;
            }
            lastChunkFromCache = isSemanticFromCache(res);
            if (!lastChunkFromCache) allFromCache = false;
            const matchDegree = res.full_match_degree ?? 0;
            maxMatchDegree = Math.max(maxMatchDegree, matchDegree);
            const matched = matchDegree >= getSemanticMatchThreshold();
            const merged = mergeAttentionTokensFullyForRendering(res.token_attention ?? [], chunk.text, {
                digitMerge: getDigitsMergeEnabled(),
            });
            const normalized = normalizeTokenScores(merged);
            const tokens = matched
                ? normalized
                : normalized.map((t) => ({ ...t, rawScore: getAttentionRawScore(t), score: 0 }));

            chunkInfos.push({
                startOffset: chunk.startOffset,
                endOffset: chunk.startOffset + chunk.text.length,
                chunkIndex: i,
                chunkMatchDegree: matchDegree,
            });
            const tokensOffsetAdjusted = tokens.map(t => ({
                ...t,
                offset: [t.offset[0] + chunk.startOffset, t.offset[1] + chunk.startOffset] as [number, number],
            }));
            allChunkProcessedTokens.push(...tokensOffsetAdjusted);
            if (!lastChunkFromCache) {
                if (scrollDoneForIndex !== i) {
                    this.deps.lmf.followSearchingChunk(chunk.startOffset);
                }
                scrollDoneForIndex = null;
                if (!this.deps.visualizationUpdater.handleSemanticResponse(
                    { token_attention: allChunkProcessedTokens, chunkInfos, debug_info: undefined },
                    text,
                    undefined
                )) {
                    aborted = true;
                    this.deps.showSemanticError();
                    break;
                }
                const nextIndex = i + 1;
                if (nextIndex < chunks.length) {
                    const nextChunk = chunks[nextIndex]!;
                    pendingNextAnalysis = this.deps.api.analyzeSemantic(query, nextChunk.text, { submode, signal });
                    await delayAbortable(CHUNK_SEARCH_HOLD_MS, signal);
                    if (signal.aborted) {
                        aborted = true;
                        break;
                    }
                    if (!isChunkSemanticallyCached(nextChunk.text, query, submode)) {
                        this.deps.lmf.followSearchingChunk(nextChunk.startOffset);
                        scrollDoneForIndex = nextIndex;
                    }
                }
            }
        }

        if (!aborted) {
            if (lastChunkFromCache) {
                this.deps.visualizationUpdater.handleSemanticResponse(
                    { token_attention: allChunkProcessedTokens, chunkInfos, debug_info: undefined },
                    text,
                    undefined
                );
            }
            if (!allFromCache) {
                await delayAbortable(CHUNK_SEARCH_HOLD_MS, signal);
            }
            if (!signal.aborted) {
                const threshold = getSemanticMatchThreshold();
                const firstMatch = chunkInfos.find((c) => c.chunkMatchDegree >= threshold);
                if (firstMatch) {
                    this.deps.lmf.scrollToChunkStart(firstMatch.startOffset);
                }
                this.deps.finishSemanticSearch(query, maxMatchDegree, allFromCache);
            }
        }
        } finally {
            if (needsAutoScroll) {
                this.deps.lmf.endChunkSearchAutoScroll();
            }
        }
    }
}