| |
| |
| |
| |
|
|
| import {FrontendAnalyzeResult} from "../api/GLTR_API"; |
| import {TokenFragmentRect} from "./types"; |
|
|
| interface TextNodeIndexEntry { |
| node: Text; |
| startOffset: number; |
| endOffset: number; |
| utf16Start: number; |
| utf16End: number; |
| charToUtf16Map: number[]; |
| } |
|
|
| export class TokenPositionCalculator { |
| private textNodeIndex?: TextNodeIndexEntry[]; |
| private baseNode: HTMLElement; |
|
|
| constructor(baseNode: HTMLElement) { |
| this.baseNode = baseNode; |
| } |
|
|
| |
| getZoom(): number { |
| return this.getAccumulatedZoom(this.baseNode); |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| private getAccumulatedZoom(element: HTMLElement): number { |
| let zoom = 1; |
| let current: HTMLElement | null = element; |
| let depth = 0; |
| const MAX_DEPTH = 50; |
| |
| while (current && depth < MAX_DEPTH) { |
| const style = window.getComputedStyle(current); |
| |
| |
| if (typeof style.zoom === 'string' && style.zoom !== '' && style.zoom !== 'normal') { |
| const elementZoom = parseFloat(style.zoom); |
| |
| if (!isNaN(elementZoom) && elementZoom > 0) { |
| zoom *= elementZoom; |
| } else if (!isNaN(elementZoom) && elementZoom <= 0) { |
| console.warn(`[TokenPositionCalculator] Invalid zoom value: ${style.zoom}`, current); |
| |
| } |
| } |
| |
| current = current.parentElement; |
| depth++; |
| } |
| |
| if (depth >= MAX_DEPTH) { |
| console.warn(`[TokenPositionCalculator] DOM depth exceeded ${MAX_DEPTH}, stopping zoom calculation`); |
| } |
| |
| return zoom; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| calculateTokenPositions(rd: FrontendAnalyzeResult, fromTokenIndex = 0): TokenFragmentRect[] { |
| if (!this.baseNode) return []; |
|
|
| const positions: TokenFragmentRect[] = []; |
| const containerRect = this.baseNode.getBoundingClientRect(); |
| |
| |
| |
| const zoom = this.getAccumulatedZoom(this.baseNode); |
|
|
| |
| const validTokens = rd.bpe_strings.map((tokenObj, index) => ({ |
| tokenObj, |
| index, |
| offset: tokenObj.offset |
| })).filter(({ index, offset }) => { |
| if (index < fromTokenIndex) return false; |
| const [start, end] = offset; |
| return !(start === end || start < 0 || end < 0 || end <= start); |
| }); |
|
|
| |
| validTokens.forEach(({ tokenObj, index, offset }) => { |
| const [start, end] = offset; |
| |
| |
| const startResult = this.findNodeAndOffset(start); |
| const endResult = this.findNodeAndOffset(end); |
| |
| if (!startResult || !endResult) { |
| console.warn(`⚠️ 无法找到token ${index} 的位置 (${start}, ${end})`); |
| return; |
| } |
|
|
| |
| const range = document.createRange(); |
| range.setStart(startResult.node, startResult.offset); |
| range.setEnd(endResult.node, endResult.offset); |
|
|
| |
| const rectList = Array.from(range.getClientRects()); |
| const fragments = rectList.length > 0 ? rectList : [range.getBoundingClientRect()]; |
| |
| fragments.forEach((rect, fragmentIndex) => { |
| if (!rect || rect.width === 0 || rect.height === 0) { |
| return; |
| } |
| |
| const tokenPos = { |
| tokenIndex: index, |
| fragmentIndex, |
| fragmentCount: fragments.length, |
| rectKey: `${index}-${fragmentIndex}`, |
| x: (rect.left - containerRect.left) / zoom, |
| y: (rect.top - containerRect.top) / zoom, |
| width: rect.width / zoom, |
| height: rect.height / zoom |
| }; |
| positions.push(tokenPos); |
| }); |
| }); |
|
|
| return positions; |
| } |
|
|
| |
| |
| |
| |
| buildTextNodeIndex(): void { |
| if (!this.baseNode) { |
| this.textNodeIndex = undefined; |
| return; |
| } |
|
|
| const index: TextNodeIndexEntry[] = []; |
|
|
| let currentCharOffset = 0; |
| let currentUtf16Offset = 0; |
|
|
| |
| const walker = document.createTreeWalker( |
| this.baseNode, |
| NodeFilter.SHOW_TEXT, |
| null |
| ); |
|
|
| let node: Text; |
| while (node = walker.nextNode() as Text) { |
| const nodeText = node.textContent || ''; |
| |
| |
| const nodeChars = Array.from(nodeText); |
| const nodeCharLength = nodeChars.length; |
| const nodeUtf16Length = nodeText.length; |
|
|
| const startOffset = currentCharOffset; |
| const endOffset = currentCharOffset + nodeCharLength; |
| const utf16Start = currentUtf16Offset; |
| const utf16End = currentUtf16Offset + nodeUtf16Length; |
|
|
| |
| |
| const charToUtf16Map: number[] = new Array(nodeCharLength + 1); |
| let utf16Pos = 0; |
| |
| |
| for (let i = 0; i <= nodeCharLength; i++) { |
| charToUtf16Map[i] = utf16Pos; |
| if (i < nodeCharLength) { |
| |
| const char = nodeChars[i]; |
| utf16Pos += char.length; |
| } |
| } |
|
|
| index.push({ |
| node, |
| startOffset, |
| endOffset, |
| utf16Start, |
| utf16End, |
| charToUtf16Map |
| }); |
|
|
| currentCharOffset += nodeCharLength; |
| currentUtf16Offset += nodeUtf16Length; |
| } |
|
|
| this.textNodeIndex = index; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| findNodeAndOffset(globalOffset: number): { node: Text, offset: number } | null { |
| |
| if (!this.textNodeIndex || this.textNodeIndex.length === 0) { |
| this.buildTextNodeIndex(); |
| if (!this.textNodeIndex || this.textNodeIndex.length === 0) { |
| return null; |
| } |
| } |
|
|
| const index = this.textNodeIndex; |
|
|
| |
| let left = 0; |
| let right = index.length - 1; |
| let foundIndex = -1; |
|
|
| while (left <= right) { |
| const mid = Math.floor((left + right) / 2); |
| const entry = index[mid]; |
|
|
| if (globalOffset >= entry.startOffset && globalOffset < entry.endOffset) { |
| foundIndex = mid; |
| break; |
| } else if (globalOffset < entry.startOffset) { |
| right = mid - 1; |
| } else { |
| left = mid + 1; |
| } |
| } |
|
|
| |
| if (foundIndex === -1 && index.length > 0) { |
| const lastEntry = index[index.length - 1]; |
| if (globalOffset === lastEntry.endOffset) { |
| foundIndex = index.length - 1; |
| |
| const lastLocalCharOffset = lastEntry.endOffset - lastEntry.startOffset; |
| const utf16Offset = lastEntry.charToUtf16Map[lastLocalCharOffset]; |
| return { node: lastEntry.node, offset: utf16Offset }; |
| } |
| return null; |
| } |
|
|
| if (foundIndex === -1) { |
| return null; |
| } |
|
|
| const entry = index[foundIndex]; |
| const localCharOffset = globalOffset - entry.startOffset; |
|
|
| |
| |
| const utf16Offset = entry.charToUtf16Map[localCharOffset]; |
|
|
| |
| |
| return { node: entry.node, offset: utf16Offset }; |
| } |
|
|
| |
| |
| |
| resetIndex(): void { |
| this.textNodeIndex = undefined; |
| } |
| } |
|
|
|
|