| | |
| | const MHCHEM_CE_REGEX = /\$\\ce\{/g; |
| | const MHCHEM_PU_REGEX = /\$\\pu\{/g; |
| | const MHCHEM_CE_ESCAPED_REGEX = /\$\\\\ce\{[^}]*\}\$/g; |
| | const MHCHEM_PU_ESCAPED_REGEX = /\$\\\\pu\{[^}]*\}\$/g; |
| | const CURRENCY_REGEX = |
| | /(?<![\\$])\$(?!\$)(?=\d+(?:,\d{3})*(?:\.\d+)?(?:[KMBkmb])?(?:\s|$|[^a-zA-Z\d]))/g; |
| | const SINGLE_DOLLAR_REGEX = /(?<!\\)\$(?!\$)((?:[^$\n]|\\[$])+?)(?<!\\)(?<!`)\$(?!\$)/g; |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | function escapeMhchem(text: string): string { |
| | |
| | let result = text.replace(MHCHEM_CE_REGEX, '$\\\\ce{'); |
| | result = result.replace(MHCHEM_PU_REGEX, '$\\\\pu{'); |
| |
|
| | |
| | result = result.replace(MHCHEM_CE_ESCAPED_REGEX, (match) => `$${match}$`); |
| | result = result.replace(MHCHEM_PU_ESCAPED_REGEX, (match) => `$${match}$`); |
| |
|
| | return result; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | function findCodeBlockRegions(content: string): Array<[number, number]> { |
| | const regions: Array<[number, number]> = []; |
| | let inlineStart = -1; |
| | let multilineStart = -1; |
| |
|
| | for (let i = 0; i < content.length; i++) { |
| | const char = content[i]; |
| |
|
| | |
| | if ( |
| | char === '`' && |
| | i + 2 < content.length && |
| | content[i + 1] === '`' && |
| | content[i + 2] === '`' |
| | ) { |
| | if (multilineStart === -1) { |
| | multilineStart = i; |
| | i += 2; |
| | } else { |
| | regions.push([multilineStart, i + 2]); |
| | multilineStart = -1; |
| | i += 2; |
| | } |
| | } |
| | |
| | else if (char === '`' && multilineStart === -1) { |
| | if (inlineStart === -1) { |
| | inlineStart = i; |
| | } else { |
| | regions.push([inlineStart, i]); |
| | inlineStart = -1; |
| | } |
| | } |
| | } |
| |
|
| | return regions; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | function isInCodeBlock(position: number, codeRegions: Array<[number, number]>): boolean { |
| | let left = 0; |
| | let right = codeRegions.length - 1; |
| |
|
| | while (left <= right) { |
| | const mid = Math.floor((left + right) / 2); |
| | const [start, end] = codeRegions[mid]; |
| |
|
| | if (position >= start && position <= end) { |
| | return true; |
| | } else if (position < start) { |
| | right = mid - 1; |
| | } else { |
| | left = mid + 1; |
| | } |
| | } |
| |
|
| | return false; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | export function preprocessLaTeX(content: string): string { |
| | |
| | if (!content.includes('$')) return content; |
| |
|
| | |
| | let processed = content; |
| | if (content.includes('\\ce{') || content.includes('\\pu{')) { |
| | processed = escapeMhchem(content); |
| | } |
| |
|
| | |
| | const codeRegions = findCodeBlockRegions(processed); |
| |
|
| | |
| | const parts: string[] = []; |
| | let lastIndex = 0; |
| |
|
| | |
| | CURRENCY_REGEX.lastIndex = 0; |
| |
|
| | let match: RegExpExecArray | null; |
| | while ((match = CURRENCY_REGEX.exec(processed)) !== null) { |
| | if (!isInCodeBlock(match.index, codeRegions)) { |
| | parts.push(processed.substring(lastIndex, match.index)); |
| | parts.push('\\$'); |
| | lastIndex = match.index + 1; |
| | } |
| | } |
| | parts.push(processed.substring(lastIndex)); |
| | processed = parts.join(''); |
| |
|
| | |
| | const result: string[] = []; |
| | lastIndex = 0; |
| |
|
| | |
| | SINGLE_DOLLAR_REGEX.lastIndex = 0; |
| |
|
| | while ((match = SINGLE_DOLLAR_REGEX.exec(processed)) !== null) { |
| | if (!isInCodeBlock(match.index, codeRegions)) { |
| | result.push(processed.substring(lastIndex, match.index)); |
| | result.push(`$$${match[1]}$$`); |
| | lastIndex = match.index + match[0].length; |
| | } |
| | } |
| | result.push(processed.substring(lastIndex)); |
| |
|
| | return result.join(''); |
| | } |
| |
|