| type QuantifierRead = { |
| consumed: number; |
| minRepeat: number; |
| maxRepeat: number | null; |
| }; |
|
|
| type TokenState = { |
| containsRepetition: boolean; |
| hasAmbiguousAlternation: boolean; |
| minLength: number; |
| maxLength: number; |
| }; |
|
|
| type ParseFrame = { |
| lastToken: TokenState | null; |
| containsRepetition: boolean; |
| hasAlternation: boolean; |
| branchMinLength: number; |
| branchMaxLength: number; |
| altMinLength: number | null; |
| altMaxLength: number | null; |
| }; |
|
|
| type PatternToken = |
| | { kind: "simple-token" } |
| | { kind: "group-open" } |
| | { kind: "group-close" } |
| | { kind: "alternation" } |
| | { kind: "quantifier"; quantifier: QuantifierRead }; |
|
|
| const SAFE_REGEX_CACHE_MAX = 256; |
| const SAFE_REGEX_TEST_WINDOW = 2048; |
| const safeRegexCache = new Map<string, RegExp | null>(); |
|
|
| function createParseFrame(): ParseFrame { |
| return { |
| lastToken: null, |
| containsRepetition: false, |
| hasAlternation: false, |
| branchMinLength: 0, |
| branchMaxLength: 0, |
| altMinLength: null, |
| altMaxLength: null, |
| }; |
| } |
|
|
| function addLength(left: number, right: number): number { |
| if (!Number.isFinite(left) || !Number.isFinite(right)) { |
| return Number.POSITIVE_INFINITY; |
| } |
| return left + right; |
| } |
|
|
| function multiplyLength(length: number, factor: number): number { |
| if (!Number.isFinite(length)) { |
| return factor === 0 ? 0 : Number.POSITIVE_INFINITY; |
| } |
| return length * factor; |
| } |
|
|
| function recordAlternative(frame: ParseFrame): void { |
| if (frame.altMinLength === null || frame.altMaxLength === null) { |
| frame.altMinLength = frame.branchMinLength; |
| frame.altMaxLength = frame.branchMaxLength; |
| return; |
| } |
| frame.altMinLength = Math.min(frame.altMinLength, frame.branchMinLength); |
| frame.altMaxLength = Math.max(frame.altMaxLength, frame.branchMaxLength); |
| } |
|
|
| function readQuantifier(source: string, index: number): QuantifierRead | null { |
| const ch = source[index]; |
| const consumed = source[index + 1] === "?" ? 2 : 1; |
| if (ch === "*") { |
| return { consumed, minRepeat: 0, maxRepeat: null }; |
| } |
| if (ch === "+") { |
| return { consumed, minRepeat: 1, maxRepeat: null }; |
| } |
| if (ch === "?") { |
| return { consumed, minRepeat: 0, maxRepeat: 1 }; |
| } |
| if (ch !== "{") { |
| return null; |
| } |
|
|
| let i = index + 1; |
| while (i < source.length && /\d/.test(source[i])) { |
| i += 1; |
| } |
| if (i === index + 1) { |
| return null; |
| } |
|
|
| const minRepeat = Number.parseInt(source.slice(index + 1, i), 10); |
| let maxRepeat: number | null = minRepeat; |
| if (source[i] === ",") { |
| i += 1; |
| const maxStart = i; |
| while (i < source.length && /\d/.test(source[i])) { |
| i += 1; |
| } |
| maxRepeat = i === maxStart ? null : Number.parseInt(source.slice(maxStart, i), 10); |
| } |
|
|
| if (source[i] !== "}") { |
| return null; |
| } |
| i += 1; |
| if (source[i] === "?") { |
| i += 1; |
| } |
| if (maxRepeat !== null && maxRepeat < minRepeat) { |
| return null; |
| } |
|
|
| return { consumed: i - index, minRepeat, maxRepeat }; |
| } |
|
|
| function tokenizePattern(source: string): PatternToken[] { |
| const tokens: PatternToken[] = []; |
| let inCharClass = false; |
|
|
| for (let i = 0; i < source.length; i += 1) { |
| const ch = source[i]; |
|
|
| if (ch === "\\") { |
| i += 1; |
| tokens.push({ kind: "simple-token" }); |
| continue; |
| } |
|
|
| if (inCharClass) { |
| if (ch === "]") { |
| inCharClass = false; |
| } |
| continue; |
| } |
|
|
| if (ch === "[") { |
| inCharClass = true; |
| tokens.push({ kind: "simple-token" }); |
| continue; |
| } |
|
|
| if (ch === "(") { |
| tokens.push({ kind: "group-open" }); |
| continue; |
| } |
|
|
| if (ch === ")") { |
| tokens.push({ kind: "group-close" }); |
| continue; |
| } |
|
|
| if (ch === "|") { |
| tokens.push({ kind: "alternation" }); |
| continue; |
| } |
|
|
| const quantifier = readQuantifier(source, i); |
| if (quantifier) { |
| tokens.push({ kind: "quantifier", quantifier }); |
| i += quantifier.consumed - 1; |
| continue; |
| } |
|
|
| tokens.push({ kind: "simple-token" }); |
| } |
|
|
| return tokens; |
| } |
|
|
| function analyzeTokensForNestedRepetition(tokens: PatternToken[]): boolean { |
| const frames: ParseFrame[] = [createParseFrame()]; |
|
|
| const emitToken = (token: TokenState) => { |
| const frame = frames[frames.length - 1]; |
| frame.lastToken = token; |
| if (token.containsRepetition) { |
| frame.containsRepetition = true; |
| } |
| frame.branchMinLength = addLength(frame.branchMinLength, token.minLength); |
| frame.branchMaxLength = addLength(frame.branchMaxLength, token.maxLength); |
| }; |
|
|
| const emitSimpleToken = () => { |
| emitToken({ |
| containsRepetition: false, |
| hasAmbiguousAlternation: false, |
| minLength: 1, |
| maxLength: 1, |
| }); |
| }; |
|
|
| for (const token of tokens) { |
| if (token.kind === "simple-token") { |
| emitSimpleToken(); |
| continue; |
| } |
|
|
| if (token.kind === "group-open") { |
| frames.push(createParseFrame()); |
| continue; |
| } |
|
|
| if (token.kind === "group-close") { |
| if (frames.length > 1) { |
| const frame = frames.pop() as ParseFrame; |
| if (frame.hasAlternation) { |
| recordAlternative(frame); |
| } |
| const groupMinLength = frame.hasAlternation |
| ? (frame.altMinLength ?? 0) |
| : frame.branchMinLength; |
| const groupMaxLength = frame.hasAlternation |
| ? (frame.altMaxLength ?? 0) |
| : frame.branchMaxLength; |
| emitToken({ |
| containsRepetition: frame.containsRepetition, |
| hasAmbiguousAlternation: |
| frame.hasAlternation && |
| frame.altMinLength !== null && |
| frame.altMaxLength !== null && |
| frame.altMinLength !== frame.altMaxLength, |
| minLength: groupMinLength, |
| maxLength: groupMaxLength, |
| }); |
| } |
| continue; |
| } |
|
|
| if (token.kind === "alternation") { |
| const frame = frames[frames.length - 1]; |
| frame.hasAlternation = true; |
| recordAlternative(frame); |
| frame.branchMinLength = 0; |
| frame.branchMaxLength = 0; |
| frame.lastToken = null; |
| continue; |
| } |
|
|
| const frame = frames[frames.length - 1]; |
| const previousToken = frame.lastToken; |
| if (!previousToken) { |
| continue; |
| } |
| if (previousToken.containsRepetition) { |
| return true; |
| } |
| if (previousToken.hasAmbiguousAlternation && token.quantifier.maxRepeat === null) { |
| return true; |
| } |
|
|
| const previousMinLength = previousToken.minLength; |
| const previousMaxLength = previousToken.maxLength; |
| previousToken.minLength = multiplyLength(previousToken.minLength, token.quantifier.minRepeat); |
| previousToken.maxLength = |
| token.quantifier.maxRepeat === null |
| ? Number.POSITIVE_INFINITY |
| : multiplyLength(previousToken.maxLength, token.quantifier.maxRepeat); |
| previousToken.containsRepetition = true; |
| frame.containsRepetition = true; |
| frame.branchMinLength = frame.branchMinLength - previousMinLength + previousToken.minLength; |
|
|
| const branchMaxBase = |
| Number.isFinite(frame.branchMaxLength) && Number.isFinite(previousMaxLength) |
| ? frame.branchMaxLength - previousMaxLength |
| : Number.POSITIVE_INFINITY; |
| frame.branchMaxLength = addLength(branchMaxBase, previousToken.maxLength); |
| } |
|
|
| return false; |
| } |
|
|
| function testRegexFromStart(regex: RegExp, value: string): boolean { |
| regex.lastIndex = 0; |
| return regex.test(value); |
| } |
|
|
| export function testRegexWithBoundedInput( |
| regex: RegExp, |
| input: string, |
| maxWindow = SAFE_REGEX_TEST_WINDOW, |
| ): boolean { |
| if (maxWindow <= 0) { |
| return false; |
| } |
| if (input.length <= maxWindow) { |
| return testRegexFromStart(regex, input); |
| } |
| const head = input.slice(0, maxWindow); |
| if (testRegexFromStart(regex, head)) { |
| return true; |
| } |
| return testRegexFromStart(regex, input.slice(-maxWindow)); |
| } |
|
|
| export function hasNestedRepetition(source: string): boolean { |
| |
| |
| return analyzeTokensForNestedRepetition(tokenizePattern(source)); |
| } |
|
|
| export function compileSafeRegex(source: string, flags = ""): RegExp | null { |
| const trimmed = source.trim(); |
| if (!trimmed) { |
| return null; |
| } |
| const cacheKey = `${flags}::${trimmed}`; |
| if (safeRegexCache.has(cacheKey)) { |
| return safeRegexCache.get(cacheKey) ?? null; |
| } |
|
|
| let compiled: RegExp | null = null; |
| if (!hasNestedRepetition(trimmed)) { |
| try { |
| compiled = new RegExp(trimmed, flags); |
| } catch { |
| compiled = null; |
| } |
| } |
|
|
| safeRegexCache.set(cacheKey, compiled); |
| if (safeRegexCache.size > SAFE_REGEX_CACHE_MAX) { |
| const oldestKey = safeRegexCache.keys().next().value; |
| if (oldestKey) { |
| safeRegexCache.delete(oldestKey); |
| } |
| } |
| return compiled; |
| } |
|
|