type QuantifierRead = { consumed: number; minRepeat: number; maxRepeat: number | null; }; type TokenState = { containsRepetition: boolean; hasAmbiguousAlternation: boolean; minLength: number; maxLength: number; }; type ParseFrame = { lastToken: TokenState | null; containsRepetition: boolean; hasAlternation: boolean; branchMinLength: number; branchMaxLength: number; altMinLength: number | null; altMaxLength: number | null; }; type PatternToken = | { kind: "simple-token" } | { kind: "group-open" } | { kind: "group-close" } | { kind: "alternation" } | { kind: "quantifier"; quantifier: QuantifierRead }; const SAFE_REGEX_CACHE_MAX = 256; const SAFE_REGEX_TEST_WINDOW = 2048; const safeRegexCache = new Map(); function createParseFrame(): ParseFrame { return { lastToken: null, containsRepetition: false, hasAlternation: false, branchMinLength: 0, branchMaxLength: 0, altMinLength: null, altMaxLength: null, }; } function addLength(left: number, right: number): number { if (!Number.isFinite(left) || !Number.isFinite(right)) { return Number.POSITIVE_INFINITY; } return left + right; } function multiplyLength(length: number, factor: number): number { if (!Number.isFinite(length)) { return factor === 0 ? 0 : Number.POSITIVE_INFINITY; } return length * factor; } function recordAlternative(frame: ParseFrame): void { if (frame.altMinLength === null || frame.altMaxLength === null) { frame.altMinLength = frame.branchMinLength; frame.altMaxLength = frame.branchMaxLength; return; } frame.altMinLength = Math.min(frame.altMinLength, frame.branchMinLength); frame.altMaxLength = Math.max(frame.altMaxLength, frame.branchMaxLength); } function readQuantifier(source: string, index: number): QuantifierRead | null { const ch = source[index]; const consumed = source[index + 1] === "?" ? 2 : 1; if (ch === "*") { return { consumed, minRepeat: 0, maxRepeat: null }; } if (ch === "+") { return { consumed, minRepeat: 1, maxRepeat: null }; } if (ch === "?") { return { consumed, minRepeat: 0, maxRepeat: 1 }; } if (ch !== "{") { return null; } let i = index + 1; while (i < source.length && /\d/.test(source[i])) { i += 1; } if (i === index + 1) { return null; } const minRepeat = Number.parseInt(source.slice(index + 1, i), 10); let maxRepeat: number | null = minRepeat; if (source[i] === ",") { i += 1; const maxStart = i; while (i < source.length && /\d/.test(source[i])) { i += 1; } maxRepeat = i === maxStart ? null : Number.parseInt(source.slice(maxStart, i), 10); } if (source[i] !== "}") { return null; } i += 1; if (source[i] === "?") { i += 1; } if (maxRepeat !== null && maxRepeat < minRepeat) { return null; } return { consumed: i - index, minRepeat, maxRepeat }; } function tokenizePattern(source: string): PatternToken[] { const tokens: PatternToken[] = []; let inCharClass = false; for (let i = 0; i < source.length; i += 1) { const ch = source[i]; if (ch === "\\") { i += 1; tokens.push({ kind: "simple-token" }); continue; } if (inCharClass) { if (ch === "]") { inCharClass = false; } continue; } if (ch === "[") { inCharClass = true; tokens.push({ kind: "simple-token" }); continue; } if (ch === "(") { tokens.push({ kind: "group-open" }); continue; } if (ch === ")") { tokens.push({ kind: "group-close" }); continue; } if (ch === "|") { tokens.push({ kind: "alternation" }); continue; } const quantifier = readQuantifier(source, i); if (quantifier) { tokens.push({ kind: "quantifier", quantifier }); i += quantifier.consumed - 1; continue; } tokens.push({ kind: "simple-token" }); } return tokens; } function analyzeTokensForNestedRepetition(tokens: PatternToken[]): boolean { const frames: ParseFrame[] = [createParseFrame()]; const emitToken = (token: TokenState) => { const frame = frames[frames.length - 1]; frame.lastToken = token; if (token.containsRepetition) { frame.containsRepetition = true; } frame.branchMinLength = addLength(frame.branchMinLength, token.minLength); frame.branchMaxLength = addLength(frame.branchMaxLength, token.maxLength); }; const emitSimpleToken = () => { emitToken({ containsRepetition: false, hasAmbiguousAlternation: false, minLength: 1, maxLength: 1, }); }; for (const token of tokens) { if (token.kind === "simple-token") { emitSimpleToken(); continue; } if (token.kind === "group-open") { frames.push(createParseFrame()); continue; } if (token.kind === "group-close") { if (frames.length > 1) { const frame = frames.pop() as ParseFrame; if (frame.hasAlternation) { recordAlternative(frame); } const groupMinLength = frame.hasAlternation ? (frame.altMinLength ?? 0) : frame.branchMinLength; const groupMaxLength = frame.hasAlternation ? (frame.altMaxLength ?? 0) : frame.branchMaxLength; emitToken({ containsRepetition: frame.containsRepetition, hasAmbiguousAlternation: frame.hasAlternation && frame.altMinLength !== null && frame.altMaxLength !== null && frame.altMinLength !== frame.altMaxLength, minLength: groupMinLength, maxLength: groupMaxLength, }); } continue; } if (token.kind === "alternation") { const frame = frames[frames.length - 1]; frame.hasAlternation = true; recordAlternative(frame); frame.branchMinLength = 0; frame.branchMaxLength = 0; frame.lastToken = null; continue; } const frame = frames[frames.length - 1]; const previousToken = frame.lastToken; if (!previousToken) { continue; } if (previousToken.containsRepetition) { return true; } if (previousToken.hasAmbiguousAlternation && token.quantifier.maxRepeat === null) { return true; } const previousMinLength = previousToken.minLength; const previousMaxLength = previousToken.maxLength; previousToken.minLength = multiplyLength(previousToken.minLength, token.quantifier.minRepeat); previousToken.maxLength = token.quantifier.maxRepeat === null ? Number.POSITIVE_INFINITY : multiplyLength(previousToken.maxLength, token.quantifier.maxRepeat); previousToken.containsRepetition = true; frame.containsRepetition = true; frame.branchMinLength = frame.branchMinLength - previousMinLength + previousToken.minLength; const branchMaxBase = Number.isFinite(frame.branchMaxLength) && Number.isFinite(previousMaxLength) ? frame.branchMaxLength - previousMaxLength : Number.POSITIVE_INFINITY; frame.branchMaxLength = addLength(branchMaxBase, previousToken.maxLength); } return false; } function testRegexFromStart(regex: RegExp, value: string): boolean { regex.lastIndex = 0; return regex.test(value); } export function testRegexWithBoundedInput( regex: RegExp, input: string, maxWindow = SAFE_REGEX_TEST_WINDOW, ): boolean { if (maxWindow <= 0) { return false; } if (input.length <= maxWindow) { return testRegexFromStart(regex, input); } const head = input.slice(0, maxWindow); if (testRegexFromStart(regex, head)) { return true; } return testRegexFromStart(regex, input.slice(-maxWindow)); } export function hasNestedRepetition(source: string): boolean { // Conservative parser: tokenize first, then check if repeated tokens/groups are repeated again. // Non-goal: complete regex AST support; keep strict enough for config safety checks. return analyzeTokensForNestedRepetition(tokenizePattern(source)); } export function compileSafeRegex(source: string, flags = ""): RegExp | null { const trimmed = source.trim(); if (!trimmed) { return null; } const cacheKey = `${flags}::${trimmed}`; if (safeRegexCache.has(cacheKey)) { return safeRegexCache.get(cacheKey) ?? null; } let compiled: RegExp | null = null; if (!hasNestedRepetition(trimmed)) { try { compiled = new RegExp(trimmed, flags); } catch { compiled = null; } } safeRegexCache.set(cacheKey, compiled); if (safeRegexCache.size > SAFE_REGEX_CACHE_MAX) { const oldestKey = safeRegexCache.keys().next().value; if (oldestKey) { safeRegexCache.delete(oldestKey); } } return compiled; }