| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import { SHELL_KEYWORDS } from './bashParser.js' |
| import type { Node } from './parser.js' |
| import { PARSE_ABORTED, parseCommandRaw } from './parser.js' |
|
|
| export type Redirect = { |
| op: '>' | '>>' | '<' | '<<' | '>&' | '>|' | '<&' | '&>' | '&>>' | '<<<' |
| target: string |
| fd?: number |
| } |
|
|
| export type SimpleCommand = { |
| |
| argv: string[] |
| |
| envVars: { name: string; value: string }[] |
| |
| redirects: Redirect[] |
| |
| text: string |
| } |
|
|
| export type ParseForSecurityResult = |
| | { kind: 'simple'; commands: SimpleCommand[] } |
| | { kind: 'too-complex'; reason: string; nodeType?: string } |
| | { kind: 'parse-unavailable' } |
|
|
| |
| |
| |
| |
| |
| |
| |
| const STRUCTURAL_TYPES = new Set([ |
| 'program', |
| 'list', |
| 'pipeline', |
| 'redirected_statement', |
| ]) |
|
|
| |
| |
| |
| |
| const SEPARATOR_TYPES = new Set(['&&', '||', '|', ';', '&', '|&', '\n']) |
|
|
| |
| |
| |
| |
| |
| |
| |
| const CMDSUB_PLACEHOLDER = '__CMDSUB_OUTPUT__' |
|
|
| |
| |
| |
| |
| |
| |
| const VAR_PLACEHOLDER = '__TRACKED_VAR__' |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| function containsAnyPlaceholder(value: string): boolean { |
| return value.includes(CMDSUB_PLACEHOLDER) || value.includes(VAR_PLACEHOLDER) |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| const BARE_VAR_UNSAFE_RE = /[ \t\n*?[]/ |
|
|
| |
| const STDBUF_SHORT_SEP_RE = /^-[ioe]$/ |
| const STDBUF_SHORT_FUSED_RE = /^-[ioe]./ |
| const STDBUF_LONG_RE = /^--(input|output|error)=/ |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| const SAFE_ENV_VARS = new Set([ |
| 'HOME', |
| 'PWD', |
| 'OLDPWD', |
| 'USER', |
| 'LOGNAME', |
| 'SHELL', |
| 'PATH', |
| 'HOSTNAME', |
| 'UID', |
| 'EUID', |
| 'PPID', |
| 'RANDOM', |
| 'SECONDS', |
| 'LINENO', |
| 'TMPDIR', |
| |
| 'BASH_VERSION', |
| 'BASHPID', |
| 'SHLVL', |
| 'HISTFILE', |
| 'IFS', |
| |
| |
| ]) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| const SPECIAL_VAR_NAMES = new Set([ |
| '?', |
| '$', |
| '!', |
| '#', |
| '0', |
| '-', |
| ]) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| const DANGEROUS_TYPES = new Set([ |
| 'command_substitution', |
| 'process_substitution', |
| 'expansion', |
| 'simple_expansion', |
| 'brace_expression', |
| 'subshell', |
| 'compound_statement', |
| 'for_statement', |
| 'while_statement', |
| 'until_statement', |
| 'if_statement', |
| 'case_statement', |
| 'function_definition', |
| 'test_command', |
| 'ansi_c_string', |
| 'translated_string', |
| 'herestring_redirect', |
| 'heredoc_redirect', |
| ]) |
|
|
| |
| |
| |
| |
| |
| const DANGEROUS_TYPE_IDS = [...DANGEROUS_TYPES] |
| export function nodeTypeId(nodeType: string | undefined): number { |
| if (!nodeType) return -2 |
| if (nodeType === 'ERROR') return -1 |
| const i = DANGEROUS_TYPE_IDS.indexOf(nodeType) |
| return i >= 0 ? i + 1 : 0 |
| } |
|
|
| |
| |
| |
| |
| const REDIRECT_OPS: Record<string, Redirect['op']> = { |
| '>': '>', |
| '>>': '>>', |
| '<': '<', |
| '>&': '>&', |
| '<&': '<&', |
| '>|': '>|', |
| '&>': '&>', |
| '&>>': '&>>', |
| '<<<': '<<<', |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| const BRACE_EXPANSION_RE = /\{[^{}\s]*(,|\.\.)[^{}\s]*\}/ |
|
|
| |
| |
| |
| |
| |
| |
| |
| const CONTROL_CHAR_RE = /[\x00-\x08\x0B-\x1F\x7F]/ |
|
|
| |
| |
| |
| |
| |
| |
| const UNICODE_WHITESPACE_RE = |
| /[\u00A0\u1680\u2000-\u200B\u2028\u2029\u202F\u205F\u3000\uFEFF]/ |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| const BACKSLASH_WHITESPACE_RE = /\\[ \t]|[^ \t\n\\]\\\n/ |
|
|
| |
| |
| |
| |
| |
| |
| const ZSH_TILDE_BRACKET_RE = /~\[/ |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| const ZSH_EQUALS_EXPANSION_RE = /(?:^|[\s;&|])=[a-zA-Z_]/ |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| const BRACE_WITH_QUOTE_RE = /\{[^}]*['"]/ |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| function maskBracesInQuotedContexts(cmd: string): string { |
| |
| |
| if (!cmd.includes('{')) return cmd |
| const out: string[] = [] |
| let inSingle = false |
| let inDouble = false |
| let i = 0 |
| while (i < cmd.length) { |
| const c = cmd[i]! |
| if (inSingle) { |
| |
| if (c === "'") inSingle = false |
| out.push(c === '{' ? ' ' : c) |
| i++ |
| } else if (inDouble) { |
| |
| |
| if (c === '\\' && (cmd[i + 1] === '"' || cmd[i + 1] === '\\')) { |
| out.push(c, cmd[i + 1]!) |
| i += 2 |
| } else { |
| if (c === '"') inDouble = false |
| out.push(c === '{' ? ' ' : c) |
| i++ |
| } |
| } else { |
| |
| if (c === '\\' && i + 1 < cmd.length) { |
| out.push(c, cmd[i + 1]!) |
| i += 2 |
| } else { |
| if (c === "'") inSingle = true |
| else if (c === '"') inDouble = true |
| out.push(c) |
| i++ |
| } |
| } |
| } |
| return out.join('') |
| } |
|
|
| const DOLLAR = String.fromCharCode(0x24) |
|
|
| |
| |
| |
| |
| |
| |
| export async function parseForSecurity( |
| cmd: string, |
| ): Promise<ParseForSecurityResult> { |
| |
| |
| |
| if (cmd === '') return { kind: 'simple', commands: [] } |
| const root = await parseCommandRaw(cmd) |
| return root === null |
| ? { kind: 'parse-unavailable' } |
| : parseForSecurityFromAst(cmd, root) |
| } |
|
|
| |
| |
| |
| |
| |
| |
| export function parseForSecurityFromAst( |
| cmd: string, |
| root: Node | typeof PARSE_ABORTED, |
| ): ParseForSecurityResult { |
| |
| |
| |
| |
| if (CONTROL_CHAR_RE.test(cmd)) { |
| return { kind: 'too-complex', reason: 'Contains control characters' } |
| } |
| if (UNICODE_WHITESPACE_RE.test(cmd)) { |
| return { kind: 'too-complex', reason: 'Contains Unicode whitespace' } |
| } |
| if (BACKSLASH_WHITESPACE_RE.test(cmd)) { |
| return { |
| kind: 'too-complex', |
| reason: 'Contains backslash-escaped whitespace', |
| } |
| } |
| if (ZSH_TILDE_BRACKET_RE.test(cmd)) { |
| return { |
| kind: 'too-complex', |
| reason: 'Contains zsh ~[ dynamic directory syntax', |
| } |
| } |
| if (ZSH_EQUALS_EXPANSION_RE.test(cmd)) { |
| return { |
| kind: 'too-complex', |
| reason: 'Contains zsh =cmd equals expansion', |
| } |
| } |
| if (BRACE_WITH_QUOTE_RE.test(maskBracesInQuotedContexts(cmd))) { |
| return { |
| kind: 'too-complex', |
| reason: 'Contains brace with quote character (expansion obfuscation)', |
| } |
| } |
|
|
| const trimmed = cmd.trim() |
| if (trimmed === '') { |
| return { kind: 'simple', commands: [] } |
| } |
|
|
| if (root === PARSE_ABORTED) { |
| |
| |
| |
| |
| |
| |
| return { |
| kind: 'too-complex', |
| reason: |
| 'Parser aborted (timeout or resource limit) — possible adversarial input', |
| nodeType: 'PARSE_ABORT', |
| } |
| } |
|
|
| return walkProgram(root) |
| } |
|
|
| function walkProgram(root: Node): ParseForSecurityResult { |
| |
| |
| |
| const commands: SimpleCommand[] = [] |
| |
| |
| |
| |
| |
| const varScope = new Map<string, string>() |
| const err = collectCommands(root, commands, varScope) |
| if (err) return err |
| return { kind: 'simple', commands } |
| } |
|
|
| |
| |
| |
| |
| function collectCommands( |
| node: Node, |
| commands: SimpleCommand[], |
| varScope: Map<string, string>, |
| ): ParseForSecurityResult | null { |
| if (node.type === 'command') { |
| |
| |
| const result = walkCommand(node, [], commands, varScope) |
| if (result.kind !== 'simple') return result |
| commands.push(...result.commands) |
| return null |
| } |
|
|
| if (node.type === 'redirected_statement') { |
| return walkRedirectedStatement(node, commands, varScope) |
| } |
|
|
| if (node.type === 'comment') { |
| return null |
| } |
|
|
| if (STRUCTURAL_TYPES.has(node.type)) { |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| const isPipeline = node.type === 'pipeline' |
| let needsSnapshot = false |
| if (!isPipeline) { |
| for (const c of node.children) { |
| if (c && (c.type === '||' || c.type === '&')) { |
| needsSnapshot = true |
| break |
| } |
| } |
| } |
| const snapshot = needsSnapshot ? new Map(varScope) : null |
| |
| |
| |
| let scope = isPipeline ? new Map(varScope) : varScope |
| for (const child of node.children) { |
| if (!child) continue |
| if (SEPARATOR_TYPES.has(child.type)) { |
| if ( |
| child.type === '||' || |
| child.type === '|' || |
| child.type === '|&' || |
| child.type === '&' |
| ) { |
| |
| |
| |
| scope = new Map(snapshot ?? varScope) |
| } |
| continue |
| } |
| const err = collectCommands(child, commands, scope) |
| if (err) return err |
| } |
| return null |
| } |
|
|
| if (node.type === 'negated_command') { |
| |
| |
| |
| for (const child of node.children) { |
| if (!child) continue |
| if (child.type === '!') continue |
| return collectCommands(child, commands, varScope) |
| } |
| return null |
| } |
|
|
| if (node.type === 'declaration_command') { |
| |
| |
| |
| |
| |
| |
| |
| const argv: string[] = [] |
| for (const child of node.children) { |
| if (!child) continue |
| switch (child.type) { |
| case 'export': |
| case 'local': |
| case 'readonly': |
| case 'declare': |
| case 'typeset': |
| argv.push(child.text) |
| break |
| case 'word': |
| case 'number': |
| case 'raw_string': |
| case 'string': |
| case 'concatenation': { |
| |
| |
| |
| |
| const arg = walkArgument(child, commands, varScope) |
| if (typeof arg !== 'string') return arg |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| if ( |
| (argv[0] === 'declare' || |
| argv[0] === 'typeset' || |
| argv[0] === 'local') && |
| /^-[a-zA-Z]*[niaA]/.test(arg) |
| ) { |
| return { |
| kind: 'too-complex', |
| reason: `declare flag ${arg} changes assignment semantics (nameref/integer/array)`, |
| nodeType: 'declaration_command', |
| } |
| } |
| |
| |
| |
| |
| |
| |
| |
| if ( |
| (argv[0] === 'declare' || |
| argv[0] === 'typeset' || |
| argv[0] === 'local') && |
| arg[0] !== '-' && |
| /^[^=]*\[/.test(arg) |
| ) { |
| return { |
| kind: 'too-complex', |
| reason: `declare positional '${arg}' contains array subscript — bash evaluates $(cmd) in subscripts`, |
| nodeType: 'declaration_command', |
| } |
| } |
| argv.push(arg) |
| break |
| } |
| case 'variable_assignment': { |
| const ev = walkVariableAssignment(child, commands, varScope) |
| if ('kind' in ev) return ev |
| |
| applyVarToScope(varScope, ev) |
| argv.push(`${ev.name}=${ev.value}`) |
| break |
| } |
| case 'variable_name': |
| |
| argv.push(child.text) |
| break |
| default: |
| return tooComplex(child) |
| } |
| } |
| commands.push({ argv, envVars: [], redirects: [], text: node.text }) |
| return null |
| } |
|
|
| if (node.type === 'variable_assignment') { |
| |
| |
| |
| |
| |
| |
| |
| const ev = walkVariableAssignment(node, commands, varScope) |
| if ('kind' in ev) return ev |
| |
| applyVarToScope(varScope, ev) |
| return null |
| } |
|
|
| if (node.type === 'for_statement') { |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| let loopVar: string | null = null |
| let doGroup: Node | null = null |
| for (const child of node.children) { |
| if (!child) continue |
| if (child.type === 'variable_name') { |
| loopVar = child.text |
| } else if (child.type === 'do_group') { |
| doGroup = child |
| } else if ( |
| child.type === 'for' || |
| child.type === 'in' || |
| child.type === 'select' || |
| child.type === ';' |
| ) { |
| continue |
| } else if (child.type === 'command_substitution') { |
| |
| const err = collectCommandSubstitution(child, commands, varScope) |
| if (err) return err |
| } else { |
| |
| |
| |
| |
| |
| const arg = walkArgument(child, commands, varScope) |
| if (typeof arg !== 'string') return arg |
| } |
| } |
| if (loopVar === null || doGroup === null) return tooComplex(node) |
| |
| |
| |
| if (loopVar === 'PS4' || loopVar === 'IFS') { |
| return { |
| kind: 'too-complex', |
| reason: `${loopVar} as loop variable bypasses assignment validation`, |
| nodeType: 'for_statement', |
| } |
| } |
| |
| |
| |
| |
| varScope.set(loopVar, VAR_PLACEHOLDER) |
| const bodyScope = new Map(varScope) |
| for (const c of doGroup.children) { |
| if (!c) continue |
| if (c.type === 'do' || c.type === 'done' || c.type === ';') continue |
| const err = collectCommands(c, commands, bodyScope) |
| if (err) return err |
| } |
| return null |
| } |
|
|
| if (node.type === 'if_statement' || node.type === 'while_statement') { |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| let seenThen = false |
| for (const child of node.children) { |
| if (!child) continue |
| if ( |
| child.type === 'if' || |
| child.type === 'fi' || |
| child.type === 'else' || |
| child.type === 'elif' || |
| child.type === 'while' || |
| child.type === 'until' || |
| child.type === ';' |
| ) { |
| continue |
| } |
| if (child.type === 'then') { |
| seenThen = true |
| continue |
| } |
| if (child.type === 'do_group') { |
| |
| |
| |
| const bodyScope = new Map(varScope) |
| for (const c of child.children) { |
| if (!c) continue |
| if (c.type === 'do' || c.type === 'done' || c.type === ';') continue |
| const err = collectCommands(c, commands, bodyScope) |
| if (err) return err |
| } |
| continue |
| } |
| if (child.type === 'elif_clause' || child.type === 'else_clause') { |
| |
| |
| const branchScope = new Map(varScope) |
| for (const c of child.children) { |
| if (!c) continue |
| if ( |
| c.type === 'elif' || |
| c.type === 'else' || |
| c.type === 'then' || |
| c.type === ';' |
| ) { |
| continue |
| } |
| const err = collectCommands(c, commands, branchScope) |
| if (err) return err |
| } |
| continue |
| } |
| |
| |
| |
| |
| const targetScope = seenThen ? new Map(varScope) : varScope |
| const before = commands.length |
| const err = collectCommands(child, commands, targetScope) |
| if (err) return err |
| |
| |
| |
| if (!seenThen) { |
| for (let i = before; i < commands.length; i++) { |
| const c = commands[i] |
| if (c?.argv[0] === 'read') { |
| for (const a of c.argv.slice(1)) { |
| |
| if (!a.startsWith('-') && /^[A-Za-z_][A-Za-z0-9_]*$/.test(a)) { |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| const existing = varScope.get(a) |
| if ( |
| existing !== undefined && |
| !containsAnyPlaceholder(existing) |
| ) { |
| return { |
| kind: 'too-complex', |
| reason: `'read ${a}' in condition may not execute (||/pipeline/subshell); cannot prove it overwrites tracked literal '${existing}'`, |
| nodeType: 'if_statement', |
| } |
| } |
| varScope.set(a, VAR_PLACEHOLDER) |
| } |
| } |
| } |
| } |
| } |
| } |
| return null |
| } |
|
|
| if (node.type === 'subshell') { |
| |
| |
| |
| |
| const innerScope = new Map(varScope) |
| for (const child of node.children) { |
| if (!child) continue |
| if (child.type === '(' || child.type === ')') continue |
| const err = collectCommands(child, commands, innerScope) |
| if (err) return err |
| } |
| return null |
| } |
|
|
| if (node.type === 'test_command') { |
| |
| |
| |
| |
| |
| |
| |
| const argv: string[] = ['[['] |
| for (const child of node.children) { |
| if (!child) continue |
| if (child.type === '[[' || child.type === ']]') continue |
| if (child.type === '[' || child.type === ']') continue |
| |
| |
| |
| const err = walkTestExpr(child, argv, commands, varScope) |
| if (err) return err |
| } |
| commands.push({ argv, envVars: [], redirects: [], text: node.text }) |
| return null |
| } |
|
|
| if (node.type === 'unset_command') { |
| |
| |
| |
| |
| |
| const argv: string[] = [] |
| for (const child of node.children) { |
| if (!child) continue |
| switch (child.type) { |
| case 'unset': |
| argv.push(child.text) |
| break |
| case 'variable_name': |
| argv.push(child.text) |
| |
| |
| |
| varScope.delete(child.text) |
| break |
| case 'word': { |
| const arg = walkArgument(child, commands, varScope) |
| if (typeof arg !== 'string') return arg |
| argv.push(arg) |
| break |
| } |
| default: |
| return tooComplex(child) |
| } |
| } |
| commands.push({ argv, envVars: [], redirects: [], text: node.text }) |
| return null |
| } |
|
|
| return tooComplex(node) |
| } |
|
|
| |
| |
| |
| |
| |
| function walkTestExpr( |
| node: Node, |
| argv: string[], |
| innerCommands: SimpleCommand[], |
| varScope: Map<string, string>, |
| ): ParseForSecurityResult | null { |
| switch (node.type) { |
| case 'unary_expression': |
| case 'binary_expression': |
| case 'negated_expression': |
| case 'parenthesized_expression': { |
| for (const c of node.children) { |
| if (!c) continue |
| const err = walkTestExpr(c, argv, innerCommands, varScope) |
| if (err) return err |
| } |
| return null |
| } |
| case 'test_operator': |
| case '!': |
| case '(': |
| case ')': |
| case '&&': |
| case '||': |
| case '==': |
| case '=': |
| case '!=': |
| case '<': |
| case '>': |
| case '=~': |
| argv.push(node.text) |
| return null |
| case 'regex': |
| case 'extglob_pattern': |
| |
| |
| |
| argv.push(node.text) |
| return null |
| default: { |
| |
| const arg = walkArgument(node, innerCommands, varScope) |
| if (typeof arg !== 'string') return arg |
| argv.push(arg) |
| return null |
| } |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| |
| function walkRedirectedStatement( |
| node: Node, |
| commands: SimpleCommand[], |
| varScope: Map<string, string>, |
| ): ParseForSecurityResult | null { |
| const redirects: Redirect[] = [] |
| let innerCommand: Node | null = null |
|
|
| for (const child of node.children) { |
| if (!child) continue |
| if (child.type === 'file_redirect') { |
| |
| |
| const r = walkFileRedirect(child, commands, varScope) |
| if ('kind' in r) return r |
| redirects.push(r) |
| } else if (child.type === 'heredoc_redirect') { |
| const r = walkHeredocRedirect(child) |
| if (r) return r |
| } else if ( |
| child.type === 'command' || |
| child.type === 'pipeline' || |
| child.type === 'list' || |
| child.type === 'negated_command' || |
| child.type === 'declaration_command' || |
| child.type === 'unset_command' |
| ) { |
| innerCommand = child |
| } else { |
| return tooComplex(child) |
| } |
| } |
|
|
| if (!innerCommand) { |
| |
| |
| commands.push({ argv: [], envVars: [], redirects, text: node.text }) |
| return null |
| } |
|
|
| const before = commands.length |
| const err = collectCommands(innerCommand, commands, varScope) |
| if (err) return err |
| if (commands.length > before && redirects.length > 0) { |
| const last = commands[commands.length - 1] |
| if (last) last.redirects.push(...redirects) |
| } |
| return null |
| } |
|
|
| |
| |
| |
| |
| function walkFileRedirect( |
| node: Node, |
| innerCommands: SimpleCommand[], |
| varScope: Map<string, string>, |
| ): Redirect | ParseForSecurityResult { |
| let op: Redirect['op'] | null = null |
| let target: string | null = null |
| let fd: number | undefined |
|
|
| for (const child of node.children) { |
| if (!child) continue |
| if (child.type === 'file_descriptor') { |
| fd = Number(child.text) |
| } else if (child.type in REDIRECT_OPS) { |
| op = REDIRECT_OPS[child.type] ?? null |
| } else if (child.type === 'word' || child.type === 'number') { |
| |
| |
| |
| |
| if (child.children.length > 0) return tooComplex(child) |
| |
| |
| |
| |
| if (BRACE_EXPANSION_RE.test(child.text)) return tooComplex(child) |
| |
| |
| |
| |
| target = child.text.replace(/\\(.)/g, '$1') |
| } else if (child.type === 'raw_string') { |
| target = stripRawString(child.text) |
| } else if (child.type === 'string') { |
| const s = walkString(child, innerCommands, varScope) |
| if (typeof s !== 'string') return s |
| target = s |
| } else if (child.type === 'concatenation') { |
| |
| |
| |
| const s = walkArgument(child, innerCommands, varScope) |
| if (typeof s !== 'string') return s |
| target = s |
| } else { |
| return tooComplex(child) |
| } |
| } |
|
|
| if (!op || target === null) { |
| return { |
| kind: 'too-complex', |
| reason: 'Unrecognized redirect shape', |
| nodeType: node.type, |
| } |
| } |
| return { op, target, fd } |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| function walkHeredocRedirect(node: Node): ParseForSecurityResult | null { |
| let startText: string | null = null |
| let body: Node | null = null |
|
|
| for (const child of node.children) { |
| if (!child) continue |
| if (child.type === 'heredoc_start') startText = child.text |
| else if (child.type === 'heredoc_body') body = child |
| else if ( |
| child.type === '<<' || |
| child.type === '<<-' || |
| child.type === 'heredoc_end' || |
| child.type === 'file_descriptor' |
| ) { |
| |
| |
| |
| } else { |
| |
| |
| |
| |
| |
| return tooComplex(child) |
| } |
| } |
|
|
| const isQuoted = |
| startText !== null && |
| ((startText.startsWith("'") && startText.endsWith("'")) || |
| (startText.startsWith('"') && startText.endsWith('"')) || |
| startText.startsWith('\\')) |
|
|
| if (!isQuoted) { |
| return { |
| kind: 'too-complex', |
| reason: 'Heredoc with unquoted delimiter undergoes shell expansion', |
| nodeType: 'heredoc_redirect', |
| } |
| } |
|
|
| if (body) { |
| for (const child of body.children) { |
| if (!child) continue |
| if (child.type !== 'heredoc_content') { |
| return tooComplex(child) |
| } |
| } |
| } |
| return null |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| function walkHerestringRedirect( |
| node: Node, |
| innerCommands: SimpleCommand[], |
| varScope: Map<string, string>, |
| ): ParseForSecurityResult | null { |
| for (const child of node.children) { |
| if (!child) continue |
| if (child.type === '<<<') continue |
| |
| |
| |
| const content = walkArgument(child, innerCommands, varScope) |
| if (typeof content !== 'string') return content |
| |
| |
| |
| if (NEWLINE_HASH_RE.test(content)) return tooComplex(child) |
| } |
| return null |
| } |
|
|
| |
| |
| |
| |
| |
| function walkCommand( |
| node: Node, |
| extraRedirects: Redirect[], |
| innerCommands: SimpleCommand[], |
| varScope: Map<string, string>, |
| ): ParseForSecurityResult { |
| const argv: string[] = [] |
| const envVars: { name: string; value: string }[] = [] |
| const redirects: Redirect[] = [...extraRedirects] |
|
|
| for (const child of node.children) { |
| if (!child) continue |
|
|
| switch (child.type) { |
| case 'variable_assignment': { |
| const ev = walkVariableAssignment(child, innerCommands, varScope) |
| if ('kind' in ev) return ev |
| |
| |
| |
| |
| envVars.push({ name: ev.name, value: ev.value }) |
| break |
| } |
| case 'command_name': { |
| const arg = walkArgument( |
| child.children[0] ?? child, |
| innerCommands, |
| varScope, |
| ) |
| if (typeof arg !== 'string') return arg |
| argv.push(arg) |
| break |
| } |
| case 'word': |
| case 'number': |
| case 'raw_string': |
| case 'string': |
| case 'concatenation': |
| case 'arithmetic_expansion': { |
| const arg = walkArgument(child, innerCommands, varScope) |
| if (typeof arg !== 'string') return arg |
| argv.push(arg) |
| break |
| } |
| |
| |
| |
| |
| |
| |
| |
| case 'simple_expansion': { |
| |
| |
| |
| const v = resolveSimpleExpansion(child, varScope, false) |
| if (typeof v !== 'string') return v |
| argv.push(v) |
| break |
| } |
| case 'file_redirect': { |
| const r = walkFileRedirect(child, innerCommands, varScope) |
| if ('kind' in r) return r |
| redirects.push(r) |
| break |
| } |
| case 'herestring_redirect': { |
| |
| |
| const err = walkHerestringRedirect(child, innerCommands, varScope) |
| if (err) return err |
| break |
| } |
| default: |
| return tooComplex(child) |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| const text = |
| /\$[A-Za-z_]/.test(node.text) || node.text.includes('\n') |
| ? argv |
| .map(a => |
| a === '' || /["'\\ \t\n$`;|&<>(){}*?[\]~#]/.test(a) |
| ? `'${a.replace(/'/g, "'\\''")}'` |
| : a, |
| ) |
| .join(' ') |
| : node.text |
| return { |
| kind: 'simple', |
| commands: [{ argv, envVars, redirects, text }], |
| } |
| } |
| |
| /** |
| * Recurse into a command_substitution node's inner command(s). If the inner |
| * command(s) parse cleanly (simple), add them to the innerCommands |
| * accumulator and return null (success). If the inner command is itself |
| * too-complex (e.g., nested arith expansion, process sub), return the error. |
| * This enables recursive permission checking: `echo $(git rev-parse HEAD)` |
| * extracts BOTH `echo $(git rev-parse HEAD)` (outer) AND `git rev-parse HEAD` |
| * (inner) — permission rules must match BOTH for the whole command to allow. |
| */ |
| function collectCommandSubstitution( |
| csNode: Node, |
| innerCommands: SimpleCommand[], |
| varScope: Map<string, string>, |
| ): ParseForSecurityResult | null { |
| // Vars set BEFORE the $() are visible inside (bash subshell semantics), |
| // but vars set INSIDE don't leak out. Pass a COPY of the outer scope so |
| // inner assignments don't mutate the outer map. |
| const innerScope = new Map(varScope) |
| // command_substitution children: `$(` or `` ` ``, inner statement(s), `)` |
| for (const child of csNode.children) { |
| if (!child) continue |
| if (child.type === '$(' || child.type === '`' || child.type === ')') { |
| continue |
| } |
| const err = collectCommands(child, innerCommands, innerScope) |
| if (err) return err |
| } |
| return null |
| } |
| |
| /** |
| * Convert an argument node to its literal string value. Quotes are resolved. |
| * This function implements the argument-position allowlist. |
| */ |
| function walkArgument( |
| node: Node | null, |
| innerCommands: SimpleCommand[], |
| varScope: Map<string, string>, |
| ): string | ParseForSecurityResult { |
| if (!node) { |
| return { kind: 'too-complex', reason: 'Null argument node' } |
| } |
| |
| switch (node.type) { |
| case 'word': { |
| // Unescape backslash sequences. In unquoted context, bash's quote |
| // removal turns `\X` → `X` for any character X. tree-sitter preserves |
| // the raw text. Required for checkSemantics: `\eval` must match |
| // EVAL_LIKE_BUILTINS, `\zmodload` must match ZSH_DANGEROUS_BUILTINS. |
| // Also makes argv accurate: `find -exec {} \;` → argv has `;` not |
| // `\;`. (Deny-rule matching on .text already worked via downstream |
| // splitCommand_DEPRECATED unescaping — see walkCommand comment.) `\<whitespace>` |
| // is already rejected by BACKSLASH_WHITESPACE_RE. |
| if (BRACE_EXPANSION_RE.test(node.text)) { |
| return { |
| kind: 'too-complex', |
| reason: 'Word contains brace expansion syntax', |
| nodeType: 'word', |
| } |
| } |
| return node.text.replace(/\\(.)/g, '$1') |
| } |
| |
| case 'number': |
| // SECURITY: tree-sitter-bash parses `NN#<expansion>` (arithmetic base |
| // syntax) as a `number` node with the expansion as a CHILD. `10#$(cmd)` |
| // is a number node whose .text is the full literal but whose child is a |
| // command_substitution — bash runs the substitution. .text on a node |
| // with children would smuggle the expansion past permission checks. |
| // Plain numbers (`10`, `16#ff`) have zero children. |
| if (node.children.length > 0) { |
| return { |
| kind: 'too-complex', |
| reason: 'Number node contains expansion (NN# arithmetic base syntax)', |
| nodeType: node.children[0]?.type, |
| } |
| } |
| return node.text |
| |
| case 'raw_string': |
| return stripRawString(node.text) |
| |
| case 'string': |
| return walkString(node, innerCommands, varScope) |
| |
| case 'concatenation': { |
| if (BRACE_EXPANSION_RE.test(node.text)) { |
| return { |
| kind: 'too-complex', |
| reason: 'Brace expansion', |
| nodeType: 'concatenation', |
| } |
| } |
| let result = '' |
| for (const child of node.children) { |
| if (!child) continue |
| const part = walkArgument(child, innerCommands, varScope) |
| if (typeof part !== 'string') return part |
| result += part |
| } |
| return result |
| } |
| |
| case 'arithmetic_expansion': { |
| const err = walkArithmetic(node) |
| if (err) return err |
| return node.text |
| } |
| |
| case 'simple_expansion': { |
| // `$VAR` inside a concatenation (e.g., `prefix$VAR`). Same rules |
| // as the bare case in walkCommand: must be tracked or SAFE_ENV_VARS. |
| // inside-concatenation counts as bare arg (the whole concat IS the arg) |
| return resolveSimpleExpansion(node, varScope, false) |
| } |
| |
| // NOTE: command_substitution at arg position (bare or inside concatenation) |
| // is intentionally NOT handled — the output is/becomes-part-of a positional |
| // argument which might be a path or flag. `rm $(foo)` or `rm $(foo)bar` |
| // would hide the real path behind the placeholder. Only $() inside a |
| // `string` node (walkString) is extracted, since the output is embedded |
| // in a longer string rather than BEING the argument. |
| |
| default: |
| return tooComplex(node) |
| } |
| } |
| |
| /** |
| * Extract literal content from a double-quoted string node. A `string` node's |
| * children are `"` delimiters, `string_content` literals, and possibly |
| * expansion nodes. |
| * |
| * tree-sitter quirk: literal newlines inside double quotes are NOT included |
| * in `string_content` node text. bash preserves them. For `"a\nb"`, |
| * tree-sitter produces two `string_content` children (`"a"`, `"b"`) with the |
| * newline in neither. For `"\n#"`, it produces ONE child (`"#"`) with the |
| * leading newline eaten. Concatenating children therefore loses newlines. |
| * |
| * Fix: track child `startIndex` and insert one `\n` per index gap. The gap |
| * between children IS the dropped newline(s). This makes the argv value |
| * match what bash actually sees. |
| */ |
| function walkString( |
| node: Node, |
| innerCommands: SimpleCommand[], |
| varScope: Map<string, string>, |
| ): string | ParseForSecurityResult { |
| let result = '' |
| let cursor = -1 |
| // SECURITY: Track whether the string contains a runtime-unknown |
| // placeholder ($() output or unknown-value tracked var) vs any literal |
| // content. A string that is ONLY a placeholder (`"$(cmd)"`, `"$VAR"` |
| // where VAR holds an unknown sentinel) produces an argv element that IS |
| // the placeholder — which downstream path validation resolves as a |
| // relative filename within cwd, bypassing the check. `cd "$(echo /etc)"` |
| // would pass validation but runtime-cd into /etc. We reject |
| // solo-placeholder strings; placeholders mixed with literal content |
| // (`"prefix: $(cmd)"`) are safe — runtime value can't equal a bare path. |
| let sawDynamicPlaceholder = false |
| let sawLiteralContent = false |
| for (const child of node.children) { |
| if (!child) continue |
| // Index gap between this child and the previous one = dropped newline(s). |
| // Ignore the gap before the first non-delimiter child (cursor === -1). |
| // Skip gap-fill for `"` delimiters: a gap before the closing `"` is the |
| // tree-sitter whitespace-only-string quirk (space/tab, not newline) — let |
| // the Fix C check below catch it as too-complex instead of mis-filling |
| // with `\n` and diverging from bash. |
| if (cursor !== -1 && child.startIndex > cursor && child.type !== '"') { |
| result += '\n'.repeat(child.startIndex - cursor) |
| sawLiteralContent = true |
| } |
| cursor = child.endIndex |
| switch (child.type) { |
| case '"': |
| // Reset cursor after opening quote so the gap between `"` and the |
| // first content child is captured. |
| cursor = child.endIndex |
| break |
| case 'string_content': |
| // Bash double-quote escape rules (NOT the generic /\\(.)/g used for |
| // unquoted words in walkArgument): inside "...", a backslash only |
| // escapes $ ` " \ — other sequences like \n stay literal. So |
| // `"fix \"bug\""` → `fix "bug"`, but `"a\nb"` → `a\nb` (backslash |
| // kept). tree-sitter preserves the raw escapes in .text; we resolve |
| // them here so argv matches what bash actually passes. |
| result += child.text.replace(/\\([$`"\\])/g, '$1') |
| sawLiteralContent = true |
| break |
| case DOLLAR: |
| // A bare dollar sign before closing quote or a non-name char is |
| // literal in bash. tree-sitter emits it as a standalone node. |
| result += DOLLAR |
| sawLiteralContent = true |
| break |
| case 'command_substitution': { |
| // Carve-out: `$(cat <<'EOF' ... EOF)` is safe. The quoted-delimiter |
| // heredoc body is literal (no expansion), and `cat` just prints it. |
| // The substitution result is therefore a known static string. This |
| // pattern is the idiomatic way to pass multi-line content to tools |
| // like `gh pr create --body`. We replace the substitution with a |
| // placeholder argv value — the actual content doesn't matter for |
| // permission checking, only that it IS static. |
| const heredocBody = extractSafeCatHeredoc(child) |
| if (heredocBody === 'DANGEROUS') return tooComplex(child) |
| if (heredocBody !== null) { |
| // SECURITY: the body IS the substitution result. Previously we |
| // dropped it → `rm "$(cat <<'EOF'\n/etc/passwd\nEOF)"` produced |
| // argv ['rm',''] while bash runs `rm /etc/passwd`. validatePath('') |
| // resolves to cwd → allowed. Every path-constrained command |
| // bypassed via this. Now: append the body (trailing LF trimmed — |
| // bash $() strips trailing newlines). |
| // |
| // Tradeoff: bodies with internal newlines are multi-line text |
| // (markdown, scripts) which cannot be valid paths — safe to drop |
| // to avoid NEWLINE_HASH_RE false positives on `## Summary`. A |
| // single-line body (like `/etc/passwd`) MUST go into argv so |
| // downstream path validation sees the real target. |
| const trimmed = heredocBody.replace(/\n+$/, '') |
| if (trimmed.includes('\n')) { |
| sawLiteralContent = true |
| break |
| } |
| result += trimmed |
| sawLiteralContent = true |
| break |
| } |
| // General $() inside "...": recurse into inner command(s). If they |
| // parse cleanly, they become additional subcommands that the |
| // permission system must match rules against. The outer argv gets |
| // the original $() text as placeholder (runtime-determined value). |
| // `echo "SHA: $(git rev-parse HEAD)"` → extracts BOTH |
| // `echo "SHA: $(...)"` AND `git rev-parse HEAD` — both must match |
| // permission rules. ~27% of too-complex in top-5k ant cmds. |
| const err = collectCommandSubstitution(child, innerCommands, varScope) |
| if (err) return err |
| result += CMDSUB_PLACEHOLDER |
| sawDynamicPlaceholder = true |
| break |
| } |
| case 'simple_expansion': { |
| // `$VAR` inside "...". Tracked/safe vars resolve; untracked reject. |
| const v = resolveSimpleExpansion(child, varScope, true) |
| if (typeof v !== 'string') return v |
| // VAR_PLACEHOLDER = runtime-unknown (loop var, read var, $() output, |
| // SAFE_ENV_VARS, special vars). Any other string = actual literal |
| // value from a tracked static var (e.g. VAR=/tmp → v='/tmp'). |
| if (v === VAR_PLACEHOLDER) sawDynamicPlaceholder = true |
| else sawLiteralContent = true |
| result += v |
| break |
| } |
| case 'arithmetic_expansion': { |
| const err = walkArithmetic(child) |
| if (err) return err |
| result += child.text |
| // Validated to be literal-numeric — static content. |
| sawLiteralContent = true |
| break |
| } |
| default: |
| // expansion (${...}) inside "..." |
| return tooComplex(child) |
| } |
| } |
| // SECURITY: Reject solo-placeholder strings. `"$(cmd)"` or `"$VAR"` (where |
| // VAR holds an unknown value) would produce an argv element that IS the |
| // placeholder — which bypasses downstream path validation (validatePath |
| // resolves placeholders as relative filenames within cwd). Only allow |
| // placeholders embedded alongside literal content (`"prefix: $(cmd)"`). |
| if (sawDynamicPlaceholder && !sawLiteralContent) { |
| return tooComplex(node) |
| } |
| // SECURITY: tree-sitter-bash quirk — a double-quoted string containing |
| // ONLY whitespace (` "`, `" "`, `"\t"`) produces NO string_content child; |
| // the whitespace is attributed to the closing `"` node's text. Our loop |
| // only adds to `result` from string_content/expansion children, so we'd |
| // return "" when bash sees " ". Detect: we saw no content children |
| // (both flags false — neither literal nor placeholder added) but the |
| // source span is longer than bare `""`. Genuine `""` has text.length==2. |
| // `"$V"` with V="" doesn't hit this — the simple_expansion child sets |
| // sawLiteralContent via the `else` branch even when v is empty. |
| if (!sawLiteralContent && !sawDynamicPlaceholder && node.text.length > 2) { |
| return tooComplex(node) |
| } |
| return result |
| } |
| |
| /** |
| * Safe leaf nodes inside arithmetic expansion: integer literals (decimal, |
| * hex, octal, bash base#digits) and operator/paren tokens. Anything else at |
| * leaf position (notably variable_name that isn't a numeric literal) rejects. |
| */ |
| const ARITH_LEAF_RE = |
| /^(?:[0-9]+|0[xX][0-9a-fA-F]+|[0-9]+#[0-9a-zA-Z]+|[-+*/%^&|~!<>=?:(),]+|<<|>>|\*\*|&&|\|\||[<>=!]=|\$\(\(|\)\))$/ |
| |
| /** |
| * Recursively validate an arithmetic_expansion node. Allows only literal |
| * numeric expressions — no variables, no substitutions. Returns null if |
| * safe, or a too-complex result if not. |
| * |
| * Variables are rejected because bash arithmetic recursively evaluates |
| * variable values: if x='a[$(cmd)]' then $((x)) executes cmd. See |
| * https://www.vidarholen.net/contents/blog/?p=716 (arithmetic injection). |
| * |
| * When safe, the caller puts the full `$((…))` span into argv as a literal |
| * string. bash will expand it to an integer at runtime; the static string |
| * won't match any sensitive path/deny patterns. |
| */ |
| function walkArithmetic(node: Node): ParseForSecurityResult | null { |
| for (const child of node.children) { |
| if (!child) continue |
| if (child.children.length === 0) { |
| if (!ARITH_LEAF_RE.test(child.text)) { |
| return { |
| kind: 'too-complex', |
| reason: `Arithmetic expansion references variable or non-literal: ${child.text}`, |
| nodeType: 'arithmetic_expansion', |
| } |
| } |
| continue |
| } |
| switch (child.type) { |
| case 'binary_expression': |
| case 'unary_expression': |
| case 'ternary_expression': |
| case 'parenthesized_expression': { |
| const err = walkArithmetic(child) |
| if (err) return err |
| break |
| } |
| default: |
| return tooComplex(child) |
| } |
| } |
| return null |
| } |
| |
| /** |
| * Check if a command_substitution node is exactly `$(cat <<'DELIM'...DELIM)` |
| * and return the heredoc body if so. Any deviation (extra args to cat, |
| * unquoted delimiter, additional commands) returns null. |
| * |
| * tree-sitter structure: |
| * command_substitution |
| * $( |
| * redirected_statement |
| * command → command_name → word "cat" (exactly one child) |
| * heredoc_redirect |
| * << |
| * heredoc_start 'DELIM' (quoted) |
| * heredoc_body (pure heredoc_content) |
| * heredoc_end |
| * ) |
| */ |
| function extractSafeCatHeredoc(subNode: Node): string | 'DANGEROUS' | null { |
| // Expect exactly: $( + one redirected_statement + ) |
| let stmt: Node | null = null |
| for (const child of subNode.children) { |
| if (!child) continue |
| if (child.type === '$(' || child.type === ')') continue |
| if (child.type === 'redirected_statement' && stmt === null) { |
| stmt = child |
| } else { |
| return null |
| } |
| } |
| if (!stmt) return null |
| |
| // redirected_statement must be: command(cat) + heredoc_redirect (quoted) |
| let sawCat = false |
| let body: string | null = null |
| for (const child of stmt.children) { |
| if (!child) continue |
| if (child.type === 'command') { |
| // Must be bare `cat` — no args, no env vars |
| const cmdChildren = child.children.filter(c => c) |
| if (cmdChildren.length !== 1) return null |
| const nameNode = cmdChildren[0] |
| if (nameNode?.type !== 'command_name' || nameNode.text !== 'cat') { |
| return null |
| } |
| sawCat = true |
| } else if (child.type === 'heredoc_redirect') { |
| // Reuse the existing validator: quoted delimiter, body is pure text. |
| // walkHeredocRedirect returns null on success, non-null on rejection. |
| if (walkHeredocRedirect(child) !== null) return null |
| for (const hc of child.children) { |
| if (hc?.type === 'heredoc_body') body = hc.text |
| } |
| } else { |
| return null |
| } |
| } |
| |
| if (!sawCat || body === null) return null |
| // SECURITY: the heredoc body becomes the outer command's argv value via |
| // substitution, so a body like `/proc/self/environ` is semantically |
| // `cat /proc/self/environ`. checkSemantics never sees the body (we drop it |
| // at the walkString call site to avoid newline+# FPs). Returning `null` |
| // here would fall through to collectCommandSubstitution in walkString, |
| // which would extract the inner `cat` via walkHeredocRedirect (body text |
| // not inspected there) — effectively bypassing this check. Return a |
| // distinct sentinel so the caller can reject instead of falling through. |
| if (PROC_ENVIRON_RE.test(body)) return 'DANGEROUS' |
| // Same for jq system(): checkSemantics checks argv but never sees the |
| // heredoc body. Check unconditionally (we don't know the outer command). |
| if (/\bsystem\s*\(/.test(body)) return 'DANGEROUS' |
| return body |
| } |
| |
| function walkVariableAssignment( |
| node: Node, |
| innerCommands: SimpleCommand[], |
| varScope: Map<string, string>, |
| ): { name: string; value: string; isAppend: boolean } | ParseForSecurityResult { |
| let name: string | null = null |
| let value = '' |
| let isAppend = false |
| |
| for (const child of node.children) { |
| if (!child) continue |
| if (child.type === 'variable_name') { |
| name = child.text |
| } else if (child.type === '=' || child.type === '+=') { |
| // `PATH+=":/new"` — tree-sitter emits `+=` as a distinct operator |
| // node. Without this case it falls through to walkArgument below |
| // → tooComplex on unknown type `+=`. |
| isAppend = child.type === '+=' |
| continue |
| } else if (child.type === 'command_substitution') { |
| // $() as the variable's value. The output becomes a STRING stored in |
| // the variable — it's NOT a positional argument (no path/flag concern). |
| // `VAR=$(date)` runs `date`, stores output. `VAR=$(rm -rf /)` runs |
| // `rm` — the inner command IS checked against permission rules, so |
| // `rm` must match a rule. The variable just holds whatever `rm` prints. |
| const err = collectCommandSubstitution(child, innerCommands, varScope) |
| if (err) return err |
| value = CMDSUB_PLACEHOLDER |
| } else if (child.type === 'simple_expansion') { |
| // `VAR=$OTHER` — assignment RHS does NOT word-split or glob-expand |
| // in bash (unlike command arguments). So `A="a b"; B=$A` sets B to |
| // the literal "a b". Resolve as if inside a string (insideString=true) |
| // so BARE_VAR_UNSAFE_RE doesn't over-reject. The resulting value may |
| // contain spaces/globs — if B is later used as a bare arg, THAT use |
| // will correctly reject via BARE_VAR_UNSAFE_RE. |
| const v = resolveSimpleExpansion(child, varScope, true) |
| if (typeof v !== 'string') return v |
| // If v is VAR_PLACEHOLDER (OTHER holds unknown), store it — combined |
| // with containsAnyPlaceholder in the caller to treat as unknown. |
| value = v |
| } else { |
| const v = walkArgument(child, innerCommands, varScope) |
| if (typeof v !== 'string') return v |
| value = v |
| } |
| } |
| |
| if (name === null) { |
| return { |
| kind: 'too-complex', |
| reason: 'Variable assignment without name', |
| nodeType: 'variable_assignment', |
| } |
| } |
| // SECURITY: tree-sitter-bash accepts invalid var names (e.g. `1VAR=value`) |
| // as variable_assignment. Bash only recognizes [A-Za-z_][A-Za-z0-9_]* — |
| // anything else is run as a COMMAND. `1VAR=value` → bash tries to execute |
| // `1VAR=value` from PATH. We must not treat it as an inert assignment. |
| if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(name)) { |
| return { |
| kind: 'too-complex', |
| reason: `Invalid variable name (bash treats as command): ${name}`, |
| nodeType: 'variable_assignment', |
| } |
| } |
| // SECURITY: Setting IFS changes word-splitting behavior for subsequent |
| // unquoted $VAR expansions. `IFS=: && VAR=a:b && rm $VAR` → bash splits |
| // on `:` → `rm a b`. Our BARE_VAR_UNSAFE_RE only checks default IFS |
| // chars (space/tab/NL) — we can't model custom IFS. Reject. |
| if (name === 'IFS') { |
| return { |
| kind: 'too-complex', |
| reason: 'IFS assignment changes word-splitting — cannot model statically', |
| nodeType: 'variable_assignment', |
| } |
| } |
| // SECURITY: PS4 is expanded via promptvars (default on) on every command |
| // traced after `set -x`. A raw_string value containing $(cmd) or `cmd` |
| // executes at trace time: `PS4='$(id)' && set -x && :` runs id, but our |
| // argv is only [["set","-x"],[":"]] — the payload is invisible to |
| // permission checks. PS0-3 and PROMPT_COMMAND are not expanded in |
| // non-interactive shells (BashTool). |
| // |
| // ALLOWLIST, not blocklist. 5 rounds of bypass patches taught us that a |
| // value-dependent blocklist is structurally fragile: |
| // - `+=` effective-value computation diverges from bash in multiple |
| // scope-model gaps: `||` reset, env-prefix chain (PS4='' && PS4='$' |
| // PS4+='(id)' cmd reads stale parent value), subshell. |
| // - bash's decode_prompt_string runs BEFORE promptvars, so `\044(id)` |
| // (octal for `$`) becomes `$(id)` at trace time — any literal-char |
| // check must model prompt-escape decoding exactly. |
| // - assignment paths exist outside walkVariableAssignment (for_statement |
| // sets loopVar directly, see that handler's PS4 check). |
| // |
| // Policy: (1) reject += outright — no scope-tracking dependency; user can |
| // combine into one PS4=... (2) reject placeholders — runtime unknowable. |
| // (3) allowlist remaining value: ${identifier} refs (value-read only, safe) |
| // plus [A-Za-z0-9 _+:.\/=[\]-]. No bare `$` (blocks split primitive), no |
| // `\` (blocks octal \044/\140), no backtick, no parens. Covers all known |
| // encoding vectors and future ones — anything off the allowlist fails. |
| // Legit `PS4='+${BASH_SOURCE}:${LINENO}: '` still passes. |
| if (name === 'PS4') { |
| if (isAppend) { |
| return { |
| kind: 'too-complex', |
| reason: |
| 'PS4 += cannot be statically verified — combine into a single PS4= assignment', |
| nodeType: 'variable_assignment', |
| } |
| } |
| if (containsAnyPlaceholder(value)) { |
| return { |
| kind: 'too-complex', |
| reason: 'PS4 value derived from cmdsub/variable — runtime unknowable', |
| nodeType: 'variable_assignment', |
| } |
| } |
| if ( |
| !/^[A-Za-z0-9 _+:./=[\]-]*$/.test( |
| value.replace(/\$\{[A-Za-z_][A-Za-z0-9_]*\}/g, ''), |
| ) |
| ) { |
| return { |
| kind: 'too-complex', |
| reason: |
| 'PS4 value outside safe charset — only ${VAR} refs and [A-Za-z0-9 _+:.=/[]-] allowed', |
| nodeType: 'variable_assignment', |
| } |
| } |
| } |
| // SECURITY: Tilde expansion in assignment RHS. `VAR=~/x` (unquoted) → |
| // bash expands `~` at ASSIGNMENT time → VAR='/home/user/x'. We see the |
| // literal `~/x`. Later `cd $VAR` → our argv `['cd','~/x']`, bash runs |
| // `cd /home/user/x`. Tilde expansion also happens after `=` and `:` in |
| // assignment values (e.g. PATH=~/bin:~/sbin). We can't model it — reject |
| // any value containing `~` that isn't already quoted-literal (where bash |
| // doesn't expand). Conservative: any `~` in value → reject. |
| if (value.includes('~')) { |
| return { |
| kind: 'too-complex', |
| reason: 'Tilde in assignment value — bash may expand at assignment time', |
| nodeType: 'variable_assignment', |
| } |
| } |
| return { name, value, isAppend } |
| } |
| |
| /** |
| * Resolve a `simple_expansion` ($VAR) node. Returns VAR_PLACEHOLDER if |
| * resolvable, too-complex otherwise. |
| * |
| * @param insideString true when $VAR is inside a `string` node ("...$VAR...") |
| * rather than a bare/concatenation argument. SAFE_ENV_VARS and unknown-value |
| * tracked vars are only allowed inside strings — as bare args their runtime |
| * value IS the argument and we don't know it statically. |
| * `cd $HOME/../x` would hide the real path behind the placeholder; |
| * `echo "Home: $HOME"` just embeds text in a string. Tracked vars holding |
| * STATIC strings (VAR=literal) are allowed in both positions since their |
| * value IS known. |
| */ |
| function resolveSimpleExpansion( |
| node: Node, |
| varScope: Map<string, string>, |
| insideString: boolean, |
| ): string | ParseForSecurityResult { |
| let varName: string | null = null |
| let isSpecial = false |
| for (const c of node.children) { |
| if (c?.type === 'variable_name') { |
| varName = c.text |
| break |
| } |
| if (c?.type === 'special_variable_name') { |
| varName = c.text |
| isSpecial = true |
| break |
| } |
| } |
| if (varName === null) return tooComplex(node) |
| // Tracked vars: check stored value. Literal strings (VAR=/tmp) are |
| // returned DIRECTLY so downstream path validation sees the real path. |
| // Non-literal values (containing any placeholder — loop vars, $() output, |
| // read vars, composites like `VAR="prefix$(cmd)"`) are ONLY safe inside |
| // strings; as bare args they'd hide the runtime path/flag from validation. |
| // |
| // SECURITY: Returning the actual trackedValue (not a placeholder) is the |
| // critical fix. `VAR=/etc && rm $VAR` → argv ['rm', '/etc'] → validatePath |
| // correctly rejects. Previously returned a placeholder → validatePath saw |
| // '__LOOP_STATIC__', resolved as cwd-relative → PASSED → bypass. |
| const trackedValue = varScope.get(varName) |
| if (trackedValue !== undefined) { |
| if (containsAnyPlaceholder(trackedValue)) { |
| // Non-literal: bare → reject, inside string → VAR_PLACEHOLDER |
| // (walkString's solo-placeholder gate rejects `"$VAR"` alone). |
| if (!insideString) return tooComplex(node) |
| return VAR_PLACEHOLDER |
| } |
| // Pure literal (e.g. '/tmp', 'foo') — return it directly. Downstream |
| // path validation / checkSemantics operate on the REAL value. |
| // |
| // SECURITY: For BARE args (not inside a string), bash word-splits on |
| // $IFS and glob-expands the result. `VAR="-rf /" && rm $VAR` → bash |
| // runs `rm -rf /` (two args); `VAR="/etc/*" && cat $VAR` → expands to |
| // all files. Reject values containing IFS/glob chars unless in "...". |
| // |
| // SECURITY: Empty value as bare arg. Bash word-splitting on "" produces |
| // ZERO fields — the expansion disappears. `V="" && $V eval x` → bash |
| // runs `eval x` (our argv would be ["","eval","x"] with name="" — |
| // every EVAL_LIKE/ZSH/keyword check misses). `V="" && ls $V /etc` → |
| // bash runs `ls /etc`, our argv has a phantom "" shifting positions. |
| // Inside "...": `"$V"` → bash produces one empty-string arg → our "" |
| // is correct, keep allowing. |
| if (!insideString) { |
| if (trackedValue === '') return tooComplex(node) |
| if (BARE_VAR_UNSAFE_RE.test(trackedValue)) return tooComplex(node) |
| } |
| return trackedValue |
| } |
| // SAFE_ENV_VARS + special vars ($?, $$, $@, $1, etc.): value unknown |
| // (shell-controlled). Only safe when embedded in a string, NOT as a |
| // bare argument to a path-sensitive command. |
| if (insideString) { |
| if (SAFE_ENV_VARS.has(varName)) return VAR_PLACEHOLDER |
| if ( |
| isSpecial && |
| (SPECIAL_VAR_NAMES.has(varName) || /^[0-9]+$/.test(varName)) |
| ) { |
| return VAR_PLACEHOLDER |
| } |
| } |
| return tooComplex(node) |
| } |
| |
| /** |
| * Apply a variable assignment to the scope, handling `+=` append semantics. |
| * SECURITY: If EITHER side (existing value or appended value) contains a |
| * placeholder, the result is non-literal — store VAR_PLACEHOLDER so later |
| * $VAR correctly rejects as bare arg. |
| * `VAR=/etc && VAR+=$(cmd)` must not leave VAR looking static. |
| */ |
| function applyVarToScope( |
| varScope: Map<string, string>, |
| ev: { name: string; value: string; isAppend: boolean }, |
| ): void { |
| const existing = varScope.get(ev.name) ?? '' |
| const combined = ev.isAppend ? existing + ev.value : ev.value |
| varScope.set( |
| ev.name, |
| containsAnyPlaceholder(combined) ? VAR_PLACEHOLDER : combined, |
| ) |
| } |
| |
| function stripRawString(text: string): string { |
| return text.slice(1, -1) |
| } |
| |
| function tooComplex(node: Node): ParseForSecurityResult { |
| const reason = |
| node.type === 'ERROR' |
| ? 'Parse error' |
| : DANGEROUS_TYPES.has(node.type) |
| ? `Contains ${node.type}` |
| : `Unhandled node type: ${node.type}` |
| return { kind: 'too-complex', reason, nodeType: node.type } |
| } |
| |
| // ──────────────────────────────────────────────────────────────────────────── |
| // Post-argv semantic checks |
| // |
| // Everything above answers "can we tokenize?". Everything below answers |
| // "is the resulting argv dangerous in ways that don't involve parsing?". |
| // These are checks on argv[0] or argv content that the old bashSecurity.ts |
| // validators performed but which have nothing to do with parser |
| // differentials. They're here (not in bashSecurity.ts) because they operate |
| // on SimpleCommand and need to run for every extracted command. |
| // ──────────────────────────────────────────────────────────────────────────── |
| |
| /** |
| * Zsh module builtins. These are not binaries on PATH — they're zsh |
| * internals loaded via zmodload. Since BashTool runs via the user's default |
| * shell (often zsh), and these parse as plain `command` nodes with no |
| * distinguishing syntax, we can only catch them by name. |
| */ |
| const ZSH_DANGEROUS_BUILTINS = new Set([ |
| 'zmodload', |
| 'emulate', |
| 'sysopen', |
| 'sysread', |
| 'syswrite', |
| 'sysseek', |
| 'zpty', |
| 'ztcp', |
| 'zsocket', |
| 'zf_rm', |
| 'zf_mv', |
| 'zf_ln', |
| 'zf_chmod', |
| 'zf_chown', |
| 'zf_mkdir', |
| 'zf_rmdir', |
| 'zf_chgrp', |
| ]) |
| |
| /** |
| * Shell builtins that evaluate their arguments as code or otherwise escape |
| * the argv abstraction. A command like `eval "rm -rf /"` has argv |
| * ['eval', 'rm -rf /'] which looks inert to flag validation but executes |
| * the string. Treat these the same as command substitution. |
| */ |
| const EVAL_LIKE_BUILTINS = new Set([ |
| 'eval', |
| 'source', |
| '.', |
| 'exec', |
| 'command', |
| 'builtin', |
| 'fc', |
| // `coproc rm -rf /` spawns rm as a coprocess. tree-sitter parses it as |
| // a plain command with argv[0]='coproc', so permission rules and path |
| // validation would check 'coproc' not 'rm'. |
| 'coproc', |
| // Zsh precommand modifiers: `noglob cmd args` runs cmd with globbing off. |
| // They parse as ordinary commands (noglob is argv[0], the real command is |
| // argv[1]) so permission matching against argv[0] would see 'noglob', not |
| // the wrapped command. |
| 'noglob', |
| 'nocorrect', |
| // `trap 'cmd' SIGNAL` — cmd runs as shell code on signal/exit. EXIT fires |
| // at end of every BashTool invocation, so this is guaranteed execution. |
| 'trap', |
| // `enable -f /path/lib.so name` — dlopen arbitrary .so as a builtin. |
| // Native code execution. |
| 'enable', |
| // `mapfile -C callback -c N` / `readarray -C callback` — callback runs as |
| // shell code every N input lines. |
| 'mapfile', |
| 'readarray', |
| // `hash -p /path cmd` — poisons bash's command-lookup cache. Subsequent |
| // `cmd` in the same command resolves to /path instead of PATH lookup. |
| 'hash', |
| // `bind -x '"key":cmd'` / `complete -C cmd` — interactive-only callbacks |
| // but still code-string arguments. Low impact in non-interactive BashTool |
| // shells, blocked for consistency. `compgen -C cmd` is NOT interactive-only: |
| // it immediately executes the -C argument to generate completions. |
| 'bind', |
| 'complete', |
| 'compgen', |
| // `alias name='cmd'` — aliases not expanded in non-interactive bash by |
| // default, but `shopt -s expand_aliases` enables them. Also blocked as |
| // defense-in-depth (alias followed by name use in same command). |
| 'alias', |
| // `let EXPR` arithmetically evaluates EXPR — identical to $(( EXPR )). |
| // Array subscripts in the expression expand $(cmd) at eval time even when |
| // the argument arrived single-quoted: `let 'x=a[$(id)]'` executes id. |
| // tree-sitter sees the raw_string as an opaque leaf. Same primitive |
| // walkArithmetic guards, but `let` is a plain command node. |
| 'let', |
| ]) |
| |
| /** |
| * Builtins that re-parse a NAME operand internally and arithmetically |
| * evaluate `arr[EXPR]` subscripts — including $(cmd) in the subscript — |
| * even when the argv element arrived from a single-quoted raw_string. |
| * `test -v 'a[$(id)]'` → tree-sitter sees an opaque leaf, bash runs id. |
| * Maps: builtin name → set of flags whose next argument is a NAME. |
| */ |
| const SUBSCRIPT_EVAL_FLAGS: Record<string, Set<string>> = { |
| test: new Set(['-v', '-R']), |
| '[': new Set(['-v', '-R']), |
| '[[': new Set(['-v', '-R']), |
| printf: new Set(['-v']), |
| read: new Set(['-a']), |
| unset: new Set(['-v']), |
| // bash 5.1+: `wait -p VAR [id...]` stores the waited PID into VAR. When VAR |
| // is `arr[EXPR]`, bash arithmetically evaluates the subscript — running |
| // $(cmd) even from a single-quoted raw_string. Verified bash 5.3.9: |
| // `: & wait -p 'a[$(id)]' %1` executes id. |
| wait: new Set(['-p']), |
| } |
| |
| /** |
| * `[[ ARG1 OP ARG2 ]]` where OP is an arithmetic comparison. bash manual: |
| * "When used with [[, Arg1 and Arg2 are evaluated as arithmetic |
| * expressions." Arithmetic evaluation recursively expands array subscripts, |
| * so `[[ 'a[$(id)]' -eq 0 ]]` executes `id` even though tree-sitter sees |
| * the operand as an opaque raw_string leaf. Unlike -v/-R (unary, NAME after |
| * flag), these are binary — the subscript can appear on EITHER side, so |
| * SUBSCRIPT_EVAL_FLAGS's "next arg" logic is insufficient. |
| * `[` / `test` are not vulnerable (bash errors with "integer expression |
| * expected"), but the test_command handler normalizes argv[0]='[[' for |
| * both forms, so they get this check too — mild over-blocking, safe side. |
| */ |
| const TEST_ARITH_CMP_OPS = new Set(['-eq', '-ne', '-lt', '-le', '-gt', '-ge']) |
| |
| /** |
| * Builtins where EVERY non-flag positional argument is a NAME that bash |
| * re-parses and arithmetically evaluates subscripts on — no flag required. |
| * `read 'a[$(id)]'` executes id: each positional is a variable name to |
| * assign into, and `arr[EXPR]` is valid syntax there. `unset NAME...` is |
| * the same (though tree-sitter's unset_command handler currently rejects |
| * raw_string children before reaching here — this is defense-in-depth). |
| * NOT printf (positional args are FORMAT/data), NOT test/[ (operands are |
| * values, only -v/-R take a NAME). declare/typeset/local handled in |
| * declaration_command since they never reach here as plain commands. |
| */ |
| const BARE_SUBSCRIPT_NAME_BUILTINS = new Set(['read', 'unset']) |
| |
| /** |
| * `read` flags whose NEXT argument is data (prompt/delimiter/count/fd), |
| * not a NAME. `read -p '[foo] ' var` must not trip on the `[` in the |
| * prompt string. `-a` is intentionally absent — its operand IS a NAME. |
| */ |
| const READ_DATA_FLAGS = new Set(['-p', '-d', '-n', '-N', '-t', '-u', '-i']) |
| |
| // SHELL_KEYWORDS imported from bashParser.ts — shell reserved words can never |
| // be legitimate argv[0]; if they appear, the parser mis-parsed a compound |
| // command. Reject to avoid nonsense argv reaching downstream. |
| |
| // Use `.*` not `[^/]*` — Linux resolves `..` in procfs, so |
| // `/proc/self/../self/environ` works and must be caught. |
| const PROC_ENVIRON_RE = /\/proc\/.*\/environ/ |
| |
| /** |
| * Newline followed by `#` in an argv element, env var value, or redirect target. |
| * Downstream stripSafeWrappers re-tokenizes .text line-by-line and treats `#` |
| * after a newline as a comment, hiding arguments that follow. |
| */ |
| const NEWLINE_HASH_RE = /\n[ \t]*#/ |
| |
| export type SemanticCheckResult = { ok: true } | { ok: false; reason: string } |
| |
| /** |
| * Post-argv semantic checks. Run after parseForSecurity returns 'simple' to |
| * catch commands that tokenize fine but are dangerous by name or argument |
| * content. Returns the first failure or {ok: true}. |
| */ |
| export function checkSemantics(commands: SimpleCommand[]): SemanticCheckResult { |
| for (const cmd of commands) { |
| // Strip safe wrapper commands (nohup, time, timeout N, nice -n N) so |
| // `nohup eval "..."` and `timeout 5 jq 'system(...)'` are checked |
| // against the wrapped command, not the wrapper. Inlined here to avoid |
| // circular import with bashPermissions.ts. |
| let a = cmd.argv |
| for (;;) { |
| if (a[0] === 'time' || a[0] === 'nohup') { |
| a = a.slice(1) |
| } else if (a[0] === 'timeout') { |
| // `timeout 5`, `timeout 5s`, `timeout 5.5`, plus optional GNU flags |
| // preceding the duration. Long: --foreground, --kill-after=N, |
| // --signal=SIG, --preserve-status. Short: -k DUR, -s SIG, -v (also |
| // fused: -k5, -sTERM). |
| // SECURITY (SAST Mar 2026): the previous loop only skipped `--long` |
| // flags, so `timeout -k 5 10 eval ...` broke out with name='timeout' |
| // and the wrapped eval was never checked. Now handle known short |
| // flags AND fail closed on any unrecognized flag — an unknown flag |
| // means we can't locate the wrapped command, so we must not silently |
| // fall through to name='timeout'. |
| let i = 1 |
| while (i < a.length) { |
| const arg = a[i]! |
| if ( |
| arg === '--foreground' || |
| arg === '--preserve-status' || |
| arg === '--verbose' |
| ) { |
| i++ // known no-value long flags |
| } else if (/^--(?:kill-after|signal)=[A-Za-z0-9_.+-]+$/.test(arg)) { |
| i++ // --kill-after=5, --signal=TERM (value fused with =) |
| } else if ( |
| (arg === '--kill-after' || arg === '--signal') && |
| a[i + 1] && |
| /^[A-Za-z0-9_.+-]+$/.test(a[i + 1]!) |
| ) { |
| i += 2 // --kill-after 5, --signal TERM (space-separated) |
| } else if (arg.startsWith('--')) { |
| // Unknown long flag, OR --kill-after/--signal with non-allowlisted |
| // value (e.g. placeholder from $() substitution). Fail closed. |
| return { |
| ok: false, |
| reason: `timeout with ${arg} flag cannot be statically analyzed`, |
| } |
| } else if (arg === '-v') { |
| i++ // --verbose, no argument |
| } else if ( |
| (arg === '-k' || arg === '-s') && |
| a[i + 1] && |
| /^[A-Za-z0-9_.+-]+$/.test(a[i + 1]!) |
| ) { |
| i += 2 // -k DURATION / -s SIGNAL — separate value |
| } else if (/^-[ks][A-Za-z0-9_.+-]+$/.test(arg)) { |
| i++ // fused: -k5, -sTERM |
| } else if (arg.startsWith('-')) { |
| // Unknown flag OR -k/-s with non-allowlisted value — can't locate |
| // wrapped cmd. Reject, don't fall through to name='timeout'. |
| return { |
| ok: false, |
| reason: `timeout with ${arg} flag cannot be statically analyzed`, |
| } |
| } else { |
| break // non-flag — should be the duration |
| } |
| } |
| if (a[i] && /^\d+(?:\.\d+)?[smhd]?$/.test(a[i]!)) { |
| a = a.slice(i + 1) |
| } else if (a[i]) { |
| // SECURITY (PR #21503 round 3): a[i] exists but doesn't match our |
| // duration regex. GNU timeout parses via xstrtod() (libc strtod) and |
| // accepts `.5`, `+5`, `5e-1`, `inf`, `infinity`, hex floats — none |
| // of which match `/^\d+(\.\d+)?[smhd]?$/`. Empirically verified: |
| // `timeout .5 echo ok` works. Previously this branch `break`ed |
| // (fail-OPEN) so `timeout .5 eval "id"` with `Bash(timeout:*)` left |
| // name='timeout' and eval was never checked. Now fail CLOSED — |
| // consistent with the unknown-FLAG handling above (lines ~1895,1912). |
| return { |
| ok: false, |
| reason: `timeout duration '${a[i]}' cannot be statically analyzed`, |
| } |
| } else { |
| break // no more args — `timeout` alone, inert |
| } |
| } else if (a[0] === 'nice') { |
| // `nice cmd`, `nice -n N cmd`, `nice -N cmd` (legacy). All run cmd |
| // at a lower priority. argv[0] check must see the wrapped cmd. |
| if (a[1] === '-n' && a[2] && /^-?\d+$/.test(a[2])) { |
| a = a.slice(3) |
| } else if (a[1] && /^-\d+$/.test(a[1])) { |
| a = a.slice(2) // `nice -10 cmd` |
| } else if (a[1] && /[$(`]/.test(a[1])) { |
| // SECURITY: walkArgument returns node.text for arithmetic_expansion, |
| // so `nice $((0-5)) jq ...` has a[1]='$((0-5))'. Bash expands it to |
| // '-5' (legacy nice syntax) and execs jq; we'd slice(1) here and |
| // set name='$((0-5))' which skips the jq system() check entirely. |
| // Fail closed — mirrors the timeout-duration fail-closed above. |
| return { |
| ok: false, |
| reason: `nice argument '${a[1]}' contains expansion — cannot statically determine wrapped command`, |
| } |
| } else { |
| a = a.slice(1) // bare `nice cmd` |
| } |
| } else if (a[0] === 'env') { |
| // `env [VAR=val...] [-i] [-0] [-v] [-u NAME...] cmd args` runs cmd. |
| // argv[0] check must see cmd, not env. Skip known-safe forms only. |
| // SECURITY: -S splits a string into argv (mini-shell) — must reject. |
| // -C/-P change cwd/PATH — wrapped cmd runs elsewhere, reject. |
| // Any OTHER flag → reject (fail-closed, not fail-open to name='env'). |
| let i = 1 |
| while (i < a.length) { |
| const arg = a[i]! |
| if (arg.includes('=') && !arg.startsWith('-')) { |
| i++ // VAR=val assignment |
| } else if (arg === '-i' || arg === '-0' || arg === '-v') { |
| i++ // flags with no argument |
| } else if (arg === '-u' && a[i + 1]) { |
| i += 2 // -u NAME unsets; takes one arg |
| } else if (arg.startsWith('-')) { |
| // -S (argv splitter), -C (altwd), -P (altpath), --anything, |
| // or unknown flag. Can't model — reject the whole command. |
| return { |
| ok: false, |
| reason: `env with ${arg} flag cannot be statically analyzed`, |
| } |
| } else { |
| break // the wrapped command |
| } |
| } |
| if (i < a.length) { |
| a = a.slice(i) |
| } else { |
| break // `env` alone (no wrapped cmd) — inert, name='env' |
| } |
| } else if (a[0] === 'stdbuf') { |
| // `stdbuf -o0 cmd` (fused), `stdbuf -o 0 cmd` (space-separated), |
| // multiple flags (`stdbuf -o0 -eL cmd`), long forms (`--output=0`). |
| // SECURITY: previous handling only stripped ONE flag and fell through |
| // to slice(2) for anything unrecognized, so `stdbuf --output 0 eval` |
| // → ['0','eval',...] → name='0' hid eval. Now iterate all known flag |
| // forms and fail closed on any unknown flag. |
| let i = 1 |
| while (i < a.length) { |
| const arg = a[i]! |
| if (STDBUF_SHORT_SEP_RE.test(arg) && a[i + 1]) { |
| i += 2 // -o MODE (space-separated) |
| } else if (STDBUF_SHORT_FUSED_RE.test(arg)) { |
| i++ // -o0 (fused) |
| } else if (STDBUF_LONG_RE.test(arg)) { |
| i++ // --output=MODE (fused long) |
| } else if (arg.startsWith('-')) { |
| // --output MODE (space-separated long) or unknown flag. GNU |
| // stdbuf long options use `=` syntax, but getopt_long also |
| // accepts space-separated — we can't enumerate safely, reject. |
| return { |
| ok: false, |
| reason: `stdbuf with ${arg} flag cannot be statically analyzed`, |
| } |
| } else { |
| break // the wrapped command |
| } |
| } |
| if (i > 1 && i < a.length) { |
| a = a.slice(i) |
| } else { |
| break // `stdbuf` with no flags or no wrapped cmd — inert |
| } |
| } else { |
| break |
| } |
| } |
| const name = a[0] |
| if (name === undefined) continue |
| |
| // SECURITY: Empty command name. Quoted empty (`"" cmd`) is harmless — |
| // bash tries to exec "" and fails with "command not found". But an |
| // UNQUOTED empty expansion at command position (`V="" && $V cmd`) is a |
| // bypass: bash drops the empty field and runs `cmd` as argv[0], while |
| // our name="" skips every builtin check below. resolveSimpleExpansion |
| // rejects the $V case; this catches any other path to empty argv[0] |
| // (concatenation of empties, walkString whitespace-quirk, future bugs). |
| if (name === '') { |
| return { |
| ok: false, |
| reason: 'Empty command name — argv[0] may not reflect what bash runs', |
| } |
| } |
| |
| // Defense-in-depth: argv[0] should never be a placeholder after the |
| // var-tracking fix (static vars return real value, unknown vars reject). |
| // But if a bug upstream ever lets one through, catch it here — a |
| // placeholder-as-command-name means runtime-determined command → unsafe. |
| if (name.includes(CMDSUB_PLACEHOLDER) || name.includes(VAR_PLACEHOLDER)) { |
| return { |
| ok: false, |
| reason: 'Command name is runtime-determined (placeholder argv[0])', |
| } |
| } |
| |
| // argv[0] starts with an operator/flag: this is a fragment, not a |
| // command. Likely a line-continuation leak or a mistake. |
| if (name.startsWith('-') || name.startsWith('|') || name.startsWith('&')) { |
| return { |
| ok: false, |
| reason: 'Command appears to be an incomplete fragment', |
| } |
| } |
| |
| // SECURITY: builtins that re-parse a NAME operand internally. bash |
| // arithmetically evaluates `arr[EXPR]` in NAME position, running $(cmd) |
| // in the subscript even when the argv element arrived from a |
| // single-quoted raw_string (opaque leaf to tree-sitter). Two forms: |
| // separate (`printf -v NAME`) and fused (`printf -vNAME`, getopt-style). |
| // `printf '[%s]' x` stays safe — `[` in format string, not after `-v`. |
| const dangerFlags = SUBSCRIPT_EVAL_FLAGS[name] |
| if (dangerFlags !== undefined) { |
| for (let i = 1; i < a.length; i++) { |
| const arg = a[i]! |
| // Separate form: `-v` then NAME in next arg. |
| if (dangerFlags.has(arg) && a[i + 1]?.includes('[')) { |
| return { |
| ok: false, |
| reason: `'${name} ${arg}' operand contains array subscript — bash evaluates $(cmd) in subscripts`, |
| } |
| } |
| // Combined short flags: `-ra` is bash shorthand for `-r -a`. |
| // Check if any danger flag character appears in a combined flag |
| // string. The danger flag's NAME operand is the next argument. |
| if ( |
| arg.length > 2 && |
| arg[0] === '-' && |
| arg[1] !== '-' && |
| !arg.includes('[') |
| ) { |
| for (const flag of dangerFlags) { |
| if (flag.length === 2 && arg.includes(flag[1]!)) { |
| if (a[i + 1]?.includes('[')) { |
| return { |
| ok: false, |
| reason: `'${name} ${flag}' (combined in '${arg}') operand contains array subscript — bash evaluates $(cmd) in subscripts`, |
| } |
| } |
| } |
| } |
| } |
| // Fused form: `-vNAME` in one arg. Only short-option flags fuse |
| // (getopt), so check -v/-a/-R. `[[` uses test_operator nodes only. |
| for (const flag of dangerFlags) { |
| if ( |
| flag.length === 2 && |
| arg.startsWith(flag) && |
| arg.length > 2 && |
| arg.includes('[') |
| ) { |
| return { |
| ok: false, |
| reason: `'${name} ${flag}' (fused) operand contains array subscript — bash evaluates $(cmd) in subscripts`, |
| } |
| } |
| } |
| } |
| } |
| |
| // SECURITY: `[[ ARG OP ARG ]]` arithmetic comparison. bash evaluates |
| // BOTH operands as arithmetic expressions, recursively expanding |
| // `arr[$(cmd)]` subscripts even from single-quoted raw_string. Check |
| // the operand adjacent to each arith-cmp operator on BOTH sides — |
| // SUBSCRIPT_EVAL_FLAGS's "flag then next-arg" pattern can't express |
| // "either side of a binary op". String comparisons (==/!=/=~) do NOT |
| // trigger arithmetic eval — `[[ 'a[x]' == y ]]` is a literal string cmp. |
| if (name === '[[') { |
| // i starts at 2: a[0]='[[' (contains '['), a[1] is the first real |
| // operand. A binary op can't appear before index 2. |
| for (let i = 2; i < a.length; i++) { |
| if (!TEST_ARITH_CMP_OPS.has(a[i]!)) continue |
| if (a[i - 1]?.includes('[') || a[i + 1]?.includes('[')) { |
| return { |
| ok: false, |
| reason: `'[[ ... ${a[i]} ... ]]' operand contains array subscript — bash arithmetically evaluates $(cmd) in subscripts`, |
| } |
| } |
| } |
| } |
| |
| // SECURITY: `read`/`unset` treat EVERY bare positional as a NAME — |
| // no flag needed. `read 'a[$(id)]' <<< data` executes id even though |
| // argv[1] arrived from a single-quoted raw_string and no -a flag is |
| // present. Same primitive as SUBSCRIPT_EVAL_FLAGS but the trigger is |
| // positional, not flag-gated. Skip operands of read's data-taking |
| // flags (-p PROMPT etc.) to avoid blocking `read -p '[foo] ' var`. |
| if (BARE_SUBSCRIPT_NAME_BUILTINS.has(name)) { |
| let skipNext = false |
| for (let i = 1; i < a.length; i++) { |
| const arg = a[i]! |
| if (skipNext) { |
| skipNext = false |
| continue |
| } |
| if (arg[0] === '-') { |
| if (name === 'read') { |
| if (READ_DATA_FLAGS.has(arg)) { |
| skipNext = true |
| } else if (arg.length > 2 && arg[1] !== '-') { |
| // Combined short flag like `-rp`. Getopt-style: first |
| // data-flag char consumes rest-of-arg as its operand |
| // (`-p[foo]` → prompt=`[foo]`), or next-arg if last |
| // (`-rp '[foo]'` → prompt=`[foo]`). So skipNext iff a |
| // data-flag char appears at the END after only no-arg |
| // flags like `-r`/`-s`. |
| for (let j = 1; j < arg.length; j++) { |
| if (READ_DATA_FLAGS.has('-' + arg[j])) { |
| if (j === arg.length - 1) skipNext = true |
| break |
| } |
| } |
| } |
| } |
| continue |
| } |
| if (arg.includes('[')) { |
| return { |
| ok: false, |
| reason: `'${name}' positional NAME '${arg}' contains array subscript — bash evaluates $(cmd) in subscripts`, |
| } |
| } |
| } |
| } |
| |
| // SECURITY: Shell reserved keywords as argv[0] indicate a tree-sitter |
| // mis-parse. `! for i in a; do :; done` parses as `command "for i in a"` |
| // + `command "do :"` + `command "done"` — tree-sitter fails to recognize |
| // `for` after `!` as a compound command start. Reject: keywords can never |
| // be legitimate command names, and argv like ['do','false'] is nonsense. |
| if (SHELL_KEYWORDS.has(name)) { |
| return { |
| ok: false, |
| reason: `Shell keyword '${name}' as command name — tree-sitter mis-parse`, |
| } |
| } |
| |
| // Check argv (not .text) to catch both single-quote (`'\n#'`) and |
| // double-quote (`"\n#"`) variants. Env vars and redirects are also |
| // part of the .text span so the same downstream bug applies. |
| // Heredoc bodies are excluded from argv so markdown `##` headers |
| // don't trigger this. |
| // TODO: remove once downstream path validation operates on argv. |
| for (const arg of cmd.argv) { |
| if (arg.includes('\n') && NEWLINE_HASH_RE.test(arg)) { |
| return { |
| ok: false, |
| reason: |
| 'Newline followed by # inside a quoted argument can hide arguments from path validation', |
| } |
| } |
| } |
| for (const ev of cmd.envVars) { |
| if (ev.value.includes('\n') && NEWLINE_HASH_RE.test(ev.value)) { |
| return { |
| ok: false, |
| reason: |
| 'Newline followed by # inside an env var value can hide arguments from path validation', |
| } |
| } |
| } |
| for (const r of cmd.redirects) { |
| if (r.target.includes('\n') && NEWLINE_HASH_RE.test(r.target)) { |
| return { |
| ok: false, |
| reason: |
| 'Newline followed by # inside a redirect target can hide arguments from path validation', |
| } |
| } |
| } |
| |
| // jq's system() built-in executes arbitrary shell commands, and flags |
| // like --from-file can read arbitrary files into jq variables. On the |
| // legacy path these are caught by validateJqCommand in bashSecurity.ts, |
| // but that validator is gated behind `astSubcommands === null` and |
| // never runs when the AST parse succeeds. Mirror the checks here so |
| // the AST path has the same defence. |
| if (name === 'jq') { |
| for (const arg of a) { |
| if (/\bsystem\s*\(/.test(arg)) { |
| return { |
| ok: false, |
| reason: |
| 'jq command contains system() function which executes arbitrary commands', |
| } |
| } |
| } |
| if ( |
| a.some(arg => |
| /^(?:-[fL](?:$|[^A-Za-z])|--(?:from-file|rawfile|slurpfile|library-path)(?:$|=))/.test( |
| arg, |
| ), |
| ) |
| ) { |
| return { |
| ok: false, |
| reason: |
| 'jq command contains dangerous flags that could execute code or read arbitrary files', |
| } |
| } |
| } |
| |
| if (ZSH_DANGEROUS_BUILTINS.has(name)) { |
| return { |
| ok: false, |
| reason: `Zsh builtin '${name}' can bypass security checks`, |
| } |
| } |
| |
| if (EVAL_LIKE_BUILTINS.has(name)) { |
| // `command -v foo` / `command -V foo` are POSIX existence checks that |
| // only print paths — they never execute argv[1]. Bare `command foo` |
| // does bypass function/alias lookup (the concern), so keep blocking it. |
| if (name === 'command' && (a[1] === '-v' || a[1] === '-V')) { |
| // fall through to remaining checks |
| } else if ( |
| name === 'fc' && |
| !a.slice(1).some(arg => /^-[^-]*[es]/.test(arg)) |
| ) { |
| // `fc -l`, `fc -ln` list history — safe. `fc -e ed` invokes an |
| // editor then executes. `fc -s [pat=rep]` RE-EXECUTES the last |
| // matching command (optionally with substitution) — as dangerous |
| // as eval. Block any short-opt containing `e` or `s`. |
| // to avoid introducing FPs for `fc -l` (list history). |
| } else if ( |
| name === 'compgen' && |
| !a.slice(1).some(arg => /^-[^-]*[CFW]/.test(arg)) |
| ) { |
| // `compgen -c/-f/-v` only list completions — safe. `compgen -C cmd` |
| // immediately executes cmd; `-F func` calls a shell function; `-W list` |
| // word-expands its argument (including $(cmd) even from single-quoted |
| // raw_string). Block any short-opt containing C/F/W (case-sensitive: |
| // -c/-f are safe). |
| } else { |
| return { |
| ok: false, |
| reason: `'${name}' evaluates arguments as shell code`, |
| } |
| } |
| } |
| |
| // /proc/*/environ exposes env vars (including secrets) of other processes. |
| // Check argv and redirect targets — `cat /proc/self/environ` and |
| // `cat < /proc/self/environ` both read it. |
| for (const arg of cmd.argv) { |
| if (arg.includes('/proc/') && PROC_ENVIRON_RE.test(arg)) { |
| return { |
| ok: false, |
| reason: 'Accesses /proc/*/environ which may expose secrets', |
| } |
| } |
| } |
| for (const r of cmd.redirects) { |
| if (r.target.includes('/proc/') && PROC_ENVIRON_RE.test(r.target)) { |
| return { |
| ok: false, |
| reason: 'Accesses /proc/*/environ which may expose secrets', |
| } |
| } |
| } |
| } |
| return { ok: true } |
| } |
| |