|
|
""" |
|
|
Pattern-based security vulnerability detector using regular expressions. |
|
|
|
|
|
Detects hardcoded secrets, credentials, and sensitive information patterns. |
|
|
""" |
|
|
|
|
|
import re |
|
|
from typing import Dict, List, Any |
|
|
|
|
|
|
|
|
SECURITY_PATTERNS = { |
|
|
"aws_access_key": { |
|
|
"regex": r"(?:AWS_ACCESS_KEY_ID|aws_access_key_id)\s*[:=]\s*['\"]?(AKIA[0-9A-Z]{16})['\"]?", |
|
|
"severity": "CRITICAL", |
|
|
"title": "Hardcoded AWS Access Key detected", |
|
|
"description": "AWS Access Key is hardcoded in the source code.", |
|
|
}, |
|
|
"aws_secret_key": { |
|
|
"regex": r"(?:AWS_SECRET_ACCESS_KEY|aws_secret_access_key)\s*[:=]\s*['\"]?([A-Za-z0-9/+=]{40})['\"]?", |
|
|
"severity": "CRITICAL", |
|
|
"title": "Hardcoded AWS Secret Key detected", |
|
|
"description": "AWS Secret Access Key is hardcoded in the source code.", |
|
|
}, |
|
|
"api_key": { |
|
|
"regex": r"(?:api[_-]?key|apikey|api[_-]?secret)\s*[:=]\s*['\"]([a-zA-Z0-9_\-]{20,})['\"]", |
|
|
"severity": "HIGH", |
|
|
"title": "Hardcoded API key detected", |
|
|
"description": "API key is directly hardcoded in the source code.", |
|
|
}, |
|
|
"github_token": { |
|
|
"regex": r"\b(gh[ps]_[a-zA-Z0-9]{36,})\b", |
|
|
"severity": "HIGH", |
|
|
"title": "GitHub Personal Access Token detected", |
|
|
"description": "GitHub personal access token is exposed in the source code.", |
|
|
}, |
|
|
"jwt_token": { |
|
|
"regex": r"\b(eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]+)\b", |
|
|
"severity": "HIGH", |
|
|
"title": "Hardcoded JWT token detected", |
|
|
"description": "JWT token is hardcoded in the source code.", |
|
|
}, |
|
|
"password": { |
|
|
"regex": r"(?:password|passwd|pwd)\s*[:=]\s*['\"]([^'\"]{4,})['\"]", |
|
|
"severity": "MEDIUM", |
|
|
"title": "Hardcoded password detected", |
|
|
"description": "Password is directly written in the source code.", |
|
|
}, |
|
|
"ssn": { |
|
|
"regex": r"\b(\d{6}[-]\d{7})\b", |
|
|
"severity": "MEDIUM", |
|
|
"title": "Social Security Number pattern detected", |
|
|
"description": "Data matching SSN format found in the source code.", |
|
|
}, |
|
|
"credit_card": { |
|
|
"regex": r"\b(\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4})\b", |
|
|
"severity": "MEDIUM", |
|
|
"title": "Credit card number pattern detected", |
|
|
"description": "Data matching credit card number format found.", |
|
|
}, |
|
|
"phone_number": { |
|
|
"regex": r"\b(0\d{1,2}[-\s]?\d{3,4}[-\s]?\d{4})\b", |
|
|
"severity": "LOW", |
|
|
"title": "Phone number pattern detected", |
|
|
"description": "Phone number is included in the source code.", |
|
|
}, |
|
|
"database_url": { |
|
|
"regex": r"(?:postgresql|mysql|mongodb|redis)://([^:]+):([^@]+)@", |
|
|
"severity": "CRITICAL", |
|
|
"title": "Database connection string contains credentials", |
|
|
"description": "Database connection string includes username and password.", |
|
|
}, |
|
|
"private_key": { |
|
|
"regex": r"-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----", |
|
|
"severity": "CRITICAL", |
|
|
"title": "Hardcoded private key detected", |
|
|
"description": "Cryptographic private key is directly included in the source code.", |
|
|
}, |
|
|
} |
|
|
|
|
|
|
|
|
def is_false_positive(line: str, pattern_type: str) -> bool: |
|
|
""" |
|
|
Check if a detected pattern is likely a false positive. |
|
|
|
|
|
Args: |
|
|
line: The line of code containing the match |
|
|
pattern_type: Type of pattern detected |
|
|
|
|
|
Returns: |
|
|
True if likely a false positive, False otherwise |
|
|
""" |
|
|
|
|
|
if line.strip().startswith("#"): |
|
|
return True |
|
|
|
|
|
|
|
|
test_indicators = [ |
|
|
"example", |
|
|
"test", |
|
|
"dummy", |
|
|
"fake", |
|
|
"sample", |
|
|
"mock", |
|
|
"placeholder", |
|
|
"TODO", |
|
|
"FIXME", |
|
|
"xxx", |
|
|
"000", |
|
|
] |
|
|
|
|
|
line_lower = line.lower() |
|
|
for indicator in test_indicators: |
|
|
if indicator in line_lower: |
|
|
return True |
|
|
|
|
|
|
|
|
if pattern_type == "credit_card": |
|
|
|
|
|
if re.search(r"(19|20)\d{2}", line): |
|
|
return True |
|
|
|
|
|
if pattern_type == "phone_number": |
|
|
|
|
|
if "date" in line_lower or "time" in line_lower: |
|
|
return True |
|
|
|
|
|
if pattern_type == "password": |
|
|
|
|
|
if re.search(r'password\s*[:=]\s*["\']?\s*["\']?$', line): |
|
|
return True |
|
|
|
|
|
return False |
|
|
|
|
|
|
|
|
def scan_patterns(file_path: str, code: str) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
Scan code for security vulnerability patterns. |
|
|
|
|
|
Args: |
|
|
file_path: Path to the file being scanned (for reference) |
|
|
code: Source code to scan |
|
|
|
|
|
Returns: |
|
|
List of vulnerability dictionaries |
|
|
""" |
|
|
vulnerabilities = [] |
|
|
lines = code.split("\n") |
|
|
|
|
|
for line_num, line in enumerate(lines, start=1): |
|
|
for pattern_name, pattern_info in SECURITY_PATTERNS.items(): |
|
|
regex = pattern_info["regex"] |
|
|
matches = re.finditer(regex, line, re.IGNORECASE) |
|
|
|
|
|
for match in matches: |
|
|
|
|
|
if is_false_positive(line, pattern_name): |
|
|
continue |
|
|
|
|
|
|
|
|
matched_text = match.group(0) |
|
|
if len(matched_text) > 50: |
|
|
|
|
|
matched_text = matched_text[:47] + "..." |
|
|
|
|
|
|
|
|
code_snippet = line.strip() |
|
|
if len(code_snippet) > 100: |
|
|
code_snippet = code_snippet[:97] + "..." |
|
|
|
|
|
vulnerability = { |
|
|
"id": f"pattern-{pattern_name}", |
|
|
"severity": pattern_info["severity"], |
|
|
"title": pattern_info["title"], |
|
|
"description": pattern_info["description"], |
|
|
"line_number": line_num, |
|
|
"code_snippet": code_snippet, |
|
|
"pattern_type": pattern_name, |
|
|
"file_path": file_path, |
|
|
"scanner": "pattern_detector", |
|
|
} |
|
|
|
|
|
vulnerabilities.append(vulnerability) |
|
|
|
|
|
return vulnerabilities |
|
|
|
|
|
|
|
|
def get_pattern_info(pattern_type: str) -> Dict[str, str]: |
|
|
""" |
|
|
Get information about a specific pattern type. |
|
|
|
|
|
Args: |
|
|
pattern_type: Type of security pattern |
|
|
|
|
|
Returns: |
|
|
Dictionary with pattern information |
|
|
""" |
|
|
return SECURITY_PATTERNS.get( |
|
|
pattern_type, |
|
|
{ |
|
|
"severity": "MEDIUM", |
|
|
"title": "Security pattern detected", |
|
|
"description": "Unknown security pattern found.", |
|
|
}, |
|
|
) |
|
|
|
|
|
|
|
|
def list_available_patterns() -> List[str]: |
|
|
""" |
|
|
List all available security patterns. |
|
|
|
|
|
Returns: |
|
|
List of pattern names |
|
|
""" |
|
|
return list(SECURITY_PATTERNS.keys()) |
|
|
|
|
|
|
|
|
def get_patterns_by_severity(severity: str) -> List[str]: |
|
|
""" |
|
|
Get patterns filtered by severity level. |
|
|
|
|
|
Args: |
|
|
severity: Severity level (CRITICAL, HIGH, MEDIUM, LOW) |
|
|
|
|
|
Returns: |
|
|
List of pattern names with matching severity |
|
|
""" |
|
|
return [ |
|
|
name |
|
|
for name, info in SECURITY_PATTERNS.items() |
|
|
if info["severity"] == severity.upper() |
|
|
] |