File size: 7,546 Bytes
f8e78b2 41329d5 f8e78b2 41329d5 f8e78b2 41329d5 f8e78b2 41329d5 f8e78b2 41329d5 f8e78b2 41329d5 f8e78b2 41329d5 f8e78b2 41329d5 f8e78b2 41329d5 f8e78b2 41329d5 f8e78b2 41329d5 f8e78b2 41329d5 f8e78b2 41329d5 f8e78b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 |
"""
Pattern-based security vulnerability detector using regular expressions.
Detects hardcoded secrets, credentials, and sensitive information patterns.
"""
import re
from typing import Dict, List, Any
# Security patterns with regex, severity, and descriptions
SECURITY_PATTERNS = {
"aws_access_key": {
"regex": r"(?:AWS_ACCESS_KEY_ID|aws_access_key_id)\s*[:=]\s*['\"]?(AKIA[0-9A-Z]{16})['\"]?",
"severity": "CRITICAL",
"title": "Hardcoded AWS Access Key detected",
"description": "AWS Access Key is hardcoded in the source code.",
},
"aws_secret_key": {
"regex": r"(?:AWS_SECRET_ACCESS_KEY|aws_secret_access_key)\s*[:=]\s*['\"]?([A-Za-z0-9/+=]{40})['\"]?",
"severity": "CRITICAL",
"title": "Hardcoded AWS Secret Key detected",
"description": "AWS Secret Access Key is hardcoded in the source code.",
},
"api_key": {
"regex": r"(?:api[_-]?key|apikey|api[_-]?secret)\s*[:=]\s*['\"]([a-zA-Z0-9_\-]{20,})['\"]",
"severity": "HIGH",
"title": "Hardcoded API key detected",
"description": "API key is directly hardcoded in the source code.",
},
"github_token": {
"regex": r"\b(gh[ps]_[a-zA-Z0-9]{36,})\b",
"severity": "HIGH",
"title": "GitHub Personal Access Token detected",
"description": "GitHub personal access token is exposed in the source code.",
},
"jwt_token": {
"regex": r"\b(eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]+)\b",
"severity": "HIGH",
"title": "Hardcoded JWT token detected",
"description": "JWT token is hardcoded in the source code.",
},
"password": {
"regex": r"(?:password|passwd|pwd)\s*[:=]\s*['\"]([^'\"]{4,})['\"]",
"severity": "MEDIUM",
"title": "Hardcoded password detected",
"description": "Password is directly written in the source code.",
},
"ssn": {
"regex": r"\b(\d{6}[-]\d{7})\b",
"severity": "MEDIUM",
"title": "Social Security Number pattern detected",
"description": "Data matching SSN format found in the source code.",
},
"credit_card": {
"regex": r"\b(\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4})\b",
"severity": "MEDIUM",
"title": "Credit card number pattern detected",
"description": "Data matching credit card number format found.",
},
"phone_number": {
"regex": r"\b(0\d{1,2}[-\s]?\d{3,4}[-\s]?\d{4})\b",
"severity": "LOW",
"title": "Phone number pattern detected",
"description": "Phone number is included in the source code.",
},
"database_url": {
"regex": r"(?:postgresql|mysql|mongodb|redis)://([^:]+):([^@]+)@",
"severity": "CRITICAL",
"title": "Database connection string contains credentials",
"description": "Database connection string includes username and password.",
},
"private_key": {
"regex": r"-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----",
"severity": "CRITICAL",
"title": "Hardcoded private key detected",
"description": "Cryptographic private key is directly included in the source code.",
},
}
def is_false_positive(line: str, pattern_type: str) -> bool:
"""
Check if a detected pattern is likely a false positive.
Args:
line: The line of code containing the match
pattern_type: Type of pattern detected
Returns:
True if likely a false positive, False otherwise
"""
# Skip commented lines (but with lower confidence)
if line.strip().startswith("#"):
return True
# Skip obvious example/test values
test_indicators = [
"example",
"test",
"dummy",
"fake",
"sample",
"mock",
"placeholder",
"TODO",
"FIXME",
"xxx",
"000",
]
line_lower = line.lower()
for indicator in test_indicators:
if indicator in line_lower:
return True
# Pattern-specific false positive checks
if pattern_type == "credit_card":
# Common false positive: date ranges, version numbers
if re.search(r"(19|20)\d{2}", line): # Year pattern
return True
if pattern_type == "phone_number":
# Skip if looks like a date or other numeric pattern
if "date" in line_lower or "time" in line_lower:
return True
if pattern_type == "password":
# Skip if it's just a variable name assignment (no actual password)
if re.search(r'password\s*[:=]\s*["\']?\s*["\']?$', line):
return True
return False
def scan_patterns(file_path: str, code: str) -> List[Dict[str, Any]]:
"""
Scan code for security vulnerability patterns.
Args:
file_path: Path to the file being scanned (for reference)
code: Source code to scan
Returns:
List of vulnerability dictionaries
"""
vulnerabilities = []
lines = code.split("\n")
for line_num, line in enumerate(lines, start=1):
for pattern_name, pattern_info in SECURITY_PATTERNS.items():
regex = pattern_info["regex"]
matches = re.finditer(regex, line, re.IGNORECASE)
for match in matches:
# Check for false positives
if is_false_positive(line, pattern_name):
continue
# Extract matched text (mask sensitive parts)
matched_text = match.group(0)
if len(matched_text) > 50:
# Truncate long matches for display
matched_text = matched_text[:47] + "..."
# Mask the actual secret value for security
code_snippet = line.strip()
if len(code_snippet) > 100:
code_snippet = code_snippet[:97] + "..."
vulnerability = {
"id": f"pattern-{pattern_name}",
"severity": pattern_info["severity"],
"title": pattern_info["title"],
"description": pattern_info["description"],
"line_number": line_num,
"code_snippet": code_snippet,
"pattern_type": pattern_name,
"file_path": file_path,
"scanner": "pattern_detector",
}
vulnerabilities.append(vulnerability)
return vulnerabilities
def get_pattern_info(pattern_type: str) -> Dict[str, str]:
"""
Get information about a specific pattern type.
Args:
pattern_type: Type of security pattern
Returns:
Dictionary with pattern information
"""
return SECURITY_PATTERNS.get(
pattern_type,
{
"severity": "MEDIUM",
"title": "Security pattern detected",
"description": "Unknown security pattern found.",
},
)
def list_available_patterns() -> List[str]:
"""
List all available security patterns.
Returns:
List of pattern names
"""
return list(SECURITY_PATTERNS.keys())
def get_patterns_by_severity(severity: str) -> List[str]:
"""
Get patterns filtered by severity level.
Args:
severity: Severity level (CRITICAL, HIGH, MEDIUM, LOW)
Returns:
List of pattern names with matching severity
"""
return [
name
for name, info in SECURITY_PATTERNS.items()
if info["severity"] == severity.upper()
] |