simple-security-scanner / src /scanner /pattern_detector.py
garibong's picture
Translate scanner messages to English
41329d5
"""
Pattern-based security vulnerability detector using regular expressions.
Detects hardcoded secrets, credentials, and sensitive information patterns.
"""
import re
from typing import Dict, List, Any
# Security patterns with regex, severity, and descriptions
SECURITY_PATTERNS = {
"aws_access_key": {
"regex": r"(?:AWS_ACCESS_KEY_ID|aws_access_key_id)\s*[:=]\s*['\"]?(AKIA[0-9A-Z]{16})['\"]?",
"severity": "CRITICAL",
"title": "Hardcoded AWS Access Key detected",
"description": "AWS Access Key is hardcoded in the source code.",
},
"aws_secret_key": {
"regex": r"(?:AWS_SECRET_ACCESS_KEY|aws_secret_access_key)\s*[:=]\s*['\"]?([A-Za-z0-9/+=]{40})['\"]?",
"severity": "CRITICAL",
"title": "Hardcoded AWS Secret Key detected",
"description": "AWS Secret Access Key is hardcoded in the source code.",
},
"api_key": {
"regex": r"(?:api[_-]?key|apikey|api[_-]?secret)\s*[:=]\s*['\"]([a-zA-Z0-9_\-]{20,})['\"]",
"severity": "HIGH",
"title": "Hardcoded API key detected",
"description": "API key is directly hardcoded in the source code.",
},
"github_token": {
"regex": r"\b(gh[ps]_[a-zA-Z0-9]{36,})\b",
"severity": "HIGH",
"title": "GitHub Personal Access Token detected",
"description": "GitHub personal access token is exposed in the source code.",
},
"jwt_token": {
"regex": r"\b(eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]+)\b",
"severity": "HIGH",
"title": "Hardcoded JWT token detected",
"description": "JWT token is hardcoded in the source code.",
},
"password": {
"regex": r"(?:password|passwd|pwd)\s*[:=]\s*['\"]([^'\"]{4,})['\"]",
"severity": "MEDIUM",
"title": "Hardcoded password detected",
"description": "Password is directly written in the source code.",
},
"ssn": {
"regex": r"\b(\d{6}[-]\d{7})\b",
"severity": "MEDIUM",
"title": "Social Security Number pattern detected",
"description": "Data matching SSN format found in the source code.",
},
"credit_card": {
"regex": r"\b(\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4})\b",
"severity": "MEDIUM",
"title": "Credit card number pattern detected",
"description": "Data matching credit card number format found.",
},
"phone_number": {
"regex": r"\b(0\d{1,2}[-\s]?\d{3,4}[-\s]?\d{4})\b",
"severity": "LOW",
"title": "Phone number pattern detected",
"description": "Phone number is included in the source code.",
},
"database_url": {
"regex": r"(?:postgresql|mysql|mongodb|redis)://([^:]+):([^@]+)@",
"severity": "CRITICAL",
"title": "Database connection string contains credentials",
"description": "Database connection string includes username and password.",
},
"private_key": {
"regex": r"-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----",
"severity": "CRITICAL",
"title": "Hardcoded private key detected",
"description": "Cryptographic private key is directly included in the source code.",
},
}
def is_false_positive(line: str, pattern_type: str) -> bool:
"""
Check if a detected pattern is likely a false positive.
Args:
line: The line of code containing the match
pattern_type: Type of pattern detected
Returns:
True if likely a false positive, False otherwise
"""
# Skip commented lines (but with lower confidence)
if line.strip().startswith("#"):
return True
# Skip obvious example/test values
test_indicators = [
"example",
"test",
"dummy",
"fake",
"sample",
"mock",
"placeholder",
"TODO",
"FIXME",
"xxx",
"000",
]
line_lower = line.lower()
for indicator in test_indicators:
if indicator in line_lower:
return True
# Pattern-specific false positive checks
if pattern_type == "credit_card":
# Common false positive: date ranges, version numbers
if re.search(r"(19|20)\d{2}", line): # Year pattern
return True
if pattern_type == "phone_number":
# Skip if looks like a date or other numeric pattern
if "date" in line_lower or "time" in line_lower:
return True
if pattern_type == "password":
# Skip if it's just a variable name assignment (no actual password)
if re.search(r'password\s*[:=]\s*["\']?\s*["\']?$', line):
return True
return False
def scan_patterns(file_path: str, code: str) -> List[Dict[str, Any]]:
"""
Scan code for security vulnerability patterns.
Args:
file_path: Path to the file being scanned (for reference)
code: Source code to scan
Returns:
List of vulnerability dictionaries
"""
vulnerabilities = []
lines = code.split("\n")
for line_num, line in enumerate(lines, start=1):
for pattern_name, pattern_info in SECURITY_PATTERNS.items():
regex = pattern_info["regex"]
matches = re.finditer(regex, line, re.IGNORECASE)
for match in matches:
# Check for false positives
if is_false_positive(line, pattern_name):
continue
# Extract matched text (mask sensitive parts)
matched_text = match.group(0)
if len(matched_text) > 50:
# Truncate long matches for display
matched_text = matched_text[:47] + "..."
# Mask the actual secret value for security
code_snippet = line.strip()
if len(code_snippet) > 100:
code_snippet = code_snippet[:97] + "..."
vulnerability = {
"id": f"pattern-{pattern_name}",
"severity": pattern_info["severity"],
"title": pattern_info["title"],
"description": pattern_info["description"],
"line_number": line_num,
"code_snippet": code_snippet,
"pattern_type": pattern_name,
"file_path": file_path,
"scanner": "pattern_detector",
}
vulnerabilities.append(vulnerability)
return vulnerabilities
def get_pattern_info(pattern_type: str) -> Dict[str, str]:
"""
Get information about a specific pattern type.
Args:
pattern_type: Type of security pattern
Returns:
Dictionary with pattern information
"""
return SECURITY_PATTERNS.get(
pattern_type,
{
"severity": "MEDIUM",
"title": "Security pattern detected",
"description": "Unknown security pattern found.",
},
)
def list_available_patterns() -> List[str]:
"""
List all available security patterns.
Returns:
List of pattern names
"""
return list(SECURITY_PATTERNS.keys())
def get_patterns_by_severity(severity: str) -> List[str]:
"""
Get patterns filtered by severity level.
Args:
severity: Severity level (CRITICAL, HIGH, MEDIUM, LOW)
Returns:
List of pattern names with matching severity
"""
return [
name
for name, info in SECURITY_PATTERNS.items()
if info["severity"] == severity.upper()
]