File size: 7,546 Bytes
f8e78b2
 
 
 
 
 
 
 
 
 
 
 
 
 
41329d5
 
f8e78b2
 
 
 
41329d5
 
f8e78b2
 
 
 
41329d5
 
f8e78b2
 
 
 
41329d5
 
f8e78b2
 
 
 
41329d5
 
f8e78b2
 
 
 
41329d5
 
f8e78b2
41329d5
f8e78b2
 
41329d5
 
f8e78b2
 
 
 
41329d5
 
f8e78b2
 
 
 
41329d5
 
f8e78b2
 
 
 
41329d5
 
f8e78b2
 
 
 
41329d5
 
f8e78b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41329d5
 
f8e78b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
"""
Pattern-based security vulnerability detector using regular expressions.

Detects hardcoded secrets, credentials, and sensitive information patterns.
"""

import re
from typing import Dict, List, Any

# Security patterns with regex, severity, and descriptions
SECURITY_PATTERNS = {
    "aws_access_key": {
        "regex": r"(?:AWS_ACCESS_KEY_ID|aws_access_key_id)\s*[:=]\s*['\"]?(AKIA[0-9A-Z]{16})['\"]?",
        "severity": "CRITICAL",
        "title": "Hardcoded AWS Access Key detected",
        "description": "AWS Access Key is hardcoded in the source code.",
    },
    "aws_secret_key": {
        "regex": r"(?:AWS_SECRET_ACCESS_KEY|aws_secret_access_key)\s*[:=]\s*['\"]?([A-Za-z0-9/+=]{40})['\"]?",
        "severity": "CRITICAL",
        "title": "Hardcoded AWS Secret Key detected",
        "description": "AWS Secret Access Key is hardcoded in the source code.",
    },
    "api_key": {
        "regex": r"(?:api[_-]?key|apikey|api[_-]?secret)\s*[:=]\s*['\"]([a-zA-Z0-9_\-]{20,})['\"]",
        "severity": "HIGH",
        "title": "Hardcoded API key detected",
        "description": "API key is directly hardcoded in the source code.",
    },
    "github_token": {
        "regex": r"\b(gh[ps]_[a-zA-Z0-9]{36,})\b",
        "severity": "HIGH",
        "title": "GitHub Personal Access Token detected",
        "description": "GitHub personal access token is exposed in the source code.",
    },
    "jwt_token": {
        "regex": r"\b(eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]+)\b",
        "severity": "HIGH",
        "title": "Hardcoded JWT token detected",
        "description": "JWT token is hardcoded in the source code.",
    },
    "password": {
        "regex": r"(?:password|passwd|pwd)\s*[:=]\s*['\"]([^'\"]{4,})['\"]",
        "severity": "MEDIUM",
        "title": "Hardcoded password detected",
        "description": "Password is directly written in the source code.",
    },
    "ssn": {
        "regex": r"\b(\d{6}[-]\d{7})\b",
        "severity": "MEDIUM",
        "title": "Social Security Number pattern detected",
        "description": "Data matching SSN format found in the source code.",
    },
    "credit_card": {
        "regex": r"\b(\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4})\b",
        "severity": "MEDIUM",
        "title": "Credit card number pattern detected",
        "description": "Data matching credit card number format found.",
    },
    "phone_number": {
        "regex": r"\b(0\d{1,2}[-\s]?\d{3,4}[-\s]?\d{4})\b",
        "severity": "LOW",
        "title": "Phone number pattern detected",
        "description": "Phone number is included in the source code.",
    },
    "database_url": {
        "regex": r"(?:postgresql|mysql|mongodb|redis)://([^:]+):([^@]+)@",
        "severity": "CRITICAL",
        "title": "Database connection string contains credentials",
        "description": "Database connection string includes username and password.",
    },
    "private_key": {
        "regex": r"-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----",
        "severity": "CRITICAL",
        "title": "Hardcoded private key detected",
        "description": "Cryptographic private key is directly included in the source code.",
    },
}


def is_false_positive(line: str, pattern_type: str) -> bool:
    """
    Check if a detected pattern is likely a false positive.

    Args:
        line: The line of code containing the match
        pattern_type: Type of pattern detected

    Returns:
        True if likely a false positive, False otherwise
    """
    # Skip commented lines (but with lower confidence)
    if line.strip().startswith("#"):
        return True

    # Skip obvious example/test values
    test_indicators = [
        "example",
        "test",
        "dummy",
        "fake",
        "sample",
        "mock",
        "placeholder",
        "TODO",
        "FIXME",
        "xxx",
        "000",
    ]

    line_lower = line.lower()
    for indicator in test_indicators:
        if indicator in line_lower:
            return True

    # Pattern-specific false positive checks
    if pattern_type == "credit_card":
        # Common false positive: date ranges, version numbers
        if re.search(r"(19|20)\d{2}", line):  # Year pattern
            return True

    if pattern_type == "phone_number":
        # Skip if looks like a date or other numeric pattern
        if "date" in line_lower or "time" in line_lower:
            return True

    if pattern_type == "password":
        # Skip if it's just a variable name assignment (no actual password)
        if re.search(r'password\s*[:=]\s*["\']?\s*["\']?$', line):
            return True

    return False


def scan_patterns(file_path: str, code: str) -> List[Dict[str, Any]]:
    """
    Scan code for security vulnerability patterns.

    Args:
        file_path: Path to the file being scanned (for reference)
        code: Source code to scan

    Returns:
        List of vulnerability dictionaries
    """
    vulnerabilities = []
    lines = code.split("\n")

    for line_num, line in enumerate(lines, start=1):
        for pattern_name, pattern_info in SECURITY_PATTERNS.items():
            regex = pattern_info["regex"]
            matches = re.finditer(regex, line, re.IGNORECASE)

            for match in matches:
                # Check for false positives
                if is_false_positive(line, pattern_name):
                    continue

                # Extract matched text (mask sensitive parts)
                matched_text = match.group(0)
                if len(matched_text) > 50:
                    # Truncate long matches for display
                    matched_text = matched_text[:47] + "..."

                # Mask the actual secret value for security
                code_snippet = line.strip()
                if len(code_snippet) > 100:
                    code_snippet = code_snippet[:97] + "..."

                vulnerability = {
                    "id": f"pattern-{pattern_name}",
                    "severity": pattern_info["severity"],
                    "title": pattern_info["title"],
                    "description": pattern_info["description"],
                    "line_number": line_num,
                    "code_snippet": code_snippet,
                    "pattern_type": pattern_name,
                    "file_path": file_path,
                    "scanner": "pattern_detector",
                }

                vulnerabilities.append(vulnerability)

    return vulnerabilities


def get_pattern_info(pattern_type: str) -> Dict[str, str]:
    """
    Get information about a specific pattern type.

    Args:
        pattern_type: Type of security pattern

    Returns:
        Dictionary with pattern information
    """
    return SECURITY_PATTERNS.get(
        pattern_type,
        {
            "severity": "MEDIUM",
            "title": "Security pattern detected",
            "description": "Unknown security pattern found.",
        },
    )


def list_available_patterns() -> List[str]:
    """
    List all available security patterns.

    Returns:
        List of pattern names
    """
    return list(SECURITY_PATTERNS.keys())


def get_patterns_by_severity(severity: str) -> List[str]:
    """
    Get patterns filtered by severity level.

    Args:
        severity: Severity level (CRITICAL, HIGH, MEDIUM, LOW)

    Returns:
        List of pattern names with matching severity
    """
    return [
        name
        for name, info in SECURITY_PATTERNS.items()
        if info["severity"] == severity.upper()
    ]