File size: 6,308 Bytes
4b445f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
"""
Bandit Static Analysis Tool
=============================

Bandit is an open-source Python security linter. It parses Python code into an
Abstract Syntax Tree (AST) and checks each node against a set of security rules.

What Bandit catches:
- SQL injection patterns (string formatting in SQL calls)
- Use of eval(), exec(), os.system() (command injection risk)
- Hardcoded passwords and bind addresses
- Use of insecure hash functions (MD5, SHA1)
- Insecure temp file creation
- SSL/TLS verification disabled (requests.get(verify=False))
- Use of pickle (deserialization attacks)

What Bandit CANNOT catch:
- Business logic flaws
- Missing authentication/authorization
- Cross-file data flow (it analyzes one file at a time)
- Vulnerabilities in non-Python code

That's why we combine Bandit (mechanical pattern matching) with the LLM (semantic
understanding). Bandit provides high-confidence, low-noise signals that anchor the
LLM's analysis.

How it works:
1. We write the changed Python files to a temp directory
2. Run `bandit -r <dir> -f json` as a subprocess
3. Parse the JSON output into a human-readable summary
4. Feed this summary into the LLM's prompt as additional context
"""

from __future__ import annotations

import json
import subprocess
import tempfile
from pathlib import Path

import structlog

logger = structlog.get_logger()


async def run_bandit(file_contents: dict[str, str]) -> str:
    """
    Run Bandit security analysis on Python files.

    Args:
        file_contents: dict of {filepath: source_code} for changed files

    Returns:
        A formatted string summarizing Bandit's findings, suitable for
        including in an LLM prompt. Returns empty string if no Python
        files or no findings.
    """
    # Filter to only Python files — Bandit only understands Python
    python_files = {
        path: content
        for path, content in file_contents.items()
        if path.endswith(".py")
    }

    if not python_files:
        return ""

    try:
        # Create a temp directory and write the Python files there.
        # We need files on disk because Bandit operates on the filesystem.
        # tempfile.mkdtemp() creates a secure temp dir that only we can access.
        with tempfile.TemporaryDirectory(prefix="ninjacg_bandit_") as tmpdir:
            tmpdir_path = Path(tmpdir)

            for filepath, content in python_files.items():
                # Recreate the directory structure (e.g., src/auth/login.py)
                file_path = tmpdir_path / filepath
                file_path.parent.mkdir(parents=True, exist_ok=True)
                file_path.write_text(content, encoding="utf-8")

            # Run Bandit as a subprocess
            # -r: recursive (scan all files in directory)
            # -f json: output as JSON (machine-parseable)
            # -ll: only report medium severity and above
            # --quiet: suppress progress bar
            result = subprocess.run(
                [
                    "bandit",
                    "-r", str(tmpdir_path),
                    "-f", "json",
                    "-ll",
                    "--quiet",
                ],
                capture_output=True,
                text=True,
                timeout=30,  # Kill if it takes too long
            )

            # Bandit exit codes:
            # 0 = no issues found
            # 1 = issues found (this is NOT an error)
            # 2+ = actual error
            if result.returncode > 1:
                logger.warning("Bandit returned error", stderr=result.stderr[:500])
                return ""

            if not result.stdout.strip():
                return ""

            # Parse the JSON output
            bandit_output = json.loads(result.stdout)
            findings = bandit_output.get("results", [])

            if not findings:
                return "Bandit static analysis: No security issues detected."

            # Format findings as a human-readable summary for the LLM
            summary_lines = [
                f"Bandit static analysis found {len(findings)} issue(s):\n"
            ]

            for i, finding in enumerate(findings, 1):
                # Map the temp file path back to the original file path
                temp_path = finding.get("filename", "")
                original_path = _map_temp_to_original(temp_path, tmpdir, python_files)

                severity = finding.get("issue_severity", "UNKNOWN")
                confidence = finding.get("issue_confidence", "UNKNOWN")
                text = finding.get("issue_text", "")
                test_id = finding.get("test_id", "")
                line_no = finding.get("line_number", 0)
                code = finding.get("code", "").strip()

                summary_lines.append(
                    f"{i}. [{severity}/{confidence}] {text}\n"
                    f"   File: {original_path}, Line: {line_no}\n"
                    f"   Test: {test_id}\n"
                    f"   Code: {code}\n"
                )

            summary = "\n".join(summary_lines)
            logger.info("Bandit analysis complete", findings_count=len(findings))
            return summary

    except subprocess.TimeoutExpired:
        logger.warning("Bandit timed out after 30 seconds")
        return ""
    except FileNotFoundError:
        # Bandit not installed — this is OK, the LLM can still analyze
        logger.warning("Bandit not found in PATH — skipping static analysis")
        return ""
    except Exception as e:
        logger.warning("Bandit analysis failed", error=str(e))
        return ""


def _map_temp_to_original(
    temp_path: str, tmpdir: str, original_files: dict[str, str]
) -> str:
    """Map a temp directory path back to the original file path."""
    try:
        # The temp path looks like: /tmp/ninjacg_bandit_xxx/src/auth/login.py
        # We need to strip the tmpdir prefix to get: src/auth/login.py
        relative = str(Path(temp_path).relative_to(tmpdir))
        # Normalize path separators
        relative = relative.replace("\\", "/")
        # Verify it's one of our original files
        if relative in original_files:
            return relative
    except (ValueError, Exception):
        pass
    # Fallback: return the filename only
    return Path(temp_path).name