"""
Bandit Static Analysis Tool
=============================
Bandit is an open-source Python security linter. It parses Python code into an
Abstract Syntax Tree (AST) and checks each node against a set of security rules.
What Bandit catches:
- SQL injection patterns (string formatting in SQL calls)
- Use of eval(), exec(), os.system() (command injection risk)
- Hardcoded passwords and bind addresses
- Use of insecure hash functions (MD5, SHA1)
- Insecure temp file creation
- SSL/TLS verification disabled (requests.get(verify=False))
- Use of pickle (deserialization attacks)
What Bandit CANNOT catch:
- Business logic flaws
- Missing authentication/authorization
- Cross-file data flow (it analyzes one file at a time)
- Vulnerabilities in non-Python code
That's why we combine Bandit (mechanical pattern matching) with the LLM (semantic
understanding). Bandit provides high-confidence, low-noise signals that anchor the
LLM's analysis.
How it works:
1. We write the changed Python files to a temp directory
2. Run `bandit -r
-f json` as a subprocess
3. Parse the JSON output into a human-readable summary
4. Feed this summary into the LLM's prompt as additional context
"""
from __future__ import annotations
import json
import subprocess
import tempfile
from pathlib import Path
import structlog
logger = structlog.get_logger()
async def run_bandit(file_contents: dict[str, str]) -> str:
"""
Run Bandit security analysis on Python files.
Args:
file_contents: dict of {filepath: source_code} for changed files
Returns:
A formatted string summarizing Bandit's findings, suitable for
including in an LLM prompt. Returns empty string if no Python
files or no findings.
"""
# Filter to only Python files — Bandit only understands Python
python_files = {
path: content
for path, content in file_contents.items()
if path.endswith(".py")
}
if not python_files:
return ""
try:
# Create a temp directory and write the Python files there.
# We need files on disk because Bandit operates on the filesystem.
# tempfile.mkdtemp() creates a secure temp dir that only we can access.
with tempfile.TemporaryDirectory(prefix="ninjacg_bandit_") as tmpdir:
tmpdir_path = Path(tmpdir)
for filepath, content in python_files.items():
# Recreate the directory structure (e.g., src/auth/login.py)
file_path = tmpdir_path / filepath
file_path.parent.mkdir(parents=True, exist_ok=True)
file_path.write_text(content, encoding="utf-8")
# Run Bandit as a subprocess
# -r: recursive (scan all files in directory)
# -f json: output as JSON (machine-parseable)
# -ll: only report medium severity and above
# --quiet: suppress progress bar
result = subprocess.run(
[
"bandit",
"-r", str(tmpdir_path),
"-f", "json",
"-ll",
"--quiet",
],
capture_output=True,
text=True,
timeout=30, # Kill if it takes too long
)
# Bandit exit codes:
# 0 = no issues found
# 1 = issues found (this is NOT an error)
# 2+ = actual error
if result.returncode > 1:
logger.warning("Bandit returned error", stderr=result.stderr[:500])
return ""
if not result.stdout.strip():
return ""
# Parse the JSON output
bandit_output = json.loads(result.stdout)
findings = bandit_output.get("results", [])
if not findings:
return "Bandit static analysis: No security issues detected."
# Format findings as a human-readable summary for the LLM
summary_lines = [
f"Bandit static analysis found {len(findings)} issue(s):\n"
]
for i, finding in enumerate(findings, 1):
# Map the temp file path back to the original file path
temp_path = finding.get("filename", "")
original_path = _map_temp_to_original(temp_path, tmpdir, python_files)
severity = finding.get("issue_severity", "UNKNOWN")
confidence = finding.get("issue_confidence", "UNKNOWN")
text = finding.get("issue_text", "")
test_id = finding.get("test_id", "")
line_no = finding.get("line_number", 0)
code = finding.get("code", "").strip()
summary_lines.append(
f"{i}. [{severity}/{confidence}] {text}\n"
f" File: {original_path}, Line: {line_no}\n"
f" Test: {test_id}\n"
f" Code: {code}\n"
)
summary = "\n".join(summary_lines)
logger.info("Bandit analysis complete", findings_count=len(findings))
return summary
except subprocess.TimeoutExpired:
logger.warning("Bandit timed out after 30 seconds")
return ""
except FileNotFoundError:
# Bandit not installed — this is OK, the LLM can still analyze
logger.warning("Bandit not found in PATH — skipping static analysis")
return ""
except Exception as e:
logger.warning("Bandit analysis failed", error=str(e))
return ""
def _map_temp_to_original(
temp_path: str, tmpdir: str, original_files: dict[str, str]
) -> str:
"""Map a temp directory path back to the original file path."""
try:
# The temp path looks like: /tmp/ninjacg_bandit_xxx/src/auth/login.py
# We need to strip the tmpdir prefix to get: src/auth/login.py
relative = str(Path(temp_path).relative_to(tmpdir))
# Normalize path separators
relative = relative.replace("\\", "/")
# Verify it's one of our original files
if relative in original_files:
return relative
except (ValueError, Exception):
pass
# Fallback: return the filename only
return Path(temp_path).name