Spaces:
Running
Running
File size: 6,308 Bytes
4b445f6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 | """
Bandit Static Analysis Tool
=============================
Bandit is an open-source Python security linter. It parses Python code into an
Abstract Syntax Tree (AST) and checks each node against a set of security rules.
What Bandit catches:
- SQL injection patterns (string formatting in SQL calls)
- Use of eval(), exec(), os.system() (command injection risk)
- Hardcoded passwords and bind addresses
- Use of insecure hash functions (MD5, SHA1)
- Insecure temp file creation
- SSL/TLS verification disabled (requests.get(verify=False))
- Use of pickle (deserialization attacks)
What Bandit CANNOT catch:
- Business logic flaws
- Missing authentication/authorization
- Cross-file data flow (it analyzes one file at a time)
- Vulnerabilities in non-Python code
That's why we combine Bandit (mechanical pattern matching) with the LLM (semantic
understanding). Bandit provides high-confidence, low-noise signals that anchor the
LLM's analysis.
How it works:
1. We write the changed Python files to a temp directory
2. Run `bandit -r <dir> -f json` as a subprocess
3. Parse the JSON output into a human-readable summary
4. Feed this summary into the LLM's prompt as additional context
"""
from __future__ import annotations
import json
import subprocess
import tempfile
from pathlib import Path
import structlog
logger = structlog.get_logger()
async def run_bandit(file_contents: dict[str, str]) -> str:
"""
Run Bandit security analysis on Python files.
Args:
file_contents: dict of {filepath: source_code} for changed files
Returns:
A formatted string summarizing Bandit's findings, suitable for
including in an LLM prompt. Returns empty string if no Python
files or no findings.
"""
# Filter to only Python files — Bandit only understands Python
python_files = {
path: content
for path, content in file_contents.items()
if path.endswith(".py")
}
if not python_files:
return ""
try:
# Create a temp directory and write the Python files there.
# We need files on disk because Bandit operates on the filesystem.
# tempfile.mkdtemp() creates a secure temp dir that only we can access.
with tempfile.TemporaryDirectory(prefix="ninjacg_bandit_") as tmpdir:
tmpdir_path = Path(tmpdir)
for filepath, content in python_files.items():
# Recreate the directory structure (e.g., src/auth/login.py)
file_path = tmpdir_path / filepath
file_path.parent.mkdir(parents=True, exist_ok=True)
file_path.write_text(content, encoding="utf-8")
# Run Bandit as a subprocess
# -r: recursive (scan all files in directory)
# -f json: output as JSON (machine-parseable)
# -ll: only report medium severity and above
# --quiet: suppress progress bar
result = subprocess.run(
[
"bandit",
"-r", str(tmpdir_path),
"-f", "json",
"-ll",
"--quiet",
],
capture_output=True,
text=True,
timeout=30, # Kill if it takes too long
)
# Bandit exit codes:
# 0 = no issues found
# 1 = issues found (this is NOT an error)
# 2+ = actual error
if result.returncode > 1:
logger.warning("Bandit returned error", stderr=result.stderr[:500])
return ""
if not result.stdout.strip():
return ""
# Parse the JSON output
bandit_output = json.loads(result.stdout)
findings = bandit_output.get("results", [])
if not findings:
return "Bandit static analysis: No security issues detected."
# Format findings as a human-readable summary for the LLM
summary_lines = [
f"Bandit static analysis found {len(findings)} issue(s):\n"
]
for i, finding in enumerate(findings, 1):
# Map the temp file path back to the original file path
temp_path = finding.get("filename", "")
original_path = _map_temp_to_original(temp_path, tmpdir, python_files)
severity = finding.get("issue_severity", "UNKNOWN")
confidence = finding.get("issue_confidence", "UNKNOWN")
text = finding.get("issue_text", "")
test_id = finding.get("test_id", "")
line_no = finding.get("line_number", 0)
code = finding.get("code", "").strip()
summary_lines.append(
f"{i}. [{severity}/{confidence}] {text}\n"
f" File: {original_path}, Line: {line_no}\n"
f" Test: {test_id}\n"
f" Code: {code}\n"
)
summary = "\n".join(summary_lines)
logger.info("Bandit analysis complete", findings_count=len(findings))
return summary
except subprocess.TimeoutExpired:
logger.warning("Bandit timed out after 30 seconds")
return ""
except FileNotFoundError:
# Bandit not installed — this is OK, the LLM can still analyze
logger.warning("Bandit not found in PATH — skipping static analysis")
return ""
except Exception as e:
logger.warning("Bandit analysis failed", error=str(e))
return ""
def _map_temp_to_original(
temp_path: str, tmpdir: str, original_files: dict[str, str]
) -> str:
"""Map a temp directory path back to the original file path."""
try:
# The temp path looks like: /tmp/ninjacg_bandit_xxx/src/auth/login.py
# We need to strip the tmpdir prefix to get: src/auth/login.py
relative = str(Path(temp_path).relative_to(tmpdir))
# Normalize path separators
relative = relative.replace("\\", "/")
# Verify it's one of our original files
if relative in original_files:
return relative
except (ValueError, Exception):
pass
# Fallback: return the filename only
return Path(temp_path).name
|