""" Bandit Static Analysis Tool ============================= Bandit is an open-source Python security linter. It parses Python code into an Abstract Syntax Tree (AST) and checks each node against a set of security rules. What Bandit catches: - SQL injection patterns (string formatting in SQL calls) - Use of eval(), exec(), os.system() (command injection risk) - Hardcoded passwords and bind addresses - Use of insecure hash functions (MD5, SHA1) - Insecure temp file creation - SSL/TLS verification disabled (requests.get(verify=False)) - Use of pickle (deserialization attacks) What Bandit CANNOT catch: - Business logic flaws - Missing authentication/authorization - Cross-file data flow (it analyzes one file at a time) - Vulnerabilities in non-Python code That's why we combine Bandit (mechanical pattern matching) with the LLM (semantic understanding). Bandit provides high-confidence, low-noise signals that anchor the LLM's analysis. How it works: 1. We write the changed Python files to a temp directory 2. Run `bandit -r -f json` as a subprocess 3. Parse the JSON output into a human-readable summary 4. Feed this summary into the LLM's prompt as additional context """ from __future__ import annotations import json import subprocess import tempfile from pathlib import Path import structlog logger = structlog.get_logger() async def run_bandit(file_contents: dict[str, str]) -> str: """ Run Bandit security analysis on Python files. Args: file_contents: dict of {filepath: source_code} for changed files Returns: A formatted string summarizing Bandit's findings, suitable for including in an LLM prompt. Returns empty string if no Python files or no findings. """ # Filter to only Python files — Bandit only understands Python python_files = { path: content for path, content in file_contents.items() if path.endswith(".py") } if not python_files: return "" try: # Create a temp directory and write the Python files there. # We need files on disk because Bandit operates on the filesystem. # tempfile.mkdtemp() creates a secure temp dir that only we can access. with tempfile.TemporaryDirectory(prefix="ninjacg_bandit_") as tmpdir: tmpdir_path = Path(tmpdir) for filepath, content in python_files.items(): # Recreate the directory structure (e.g., src/auth/login.py) file_path = tmpdir_path / filepath file_path.parent.mkdir(parents=True, exist_ok=True) file_path.write_text(content, encoding="utf-8") # Run Bandit as a subprocess # -r: recursive (scan all files in directory) # -f json: output as JSON (machine-parseable) # -ll: only report medium severity and above # --quiet: suppress progress bar result = subprocess.run( [ "bandit", "-r", str(tmpdir_path), "-f", "json", "-ll", "--quiet", ], capture_output=True, text=True, timeout=30, # Kill if it takes too long ) # Bandit exit codes: # 0 = no issues found # 1 = issues found (this is NOT an error) # 2+ = actual error if result.returncode > 1: logger.warning("Bandit returned error", stderr=result.stderr[:500]) return "" if not result.stdout.strip(): return "" # Parse the JSON output bandit_output = json.loads(result.stdout) findings = bandit_output.get("results", []) if not findings: return "Bandit static analysis: No security issues detected." # Format findings as a human-readable summary for the LLM summary_lines = [ f"Bandit static analysis found {len(findings)} issue(s):\n" ] for i, finding in enumerate(findings, 1): # Map the temp file path back to the original file path temp_path = finding.get("filename", "") original_path = _map_temp_to_original(temp_path, tmpdir, python_files) severity = finding.get("issue_severity", "UNKNOWN") confidence = finding.get("issue_confidence", "UNKNOWN") text = finding.get("issue_text", "") test_id = finding.get("test_id", "") line_no = finding.get("line_number", 0) code = finding.get("code", "").strip() summary_lines.append( f"{i}. [{severity}/{confidence}] {text}\n" f" File: {original_path}, Line: {line_no}\n" f" Test: {test_id}\n" f" Code: {code}\n" ) summary = "\n".join(summary_lines) logger.info("Bandit analysis complete", findings_count=len(findings)) return summary except subprocess.TimeoutExpired: logger.warning("Bandit timed out after 30 seconds") return "" except FileNotFoundError: # Bandit not installed — this is OK, the LLM can still analyze logger.warning("Bandit not found in PATH — skipping static analysis") return "" except Exception as e: logger.warning("Bandit analysis failed", error=str(e)) return "" def _map_temp_to_original( temp_path: str, tmpdir: str, original_files: dict[str, str] ) -> str: """Map a temp directory path back to the original file path.""" try: # The temp path looks like: /tmp/ninjacg_bandit_xxx/src/auth/login.py # We need to strip the tmpdir prefix to get: src/auth/login.py relative = str(Path(temp_path).relative_to(tmpdir)) # Normalize path separators relative = relative.replace("\\", "/") # Verify it's one of our original files if relative in original_files: return relative except (ValueError, Exception): pass # Fallback: return the filename only return Path(temp_path).name