Spaces:

NinjainPJs
/

ninja-code-guard

Running

File size: 6,308 Bytes

4b445f6

"""
Bandit Static Analysis Tool
=============================

Bandit is an open-source Python security linter. It parses Python code into an
Abstract Syntax Tree (AST) and checks each node against a set of security rules.

What Bandit catches:
- SQL injection patterns (string formatting in SQL calls)
- Use of eval(), exec(), os.system() (command injection risk)
- Hardcoded passwords and bind addresses
- Use of insecure hash functions (MD5, SHA1)
- Insecure temp file creation
- SSL/TLS verification disabled (requests.get(verify=False))
- Use of pickle (deserialization attacks)

What Bandit CANNOT catch:
- Business logic flaws
- Missing authentication/authorization
- Cross-file data flow (it analyzes one file at a time)
- Vulnerabilities in non-Python code

That's why we combine Bandit (mechanical pattern matching) with the LLM (semantic
understanding). Bandit provides high-confidence, low-noise signals that anchor the
LLM's analysis.

How it works:
1. We write the changed Python files to a temp directory
2. Run `bandit -r <dir> -f json` as a subprocess
3. Parse the JSON output into a human-readable summary
4. Feed this summary into the LLM's prompt as additional context
"""

from __future__ import annotations

import json
import subprocess
import tempfile
from pathlib import Path

import structlog

logger = structlog.get_logger()


async def run_bandit(file_contents: dict[str, str]) -> str:
    """
    Run Bandit security analysis on Python files.

    Args:
        file_contents: dict of {filepath: source_code} for changed files

    Returns:
        A formatted string summarizing Bandit's findings, suitable for
        including in an LLM prompt. Returns empty string if no Python
        files or no findings.
    """
    # Filter to only Python files — Bandit only understands Python
    python_files = {
        path: content
        for path, content in file_contents.items()
        if path.endswith(".py")
    }

    if not python_files:
        return ""

    try:
        # Create a temp directory and write the Python files there.
        # We need files on disk because Bandit operates on the filesystem.
        # tempfile.mkdtemp() creates a secure temp dir that only we can access.
        with tempfile.TemporaryDirectory(prefix="ninjacg_bandit_") as tmpdir:
            tmpdir_path = Path(tmpdir)

            for filepath, content in python_files.items():
                # Recreate the directory structure (e.g., src/auth/login.py)
                file_path = tmpdir_path / filepath
                file_path.parent.mkdir(parents=True, exist_ok=True)
                file_path.write_text(content, encoding="utf-8")

            # Run Bandit as a subprocess
            # -r: recursive (scan all files in directory)
            # -f json: output as JSON (machine-parseable)
            # -ll: only report medium severity and above
            # --quiet: suppress progress bar
            result = subprocess.run(
                [
                    "bandit",
                    "-r", str(tmpdir_path),
                    "-f", "json",
                    "-ll",
                    "--quiet",
                ],
                capture_output=True,
                text=True,
                timeout=30,  # Kill if it takes too long
            )

            # Bandit exit codes:
            # 0 = no issues found
            # 1 = issues found (this is NOT an error)
            # 2+ = actual error
            if result.returncode > 1:
                logger.warning("Bandit returned error", stderr=result.stderr[:500])
                return ""

            if not result.stdout.strip():
                return ""

            # Parse the JSON output
            bandit_output = json.loads(result.stdout)
            findings = bandit_output.get("results", [])

            if not findings:
                return "Bandit static analysis: No security issues detected."

            # Format findings as a human-readable summary for the LLM
            summary_lines = [
                f"Bandit static analysis found {len(findings)} issue(s):\n"
            ]

            for i, finding in enumerate(findings, 1):
                # Map the temp file path back to the original file path
                temp_path = finding.get("filename", "")
                original_path = _map_temp_to_original(temp_path, tmpdir, python_files)

                severity = finding.get("issue_severity", "UNKNOWN")
                confidence = finding.get("issue_confidence", "UNKNOWN")
                text = finding.get("issue_text", "")
                test_id = finding.get("test_id", "")
                line_no = finding.get("line_number", 0)
                code = finding.get("code", "").strip()

                summary_lines.append(
                    f"{i}. [{severity}/{confidence}] {text}\n"
                    f"   File: {original_path}, Line: {line_no}\n"
                    f"   Test: {test_id}\n"
                    f"   Code: {code}\n"
                )

            summary = "\n".join(summary_lines)
            logger.info("Bandit analysis complete", findings_count=len(findings))
            return summary

    except subprocess.TimeoutExpired:
        logger.warning("Bandit timed out after 30 seconds")
        return ""
    except FileNotFoundError:
        # Bandit not installed — this is OK, the LLM can still analyze
        logger.warning("Bandit not found in PATH — skipping static analysis")
        return ""
    except Exception as e:
        logger.warning("Bandit analysis failed", error=str(e))
        return ""


def _map_temp_to_original(
    temp_path: str, tmpdir: str, original_files: dict[str, str]
) -> str:
    """Map a temp directory path back to the original file path."""
    try:
        # The temp path looks like: /tmp/ninjacg_bandit_xxx/src/auth/login.py
        # We need to strip the tmpdir prefix to get: src/auth/login.py
        relative = str(Path(temp_path).relative_to(tmpdir))
        # Normalize path separators
        relative = relative.replace("\\", "/")
        # Verify it's one of our original files
        if relative in original_files:
            return relative
    except (ValueError, Exception):
        pass
    # Fallback: return the filename only
    return Path(temp_path).name