Spaces:

NinjainPJs
/

ninja-code-guard

Running

File size: 4,198 Bytes

4b445f6

"""
detect-secrets Tool
====================

detect-secrets scans code for hardcoded credentials: API keys, passwords,
database connection strings, AWS access keys, private keys, etc.

Why a dedicated tool for secrets?
- Hardcoded secrets are the #1 most common security finding in code reviews
- They're easy to detect with regex/entropy analysis but easy to miss manually
- detect-secrets uses both pattern matching AND Shannon entropy analysis:
  - Pattern matching: finds things that LOOK like API keys (e.g., "sk_live_...")
  - Entropy analysis: finds random-looking strings that might be secrets
    (high entropy = lots of randomness = probably a key, not a variable name)

What Shannon entropy means:
- "hello" has low entropy (~2.8 bits/char) — predictable, probably not a secret
- "a3f8g2kx9m" has high entropy (~3.9 bits/char) — random, might be a secret
- detect-secrets flags strings above a configurable entropy threshold

We run this on the PR diff specifically (not full files) because we only care
about NEWLY introduced secrets, not pre-existing ones.
"""

from __future__ import annotations

import json
import subprocess
import tempfile
from pathlib import Path

import structlog

logger = structlog.get_logger()


async def run_detect_secrets(file_contents: dict[str, str]) -> str:
    """
    Scan changed files for hardcoded secrets.

    Args:
        file_contents: dict of {filepath: source_code}

    Returns:
        A formatted string listing detected secrets, suitable for
        including in an LLM prompt. Empty string if no secrets found.
    """
    if not file_contents:
        return ""

    try:
        with tempfile.TemporaryDirectory(prefix="ninjacg_secrets_") as tmpdir:
            tmpdir_path = Path(tmpdir)

            for filepath, content in file_contents.items():
                file_path = tmpdir_path / filepath
                file_path.parent.mkdir(parents=True, exist_ok=True)
                file_path.write_text(content, encoding="utf-8")

            # Run detect-secrets scan
            # --all-files: scan all file types
            # --force-use-all-plugins: use every detection plugin
            result = subprocess.run(
                [
                    "detect-secrets", "scan",
                    str(tmpdir_path),
                    "--all-files",
                ],
                capture_output=True,
                text=True,
                timeout=30,
            )

            if result.returncode != 0 and not result.stdout:
                logger.warning("detect-secrets error", stderr=result.stderr[:500])
                return ""

            if not result.stdout.strip():
                return ""

            scan_results = json.loads(result.stdout)
            results_map = scan_results.get("results", {})

            # Count total secrets found
            total_secrets = sum(len(secrets) for secrets in results_map.values())

            if total_secrets == 0:
                return "detect-secrets scan: No hardcoded secrets detected."

            # Format findings
            summary_lines = [
                f"detect-secrets found {total_secrets} potential secret(s):\n"
            ]

            for file_path, secrets in results_map.items():
                # Map temp path back to original
                try:
                    relative = str(Path(file_path).relative_to(tmpdir)).replace("\\", "/")
                except ValueError:
                    relative = Path(file_path).name

                for secret in secrets:
                    secret_type = secret.get("type", "Unknown")
                    line_no = secret.get("line_number", 0)
                    summary_lines.append(
                        f"- {secret_type} in {relative} at line {line_no}"
                    )

            summary = "\n".join(summary_lines)
            logger.info("detect-secrets scan complete", secrets_found=total_secrets)
            return summary

    except FileNotFoundError:
        logger.warning("detect-secrets not found in PATH — skipping")
        return ""
    except Exception as e:
        logger.warning("detect-secrets scan failed", error=str(e))
        return ""