File size: 4,198 Bytes
4b445f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
"""
detect-secrets Tool
====================

detect-secrets scans code for hardcoded credentials: API keys, passwords,
database connection strings, AWS access keys, private keys, etc.

Why a dedicated tool for secrets?
- Hardcoded secrets are the #1 most common security finding in code reviews
- They're easy to detect with regex/entropy analysis but easy to miss manually
- detect-secrets uses both pattern matching AND Shannon entropy analysis:
  - Pattern matching: finds things that LOOK like API keys (e.g., "sk_live_...")
  - Entropy analysis: finds random-looking strings that might be secrets
    (high entropy = lots of randomness = probably a key, not a variable name)

What Shannon entropy means:
- "hello" has low entropy (~2.8 bits/char) — predictable, probably not a secret
- "a3f8g2kx9m" has high entropy (~3.9 bits/char) — random, might be a secret
- detect-secrets flags strings above a configurable entropy threshold

We run this on the PR diff specifically (not full files) because we only care
about NEWLY introduced secrets, not pre-existing ones.
"""

from __future__ import annotations

import json
import subprocess
import tempfile
from pathlib import Path

import structlog

logger = structlog.get_logger()


async def run_detect_secrets(file_contents: dict[str, str]) -> str:
    """
    Scan changed files for hardcoded secrets.

    Args:
        file_contents: dict of {filepath: source_code}

    Returns:
        A formatted string listing detected secrets, suitable for
        including in an LLM prompt. Empty string if no secrets found.
    """
    if not file_contents:
        return ""

    try:
        with tempfile.TemporaryDirectory(prefix="ninjacg_secrets_") as tmpdir:
            tmpdir_path = Path(tmpdir)

            for filepath, content in file_contents.items():
                file_path = tmpdir_path / filepath
                file_path.parent.mkdir(parents=True, exist_ok=True)
                file_path.write_text(content, encoding="utf-8")

            # Run detect-secrets scan
            # --all-files: scan all file types
            # --force-use-all-plugins: use every detection plugin
            result = subprocess.run(
                [
                    "detect-secrets", "scan",
                    str(tmpdir_path),
                    "--all-files",
                ],
                capture_output=True,
                text=True,
                timeout=30,
            )

            if result.returncode != 0 and not result.stdout:
                logger.warning("detect-secrets error", stderr=result.stderr[:500])
                return ""

            if not result.stdout.strip():
                return ""

            scan_results = json.loads(result.stdout)
            results_map = scan_results.get("results", {})

            # Count total secrets found
            total_secrets = sum(len(secrets) for secrets in results_map.values())

            if total_secrets == 0:
                return "detect-secrets scan: No hardcoded secrets detected."

            # Format findings
            summary_lines = [
                f"detect-secrets found {total_secrets} potential secret(s):\n"
            ]

            for file_path, secrets in results_map.items():
                # Map temp path back to original
                try:
                    relative = str(Path(file_path).relative_to(tmpdir)).replace("\\", "/")
                except ValueError:
                    relative = Path(file_path).name

                for secret in secrets:
                    secret_type = secret.get("type", "Unknown")
                    line_no = secret.get("line_number", 0)
                    summary_lines.append(
                        f"- {secret_type} in {relative} at line {line_no}"
                    )

            summary = "\n".join(summary_lines)
            logger.info("detect-secrets scan complete", secrets_found=total_secrets)
            return summary

    except FileNotFoundError:
        logger.warning("detect-secrets not found in PATH — skipping")
        return ""
    except Exception as e:
        logger.warning("detect-secrets scan failed", error=str(e))
        return ""