Spaces:
Running
Running
| """ | |
| detect-secrets Tool | |
| ==================== | |
| detect-secrets scans code for hardcoded credentials: API keys, passwords, | |
| database connection strings, AWS access keys, private keys, etc. | |
| Why a dedicated tool for secrets? | |
| - Hardcoded secrets are the #1 most common security finding in code reviews | |
| - They're easy to detect with regex/entropy analysis but easy to miss manually | |
| - detect-secrets uses both pattern matching AND Shannon entropy analysis: | |
| - Pattern matching: finds things that LOOK like API keys (e.g., "sk_live_...") | |
| - Entropy analysis: finds random-looking strings that might be secrets | |
| (high entropy = lots of randomness = probably a key, not a variable name) | |
| What Shannon entropy means: | |
| - "hello" has low entropy (~2.8 bits/char) — predictable, probably not a secret | |
| - "a3f8g2kx9m" has high entropy (~3.9 bits/char) — random, might be a secret | |
| - detect-secrets flags strings above a configurable entropy threshold | |
| We run this on the PR diff specifically (not full files) because we only care | |
| about NEWLY introduced secrets, not pre-existing ones. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import subprocess | |
| import tempfile | |
| from pathlib import Path | |
| import structlog | |
| logger = structlog.get_logger() | |
| async def run_detect_secrets(file_contents: dict[str, str]) -> str: | |
| """ | |
| Scan changed files for hardcoded secrets. | |
| Args: | |
| file_contents: dict of {filepath: source_code} | |
| Returns: | |
| A formatted string listing detected secrets, suitable for | |
| including in an LLM prompt. Empty string if no secrets found. | |
| """ | |
| if not file_contents: | |
| return "" | |
| try: | |
| with tempfile.TemporaryDirectory(prefix="ninjacg_secrets_") as tmpdir: | |
| tmpdir_path = Path(tmpdir) | |
| for filepath, content in file_contents.items(): | |
| file_path = tmpdir_path / filepath | |
| file_path.parent.mkdir(parents=True, exist_ok=True) | |
| file_path.write_text(content, encoding="utf-8") | |
| # Run detect-secrets scan | |
| # --all-files: scan all file types | |
| # --force-use-all-plugins: use every detection plugin | |
| result = subprocess.run( | |
| [ | |
| "detect-secrets", "scan", | |
| str(tmpdir_path), | |
| "--all-files", | |
| ], | |
| capture_output=True, | |
| text=True, | |
| timeout=30, | |
| ) | |
| if result.returncode != 0 and not result.stdout: | |
| logger.warning("detect-secrets error", stderr=result.stderr[:500]) | |
| return "" | |
| if not result.stdout.strip(): | |
| return "" | |
| scan_results = json.loads(result.stdout) | |
| results_map = scan_results.get("results", {}) | |
| # Count total secrets found | |
| total_secrets = sum(len(secrets) for secrets in results_map.values()) | |
| if total_secrets == 0: | |
| return "detect-secrets scan: No hardcoded secrets detected." | |
| # Format findings | |
| summary_lines = [ | |
| f"detect-secrets found {total_secrets} potential secret(s):\n" | |
| ] | |
| for file_path, secrets in results_map.items(): | |
| # Map temp path back to original | |
| try: | |
| relative = str(Path(file_path).relative_to(tmpdir)).replace("\\", "/") | |
| except ValueError: | |
| relative = Path(file_path).name | |
| for secret in secrets: | |
| secret_type = secret.get("type", "Unknown") | |
| line_no = secret.get("line_number", 0) | |
| summary_lines.append( | |
| f"- {secret_type} in {relative} at line {line_no}" | |
| ) | |
| summary = "\n".join(summary_lines) | |
| logger.info("detect-secrets scan complete", secrets_found=total_secrets) | |
| return summary | |
| except FileNotFoundError: | |
| logger.warning("detect-secrets not found in PATH — skipping") | |
| return "" | |
| except Exception as e: | |
| logger.warning("detect-secrets scan failed", error=str(e)) | |
| return "" | |