Spaces:

NinjainPJs
/

ninja-code-guard

Running

App Files Files Community

ninja-code-guard / app /tools /detect_secrets_tool.py

NinjainPJs

initial - commit

4b445f6 3 months ago

raw

history blame contribute delete

4.2 kB

	"""
	detect-secrets Tool
	====================

	detect-secrets scans code for hardcoded credentials: API keys, passwords,
	database connection strings, AWS access keys, private keys, etc.

	Why a dedicated tool for secrets?
	- Hardcoded secrets are the #1 most common security finding in code reviews
	- They're easy to detect with regex/entropy analysis but easy to miss manually
	- detect-secrets uses both pattern matching AND Shannon entropy analysis:
	- Pattern matching: finds things that LOOK like API keys (e.g., "sk_live_...")
	- Entropy analysis: finds random-looking strings that might be secrets
	(high entropy = lots of randomness = probably a key, not a variable name)

	What Shannon entropy means:
	- "hello" has low entropy (~2.8 bits/char) — predictable, probably not a secret
	- "a3f8g2kx9m" has high entropy (~3.9 bits/char) — random, might be a secret
	- detect-secrets flags strings above a configurable entropy threshold

	We run this on the PR diff specifically (not full files) because we only care
	about NEWLY introduced secrets, not pre-existing ones.
	"""

	from __future__ import annotations

	import json
	import subprocess
	import tempfile
	from pathlib import Path

	import structlog

	logger = structlog.get_logger()


	async def run_detect_secrets(file_contents: dict[str, str]) -> str:
	"""
	Scan changed files for hardcoded secrets.

	Args:
	file_contents: dict of {filepath: source_code}

	Returns:
	A formatted string listing detected secrets, suitable for
	including in an LLM prompt. Empty string if no secrets found.
	"""
	if not file_contents:
	return ""

	try:
	with tempfile.TemporaryDirectory(prefix="ninjacg_secrets_") as tmpdir:
	tmpdir_path = Path(tmpdir)

	for filepath, content in file_contents.items():
	file_path = tmpdir_path / filepath
	file_path.parent.mkdir(parents=True, exist_ok=True)
	file_path.write_text(content, encoding="utf-8")

	# Run detect-secrets scan
	# --all-files: scan all file types
	# --force-use-all-plugins: use every detection plugin
	result = subprocess.run(
	[
	"detect-secrets", "scan",
	str(tmpdir_path),
	"--all-files",
	],
	capture_output=True,
	text=True,
	timeout=30,
	)

	if result.returncode != 0 and not result.stdout:
	logger.warning("detect-secrets error", stderr=result.stderr[:500])
	return ""

	if not result.stdout.strip():
	return ""

	scan_results = json.loads(result.stdout)
	results_map = scan_results.get("results", {})

	# Count total secrets found
	total_secrets = sum(len(secrets) for secrets in results_map.values())

	if total_secrets == 0:
	return "detect-secrets scan: No hardcoded secrets detected."

	# Format findings
	summary_lines = [
	f"detect-secrets found {total_secrets} potential secret(s):\n"
	]

	for file_path, secrets in results_map.items():
	# Map temp path back to original
	try:
	relative = str(Path(file_path).relative_to(tmpdir)).replace("\\", "/")
	except ValueError:
	relative = Path(file_path).name

	for secret in secrets:
	secret_type = secret.get("type", "Unknown")
	line_no = secret.get("line_number", 0)
	summary_lines.append(
	f"- {secret_type} in {relative} at line {line_no}"
	)

	summary = "\n".join(summary_lines)
	logger.info("detect-secrets scan complete", secrets_found=total_secrets)
	return summary

	except FileNotFoundError:
	logger.warning("detect-secrets not found in PATH — skipping")
	return ""
	except Exception as e:
	logger.warning("detect-secrets scan failed", error=str(e))
	return ""