File size: 2,515 Bytes
5e21013
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from pathlib import Path
import re
import subprocess
import sys

root = Path(__file__).resolve().parents[2]
tracked = subprocess.run(
    ["git", "ls-files"],
    cwd=root,
    text=True,
    capture_output=True,
)
if tracked.returncode != 0:
    sys.stderr.write(tracked.stderr)
    raise SystemExit(tracked.returncode)

patterns = [
    ("anthropic_api_key", re.compile(r"\bsk-ant-[A-Za-z0-9_-]{10,}\b")),
    ("openai_api_key", re.compile(r"\bsk-[A-Za-z0-9]{20,}\b")),
    ("huggingface_token", re.compile(r"\bhf_[A-Za-zA-Z0-9]{20,}\b")),
    ("github_token", re.compile(r"\bgh[pousr]_[A-Za-z0-9]{20,}\b")),
    ("aws_access_key", re.compile(r"\bAKIA[0-9A-Z]{16}\b")),
    ("private_key", re.compile(r"-----BEGIN (?:RSA |EC |OPENSSH |DSA )?PRIVATE KEY-----")),
    (
        "supabase_service_key_assignment",
        re.compile(r'SUPABASE_SERVICE_ROLE_KEY\s*=\s*["\'][^"\']+["\']'),
    ),
]
allowed_suffixes = {".md", ".json", ".ts", ".tsx", ".js", ".jsx", ".py", ".txt", ".yaml", ".yml", ".toml"}
ignored_names = {"pnpm-lock.yaml"}
# Fixture directories — files here intentionally embed strings that
# look like real secrets (the canonical AWS docs `AKIA...EXAMPLE` keys,
# fake `sk-` tokens, dummy private-key headers) because the whole point
# of the eval harness is to verify Bee detects/refuses these patterns.
# Scanning fixtures with the same regex used to find real leaks always
# false-positives. Keep this list narrow.
ignored_path_prefixes = (
    "eval/bee_security_harness/cases/",
)
violations = []

for line in tracked.stdout.splitlines():
    rel_path = Path(line)
    if rel_path.name in ignored_names:
        continue
    rel_str = str(rel_path)
    if any(rel_str.startswith(p) for p in ignored_path_prefixes):
        continue
    if rel_path.name.startswith('.env') and rel_path.name != '.env.example':
        continue
    if rel_path.name == '.env.example':
        pass
    if rel_path.suffix and rel_path.suffix not in allowed_suffixes:
        if rel_path.name != '.env.example':
            continue
    file_path = root / rel_path
    if not file_path.exists():
        continue
    try:
        text = file_path.read_text(encoding="utf-8")
    except UnicodeDecodeError:
        continue
    for label, pattern in patterns:
        if pattern.search(text):
            violations.append(f"{rel_path}: {label}")

if violations:
    sys.stderr.write("Security check failed:\n")
    for violation in violations:
        sys.stderr.write(f"- {violation}\n")
    raise SystemExit(1)