File size: 2,081 Bytes
c36ffd2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
"""
Deterministic pools used by scenario_generator.py to sample usernames, hostnames,
and IP addresses. Everything public-facing lives in RFC 5737 / RFC 3849 ranges so
the synthetic data can never collide with real production IPs.
"""

# 40 common names + a few IT-flavored accounts. Excludes 'root' from the compromise
# target pool because it's structurally weird ("root was compromised via ssh brute")
# even though real incidents exist.
USERNAMES = [
    "alice", "bob", "carol", "dave", "eve", "frank", "grace", "heidi",
    "ivan", "judy", "ken", "leo", "mia", "noah", "olivia", "peter",
    "quinn", "ruby", "sam", "tina", "ursula", "vince", "wendy", "xander",
    "yara", "zach", "maya", "theo", "lara", "jonas",
    "devops", "deploy", "ci", "jenkins", "dbuser", "webops", "svc-nginx",
    "releng", "admin2", "ops",
]

# Decoy (benign) user accounts used to populate /etc/passwd and /home.
DECOY_USERS = [
    "guest", "backup", "mail", "www-data", "postfix", "systemd-network",
]

HOSTNAMES = [
    "webhost", "db01", "api-gateway", "worker-03", "cache-redis",
    "staging", "bastion", "ingest", "ci-runner", "monitoring",
    "edge-01", "payments", "search-indexer", "log-agg", "auth-svc",
]

# RFC 5737 "documentation" ranges — safe to use, never routable.
PUBLIC_CIDRS = [
    "192.0.2.",    # TEST-NET-1
    "198.51.100.", # TEST-NET-2
    "203.0.113.",  # TEST-NET-3
]

# RFC 1918 private space — used for legit internal traffic noise.
INTERNAL_CIDRS = [
    "10.0.0.",
    "10.0.1.",
    "172.16.0.",
    "192.168.1.",
]


def sample_public_ip(rng) -> str:
    """Sample an attacker-looking IP from RFC 5737 test ranges."""
    return rng.choice(PUBLIC_CIDRS) + str(rng.randint(2, 254))


def sample_internal_ip(rng) -> str:
    """Sample a legit-looking internal IP from RFC 1918."""
    return rng.choice(INTERNAL_CIDRS) + str(rng.randint(2, 254))


def sample_username(rng, exclude=()) -> str:
    candidates = [u for u in USERNAMES if u not in exclude]
    return rng.choice(candidates)


def sample_hostname(rng) -> str:
    return rng.choice(HOSTNAMES)