""" Deterministic pools used by scenario_generator.py to sample usernames, hostnames, and IP addresses. Everything public-facing lives in RFC 5737 / RFC 3849 ranges so the synthetic data can never collide with real production IPs. """ # 40 common names + a few IT-flavored accounts. Excludes 'root' from the compromise # target pool because it's structurally weird ("root was compromised via ssh brute") # even though real incidents exist. USERNAMES = [ "alice", "bob", "carol", "dave", "eve", "frank", "grace", "heidi", "ivan", "judy", "ken", "leo", "mia", "noah", "olivia", "peter", "quinn", "ruby", "sam", "tina", "ursula", "vince", "wendy", "xander", "yara", "zach", "maya", "theo", "lara", "jonas", "devops", "deploy", "ci", "jenkins", "dbuser", "webops", "svc-nginx", "releng", "admin2", "ops", ] # Decoy (benign) user accounts used to populate /etc/passwd and /home. DECOY_USERS = [ "guest", "backup", "mail", "www-data", "postfix", "systemd-network", ] HOSTNAMES = [ "webhost", "db01", "api-gateway", "worker-03", "cache-redis", "staging", "bastion", "ingest", "ci-runner", "monitoring", "edge-01", "payments", "search-indexer", "log-agg", "auth-svc", ] # RFC 5737 "documentation" ranges — safe to use, never routable. PUBLIC_CIDRS = [ "192.0.2.", # TEST-NET-1 "198.51.100.", # TEST-NET-2 "203.0.113.", # TEST-NET-3 ] # RFC 1918 private space — used for legit internal traffic noise. INTERNAL_CIDRS = [ "10.0.0.", "10.0.1.", "172.16.0.", "192.168.1.", ] def sample_public_ip(rng) -> str: """Sample an attacker-looking IP from RFC 5737 test ranges.""" return rng.choice(PUBLIC_CIDRS) + str(rng.randint(2, 254)) def sample_internal_ip(rng) -> str: """Sample a legit-looking internal IP from RFC 1918.""" return rng.choice(INTERNAL_CIDRS) + str(rng.randint(2, 254)) def sample_username(rng, exclude=()) -> str: candidates = [u for u in USERNAMES if u not in exclude] return rng.choice(candidates) def sample_hostname(rng) -> str: return rng.choice(HOSTNAMES)