""" Static reference tables for Track B plausibility audit. These are deliberately **small, hand-curated lookups** rather than scraped from the web — they run fully offline at judging time. The tables are tuned against CounterFeint's R1 synthetic data (see `fraud_patterns.py`, `advertiser_profiles.py`, `landing_pages.py`) so a realistic R1-generated fraud ad should *not* trip them, while obviously absurd / gibberish ads clearly will. """ from __future__ import annotations import re from typing import Dict, FrozenSet, List, Set # ----------------------------------------------------------------------------- # Country ↔ TLD plausibility. # # Map ISO country codes to the set of TLDs that are "plausible" (a common # ccTLD plus the gTLDs anyone uses). Ads claiming a US advertiser with a # `.cn` landing page in a fake-crypto category is classic fraudster- # signal-mismatch. # ----------------------------------------------------------------------------- _GLOBAL_TLDS: FrozenSet[str] = frozenset( { "com", "net", "org", "io", "co", "shop", "store", "xyz", "online", "site", } ) VALID_COUNTRY_TLD_PAIRS: Dict[str, FrozenSet[str]] = { "US": _GLOBAL_TLDS | frozenset({"us"}), "UK": _GLOBAL_TLDS | frozenset({"uk", "co.uk"}), "GB": _GLOBAL_TLDS | frozenset({"uk", "co.uk"}), "DE": _GLOBAL_TLDS | frozenset({"de"}), "FR": _GLOBAL_TLDS | frozenset({"fr"}), "ES": _GLOBAL_TLDS | frozenset({"es"}), "IT": _GLOBAL_TLDS | frozenset({"it"}), "NL": _GLOBAL_TLDS | frozenset({"nl"}), "CA": _GLOBAL_TLDS | frozenset({"ca"}), "AU": _GLOBAL_TLDS | frozenset({"au", "com.au"}), "IN": _GLOBAL_TLDS | frozenset({"in"}), "JP": _GLOBAL_TLDS | frozenset({"jp"}), "CN": _GLOBAL_TLDS | frozenset({"cn", "com.cn"}), "RU": _GLOBAL_TLDS | frozenset({"ru"}), "NG": _GLOBAL_TLDS | frozenset({"ng"}), "BR": _GLOBAL_TLDS | frozenset({"br", "com.br"}), "MX": _GLOBAL_TLDS | frozenset({"mx"}), } # TLDs that should make us suspicious whenever they co-occur with a # Western advertiser country in a financial / crypto / health category. _HIGH_RISK_TLDS: FrozenSet[str] = frozenset({"cn", "ru", "tk", "ml", "ga", "cf", "xyz"}) # ----------------------------------------------------------------------------- # Category ↔ targeting compatibility. # # Each category has a list of *token* substrings we expect to appear in # plausible targeting strings. E.g. weight-loss targeting kids is an # obvious parameter mismatch. Lookups are lower-cased substring `in` # checks so any reasonable phrasing matches. # ----------------------------------------------------------------------------- CATEGORY_TARGETING_COMPATIBILITY: Dict[str, List[str]] = { "ecommerce": [ "adults", "shoppers", "shopping", "fashion", "home", "kitchen", "beauty", "gift", ], "saas": [ "adults", "professionals", "business", "developers", "technology", "it ", "b2b", ], "local_service": [ "local", "homeowners", "neighborhood", "residents", "adults", ], "education": [ "students", "learners", "adults", "teachers", "parents", "kids ", # note trailing space so we don't match "kidsafe" ], "fitness": [ "adults", "fitness", "athletes", "gym", "workout", "health", ], "fake_giveaway": [ "adults", "18+", "sweepstakes", "rewards", "gift", ], "counterfeit_goods": [ "shoppers", "fashion", "adults", "deals", ], "miracle_cure": [ "adults", "health", "wellness", "weight loss", "senior", ], "advance_fee": [ "adults", "finance", "investing", "entrepreneurs", ], "fake_crypto": [ "adults", "crypto", "investing", "finance", ], "celebrity_endorsement_fraud": [ "adults", "fans", "investing", "lifestyle", ], "clone_brand": [ "shoppers", "fashion", "adults", "bargain", ], "gray_area_supplements": [ "adults", "wellness", "fitness", "health", ], "network_crypto": [ "adults", "crypto", "investing", "finance", ], "network_ecommerce": [ "adults", "shoppers", "shopping", ], "network_fintech": [ "adults", "finance", "investing", "business", ], "network_health": [ "adults", "health", "wellness", ], } # Categories we *never* want to see targeting minors. _ADULT_ONLY_CATEGORIES: FrozenSet[str] = frozenset( { "fake_giveaway", "miracle_cure", "advance_fee", "fake_crypto", "celebrity_endorsement_fraud", "gray_area_supplements", "network_crypto", "network_fintech", } ) # Explicit "minor" targeting signals that should trigger a flag for adult-only cats. _MINOR_TARGETING_MARKERS: FrozenSet[str] = frozenset( { "kids", "teens", "teenagers", "children", "minors", "under 18", "under-18", "schoolers", } ) # ----------------------------------------------------------------------------- # Known fraud markers — patterns the R1 environment uses to *signal* fraud to # the Investigator. A Fraudster copying these verbatim into its ad_copy / # landing_page_blurb is "grader-hacking" rather than generating plausible # adversarial surface text. # ----------------------------------------------------------------------------- KNOWN_FRAUD_MARKER_REGEXES: List[re.Pattern[str]] = [ re.compile(r"\btmpl_\d{3,}\b", re.IGNORECASE), re.compile(r"\bpmt_ring_\d{3,}\b", re.IGNORECASE), re.compile(r"\bring_[a-z0-9]{3,}\b", re.IGNORECASE), re.compile(r"\bcfg_[a-f0-9]{3,}\b", re.IGNORECASE), re.compile(r"\badv_[a-z0-9]{3,}_fraud\b", re.IGNORECASE), ] # ----------------------------------------------------------------------------- # Deny-list tokens — obvious "adversarial" / prompt-injection markers and # control characters. If the Fraudster tries to emit these it has clearly # abandoned the "pretend to be a real advertiser" pretence. # ----------------------------------------------------------------------------- DENY_LIST_TOKENS: FrozenSet[str] = frozenset( { "", "", "", "", "<<>", "", "ignore previous", "ignore all previous", "disregard instructions", "disregard previous", } ) # ----------------------------------------------------------------------------- # Helpers # ----------------------------------------------------------------------------- def extract_tlds_from_text(text: str) -> Set[str]: """Lower-case TLDs found as domain suffixes in free text.""" if not text: return set() out: Set[str] = set() for match in re.findall( r"\b[a-z0-9][a-z0-9\-]*\.([a-z]{2,6}(?:\.[a-z]{2})?)\b", text, re.IGNORECASE, ): out.add(match.lower()) return out def is_tld_plausible_for_country(country: str, tld: str) -> bool: country = (country or "").upper() tld = (tld or "").lower().lstrip(".") if not tld: return True allowed = VALID_COUNTRY_TLD_PAIRS.get(country) if allowed is None: return True # unknown country, can't flag return tld in allowed def is_high_risk_tld(tld: str) -> bool: return (tld or "").lower().lstrip(".") in _HIGH_RISK_TLDS def is_adult_only_category(category: str) -> bool: return (category or "").lower() in _ADULT_ONLY_CATEGORIES def targeting_mentions_minors(targeting: str) -> bool: if not targeting: return False lowered = targeting.lower() return any(marker in lowered for marker in _MINOR_TARGETING_MARKERS) def targeting_matches_category(category: str, targeting: str) -> bool: """ Soft compatibility check: True if the targeting string contains ≥1 category-appropriate keyword, or if the category has no configured expectations (unknown category → don't flag). """ expected = CATEGORY_TARGETING_COMPATIBILITY.get((category or "").lower()) if expected is None: return True if not targeting: return False lowered = targeting.lower() return any(tok in lowered for tok in expected) def contains_fraud_marker(text: str) -> bool: if not text: return False return any(rx.search(text) for rx in KNOWN_FRAUD_MARKER_REGEXES) def contains_deny_token(text: str) -> bool: if not text: return False lowered = text.lower() return any(tok in lowered for tok in DENY_LIST_TOKENS) __all__ = [ "CATEGORY_TARGETING_COMPATIBILITY", "DENY_LIST_TOKENS", "KNOWN_FRAUD_MARKER_REGEXES", "VALID_COUNTRY_TLD_PAIRS", "contains_deny_token", "contains_fraud_marker", "extract_tlds_from_text", "is_adult_only_category", "is_high_risk_tld", "is_tld_plausible_for_country", "targeting_matches_category", "targeting_mentions_minors", ]