Spaces:
Sleeping
Sleeping
| """ | |
| Static reference tables for Track B plausibility audit. | |
| These are deliberately **small, hand-curated lookups** rather than scraped | |
| from the web — they run fully offline at judging time. The tables are | |
| tuned against CounterFeint's R1 synthetic data (see `fraud_patterns.py`, | |
| `advertiser_profiles.py`, `landing_pages.py`) so a realistic R1-generated | |
| fraud ad should *not* trip them, while obviously absurd / gibberish ads | |
| clearly will. | |
| """ | |
| from __future__ import annotations | |
| import re | |
| from typing import Dict, FrozenSet, List, Set | |
| # ----------------------------------------------------------------------------- | |
| # Country ↔ TLD plausibility. | |
| # | |
| # Map ISO country codes to the set of TLDs that are "plausible" (a common | |
| # ccTLD plus the gTLDs anyone uses). Ads claiming a US advertiser with a | |
| # `.cn` landing page in a fake-crypto category is classic fraudster- | |
| # signal-mismatch. | |
| # ----------------------------------------------------------------------------- | |
| _GLOBAL_TLDS: FrozenSet[str] = frozenset( | |
| { | |
| "com", | |
| "net", | |
| "org", | |
| "io", | |
| "co", | |
| "shop", | |
| "store", | |
| "xyz", | |
| "online", | |
| "site", | |
| } | |
| ) | |
| VALID_COUNTRY_TLD_PAIRS: Dict[str, FrozenSet[str]] = { | |
| "US": _GLOBAL_TLDS | frozenset({"us"}), | |
| "UK": _GLOBAL_TLDS | frozenset({"uk", "co.uk"}), | |
| "GB": _GLOBAL_TLDS | frozenset({"uk", "co.uk"}), | |
| "DE": _GLOBAL_TLDS | frozenset({"de"}), | |
| "FR": _GLOBAL_TLDS | frozenset({"fr"}), | |
| "ES": _GLOBAL_TLDS | frozenset({"es"}), | |
| "IT": _GLOBAL_TLDS | frozenset({"it"}), | |
| "NL": _GLOBAL_TLDS | frozenset({"nl"}), | |
| "CA": _GLOBAL_TLDS | frozenset({"ca"}), | |
| "AU": _GLOBAL_TLDS | frozenset({"au", "com.au"}), | |
| "IN": _GLOBAL_TLDS | frozenset({"in"}), | |
| "JP": _GLOBAL_TLDS | frozenset({"jp"}), | |
| "CN": _GLOBAL_TLDS | frozenset({"cn", "com.cn"}), | |
| "RU": _GLOBAL_TLDS | frozenset({"ru"}), | |
| "NG": _GLOBAL_TLDS | frozenset({"ng"}), | |
| "BR": _GLOBAL_TLDS | frozenset({"br", "com.br"}), | |
| "MX": _GLOBAL_TLDS | frozenset({"mx"}), | |
| } | |
| # TLDs that should make us suspicious whenever they co-occur with a | |
| # Western advertiser country in a financial / crypto / health category. | |
| _HIGH_RISK_TLDS: FrozenSet[str] = frozenset({"cn", "ru", "tk", "ml", "ga", "cf", "xyz"}) | |
| # ----------------------------------------------------------------------------- | |
| # Category ↔ targeting compatibility. | |
| # | |
| # Each category has a list of *token* substrings we expect to appear in | |
| # plausible targeting strings. E.g. weight-loss targeting kids is an | |
| # obvious parameter mismatch. Lookups are lower-cased substring `in` | |
| # checks so any reasonable phrasing matches. | |
| # ----------------------------------------------------------------------------- | |
| CATEGORY_TARGETING_COMPATIBILITY: Dict[str, List[str]] = { | |
| "ecommerce": [ | |
| "adults", | |
| "shoppers", | |
| "shopping", | |
| "fashion", | |
| "home", | |
| "kitchen", | |
| "beauty", | |
| "gift", | |
| ], | |
| "saas": [ | |
| "adults", | |
| "professionals", | |
| "business", | |
| "developers", | |
| "technology", | |
| "it ", | |
| "b2b", | |
| ], | |
| "local_service": [ | |
| "local", | |
| "homeowners", | |
| "neighborhood", | |
| "residents", | |
| "adults", | |
| ], | |
| "education": [ | |
| "students", | |
| "learners", | |
| "adults", | |
| "teachers", | |
| "parents", | |
| "kids ", # note trailing space so we don't match "kidsafe" | |
| ], | |
| "fitness": [ | |
| "adults", | |
| "fitness", | |
| "athletes", | |
| "gym", | |
| "workout", | |
| "health", | |
| ], | |
| "fake_giveaway": [ | |
| "adults", | |
| "18+", | |
| "sweepstakes", | |
| "rewards", | |
| "gift", | |
| ], | |
| "counterfeit_goods": [ | |
| "shoppers", | |
| "fashion", | |
| "adults", | |
| "deals", | |
| ], | |
| "miracle_cure": [ | |
| "adults", | |
| "health", | |
| "wellness", | |
| "weight loss", | |
| "senior", | |
| ], | |
| "advance_fee": [ | |
| "adults", | |
| "finance", | |
| "investing", | |
| "entrepreneurs", | |
| ], | |
| "fake_crypto": [ | |
| "adults", | |
| "crypto", | |
| "investing", | |
| "finance", | |
| ], | |
| "celebrity_endorsement_fraud": [ | |
| "adults", | |
| "fans", | |
| "investing", | |
| "lifestyle", | |
| ], | |
| "clone_brand": [ | |
| "shoppers", | |
| "fashion", | |
| "adults", | |
| "bargain", | |
| ], | |
| "gray_area_supplements": [ | |
| "adults", | |
| "wellness", | |
| "fitness", | |
| "health", | |
| ], | |
| "network_crypto": [ | |
| "adults", | |
| "crypto", | |
| "investing", | |
| "finance", | |
| ], | |
| "network_ecommerce": [ | |
| "adults", | |
| "shoppers", | |
| "shopping", | |
| ], | |
| "network_fintech": [ | |
| "adults", | |
| "finance", | |
| "investing", | |
| "business", | |
| ], | |
| "network_health": [ | |
| "adults", | |
| "health", | |
| "wellness", | |
| ], | |
| } | |
| # Categories we *never* want to see targeting minors. | |
| _ADULT_ONLY_CATEGORIES: FrozenSet[str] = frozenset( | |
| { | |
| "fake_giveaway", | |
| "miracle_cure", | |
| "advance_fee", | |
| "fake_crypto", | |
| "celebrity_endorsement_fraud", | |
| "gray_area_supplements", | |
| "network_crypto", | |
| "network_fintech", | |
| } | |
| ) | |
| # Explicit "minor" targeting signals that should trigger a flag for adult-only cats. | |
| _MINOR_TARGETING_MARKERS: FrozenSet[str] = frozenset( | |
| { | |
| "kids", | |
| "teens", | |
| "teenagers", | |
| "children", | |
| "minors", | |
| "under 18", | |
| "under-18", | |
| "schoolers", | |
| } | |
| ) | |
| # ----------------------------------------------------------------------------- | |
| # Known fraud markers — patterns the R1 environment uses to *signal* fraud to | |
| # the Investigator. A Fraudster copying these verbatim into its ad_copy / | |
| # landing_page_blurb is "grader-hacking" rather than generating plausible | |
| # adversarial surface text. | |
| # ----------------------------------------------------------------------------- | |
| KNOWN_FRAUD_MARKER_REGEXES: List[re.Pattern[str]] = [ | |
| re.compile(r"\btmpl_\d{3,}\b", re.IGNORECASE), | |
| re.compile(r"\bpmt_ring_\d{3,}\b", re.IGNORECASE), | |
| re.compile(r"\bring_[a-z0-9]{3,}\b", re.IGNORECASE), | |
| re.compile(r"\bcfg_[a-f0-9]{3,}\b", re.IGNORECASE), | |
| re.compile(r"\badv_[a-z0-9]{3,}_fraud\b", re.IGNORECASE), | |
| ] | |
| # ----------------------------------------------------------------------------- | |
| # Deny-list tokens — obvious "adversarial" / prompt-injection markers and | |
| # control characters. If the Fraudster tries to emit these it has clearly | |
| # abandoned the "pretend to be a real advertiser" pretence. | |
| # ----------------------------------------------------------------------------- | |
| DENY_LIST_TOKENS: FrozenSet[str] = frozenset( | |
| { | |
| "<bypass>", | |
| "<adversarial>", | |
| "<jailbreak>", | |
| "<system>", | |
| "<<</>>", | |
| "</instructions>", | |
| "ignore previous", | |
| "ignore all previous", | |
| "disregard instructions", | |
| "disregard previous", | |
| } | |
| ) | |
| # ----------------------------------------------------------------------------- | |
| # Helpers | |
| # ----------------------------------------------------------------------------- | |
| def extract_tlds_from_text(text: str) -> Set[str]: | |
| """Lower-case TLDs found as domain suffixes in free text.""" | |
| if not text: | |
| return set() | |
| out: Set[str] = set() | |
| for match in re.findall( | |
| r"\b[a-z0-9][a-z0-9\-]*\.([a-z]{2,6}(?:\.[a-z]{2})?)\b", | |
| text, | |
| re.IGNORECASE, | |
| ): | |
| out.add(match.lower()) | |
| return out | |
| def is_tld_plausible_for_country(country: str, tld: str) -> bool: | |
| country = (country or "").upper() | |
| tld = (tld or "").lower().lstrip(".") | |
| if not tld: | |
| return True | |
| allowed = VALID_COUNTRY_TLD_PAIRS.get(country) | |
| if allowed is None: | |
| return True # unknown country, can't flag | |
| return tld in allowed | |
| def is_high_risk_tld(tld: str) -> bool: | |
| return (tld or "").lower().lstrip(".") in _HIGH_RISK_TLDS | |
| def is_adult_only_category(category: str) -> bool: | |
| return (category or "").lower() in _ADULT_ONLY_CATEGORIES | |
| def targeting_mentions_minors(targeting: str) -> bool: | |
| if not targeting: | |
| return False | |
| lowered = targeting.lower() | |
| return any(marker in lowered for marker in _MINOR_TARGETING_MARKERS) | |
| def targeting_matches_category(category: str, targeting: str) -> bool: | |
| """ | |
| Soft compatibility check: True if the targeting string contains ≥1 | |
| category-appropriate keyword, or if the category has no configured | |
| expectations (unknown category → don't flag). | |
| """ | |
| expected = CATEGORY_TARGETING_COMPATIBILITY.get((category or "").lower()) | |
| if expected is None: | |
| return True | |
| if not targeting: | |
| return False | |
| lowered = targeting.lower() | |
| return any(tok in lowered for tok in expected) | |
| def contains_fraud_marker(text: str) -> bool: | |
| if not text: | |
| return False | |
| return any(rx.search(text) for rx in KNOWN_FRAUD_MARKER_REGEXES) | |
| def contains_deny_token(text: str) -> bool: | |
| if not text: | |
| return False | |
| lowered = text.lower() | |
| return any(tok in lowered for tok in DENY_LIST_TOKENS) | |
| __all__ = [ | |
| "CATEGORY_TARGETING_COMPATIBILITY", | |
| "DENY_LIST_TOKENS", | |
| "KNOWN_FRAUD_MARKER_REGEXES", | |
| "VALID_COUNTRY_TLD_PAIRS", | |
| "contains_deny_token", | |
| "contains_fraud_marker", | |
| "extract_tlds_from_text", | |
| "is_adult_only_category", | |
| "is_high_risk_tld", | |
| "is_tld_plausible_for_country", | |
| "targeting_matches_category", | |
| "targeting_mentions_minors", | |
| ] | |