Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from enum import Enum | |
| from typing import Dict, List, Optional | |
| from pydantic import BaseModel | |
| # --------------------------------------------------------------------------- | |
| # OpenEnv base types | |
| # Use real SDK when available; fall back to stubs for local dev without SDK. | |
| # --------------------------------------------------------------------------- | |
| try: | |
| from openenv.core.env_server import Action, Observation, State # type: ignore | |
| except ImportError: | |
| class Action(BaseModel): # type: ignore[no-redef] | |
| pass | |
| class Observation(BaseModel): # type: ignore[no-redef] | |
| done: bool = False | |
| reward: Optional[float] = None | |
| class State(BaseModel): # type: ignore[no-redef] | |
| episode_id: str = "" | |
| step_count: int = 0 | |
| # --------------------------------------------------------------------------- | |
| # Domain types | |
| # --------------------------------------------------------------------------- | |
| class ActionType(str, Enum): | |
| INSPECT = "inspect" # reveal full profile + edges, costs 1 step | |
| INVESTIGATE_NETWORK = "investigate_network" # expand graph 1 hop, costs 2 steps | |
| FLAG = "flag" # mark account fake (free) | |
| UNFLAG = "unflag" # unmark account (free) | |
| SUBMIT = "submit" # end episode, trigger scoring | |
| # Round 2: New tool-call actions | |
| REVERSE_IMAGE_SEARCH = "reverse_image_search" # reveal photo_reuse_score, costs 1 step | |
| ANALYZE_BIO = "analyze_bio" # reveal bio_template_score, costs 1 step | |
| CHECK_IP = "check_ip" # reveal ip_cluster_signal, costs 2 steps | |
| GET_POLICY = "get_policy" # get platform policy, costs 0 steps | |
| class AccountStatus(str, Enum): | |
| NORMAL = "normal" | |
| SUSPECT = "suspect" # auto-elevated when a neighbor is flagged | |
| CONFIRMED_FAKE = "confirmed_fake" # agent explicitly flagged this account | |
| class FakeGangAction(Action): | |
| action_type: ActionType | |
| account_id: Optional[str] = None # required for all actions except SUBMIT | |
| class AccountProfile(BaseModel): | |
| account_id: str | |
| follower_count: int | |
| following_count: int | |
| post_count: int | |
| avg_post_hour: float # 0β23 | |
| photo_reuse_score: float # 0β1 β pre-computed: fraction of posts using stolen celebrity photos | |
| bio_template_score: float # 0β1 β pre-computed: cosine similarity to known fake bio templates | |
| account_age_days: int | |
| name_change_count: int = 0 # incremented by hard-mode evasion events | |
| # ββ Derived graph features (computed at INSPECT time from live graph state) ββ | |
| flagged_neighbor_count: int = 0 # how many of this account's follows are currently flagged | |
| # high value = deep inside a cluster you're already tracking | |
| mutual_follow_rate: float = 0.0 # fraction of follows that also follow back (0β1) | |
| # real fans: low; fake gangs: high (they mutually inflate each other) | |
| avg_neighbor_photo_reuse: float = 0.0 # mean photo_reuse_score of inspected follows | |
| # gang members cluster: if neighbors are fake, this is high | |
| visible_follows: List[str] = [] # IDs of accounts this account follows (revealed by INSPECT) | |
| # ββ Account status ββ | |
| status: AccountStatus = AccountStatus.NORMAL | |
| # ββ Full risk breakdown (computed via scoring.py at INSPECT time) ββ | |
| fake_risk_score: float = 0.0 | |
| node_risk: float = 0.0 | |
| behavior_risk: float = 0.0 | |
| graph_risk: float = 0.0 | |
| hub_legitimacy_score: float = 0.0 | |
| # ββ New raw features (from generator) ββ | |
| comment_repeat_score: float = 0.0 # fakes: 0.6-0.9 | decoys: 0.1-0.3 | reals: 0.0-0.08 | |
| shared_ip_count: int = 0 # fakes: 9 (gang shares 1 IP) | reals: 0-1 | |
| # ββ Extended runtime graph features ββ | |
| inspected_neighbor_count: int = 0 # denominator for flagged_neighbor_ratio | |
| post_hour_cluster_score: float = 0.0 # hour alignment to flagged cluster mean | |
| suspicious_mutual_ratio: float = 0.0 # used in hub legitimacy computation | |
| class FakeGangObservation(Observation): | |
| visible_accounts: List[AccountProfile] = [] | |
| visible_account_ids: List[str] = [] # all account IDs the agent knows exist | |
| flagged_ids: List[str] = [] | |
| inspected_ids: List[str] = [] | |
| graph_edges: Dict[str, List[str]] = {} # account_id -> list of accounts it follows | |
| steps_remaining: int = 0 | |
| evasion_triggered: bool = False | |
| evasion_count: int = 0 | |
| task: str = "easy" | |
| message: str = "" | |
| suspect_ids: List[str] = [] # auto-elevated neighbors of flagged accounts | |
| platform: str = "" # Round 2: Platform name (Instagram/Snapchat) - passed from state | |
| class FakeGangState(State): | |
| task: str = "easy" | |
| score_so_far: float = 0.0 | |
| evasion_count: int = 0 | |
| network_size: int = 0 | |
| gang_size: int = 10 | |
| episode_seed: int = 0 | |
| platform: str = "" # Round 2: Platform name (Instagram/Snapchat) | |
| # --------------------------------------------------------------------------- | |
| # Round 2: Platform Policy Model | |
| # --------------------------------------------------------------------------- | |
| class PlatformPolicy(BaseModel): | |
| """Dynamically compiled platform policy from transparency reports.""" | |
| platform: str # "Instagram" or "Snapchat" | |
| threshold: float # ΞΈ* - computed Bayesian threshold for flagging | |
| base_rate: float # Ο - prevalence of fake accounts | |
| fn_cost_signal: str # "low" | "medium" | "high" | "critical" | |
| fp_cost_signal: str # "low" | "medium" | "high" | |
| harm_weight: float # enforcement vs creator balance (0.5-2.0) | |
| primary_enforcement_signal: str # "photo_reuse" | "bio_template" | "ip_cluster" | |
| fp_penalty_weight: float # C_fp for reward function | |
| sources: List[str] = [] # URLs used for extraction | |
| confidence: float = 0.0 # LLM extraction confidence (0.0-1.0) | |
| compiled_at: str = "" # ISO timestamp | |
| used_fallback: bool = False # True if fallback policy was used due to extraction failure | |