Spaces:

Pandago
/

graphstrike-model-training

Sleeping

App Files Files Community

graphstrike-model-training / models.py

Pandago

Upload folder using huggingface_hub

a6f0611 verified 30 days ago

raw

history blame contribute delete

6.32 kB

	from __future__ import annotations

	from enum import Enum
	from typing import Dict, List, Optional

	from pydantic import BaseModel

	# ---------------------------------------------------------------------------
	# OpenEnv base types
	# Use real SDK when available; fall back to stubs for local dev without SDK.
	# ---------------------------------------------------------------------------
	try:
	from openenv.core.env_server import Action, Observation, State # type: ignore
	except ImportError:
	class Action(BaseModel): # type: ignore[no-redef]
	pass

	class Observation(BaseModel): # type: ignore[no-redef]
	done: bool = False
	reward: Optional[float] = None

	class State(BaseModel): # type: ignore[no-redef]
	episode_id: str = ""
	step_count: int = 0

	# ---------------------------------------------------------------------------
	# Domain types
	# ---------------------------------------------------------------------------

	class ActionType(str, Enum):
	INSPECT = "inspect" # reveal full profile + edges, costs 1 step
	INVESTIGATE_NETWORK = "investigate_network" # expand graph 1 hop, costs 2 steps
	FLAG = "flag" # mark account fake (free)
	UNFLAG = "unflag" # unmark account (free)
	SUBMIT = "submit" # end episode, trigger scoring
	# Round 2: New tool-call actions
	REVERSE_IMAGE_SEARCH = "reverse_image_search" # reveal photo_reuse_score, costs 1 step
	ANALYZE_BIO = "analyze_bio" # reveal bio_template_score, costs 1 step
	CHECK_IP = "check_ip" # reveal ip_cluster_signal, costs 2 steps
	GET_POLICY = "get_policy" # get platform policy, costs 0 steps


	class AccountStatus(str, Enum):
	NORMAL = "normal"
	SUSPECT = "suspect" # auto-elevated when a neighbor is flagged
	CONFIRMED_FAKE = "confirmed_fake" # agent explicitly flagged this account


	class FakeGangAction(Action):
	action_type: ActionType
	account_id: Optional[str] = None # required for all actions except SUBMIT


	class AccountProfile(BaseModel):
	account_id: str
	follower_count: int
	following_count: int
	post_count: int
	avg_post_hour: float # 0–23
	photo_reuse_score: float # 0–1 — pre-computed: fraction of posts using stolen celebrity photos
	bio_template_score: float # 0–1 — pre-computed: cosine similarity to known fake bio templates
	account_age_days: int
	name_change_count: int = 0 # incremented by hard-mode evasion events

	# ── Derived graph features (computed at INSPECT time from live graph state) ──
	flagged_neighbor_count: int = 0 # how many of this account's follows are currently flagged
	# high value = deep inside a cluster you're already tracking
	mutual_follow_rate: float = 0.0 # fraction of follows that also follow back (0–1)
	# real fans: low; fake gangs: high (they mutually inflate each other)
	avg_neighbor_photo_reuse: float = 0.0 # mean photo_reuse_score of inspected follows
	# gang members cluster: if neighbors are fake, this is high

	visible_follows: List[str] = [] # IDs of accounts this account follows (revealed by INSPECT)

	# ── Account status ──
	status: AccountStatus = AccountStatus.NORMAL

	# ── Full risk breakdown (computed via scoring.py at INSPECT time) ──
	fake_risk_score: float = 0.0
	node_risk: float = 0.0
	behavior_risk: float = 0.0
	graph_risk: float = 0.0
	hub_legitimacy_score: float = 0.0

	# ── New raw features (from generator) ──
	comment_repeat_score: float = 0.0 # fakes: 0.6-0.9 \| decoys: 0.1-0.3 \| reals: 0.0-0.08
	shared_ip_count: int = 0 # fakes: 9 (gang shares 1 IP) \| reals: 0-1

	# ── Extended runtime graph features ──
	inspected_neighbor_count: int = 0 # denominator for flagged_neighbor_ratio
	post_hour_cluster_score: float = 0.0 # hour alignment to flagged cluster mean
	suspicious_mutual_ratio: float = 0.0 # used in hub legitimacy computation


	class FakeGangObservation(Observation):
	visible_accounts: List[AccountProfile] = []
	visible_account_ids: List[str] = [] # all account IDs the agent knows exist
	flagged_ids: List[str] = []
	inspected_ids: List[str] = []
	graph_edges: Dict[str, List[str]] = {} # account_id -> list of accounts it follows
	steps_remaining: int = 0
	evasion_triggered: bool = False
	evasion_count: int = 0
	task: str = "easy"
	message: str = ""
	suspect_ids: List[str] = [] # auto-elevated neighbors of flagged accounts
	platform: str = "" # Round 2: Platform name (Instagram/Snapchat) - passed from state


	class FakeGangState(State):
	task: str = "easy"
	score_so_far: float = 0.0
	evasion_count: int = 0
	network_size: int = 0
	gang_size: int = 10
	episode_seed: int = 0
	platform: str = "" # Round 2: Platform name (Instagram/Snapchat)


	# ---------------------------------------------------------------------------
	# Round 2: Platform Policy Model
	# ---------------------------------------------------------------------------

	class PlatformPolicy(BaseModel):
	"""Dynamically compiled platform policy from transparency reports."""
	platform: str # "Instagram" or "Snapchat"
	threshold: float # θ* - computed Bayesian threshold for flagging
	base_rate: float # π - prevalence of fake accounts
	fn_cost_signal: str # "low" \| "medium" \| "high" \| "critical"
	fp_cost_signal: str # "low" \| "medium" \| "high"
	harm_weight: float # enforcement vs creator balance (0.5-2.0)
	primary_enforcement_signal: str # "photo_reuse" \| "bio_template" \| "ip_cluster"
	fp_penalty_weight: float # C_fp for reward function
	sources: List[str] = [] # URLs used for extraction
	confidence: float = 0.0 # LLM extraction confidence (0.0-1.0)
	compiled_at: str = "" # ISO timestamp
	used_fallback: bool = False # True if fallback policy was used due to extraction failure