Spaces:

Pandago
/

graphstrike-model-training

Sleeping

App Files Files Community

graphstrike-model-training / server /scoring.py

Pandago

Upload folder using huggingface_hub

a6f0611 verified about 1 month ago

raw

history blame contribute delete

5.69 kB

	"""Pure-math risk scoring engine for the Fake Gang Detection environment.

	All functions are stateless — no imports from other project modules.
	Implements the formulas from formulas.md exactly.
	"""

	from __future__ import annotations

	import math


	def compute_node_risk(photo_reuse: float, bio_template: float) -> float:
	"""Content-based risk: stolen photos + copy-paste bios."""
	return round(0.60 * photo_reuse + 0.40 * bio_template, 4)


	def compute_behavior_risk(account_age_days: int, post_hour_cluster_score: float) -> float:
	"""Temporal risk: recently created + posting in the gang's time window."""
	age_norm = min(1.0, account_age_days / 365.0)
	return round(0.55 * (1.0 - age_norm) + 0.45 * post_hour_cluster_score, 4)


	def compute_graph_risk(
	flagged_neighbor_ratio: float,
	mutual_follow_rate: float,
	avg_neighbor_photo_reuse: float,
	) -> float:
	"""Structural risk: embedded in a flagged cluster + inflated mutual follows."""
	return round(
	0.45 * flagged_neighbor_ratio
	+ 0.35 * mutual_follow_rate
	+ 0.20 * avg_neighbor_photo_reuse,
	4,
	)


	def compute_hub_legitimacy(
	follower_count: int,
	following_count: int,
	account_age_days: int,
	suspicious_mutual_ratio: float,
	) -> float:
	"""Celebrity/hub discount: large established accounts are unlikely to be fakes.

	High value → high legitimacy → subtract from fake_risk.
	"""
	F_MAX = 1_000_000
	followers_norm = min(1.0, math.log1p(follower_count) / math.log1p(F_MAX))
	follow_ratio_norm = min(1.0, (following_count / max(follower_count, 1)) / 5.0)
	age_norm = min(1.0, account_age_days / 365.0)
	return round(
	0.45 * followers_norm
	+ 0.25 * (1.0 - follow_ratio_norm)
	+ 0.20 * age_norm
	+ 0.10 * (1.0 - suspicious_mutual_ratio),
	4,
	)


	def compute_fake_risk(
	node_risk: float,
	behavior_risk: float,
	graph_risk: float,
	hub_legitimacy: float,
	) -> float:
	"""Composite fake risk score in [0.0, 1.0].

	Graph risk carries the most weight (0.45) because structural signals
	are hardest to fake at scale. Hub legitimacy discounts celebrities.
	"""
	raw = (
	0.30 * node_risk
	+ 0.25 * behavior_risk
	+ 0.45 * graph_risk
	- 0.25 * hub_legitimacy
	)
	return round(max(0.0, min(1.0, raw)), 4)


	def compute_weighted_fake_risk(
	node_risk: float,
	behavior_risk: float,
	graph_risk: float,
	hub_legitimacy: float,
	primary_signal: str = "photo_reuse",
	) -> float:
	"""
	Platform-weighted fake risk computation.

	Round 2: Boosts weight of platform's primary enforcement signal.

	Args:
	node_risk: Content-based risk (photo + bio)
	behavior_risk: Temporal risk (age + post hour)
	graph_risk: Structural risk (flagged neighbors + mutuals)
	hub_legitimacy: Celebrity discount factor
	primary_signal: Platform priority ("photo_reuse", "bio_template", "ip_cluster")

	Returns:
	Weighted risk score in [0.0, 1.0]
	"""
	# Default weights
	w_node = 0.30
	w_behavior = 0.25
	w_graph = 0.45

	# Boost primary signal weight
	if primary_signal in ["photo_reuse", "bio_template"]:
	# Content signals → boost node_risk
	w_node = 0.45
	w_behavior = 0.20
	w_graph = 0.35
	elif primary_signal == "ip_cluster":
	# IP cluster → boost behavior_risk (device/network signals)
	w_node = 0.25
	w_behavior = 0.40
	w_graph = 0.35

	raw = (
	w_node * node_risk
	+ w_behavior * behavior_risk
	+ w_graph * graph_risk
	- 0.25 * hub_legitimacy
	)
	return round(max(0.0, min(1.0, raw)), 4)


	def classify_risk(fake_risk: float, threshold: float = 0.35) -> str:
	"""
	Map a fake_risk score to an account status string using platform threshold.

	Round 2: Accepts threshold parameter (default 0.35 for backward compatibility).
	"""
	if fake_risk < threshold:
	return "normal"
	if fake_risk < threshold + 0.25:
	return "suspect"
	return "confirmed_fake"


	def grader_score(
	tp: int,
	fp: int,
	fn: int,
	steps_used: int,
	max_steps: int,
	threshold: float = 0.35,
	fp_penalty_weight: float = 0.5,
	) -> float:
	"""
	Normalised [0.0, 1.0] submission score used by /grader endpoint.

	Round 2: Accepts platform-specific threshold and FP penalty.

	Win condition (recall >= 0.8 AND precision >= 0.7):
	score = 0.55 + 0.20recall + 0.15precision + 0.10*efficiency + threshold_bonus
	Otherwise (partial credit):
	score = 0.30recall + 0.10precision

	Args:
	tp: True positives
	fp: False positives
	fn: False negatives
	steps_used: Steps consumed
	max_steps: Maximum steps allowed
	threshold: Platform threshold (stricter = harder = higher bonus)
	fp_penalty_weight: Platform FP cost (not used in score, for reference)
	"""
	recall = tp / 10.0
	precision = tp / max(tp + fp, 1)
	efficiency = max(0.0, (max_steps - steps_used) / max_steps)

	# Threshold difficulty bonus: stricter platforms (low threshold) get bonus for high precision
	# Instagram (0.08) → factor 0.92, Snapchat (0.74) → factor 0.26
	threshold_factor = 1.0 - threshold

	if recall >= 0.8 and precision >= 0.7:
	score = (
	0.55
	+ 0.20 * recall
	+ 0.15 * precision
	+ 0.10 * efficiency
	+ 0.05 * threshold_factor # Bonus for strict platform
	)
	else:
	score = 0.30 * recall + 0.10 * precision

	return round(max(0.0, min(1.0, score)), 4)