Spaces:

DevikaJ2005
/

fraudshield

Configuration error

App Files Files Community

fraudshield / fraudshield_env.py

DevikaJ2005

Refine FraudShield snapshot pipeline

e4ffe61 about 1 month ago

raw

history blame contribute delete

9.61 kB

	"""FraudShield environment implementation."""

	from __future__ import annotations

	import uuid
	from datetime import datetime
	from typing import Any, Dict, List

	from data_loader import FraudDataLoader
	from models import (
	DecisionEnum,
	EpisodeState,
	FraudCheckAction,
	FraudCheckObservation,
	ResetResult,
	Reward,
	StepResult,
	TaskDifficulty,
	TransactionData,
	)


	class FraudShieldEnvironment:
	"""OpenEnv-compatible environment for e-commerce fraud review."""

	def __init__(self, data_path: str = "data", seed: int = 42):
	self.seed = seed
	self.data_loader = FraudDataLoader(data_path=data_path, seed=seed)
	self.data_loaded = False

	self.episode_id = ""
	self.current_task = TaskDifficulty.EASY
	self.step_count = 0
	self.current_transaction_idx = 0
	self.cumulative_reward = 0.0
	self.correct_predictions = 0
	self.is_done = False

	self.current_cases: List[Dict[str, Any]] = []
	self.ground_truth_labels: List[str] = []
	self.predictions: List[str] = []
	self.confidences: List[float] = []

	self.max_steps = {
	TaskDifficulty.EASY: 24,
	TaskDifficulty.MEDIUM: 36,
	TaskDifficulty.HARD: 48,
	}

	def load_data(self) -> bool:
	"""Load the committed snapshot or rebuild it from the local public source CSV."""

	self.data_loaded = self.data_loader.load_data()
	return self.data_loaded

	def load_kaggle_data(self) -> bool:
	"""Backward-compatible wrapper for the previous method name."""

	return self.load_data()

	def ensure_data_loaded(self) -> None:
	"""Load data on demand so server startup can stay simple."""

	if not self.data_loaded and not self.load_data():
	raise RuntimeError("FraudShield data bundle could not be loaded.")

	def reset(self, task: str = "easy") -> ResetResult:
	"""Start a fresh episode for a given task difficulty."""

	self.ensure_data_loaded()

	self.episode_id = f"ep_{uuid.uuid4().hex[:8]}"
	self.current_task = TaskDifficulty(task)
	self.step_count = 0
	self.current_transaction_idx = 0
	self.cumulative_reward = 0.0
	self.correct_predictions = 0
	self.is_done = False
	self.predictions = []
	self.confidences = []

	self.current_cases = self.data_loader.get_task_cases(task)
	self.ground_truth_labels = [case["label"] for case in self.current_cases]
	self.max_steps[self.current_task] = len(self.current_cases)

	observation = self._get_observation()
	info = {
	"episode_id": self.episode_id,
	"task": task,
	"task_focus": observation.historical_context.get("task_focus") if observation.historical_context else None,
	"data_snapshot": self.data_loader.get_bundle_summary(),
	"max_steps": self.max_steps[self.current_task],
	"num_transactions": len(self.current_cases),
	"fraud_count": sum(1 for label in self.ground_truth_labels if label == "fraud"),
	"legitimate_count": sum(1 for label in self.ground_truth_labels if label == "legitimate"),
	}
	return ResetResult(observation=observation, info=info)

	def step(self, action: FraudCheckAction) -> StepResult:
	"""Evaluate one agent action and return the next observation."""

	if self.is_done:
	raise RuntimeError("Episode is done. Call reset() to start a new episode.")

	current_case = self.current_cases[self.current_transaction_idx]
	expected_transaction_id = current_case["transaction_id"]
	wrong_transaction_id = action.transaction_id != expected_transaction_id
	ground_truth = current_case["label"]
	risk_score = float(current_case["risk_score"])
	business_cost = float(current_case["business_cost"])

	predicted_label = action.decision.value
	is_correct = predicted_label == ground_truth and not wrong_transaction_id

	reward_value, confidence_penalty, reward_reason = self._calculate_reward(
	predicted_label=predicted_label,
	ground_truth=ground_truth,
	confidence=action.confidence,
	risk_score=risk_score,
	business_cost=business_cost,
	wrong_transaction_id=wrong_transaction_id,
	)

	if is_correct:
	self.correct_predictions += 1

	self.predictions.append(predicted_label)
	self.confidences.append(action.confidence)
	self.cumulative_reward += reward_value
	self.step_count += 1
	self.current_transaction_idx += 1
	self.is_done = self.current_transaction_idx >= len(self.current_cases)

	reward = Reward(
	value=reward_value,
	reason=reward_reason,
	is_correct=is_correct,
	ground_truth=DecisionEnum(ground_truth),
	confidence_penalty=confidence_penalty,
	business_impact=business_cost,
	)

	observation = self._get_terminal_observation() if self.is_done else self._get_observation()
	info = {
	"step": self.step_count,
	"accuracy_so_far": round(self.correct_predictions / self.step_count, 4),
	"cumulative_reward": round(self.cumulative_reward, 4),
	"expected_transaction_id": expected_transaction_id,
	"wrong_transaction_id": wrong_transaction_id,
	"risk_score": risk_score,
	"business_cost": business_cost,
	}
	return StepResult(observation=observation, reward=reward, done=self.is_done, info=info)

	def state(self) -> EpisodeState:
	"""Return the current episode state."""

	return EpisodeState(
	episode_id=self.episode_id,
	task_name=self.current_task,
	step_count=self.step_count,
	transactions_evaluated=self.current_transaction_idx,
	cumulative_reward=self.cumulative_reward,
	correct_predictions=self.correct_predictions,
	is_done=self.is_done,
	max_steps=self.max_steps[self.current_task],
	)

	def _calculate_reward(
	self,
	predicted_label: str,
	ground_truth: str,
	confidence: float,
	risk_score: float,
	business_cost: float,
	wrong_transaction_id: bool,
	) -> tuple[float, float, str]:
	"""Apply dense reward shaping with business-cost sensitivity."""

	is_fraud_case = ground_truth == "fraud"
	predicted_fraud = predicted_label == "fraud"

	if is_fraud_case and predicted_fraud:
	base_reward = 0.68 + (0.16 * business_cost)
	elif not is_fraud_case and not predicted_fraud:
	base_reward = 0.54 + (0.06 * (1.2 - min(business_cost, 1.2)))
	elif is_fraud_case and not predicted_fraud:
	base_reward = -0.72 - (0.14 * business_cost)
	else:
	base_reward = -0.48 - (0.08 * business_cost)

	target_confidence = risk_score if is_fraud_case else (1.0 - risk_score)
	confidence_penalty = 0.12 - abs(confidence - target_confidence) * 0.24
	if predicted_label != ground_truth:
	confidence_penalty -= 0.04 + (confidence * 0.06)
	if wrong_transaction_id:
	confidence_penalty -= 0.10

	reward_value = max(-1.0, min(1.0, base_reward + confidence_penalty))
	reason_bits = [
	f"predicted={predicted_label}",
	f"actual={ground_truth}",
	f"target_confidence={target_confidence:.2f}",
	]
	if wrong_transaction_id:
	reason_bits.append("action referenced the wrong transaction_id")
	reward_reason = ", ".join(reason_bits)
	return reward_value, confidence_penalty, reward_reason

	def _get_observation(self) -> FraudCheckObservation:
	"""Return the current task observation."""

	current_case = self.current_cases[self.current_transaction_idx]
	return FraudCheckObservation(
	transaction_id=current_case["transaction_id"],
	transaction_data=TransactionData(**current_case["transaction_data"]),
	task_name=self.current_task,
	episode_step=self.step_count + 1,
	historical_context=current_case["historical_context"],
	)

	def _get_terminal_observation(self) -> FraudCheckObservation:
	"""Return a terminal observation once the episode completes."""

	terminal_transaction = TransactionData(
	amount=0.0,
	seller_id="TERMINAL",
	buyer_id="TERMINAL",
	item_category="none",
	item_price=0.0,
	shipping_address="XX",
	seller_account_age_days=0,
	buyer_account_age_days=0,
	payment_method="none",
	device_country="XX",
	timestamp=datetime.utcnow().isoformat(),
	is_repeat_buyer=False,
	seller_avg_rating=0.0,
	num_seller_reviews=0,
	previous_fraud_flags=0,
	shipping_speed="none",
	amount_percentile=0.0,
	seller_chargeback_rate_30d=0.0,
	buyer_disputes_90d=0,
	shared_device_accounts_24h=0,
	same_address_orders_24h=0,
	)
	return FraudCheckObservation(
	transaction_id="TERMINAL",
	transaction_data=terminal_transaction,
	task_name=self.current_task,
	episode_step=max(1, self.step_count),
	historical_context={"episode_done": True},
	)