Spaces:

oluinioluwa814
/

Fraud_detection

Sleeping

App Files Files Community

Fraud_detection / main.py

oluinioluwa814

Update main.py

f506b26 verified about 1 month ago

raw

history blame contribute delete

3.1 kB

	import os
	import numpy as np
	import pandas as pd
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import accuracy_score
	import joblib
	import google.generativeai as genai

	# ================================
	# CONFIG
	# ================================

	genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

	MODEL_PATH = "fraud_model.pkl"

	# ================================
	# DATA GENERATION (SIMULATED)
	# ================================

	def generate_dataset():
	np.random.seed(42)

	data = pd.DataFrame({
	"amount": np.random.uniform(10, 5000, 2000),
	"old_balance": np.random.uniform(0, 10000, 2000),
	"new_balance": np.random.uniform(0, 10000, 2000),
	"transactions_per_day": np.random.randint(1, 40, 2000),
	"fraud": np.random.randint(0, 2, 2000)
	})

	return data

	# ================================
	# MODEL TRAINING
	# ================================

	def train_model():
	data = generate_dataset()

	X = data.drop("fraud", axis=1)
	y = data["fraud"]

	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.2, random_state=42
	)

	model = RandomForestClassifier(
	n_estimators=200,
	max_depth=10,
	random_state=42
	)

	model.fit(X_train, y_train)

	acc = accuracy_score(y_test, model.predict(X_test))
	print(f"Model Accuracy: {acc * 100:.2f}%")

	joblib.dump(model, MODEL_PATH)
	return model

	# ================================
	# LOAD OR TRAIN MODEL
	# ================================

	def load_model():
	if os.path.exists(MODEL_PATH):
	return joblib.load(MODEL_PATH)
	return train_model()

	model = load_model()

	# ================================
	# GEMINI EXPLANATION ENGINE
	# ================================

	def explain_prediction(features, prediction):
	model = genai.GenerativeModel("gemini-2.5-flash")

	prompt = f"""
	You are an AI fraud analyst.

	Transaction Details:
	- Amount: {features[0]}
	- Old Balance: {features[1]}
	- New Balance: {features[2]}
	- Transactions per day: {features[3]}

	Prediction: {"Fraud" if prediction == 1 else "Legitimate"}

	Provide a professional fraud analysis explanation.
	"""

	response = model.generate_content(prompt)
	return response.text

	# ================================
	# PREDICTION PIPELINE
	# ================================
	def detect_fraud(amount, old_balance, new_balance, transactions_per_day):
	features = np.array([amount, old_balance, new_balance, transactions_per_day]).reshape(1, -1)

	pred = model.predict(features)[0]
	# Get probability for the 'Fraud' class (assumed index 1)
	fraud_prob = model.predict_proba(features)[0][1]

	# Only explain if it's likely fraud OR the model is very unsure (near 0.5)
	if pred == 1 or (0.4 < fraud_prob < 0.6):
	explanation = explain_prediction(features[0], pred)
	else:
	explanation = "Transaction processed normally; no anomaly detected."

	return pred, fraud_prob, explanation