import os import numpy as np import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score import joblib import google.generativeai as genai # ================================ # CONFIG # ================================ genai.configure(api_key=os.getenv("GEMINI_API_KEY")) MODEL_PATH = "fraud_model.pkl" # ================================ # DATA GENERATION (SIMULATED) # ================================ def generate_dataset(): np.random.seed(42) data = pd.DataFrame({ "amount": np.random.uniform(10, 5000, 2000), "old_balance": np.random.uniform(0, 10000, 2000), "new_balance": np.random.uniform(0, 10000, 2000), "transactions_per_day": np.random.randint(1, 40, 2000), "fraud": np.random.randint(0, 2, 2000) }) return data # ================================ # MODEL TRAINING # ================================ def train_model(): data = generate_dataset() X = data.drop("fraud", axis=1) y = data["fraud"] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42 ) model = RandomForestClassifier( n_estimators=200, max_depth=10, random_state=42 ) model.fit(X_train, y_train) acc = accuracy_score(y_test, model.predict(X_test)) print(f"Model Accuracy: {acc * 100:.2f}%") joblib.dump(model, MODEL_PATH) return model # ================================ # LOAD OR TRAIN MODEL # ================================ def load_model(): if os.path.exists(MODEL_PATH): return joblib.load(MODEL_PATH) return train_model() model = load_model() # ================================ # GEMINI EXPLANATION ENGINE # ================================ def explain_prediction(features, prediction): model = genai.GenerativeModel("gemini-2.5-flash") prompt = f""" You are an AI fraud analyst. Transaction Details: - Amount: {features[0]} - Old Balance: {features[1]} - New Balance: {features[2]} - Transactions per day: {features[3]} Prediction: {"Fraud" if prediction == 1 else "Legitimate"} Provide a professional fraud analysis explanation. """ response = model.generate_content(prompt) return response.text # ================================ # PREDICTION PIPELINE # ================================ def detect_fraud(amount, old_balance, new_balance, transactions_per_day): features = np.array([amount, old_balance, new_balance, transactions_per_day]).reshape(1, -1) pred = model.predict(features)[0] # Get probability for the 'Fraud' class (assumed index 1) fraud_prob = model.predict_proba(features)[0][1] # Only explain if it's likely fraud OR the model is very unsure (near 0.5) if pred == 1 or (0.4 < fraud_prob < 0.6): explanation = explain_prediction(features[0], pred) else: explanation = "Transaction processed normally; no anomaly detected." return pred, fraud_prob, explanation