Spaces:
Sleeping
Sleeping
| import os | |
| import numpy as np | |
| import pandas as pd | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import accuracy_score | |
| import joblib | |
| import google.generativeai as genai | |
| # ================================ | |
| # CONFIG | |
| # ================================ | |
| genai.configure(api_key=os.getenv("GEMINI_API_KEY")) | |
| MODEL_PATH = "fraud_model.pkl" | |
| # ================================ | |
| # DATA GENERATION (SIMULATED) | |
| # ================================ | |
| def generate_dataset(): | |
| np.random.seed(42) | |
| data = pd.DataFrame({ | |
| "amount": np.random.uniform(10, 5000, 2000), | |
| "old_balance": np.random.uniform(0, 10000, 2000), | |
| "new_balance": np.random.uniform(0, 10000, 2000), | |
| "transactions_per_day": np.random.randint(1, 40, 2000), | |
| "fraud": np.random.randint(0, 2, 2000) | |
| }) | |
| return data | |
| # ================================ | |
| # MODEL TRAINING | |
| # ================================ | |
| def train_model(): | |
| data = generate_dataset() | |
| X = data.drop("fraud", axis=1) | |
| y = data["fraud"] | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X, y, test_size=0.2, random_state=42 | |
| ) | |
| model = RandomForestClassifier( | |
| n_estimators=200, | |
| max_depth=10, | |
| random_state=42 | |
| ) | |
| model.fit(X_train, y_train) | |
| acc = accuracy_score(y_test, model.predict(X_test)) | |
| print(f"Model Accuracy: {acc * 100:.2f}%") | |
| joblib.dump(model, MODEL_PATH) | |
| return model | |
| # ================================ | |
| # LOAD OR TRAIN MODEL | |
| # ================================ | |
| def load_model(): | |
| if os.path.exists(MODEL_PATH): | |
| return joblib.load(MODEL_PATH) | |
| return train_model() | |
| model = load_model() | |
| # ================================ | |
| # GEMINI EXPLANATION ENGINE | |
| # ================================ | |
| def explain_prediction(features, prediction): | |
| model = genai.GenerativeModel("gemini-2.5-flash") | |
| prompt = f""" | |
| You are an AI fraud analyst. | |
| Transaction Details: | |
| - Amount: {features[0]} | |
| - Old Balance: {features[1]} | |
| - New Balance: {features[2]} | |
| - Transactions per day: {features[3]} | |
| Prediction: {"Fraud" if prediction == 1 else "Legitimate"} | |
| Provide a professional fraud analysis explanation. | |
| """ | |
| response = model.generate_content(prompt) | |
| return response.text | |
| # ================================ | |
| # PREDICTION PIPELINE | |
| # ================================ | |
| def detect_fraud(amount, old_balance, new_balance, transactions_per_day): | |
| features = np.array([amount, old_balance, new_balance, transactions_per_day]).reshape(1, -1) | |
| pred = model.predict(features)[0] | |
| # Get probability for the 'Fraud' class (assumed index 1) | |
| fraud_prob = model.predict_proba(features)[0][1] | |
| # Only explain if it's likely fraud OR the model is very unsure (near 0.5) | |
| if pred == 1 or (0.4 < fraud_prob < 0.6): | |
| explanation = explain_prediction(features[0], pred) | |
| else: | |
| explanation = "Transaction processed normally; no anomaly detected." | |
| return pred, fraud_prob, explanation | |