import gradio as gr import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier, IsolationForest from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve # ------------------------- # Load Dataset (auto-download from GitHub mirror if not present) # ------------------------- import os, requests, zipfile DATA_URL = "https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv" DATA_PATH = "creditcard.csv" if not os.path.exists(DATA_PATH): print("Downloading dataset...") r = requests.get(DATA_URL) with open(DATA_PATH, "wb") as f: f.write(r.content) df = pd.read_csv(DATA_PATH) # ------------------------- # Preprocess # ------------------------- X = df.drop("Class", axis=1) y = df["Class"] scaler = StandardScaler() X["Amount"] = scaler.fit_transform(X["Amount"].values.reshape(-1, 1)) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=42, stratify=y ) # ------------------------- # Train Models # ------------------------- log_reg = LogisticRegression(max_iter=5000, class_weight="balanced", random_state=42) log_reg.fit(X_train, y_train) rf = RandomForestClassifier(n_estimators=200, class_weight="balanced", random_state=42) rf.fit(X_train, y_train) iso_forest = IsolationForest( n_estimators=200, contamination=0.0017, random_state=42 ) iso_forest.fit(X_train) # ------------------------- # Evaluation # ------------------------- def evaluate_models(): results = {} # Logistic Regression y_pred_lr = log_reg.predict(X_test) y_prob_lr = log_reg.predict_proba(X_test)[:, 1] results["Logistic Regression"] = classification_report(y_test, y_pred_lr, digits=4) # Random Forest y_pred_rf = rf.predict(X_test) y_prob_rf = rf.predict_proba(X_test)[:, 1] results["Random Forest"] = classification_report(y_test, y_pred_rf, digits=4) # Isolation Forest y_pred_if = iso_forest.predict(X_test) y_pred_if = np.where(y_pred_if == -1, 1, 0) results["Isolation Forest"] = classification_report(y_test, y_pred_if, digits=4) return results # ------------------------- # Fraud Prediction Function # ------------------------- def predict_transaction(amount, time, v_features): # Build feature vector data = np.array([time] + v_features + [amount]).reshape(1, -1) data[:, -1] = scaler.transform(data[:, -1].reshape(-1, 1)) # scale amount pred_lr = log_reg.predict(data)[0] prob_lr = log_reg.predict_proba(data)[0][1] pred_rf = rf.predict(data)[0] prob_rf = rf.predict_proba(data)[0][1] pred_if = iso_forest.predict(data)[0] pred_if = 1 if pred_if == -1 else 0 return { "Logistic Regression": f"Fraud={pred_lr} (Prob={prob_lr:.3f})", "Random Forest": f"Fraud={pred_rf} (Prob={prob_rf:.3f})", "Isolation Forest": f"Fraud={pred_if}", } # ------------------------- # Gradio UI # ------------------------- def ui_transaction(amount, time, *v_features): v_features = list(v_features) return predict_transaction(amount, time, v_features) with gr.Blocks() as demo: gr.Markdown("# 💳 Credit Card Fraud Detection\nCompare Logistic Regression, Random Forest & Isolation Forest") with gr.Tab("Evaluate Models"): btn = gr.Button("Run Evaluation") out = gr.Textbox(lines=15, label="Results") def run_eval(): res = evaluate_models() return "\n\n".join([f"{k}:\n{v}" for k, v in res.items()]) btn.click(run_eval, outputs=out) with gr.Tab("Predict a Transaction"): amount = gr.Number(label="Transaction Amount") time = gr.Number(label="Time (seconds since first transaction)") v_inputs = [gr.Number(label=f"V{i}") for i in range(1, 29)] btn_pred = gr.Button("Predict Fraud") out_pred = gr.JSON(label="Predictions") btn_pred.click(ui_transaction, inputs=[amount, time] + v_inputs, outputs=out_pred) demo.launch()