Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

README.md +89 -0
ensemble_model.pkl +3 -0
lr_model.pkl +3 -0
metadata.json +117 -0
nn_model.pkl +3 -0
nn_pytorch_state.pth +3 -0
rf_model.pkl +3 -0
scaler.pkl +3 -0
xgb_model.pkl +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,89 @@

+---
+tags:
+- fraud-detection
+- ensemble-learning
+- e-commerce
+- imbalanced-data
+license: mit
+metrics:
+- accuracy
+- precision
+- recall
+- f1
+- auc
+---
+# E-Commerce Fraud Detection Model
+## Model Description
+This is an ensemble fraud detection system trained on 1.47M e-commerce transactions with a 5.01% fraud rate.
+### Architecture
+**Weighted Ensemble Strategy (70%-30%)**
+- **Stage 1 - Recall Specialists (70% weight):** Logistic Regression + Random Forest
+- **Stage 2 - Precision Specialists (30% weight):** Neural Network + XGBoost
+### Performance Metrics
+| Model | Accuracy | Precision | Recall | F1-Score | AUC-ROC |
+|-------|----------|-----------|--------|----------|---------|
+| Logistic Regression | 0.5723 | 0.0988 | 0.9273 | 0.1786 | 0.8619 |
+| Random Forest | 0.6203 | 0.1075 | 0.8999 | 0.1920 | 0.8712 |
+| Neural Network | 0.9569 | 0.7013 | 0.2442 | 0.3623 | 0.8748 |
+| XGBoost | 0.9558 | 0.6632 | 0.2389 | 0.3513 | 0.8459 |
+| Stacking Ensemble | 0.8973 | 0.2640 | 0.5868 | 0.3642 | 0.8731 |
+### Key Features
+- **52 engineered features** including:
+  - Transaction patterns (amount, quantity, frequency)
+  - Customer behavior (account age, transaction history)
+  - Temporal features (time-based patterns)
+  - Risk indicators (unusual patterns, high-value flags)
+  - Interaction features (multi-dimensional risk signals)
+### Training
+- **Resampling:** ADASYN (1:1 balance)
+- **GPU Acceleration:** RAPIDS cuML, PyTorch, XGBoost
+- **Threshold Optimization:** F-beta score optimization
+- **Validation:** Stratified K-Fold Cross-Validation
+### Usage
+```python
+import joblib
+import numpy as np
+# Load models
+lr_model = joblib.load("lr_model.pkl")
+rf_model = joblib.load("rf_model.pkl")
+nn_model = joblib.load("nn_model.pkl")
+xgb_model = joblib.load("xgb_model.pkl")
+ensemble_model = joblib.load("ensemble_model.pkl")
+scaler = joblib.load("scaler.pkl")
+# Prepare your data (same features as training)
+X = ...  # Your transaction data
+# Scale
+X_scaled = scaler.transform(X)
+# Predict with ensemble
+fraud_proba = ensemble_model.predict_proba(X_scaled)[:, 1]
+fraud_pred = ensemble_model.predict(X_scaled)
+```
+### License
+MIT License
+### Contact
+COMPSCI 4AL3 - Group 34
+Viransh Shah (shahv47@mcmaster.ca)
+Ellen Xiong (xionge1@mcmaster.ca)

ensemble_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9dcd960fce8d4953dae330162a854d23c24b10bb6c8f5f6e8d5eced636ac1dbe
+size 12840038323

lr_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ebdb69fcc3b77ade500f4257e4fe6a0af37dcbadc72e869e41c7e2c05628574
+size 3012

metadata.json ADDED Viewed

	@@ -0,0 +1,117 @@

+{
+  "version": "v1",
+  "dataset_size": "1.47M transactions",
+  "fraud_rate": "5.01%",
+  "features": [
+    "Transaction Amount",
+    "Quantity",
+    "Customer Age",
+    "Account Age Days",
+    "Transaction Hour",
+    "Total Customer Transactions",
+    "Address Mismatch",
+    "Day of Week",
+    "Month",
+    "Is Weekend",
+    "New Account",
+    "Transaction Amount Ratio",
+    "Avg Daily Transaction Velocity",
+    "Time Since Last Transaction",
+    "Amount Deviation From History",
+    "Product Category Diversity",
+    "Amount Log",
+    "Amount per Quantity",
+    "Amount zscore",
+    "Account Age Weeks",
+    "Quantity Log",
+    "High Amount Flag",
+    "High Quantity Flag",
+    "Unusual Hour Flag",
+    "Amount Age Interaction",
+    "Amount Velocity Interaction",
+    "New Account High Value",
+    "Weekend High Value",
+    "High Risk Profile",
+    "Velocity Deviation",
+    "Suspicious Pattern",
+    "Customer Location",
+    "Payment Method_bank transfer",
+    "Payment Method_credit card",
+    "Payment Method_debit card",
+    "Product Category_electronics",
+    "Product Category_health & beauty",
+    "Product Category_home & garden",
+    "Product Category_toys & games",
+    "Device Used_mobile",
+    "Device Used_tablet",
+    "Hour Bin_Evening",
+    "Hour Bin_Morning",
+    "Hour Bin_Night",
+    "Age Category_Elder",
+    "Age Category_Senior",
+    "Age Category_Young",
+    "Age Category_Young_Adult",
+    "Transaction Size_Medium",
+    "Transaction Size_Small",
+    "Transaction Size_Very_Large",
+    "Transaction Size_Very_Small"
+  ],
+  "num_features": 52,
+  "models": {
+    "Logistic Regression": {
+      "accuracy": 0.5723150314787072,
+      "precision": 0.09879761446710274,
+      "recall": 0.9273203322499097,
+      "f1": 0.17857018672415592,
+      "f2": 0.3463753844493606,
+      "auc": 0.861894376946222
+    },
+    "Random Forest": {
+      "accuracy": 0.6203364668715461,
+      "precision": 0.10747018610769662,
+      "recall": 0.899873600577826,
+      "f1": 0.19200909283553913,
+      "f2": 0.36363702698364053,
+      "auc": 0.8712141180359901
+    },
+    "Neural Network": {
+      "accuracy": 0.9568983855564558,
+      "precision": 0.7013222711952295,
+      "recall": 0.24422174070061395,
+      "f1": 0.3622848724301882,
+      "f2": 0.2808288864433878,
+      "auc": 0.8747753829020086
+    },
+    "XGBoost": {
+      "accuracy": 0.9557623459444291,
+      "precision": 0.6631578947368421,
+      "recall": 0.23889490790899243,
+      "f1": 0.35125448028673834,
+      "f2": 0.2739470741707044,
+      "auc": 0.8459368559003257
+    },
+    "Stacking Ensemble": {
+      "accuracy": 0.8972857252775602,
+      "precision": 0.26401527461813457,
+      "recall": 0.5867641747923438,
+      "f1": 0.36417124285554187,
+      "f2": 0.4714886825304701,
+      "auc": 0.873078448402797
+    }
+  },
+  "ensemble_strategy": {
+    "type": "weighted_combination",
+    "recall_models": [
+      "lr",
+      "rf"
+    ],
+    "precision_models": [
+      "nn",
+      "xgb"
+    ],
+    "weights": {
+      "recall": 0.7,
+      "precision": 0.3
+    }
+  }
+}

nn_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65ebd33b46bd12783184d6d642a7631dfc9f61cb303414e2c8ad309674b7f60d
+size 386

nn_pytorch_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:572f83d79864361ca3ac5b3b439cb0e481a205f7ae3055b0dda59d38257ab1d3
+size 3030263

rf_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de9195317c46710a57f404819c8a78b8f44480c000ce14b65889c13497725cc8
+size 12828131151

scaler.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df66bdc67d7431549861e43a489f428080486978f2c60b2e55938f8473ef94b5
+size 3287

xgb_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1fde160c9af8c40cb0047bcfef31e224a84a212d6164bea1639969a47077685e
+size 8870203