Upload folder using huggingface_hub
Browse files- README.md +89 -0
- ensemble_model.pkl +3 -0
- lr_model.pkl +3 -0
- metadata.json +117 -0
- nn_model.pkl +3 -0
- nn_pytorch_state.pth +3 -0
- rf_model.pkl +3 -0
- scaler.pkl +3 -0
- xgb_model.pkl +3 -0
README.md
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- fraud-detection
|
| 4 |
+
- ensemble-learning
|
| 5 |
+
- e-commerce
|
| 6 |
+
- imbalanced-data
|
| 7 |
+
license: mit
|
| 8 |
+
metrics:
|
| 9 |
+
- accuracy
|
| 10 |
+
- precision
|
| 11 |
+
- recall
|
| 12 |
+
- f1
|
| 13 |
+
- auc
|
| 14 |
+
---
|
| 15 |
+
|
| 16 |
+
# E-Commerce Fraud Detection Model
|
| 17 |
+
|
| 18 |
+
## Model Description
|
| 19 |
+
|
| 20 |
+
This is an ensemble fraud detection system trained on 1.47M e-commerce transactions with a 5.01% fraud rate.
|
| 21 |
+
|
| 22 |
+
### Architecture
|
| 23 |
+
|
| 24 |
+
**Weighted Ensemble Strategy (70%-30%)**
|
| 25 |
+
- **Stage 1 - Recall Specialists (70% weight):** Logistic Regression + Random Forest
|
| 26 |
+
- **Stage 2 - Precision Specialists (30% weight):** Neural Network + XGBoost
|
| 27 |
+
|
| 28 |
+
### Performance Metrics
|
| 29 |
+
|
| 30 |
+
| Model | Accuracy | Precision | Recall | F1-Score | AUC-ROC |
|
| 31 |
+
|-------|----------|-----------|--------|----------|---------|
|
| 32 |
+
| Logistic Regression | 0.5723 | 0.0988 | 0.9273 | 0.1786 | 0.8619 |
|
| 33 |
+
| Random Forest | 0.6203 | 0.1075 | 0.8999 | 0.1920 | 0.8712 |
|
| 34 |
+
| Neural Network | 0.9569 | 0.7013 | 0.2442 | 0.3623 | 0.8748 |
|
| 35 |
+
| XGBoost | 0.9558 | 0.6632 | 0.2389 | 0.3513 | 0.8459 |
|
| 36 |
+
| Stacking Ensemble | 0.8973 | 0.2640 | 0.5868 | 0.3642 | 0.8731 |
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
### Key Features
|
| 40 |
+
|
| 41 |
+
- **52 engineered features** including:
|
| 42 |
+
- Transaction patterns (amount, quantity, frequency)
|
| 43 |
+
- Customer behavior (account age, transaction history)
|
| 44 |
+
- Temporal features (time-based patterns)
|
| 45 |
+
- Risk indicators (unusual patterns, high-value flags)
|
| 46 |
+
- Interaction features (multi-dimensional risk signals)
|
| 47 |
+
|
| 48 |
+
### Training
|
| 49 |
+
|
| 50 |
+
- **Resampling:** ADASYN (1:1 balance)
|
| 51 |
+
- **GPU Acceleration:** RAPIDS cuML, PyTorch, XGBoost
|
| 52 |
+
- **Threshold Optimization:** F-beta score optimization
|
| 53 |
+
- **Validation:** Stratified K-Fold Cross-Validation
|
| 54 |
+
|
| 55 |
+
### Usage
|
| 56 |
+
|
| 57 |
+
```python
|
| 58 |
+
import joblib
|
| 59 |
+
import numpy as np
|
| 60 |
+
|
| 61 |
+
# Load models
|
| 62 |
+
lr_model = joblib.load("lr_model.pkl")
|
| 63 |
+
rf_model = joblib.load("rf_model.pkl")
|
| 64 |
+
nn_model = joblib.load("nn_model.pkl")
|
| 65 |
+
xgb_model = joblib.load("xgb_model.pkl")
|
| 66 |
+
ensemble_model = joblib.load("ensemble_model.pkl")
|
| 67 |
+
scaler = joblib.load("scaler.pkl")
|
| 68 |
+
|
| 69 |
+
# Prepare your data (same features as training)
|
| 70 |
+
X = ... # Your transaction data
|
| 71 |
+
|
| 72 |
+
# Scale
|
| 73 |
+
X_scaled = scaler.transform(X)
|
| 74 |
+
|
| 75 |
+
# Predict with ensemble
|
| 76 |
+
fraud_proba = ensemble_model.predict_proba(X_scaled)[:, 1]
|
| 77 |
+
fraud_pred = ensemble_model.predict(X_scaled)
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
### License
|
| 81 |
+
|
| 82 |
+
MIT License
|
| 83 |
+
|
| 84 |
+
### Contact
|
| 85 |
+
|
| 86 |
+
COMPSCI 4AL3 - Group 34
|
| 87 |
+
|
| 88 |
+
Viransh Shah (shahv47@mcmaster.ca)
|
| 89 |
+
Ellen Xiong (xionge1@mcmaster.ca)
|
ensemble_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9dcd960fce8d4953dae330162a854d23c24b10bb6c8f5f6e8d5eced636ac1dbe
|
| 3 |
+
size 12840038323
|
lr_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ebdb69fcc3b77ade500f4257e4fe6a0af37dcbadc72e869e41c7e2c05628574
|
| 3 |
+
size 3012
|
metadata.json
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"version": "v1",
|
| 3 |
+
"dataset_size": "1.47M transactions",
|
| 4 |
+
"fraud_rate": "5.01%",
|
| 5 |
+
"features": [
|
| 6 |
+
"Transaction Amount",
|
| 7 |
+
"Quantity",
|
| 8 |
+
"Customer Age",
|
| 9 |
+
"Account Age Days",
|
| 10 |
+
"Transaction Hour",
|
| 11 |
+
"Total Customer Transactions",
|
| 12 |
+
"Address Mismatch",
|
| 13 |
+
"Day of Week",
|
| 14 |
+
"Month",
|
| 15 |
+
"Is Weekend",
|
| 16 |
+
"New Account",
|
| 17 |
+
"Transaction Amount Ratio",
|
| 18 |
+
"Avg Daily Transaction Velocity",
|
| 19 |
+
"Time Since Last Transaction",
|
| 20 |
+
"Amount Deviation From History",
|
| 21 |
+
"Product Category Diversity",
|
| 22 |
+
"Amount Log",
|
| 23 |
+
"Amount per Quantity",
|
| 24 |
+
"Amount zscore",
|
| 25 |
+
"Account Age Weeks",
|
| 26 |
+
"Quantity Log",
|
| 27 |
+
"High Amount Flag",
|
| 28 |
+
"High Quantity Flag",
|
| 29 |
+
"Unusual Hour Flag",
|
| 30 |
+
"Amount Age Interaction",
|
| 31 |
+
"Amount Velocity Interaction",
|
| 32 |
+
"New Account High Value",
|
| 33 |
+
"Weekend High Value",
|
| 34 |
+
"High Risk Profile",
|
| 35 |
+
"Velocity Deviation",
|
| 36 |
+
"Suspicious Pattern",
|
| 37 |
+
"Customer Location",
|
| 38 |
+
"Payment Method_bank transfer",
|
| 39 |
+
"Payment Method_credit card",
|
| 40 |
+
"Payment Method_debit card",
|
| 41 |
+
"Product Category_electronics",
|
| 42 |
+
"Product Category_health & beauty",
|
| 43 |
+
"Product Category_home & garden",
|
| 44 |
+
"Product Category_toys & games",
|
| 45 |
+
"Device Used_mobile",
|
| 46 |
+
"Device Used_tablet",
|
| 47 |
+
"Hour Bin_Evening",
|
| 48 |
+
"Hour Bin_Morning",
|
| 49 |
+
"Hour Bin_Night",
|
| 50 |
+
"Age Category_Elder",
|
| 51 |
+
"Age Category_Senior",
|
| 52 |
+
"Age Category_Young",
|
| 53 |
+
"Age Category_Young_Adult",
|
| 54 |
+
"Transaction Size_Medium",
|
| 55 |
+
"Transaction Size_Small",
|
| 56 |
+
"Transaction Size_Very_Large",
|
| 57 |
+
"Transaction Size_Very_Small"
|
| 58 |
+
],
|
| 59 |
+
"num_features": 52,
|
| 60 |
+
"models": {
|
| 61 |
+
"Logistic Regression": {
|
| 62 |
+
"accuracy": 0.5723150314787072,
|
| 63 |
+
"precision": 0.09879761446710274,
|
| 64 |
+
"recall": 0.9273203322499097,
|
| 65 |
+
"f1": 0.17857018672415592,
|
| 66 |
+
"f2": 0.3463753844493606,
|
| 67 |
+
"auc": 0.861894376946222
|
| 68 |
+
},
|
| 69 |
+
"Random Forest": {
|
| 70 |
+
"accuracy": 0.6203364668715461,
|
| 71 |
+
"precision": 0.10747018610769662,
|
| 72 |
+
"recall": 0.899873600577826,
|
| 73 |
+
"f1": 0.19200909283553913,
|
| 74 |
+
"f2": 0.36363702698364053,
|
| 75 |
+
"auc": 0.8712141180359901
|
| 76 |
+
},
|
| 77 |
+
"Neural Network": {
|
| 78 |
+
"accuracy": 0.9568983855564558,
|
| 79 |
+
"precision": 0.7013222711952295,
|
| 80 |
+
"recall": 0.24422174070061395,
|
| 81 |
+
"f1": 0.3622848724301882,
|
| 82 |
+
"f2": 0.2808288864433878,
|
| 83 |
+
"auc": 0.8747753829020086
|
| 84 |
+
},
|
| 85 |
+
"XGBoost": {
|
| 86 |
+
"accuracy": 0.9557623459444291,
|
| 87 |
+
"precision": 0.6631578947368421,
|
| 88 |
+
"recall": 0.23889490790899243,
|
| 89 |
+
"f1": 0.35125448028673834,
|
| 90 |
+
"f2": 0.2739470741707044,
|
| 91 |
+
"auc": 0.8459368559003257
|
| 92 |
+
},
|
| 93 |
+
"Stacking Ensemble": {
|
| 94 |
+
"accuracy": 0.8972857252775602,
|
| 95 |
+
"precision": 0.26401527461813457,
|
| 96 |
+
"recall": 0.5867641747923438,
|
| 97 |
+
"f1": 0.36417124285554187,
|
| 98 |
+
"f2": 0.4714886825304701,
|
| 99 |
+
"auc": 0.873078448402797
|
| 100 |
+
}
|
| 101 |
+
},
|
| 102 |
+
"ensemble_strategy": {
|
| 103 |
+
"type": "weighted_combination",
|
| 104 |
+
"recall_models": [
|
| 105 |
+
"lr",
|
| 106 |
+
"rf"
|
| 107 |
+
],
|
| 108 |
+
"precision_models": [
|
| 109 |
+
"nn",
|
| 110 |
+
"xgb"
|
| 111 |
+
],
|
| 112 |
+
"weights": {
|
| 113 |
+
"recall": 0.7,
|
| 114 |
+
"precision": 0.3
|
| 115 |
+
}
|
| 116 |
+
}
|
| 117 |
+
}
|
nn_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65ebd33b46bd12783184d6d642a7631dfc9f61cb303414e2c8ad309674b7f60d
|
| 3 |
+
size 386
|
nn_pytorch_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:572f83d79864361ca3ac5b3b439cb0e481a205f7ae3055b0dda59d38257ab1d3
|
| 3 |
+
size 3030263
|
rf_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de9195317c46710a57f404819c8a78b8f44480c000ce14b65889c13497725cc8
|
| 3 |
+
size 12828131151
|
scaler.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df66bdc67d7431549861e43a489f428080486978f2c60b2e55938f8473ef94b5
|
| 3 |
+
size 3287
|
xgb_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1fde160c9af8c40cb0047bcfef31e224a84a212d6164bea1639969a47077685e
|
| 3 |
+
size 8870203
|