shahviransh commited on
Commit
719445e
·
verified ·
1 Parent(s): 383ef4d

Upload folder using huggingface_hub

Browse files
Files changed (9) hide show
  1. README.md +89 -0
  2. ensemble_model.pkl +3 -0
  3. lr_model.pkl +3 -0
  4. metadata.json +117 -0
  5. nn_model.pkl +3 -0
  6. nn_pytorch_state.pth +3 -0
  7. rf_model.pkl +3 -0
  8. scaler.pkl +3 -0
  9. xgb_model.pkl +3 -0
README.md ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - fraud-detection
4
+ - ensemble-learning
5
+ - e-commerce
6
+ - imbalanced-data
7
+ license: mit
8
+ metrics:
9
+ - accuracy
10
+ - precision
11
+ - recall
12
+ - f1
13
+ - auc
14
+ ---
15
+
16
+ # E-Commerce Fraud Detection Model
17
+
18
+ ## Model Description
19
+
20
+ This is an ensemble fraud detection system trained on 1.47M e-commerce transactions with a 5.01% fraud rate.
21
+
22
+ ### Architecture
23
+
24
+ **Weighted Ensemble Strategy (70%-30%)**
25
+ - **Stage 1 - Recall Specialists (70% weight):** Logistic Regression + Random Forest
26
+ - **Stage 2 - Precision Specialists (30% weight):** Neural Network + XGBoost
27
+
28
+ ### Performance Metrics
29
+
30
+ | Model | Accuracy | Precision | Recall | F1-Score | AUC-ROC |
31
+ |-------|----------|-----------|--------|----------|---------|
32
+ | Logistic Regression | 0.5723 | 0.0988 | 0.9273 | 0.1786 | 0.8619 |
33
+ | Random Forest | 0.6203 | 0.1075 | 0.8999 | 0.1920 | 0.8712 |
34
+ | Neural Network | 0.9569 | 0.7013 | 0.2442 | 0.3623 | 0.8748 |
35
+ | XGBoost | 0.9558 | 0.6632 | 0.2389 | 0.3513 | 0.8459 |
36
+ | Stacking Ensemble | 0.8973 | 0.2640 | 0.5868 | 0.3642 | 0.8731 |
37
+
38
+
39
+ ### Key Features
40
+
41
+ - **52 engineered features** including:
42
+ - Transaction patterns (amount, quantity, frequency)
43
+ - Customer behavior (account age, transaction history)
44
+ - Temporal features (time-based patterns)
45
+ - Risk indicators (unusual patterns, high-value flags)
46
+ - Interaction features (multi-dimensional risk signals)
47
+
48
+ ### Training
49
+
50
+ - **Resampling:** ADASYN (1:1 balance)
51
+ - **GPU Acceleration:** RAPIDS cuML, PyTorch, XGBoost
52
+ - **Threshold Optimization:** F-beta score optimization
53
+ - **Validation:** Stratified K-Fold Cross-Validation
54
+
55
+ ### Usage
56
+
57
+ ```python
58
+ import joblib
59
+ import numpy as np
60
+
61
+ # Load models
62
+ lr_model = joblib.load("lr_model.pkl")
63
+ rf_model = joblib.load("rf_model.pkl")
64
+ nn_model = joblib.load("nn_model.pkl")
65
+ xgb_model = joblib.load("xgb_model.pkl")
66
+ ensemble_model = joblib.load("ensemble_model.pkl")
67
+ scaler = joblib.load("scaler.pkl")
68
+
69
+ # Prepare your data (same features as training)
70
+ X = ... # Your transaction data
71
+
72
+ # Scale
73
+ X_scaled = scaler.transform(X)
74
+
75
+ # Predict with ensemble
76
+ fraud_proba = ensemble_model.predict_proba(X_scaled)[:, 1]
77
+ fraud_pred = ensemble_model.predict(X_scaled)
78
+ ```
79
+
80
+ ### License
81
+
82
+ MIT License
83
+
84
+ ### Contact
85
+
86
+ COMPSCI 4AL3 - Group 34
87
+
88
+ Viransh Shah (shahv47@mcmaster.ca)
89
+ Ellen Xiong (xionge1@mcmaster.ca)
ensemble_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dcd960fce8d4953dae330162a854d23c24b10bb6c8f5f6e8d5eced636ac1dbe
3
+ size 12840038323
lr_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ebdb69fcc3b77ade500f4257e4fe6a0af37dcbadc72e869e41c7e2c05628574
3
+ size 3012
metadata.json ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "v1",
3
+ "dataset_size": "1.47M transactions",
4
+ "fraud_rate": "5.01%",
5
+ "features": [
6
+ "Transaction Amount",
7
+ "Quantity",
8
+ "Customer Age",
9
+ "Account Age Days",
10
+ "Transaction Hour",
11
+ "Total Customer Transactions",
12
+ "Address Mismatch",
13
+ "Day of Week",
14
+ "Month",
15
+ "Is Weekend",
16
+ "New Account",
17
+ "Transaction Amount Ratio",
18
+ "Avg Daily Transaction Velocity",
19
+ "Time Since Last Transaction",
20
+ "Amount Deviation From History",
21
+ "Product Category Diversity",
22
+ "Amount Log",
23
+ "Amount per Quantity",
24
+ "Amount zscore",
25
+ "Account Age Weeks",
26
+ "Quantity Log",
27
+ "High Amount Flag",
28
+ "High Quantity Flag",
29
+ "Unusual Hour Flag",
30
+ "Amount Age Interaction",
31
+ "Amount Velocity Interaction",
32
+ "New Account High Value",
33
+ "Weekend High Value",
34
+ "High Risk Profile",
35
+ "Velocity Deviation",
36
+ "Suspicious Pattern",
37
+ "Customer Location",
38
+ "Payment Method_bank transfer",
39
+ "Payment Method_credit card",
40
+ "Payment Method_debit card",
41
+ "Product Category_electronics",
42
+ "Product Category_health & beauty",
43
+ "Product Category_home & garden",
44
+ "Product Category_toys & games",
45
+ "Device Used_mobile",
46
+ "Device Used_tablet",
47
+ "Hour Bin_Evening",
48
+ "Hour Bin_Morning",
49
+ "Hour Bin_Night",
50
+ "Age Category_Elder",
51
+ "Age Category_Senior",
52
+ "Age Category_Young",
53
+ "Age Category_Young_Adult",
54
+ "Transaction Size_Medium",
55
+ "Transaction Size_Small",
56
+ "Transaction Size_Very_Large",
57
+ "Transaction Size_Very_Small"
58
+ ],
59
+ "num_features": 52,
60
+ "models": {
61
+ "Logistic Regression": {
62
+ "accuracy": 0.5723150314787072,
63
+ "precision": 0.09879761446710274,
64
+ "recall": 0.9273203322499097,
65
+ "f1": 0.17857018672415592,
66
+ "f2": 0.3463753844493606,
67
+ "auc": 0.861894376946222
68
+ },
69
+ "Random Forest": {
70
+ "accuracy": 0.6203364668715461,
71
+ "precision": 0.10747018610769662,
72
+ "recall": 0.899873600577826,
73
+ "f1": 0.19200909283553913,
74
+ "f2": 0.36363702698364053,
75
+ "auc": 0.8712141180359901
76
+ },
77
+ "Neural Network": {
78
+ "accuracy": 0.9568983855564558,
79
+ "precision": 0.7013222711952295,
80
+ "recall": 0.24422174070061395,
81
+ "f1": 0.3622848724301882,
82
+ "f2": 0.2808288864433878,
83
+ "auc": 0.8747753829020086
84
+ },
85
+ "XGBoost": {
86
+ "accuracy": 0.9557623459444291,
87
+ "precision": 0.6631578947368421,
88
+ "recall": 0.23889490790899243,
89
+ "f1": 0.35125448028673834,
90
+ "f2": 0.2739470741707044,
91
+ "auc": 0.8459368559003257
92
+ },
93
+ "Stacking Ensemble": {
94
+ "accuracy": 0.8972857252775602,
95
+ "precision": 0.26401527461813457,
96
+ "recall": 0.5867641747923438,
97
+ "f1": 0.36417124285554187,
98
+ "f2": 0.4714886825304701,
99
+ "auc": 0.873078448402797
100
+ }
101
+ },
102
+ "ensemble_strategy": {
103
+ "type": "weighted_combination",
104
+ "recall_models": [
105
+ "lr",
106
+ "rf"
107
+ ],
108
+ "precision_models": [
109
+ "nn",
110
+ "xgb"
111
+ ],
112
+ "weights": {
113
+ "recall": 0.7,
114
+ "precision": 0.3
115
+ }
116
+ }
117
+ }
nn_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65ebd33b46bd12783184d6d642a7631dfc9f61cb303414e2c8ad309674b7f60d
3
+ size 386
nn_pytorch_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:572f83d79864361ca3ac5b3b439cb0e481a205f7ae3055b0dda59d38257ab1d3
3
+ size 3030263
rf_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de9195317c46710a57f404819c8a78b8f44480c000ce14b65889c13497725cc8
3
+ size 12828131151
scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df66bdc67d7431549861e43a489f428080486978f2c60b2e55938f8473ef94b5
3
+ size 3287
xgb_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fde160c9af8c40cb0047bcfef31e224a84a212d6164bea1639969a47077685e
3
+ size 8870203