Commit ·
c302dd6
0
Parent(s):
Initial commit: AI Powered Transaction Fraud Detection System
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitignore +22 -0
- README.md +269 -0
- app.py +306 -0
- docker-compose.yml +27 -0
- drift/adapter.py +0 -0
- drift/detector.py +61 -0
- fraud_detection.ipynb +0 -0
- graph_models/data_loader.py +38 -0
- graph_models/gnn_model.py +45 -0
- graph_models/train_gnn.py +35 -0
- mlruns/0/meta.yaml +6 -0
- mlruns/588265755531758591/0311fe8dff3e46ee9d5c6c013e0580bc/meta.yaml +15 -0
- mlruns/588265755531758591/0311fe8dff3e46ee9d5c6c013e0580bc/metrics/roc_auc +1 -0
- mlruns/588265755531758591/0311fe8dff3e46ee9d5c6c013e0580bc/tags/mlflow.log-model.history +1 -0
- mlruns/588265755531758591/0311fe8dff3e46ee9d5c6c013e0580bc/tags/mlflow.runName +1 -0
- mlruns/588265755531758591/0311fe8dff3e46ee9d5c6c013e0580bc/tags/mlflow.source.name +1 -0
- mlruns/588265755531758591/0311fe8dff3e46ee9d5c6c013e0580bc/tags/mlflow.source.type +1 -0
- mlruns/588265755531758591/0311fe8dff3e46ee9d5c6c013e0580bc/tags/mlflow.user +1 -0
- mlruns/588265755531758591/04c33c5f043e4977b3b7a930580a2dcb/meta.yaml +15 -0
- mlruns/588265755531758591/04c33c5f043e4977b3b7a930580a2dcb/metrics/roc_auc +1 -0
- mlruns/588265755531758591/04c33c5f043e4977b3b7a930580a2dcb/tags/mlflow.log-model.history +1 -0
- mlruns/588265755531758591/04c33c5f043e4977b3b7a930580a2dcb/tags/mlflow.runName +1 -0
- mlruns/588265755531758591/04c33c5f043e4977b3b7a930580a2dcb/tags/mlflow.source.name +1 -0
- mlruns/588265755531758591/04c33c5f043e4977b3b7a930580a2dcb/tags/mlflow.source.type +1 -0
- mlruns/588265755531758591/04c33c5f043e4977b3b7a930580a2dcb/tags/mlflow.user +1 -0
- mlruns/588265755531758591/0ceb1e53b25f47d0b62d41bdb664e954/meta.yaml +15 -0
- mlruns/588265755531758591/0ceb1e53b25f47d0b62d41bdb664e954/metrics/roc_auc +1 -0
- mlruns/588265755531758591/0ceb1e53b25f47d0b62d41bdb664e954/tags/mlflow.log-model.history +1 -0
- mlruns/588265755531758591/0ceb1e53b25f47d0b62d41bdb664e954/tags/mlflow.runName +1 -0
- mlruns/588265755531758591/0ceb1e53b25f47d0b62d41bdb664e954/tags/mlflow.source.name +1 -0
- mlruns/588265755531758591/0ceb1e53b25f47d0b62d41bdb664e954/tags/mlflow.source.type +1 -0
- mlruns/588265755531758591/0ceb1e53b25f47d0b62d41bdb664e954/tags/mlflow.user +1 -0
- mlruns/588265755531758591/0d42202ebbf14102b9771574825528e2/meta.yaml +15 -0
- mlruns/588265755531758591/0d42202ebbf14102b9771574825528e2/metrics/roc_auc +1 -0
- mlruns/588265755531758591/0d42202ebbf14102b9771574825528e2/tags/mlflow.log-model.history +1 -0
- mlruns/588265755531758591/0d42202ebbf14102b9771574825528e2/tags/mlflow.runName +1 -0
- mlruns/588265755531758591/0d42202ebbf14102b9771574825528e2/tags/mlflow.source.name +1 -0
- mlruns/588265755531758591/0d42202ebbf14102b9771574825528e2/tags/mlflow.source.type +1 -0
- mlruns/588265755531758591/0d42202ebbf14102b9771574825528e2/tags/mlflow.user +1 -0
- mlruns/588265755531758591/12c38a58707142b49abf42946712e666/meta.yaml +15 -0
- mlruns/588265755531758591/12c38a58707142b49abf42946712e666/metrics/roc_auc +1 -0
- mlruns/588265755531758591/12c38a58707142b49abf42946712e666/tags/mlflow.log-model.history +1 -0
- mlruns/588265755531758591/12c38a58707142b49abf42946712e666/tags/mlflow.runName +1 -0
- mlruns/588265755531758591/12c38a58707142b49abf42946712e666/tags/mlflow.source.name +1 -0
- mlruns/588265755531758591/12c38a58707142b49abf42946712e666/tags/mlflow.source.type +1 -0
- mlruns/588265755531758591/12c38a58707142b49abf42946712e666/tags/mlflow.user +1 -0
- mlruns/588265755531758591/159a3dd3b1204e7fa3008a0eb85f5678/meta.yaml +15 -0
- mlruns/588265755531758591/159a3dd3b1204e7fa3008a0eb85f5678/metrics/roc_auc +1 -0
- mlruns/588265755531758591/159a3dd3b1204e7fa3008a0eb85f5678/tags/mlflow.log-model.history +1 -0
- mlruns/588265755531758591/159a3dd3b1204e7fa3008a0eb85f5678/tags/mlflow.runName +1 -0
.gitignore
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
*.pyo
|
| 5 |
+
*.pyd
|
| 6 |
+
.env
|
| 7 |
+
venv/
|
| 8 |
+
.env/
|
| 9 |
+
|
| 10 |
+
# ML / Data
|
| 11 |
+
data/
|
| 12 |
+
reports/
|
| 13 |
+
*.csv
|
| 14 |
+
*.log
|
| 15 |
+
|
| 16 |
+
# Model artifacts (optional – keep if asked by reviewer)
|
| 17 |
+
trained_models/*.pkl
|
| 18 |
+
models/*.pt
|
| 19 |
+
|
| 20 |
+
# OS
|
| 21 |
+
.DS_Store
|
| 22 |
+
Thumbs.db
|
README.md
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
🛡️ AI-Powered Transaction Fraud Detection System
|
| 2 |
+
📌 Project Overview
|
| 3 |
+
|
| 4 |
+
The AI-Powered Transaction Fraud Detection System is a real-time financial fraud monitoring platform designed to detect, analyze, and report suspicious transactions using Machine Learning, Graph Neural Networks (GNNs), and Explainable AI (SHAP).
|
| 5 |
+
|
| 6 |
+
The system continuously ingests transactions, evaluates fraud risk using multiple models, visualizes insights through an interactive dashboard, and generates Suspicious Activity Reports (SAR) in PDF format.
|
| 7 |
+
|
| 8 |
+
This project follows industry-grade architecture and demonstrates concepts from:
|
| 9 |
+
|
| 10 |
+
Cybersecurity
|
| 11 |
+
|
| 12 |
+
Machine Learning
|
| 13 |
+
|
| 14 |
+
Data Science
|
| 15 |
+
|
| 16 |
+
Web Application Development
|
| 17 |
+
|
| 18 |
+
Model Monitoring & Drift Detection
|
| 19 |
+
|
| 20 |
+
🎯 Key Objectives
|
| 21 |
+
|
| 22 |
+
Detect fraudulent financial transactions in real time
|
| 23 |
+
|
| 24 |
+
Combine multiple ML models for higher accuracy
|
| 25 |
+
|
| 26 |
+
Provide explainability for fraud predictions
|
| 27 |
+
|
| 28 |
+
Visualize risk trends and transaction networks
|
| 29 |
+
|
| 30 |
+
Generate regulatory-ready SAR reports
|
| 31 |
+
|
| 32 |
+
Support continuous model monitoring and improvement
|
| 33 |
+
|
| 34 |
+
🧠 System Architecture
|
| 35 |
+
|
| 36 |
+
Frontend
|
| 37 |
+
|
| 38 |
+
HTML5, CSS3, Bootstrap 5
|
| 39 |
+
|
| 40 |
+
Chart.js (Risk charts & trends)
|
| 41 |
+
|
| 42 |
+
Vis.js (Transaction network graph)
|
| 43 |
+
|
| 44 |
+
JavaScript (Real-time updates)
|
| 45 |
+
|
| 46 |
+
Backend
|
| 47 |
+
|
| 48 |
+
Flask (Python web framework)
|
| 49 |
+
|
| 50 |
+
REST APIs for data exchange
|
| 51 |
+
|
| 52 |
+
Background threads for live transaction simulation
|
| 53 |
+
|
| 54 |
+
Machine Learning
|
| 55 |
+
|
| 56 |
+
Isolation Forest (Anomaly Detection)
|
| 57 |
+
|
| 58 |
+
XGBoost (Supervised Fraud Classification)
|
| 59 |
+
|
| 60 |
+
Graph Neural Network (Relationship-based fraud detection)
|
| 61 |
+
|
| 62 |
+
SHAP (Explainable AI)
|
| 63 |
+
|
| 64 |
+
Other Components
|
| 65 |
+
|
| 66 |
+
Concept Drift Detection
|
| 67 |
+
|
| 68 |
+
AutoML-based retraining
|
| 69 |
+
|
| 70 |
+
SAR PDF generation using ReportLab
|
| 71 |
+
|
| 72 |
+
🧩 Core Features
|
| 73 |
+
🔹 Real-Time Transaction Monitoring
|
| 74 |
+
|
| 75 |
+
Live transaction feed
|
| 76 |
+
|
| 77 |
+
Automatic refresh every few seconds
|
| 78 |
+
|
| 79 |
+
Risk-based color coding
|
| 80 |
+
|
| 81 |
+
🔹 Fraud Detection Models
|
| 82 |
+
|
| 83 |
+
Isolation Forest – Detects anomalies
|
| 84 |
+
|
| 85 |
+
XGBoost – Predicts fraud probability
|
| 86 |
+
|
| 87 |
+
GNN – Detects suspicious account-merchant-device relationships
|
| 88 |
+
|
| 89 |
+
🔹 Composite Risk Scoring
|
| 90 |
+
|
| 91 |
+
A weighted risk score combining:
|
| 92 |
+
|
| 93 |
+
Isolation Forest score
|
| 94 |
+
|
| 95 |
+
XGBoost probability
|
| 96 |
+
|
| 97 |
+
GNN probability
|
| 98 |
+
|
| 99 |
+
Customer risk profile
|
| 100 |
+
|
| 101 |
+
🔹 Explainable AI (SHAP)
|
| 102 |
+
|
| 103 |
+
Displays top contributing risk features
|
| 104 |
+
|
| 105 |
+
Improves transparency and trust
|
| 106 |
+
|
| 107 |
+
Helps analysts understand why a transaction is flagged
|
| 108 |
+
|
| 109 |
+
🔹 Risk Visualization Dashboard
|
| 110 |
+
|
| 111 |
+
Risk distribution (Low / Medium / High)
|
| 112 |
+
|
| 113 |
+
Average risk trends
|
| 114 |
+
|
| 115 |
+
Top risk indicators
|
| 116 |
+
|
| 117 |
+
Interactive transaction table
|
| 118 |
+
|
| 119 |
+
🔹 Transaction Network Graph
|
| 120 |
+
|
| 121 |
+
Visualizes relationships between:
|
| 122 |
+
|
| 123 |
+
Accounts
|
| 124 |
+
|
| 125 |
+
Merchants
|
| 126 |
+
|
| 127 |
+
Devices
|
| 128 |
+
|
| 129 |
+
Helps identify fraud rings and suspicious behavior
|
| 130 |
+
|
| 131 |
+
🔹 Suspicious Activity Report (SAR)
|
| 132 |
+
|
| 133 |
+
One-click SAR generation
|
| 134 |
+
|
| 135 |
+
Automatically includes high-risk transactions
|
| 136 |
+
|
| 137 |
+
Downloadable PDF report
|
| 138 |
+
|
| 139 |
+
🔹 Concept Drift Detection
|
| 140 |
+
|
| 141 |
+
Monitors data distribution changes
|
| 142 |
+
|
| 143 |
+
Flags model drift risks
|
| 144 |
+
|
| 145 |
+
Supports long-term model reliability
|
| 146 |
+
|
| 147 |
+
📁 Project Directory Structure
|
| 148 |
+
|
| 149 |
+
AI-Powered-Transaction-Fraud-Detection-System/
|
| 150 |
+
│
|
| 151 |
+
├── app.py # Flask backend
|
| 152 |
+
├── templates/
|
| 153 |
+
│ └── dashboard.html # Frontend dashboard
|
| 154 |
+
│
|
| 155 |
+
├── trained_models/
|
| 156 |
+
│ ├── isolation_forest.pkl
|
| 157 |
+
│ ├── xgboost.pkl
|
| 158 |
+
│ └── shap_explainer.pkl
|
| 159 |
+
│
|
| 160 |
+
├── graph_models/
|
| 161 |
+
│ ├── gnn_model.py
|
| 162 |
+
│ └── data_loader.py
|
| 163 |
+
│
|
| 164 |
+
├── models/
|
| 165 |
+
│ └── automl/
|
| 166 |
+
│ └── trainer.py
|
| 167 |
+
│
|
| 168 |
+
├── drift/
|
| 169 |
+
│ └── detector.py
|
| 170 |
+
│
|
| 171 |
+
├── profiling/
|
| 172 |
+
│ └── builder.py
|
| 173 |
+
│
|
| 174 |
+
├── reporting/
|
| 175 |
+
│ └── generator.py
|
| 176 |
+
│
|
| 177 |
+
├── data/
|
| 178 |
+
│ └── bank_transactions_data_2.csv
|
| 179 |
+
│
|
| 180 |
+
└── README.md
|
| 181 |
+
⚙️ Installation & Setup (Local Execution)
|
| 182 |
+
|
| 183 |
+
1️⃣ Create Virtual Environment
|
| 184 |
+
python -m venv venv
|
| 185 |
+
venv\Scripts\activate # Windows
|
| 186 |
+
|
| 187 |
+
2️⃣ Install Dependencies
|
| 188 |
+
pip install -r requirements.txt
|
| 189 |
+
|
| 190 |
+
3️⃣ Run the Application
|
| 191 |
+
python app.py
|
| 192 |
+
|
| 193 |
+
4️⃣ Access the Dashboard
|
| 194 |
+
|
| 195 |
+
Open your browser and visit:
|
| 196 |
+
|
| 197 |
+
http://127.0.0.1:5000
|
| 198 |
+
|
| 199 |
+
🧪 How the System Works (Execution Flow)
|
| 200 |
+
|
| 201 |
+
Dummy or real transactions are generated
|
| 202 |
+
|
| 203 |
+
Data is sent to backend APIs
|
| 204 |
+
|
| 205 |
+
ML models compute fraud risk
|
| 206 |
+
|
| 207 |
+
SHAP explains model decisions
|
| 208 |
+
|
| 209 |
+
Dashboard updates in real time
|
| 210 |
+
|
| 211 |
+
High-risk transactions trigger SAR reports
|
| 212 |
+
|
| 213 |
+
📊 APIs Overview
|
| 214 |
+
Endpoint Method Description
|
| 215 |
+
/api/transactions GET Fetch recent transactions
|
| 216 |
+
/api/analyze POST Analyze a transaction
|
| 217 |
+
/api/reports/sar POST Generate SAR PDF
|
| 218 |
+
/api/drift/status GET Concept drift status
|
| 219 |
+
🔒 Security Considerations
|
| 220 |
+
|
| 221 |
+
Backend APIs are modular and extendable
|
| 222 |
+
|
| 223 |
+
Can be integrated with authentication systems
|
| 224 |
+
|
| 225 |
+
Ready for production-grade deployment
|
| 226 |
+
|
| 227 |
+
🚀 Future Enhancements
|
| 228 |
+
|
| 229 |
+
User authentication & role-based access
|
| 230 |
+
|
| 231 |
+
Database integration (PostgreSQL / MongoDB)
|
| 232 |
+
|
| 233 |
+
Real banking transaction feeds
|
| 234 |
+
|
| 235 |
+
Advanced fraud pattern learning
|
| 236 |
+
|
| 237 |
+
Cloud deployment (AWS / Azure)
|
| 238 |
+
|
| 239 |
+
SOC-style alerting system
|
| 240 |
+
|
| 241 |
+
🎓 Academic Relevance
|
| 242 |
+
|
| 243 |
+
This project demonstrates:
|
| 244 |
+
|
| 245 |
+
Applied Machine Learning
|
| 246 |
+
|
| 247 |
+
Cybersecurity analytics
|
| 248 |
+
|
| 249 |
+
Explainable AI
|
| 250 |
+
|
| 251 |
+
Full-stack development
|
| 252 |
+
|
| 253 |
+
Real-time monitoring systems
|
| 254 |
+
|
| 255 |
+
Suitable for:
|
| 256 |
+
|
| 257 |
+
Major Project
|
| 258 |
+
|
| 259 |
+
Final Year Project
|
| 260 |
+
|
| 261 |
+
Capstone Project
|
| 262 |
+
|
| 263 |
+
Research-oriented submissions
|
| 264 |
+
|
| 265 |
+
👤 Author
|
| 266 |
+
|
| 267 |
+
Saheel Yadav
|
| 268 |
+
B.Tech – Computer Science Engineering
|
| 269 |
+
Specialization: Cybersecurity & AI
|
app.py
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, render_template, request, jsonify, send_file
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import joblib
|
| 4 |
+
import numpy as np
|
| 5 |
+
import io
|
| 6 |
+
from reportlab.lib.pagesizes import A4
|
| 7 |
+
from reportlab.pdfgen import canvas
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
import torch
|
| 10 |
+
import mlflow
|
| 11 |
+
import threading
|
| 12 |
+
import time
|
| 13 |
+
from graph_models.gnn_model import load_gnn_model
|
| 14 |
+
from graph_models.data_loader import TransactionGraphBuilder
|
| 15 |
+
from reporting.generator import ReportGenerator
|
| 16 |
+
from profiling.builder import CustomerRiskProfiler
|
| 17 |
+
from drift.detector import ConceptDriftDetector
|
| 18 |
+
from models.automl.trainer import AutoMLTrainer
|
| 19 |
+
import os
|
| 20 |
+
import logging
|
| 21 |
+
import random
|
| 22 |
+
|
| 23 |
+
TRANSACTIONS = []
|
| 24 |
+
import random
|
| 25 |
+
|
| 26 |
+
def generate_dummy_transaction():
|
| 27 |
+
return {
|
| 28 |
+
"TransactionID": f"TX{random.randint(100000, 999999)}",
|
| 29 |
+
"AccountID": f"AC{random.randint(10000, 99999)}",
|
| 30 |
+
"TransactionAmount": round(random.uniform(10, 5000), 2),
|
| 31 |
+
"TransactionDate": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 32 |
+
"TransactionType": random.choice(["Debit", "Credit"]),
|
| 33 |
+
"Location": random.choice([
|
| 34 |
+
"New York, NY", "Chicago, IL", "Miami, FL"
|
| 35 |
+
]),
|
| 36 |
+
"RiskScore": round(random.uniform(0, 1), 2),
|
| 37 |
+
"Status": random.choice(["Approved", "Flagged", "Pending Review"])
|
| 38 |
+
}
|
| 39 |
+
def transaction_generator_loop():
|
| 40 |
+
while True:
|
| 41 |
+
txn = generate_dummy_transaction()
|
| 42 |
+
TRANSACTIONS.insert(0, txn)
|
| 43 |
+
|
| 44 |
+
# keep only latest 20 transactions
|
| 45 |
+
if len(TRANSACTIONS) > 20:
|
| 46 |
+
TRANSACTIONS.pop()
|
| 47 |
+
|
| 48 |
+
time.sleep(random.randint(120, 300)) # 2–5 minutes
|
| 49 |
+
|
| 50 |
+
threading.Thread(
|
| 51 |
+
target=transaction_generator_loop,
|
| 52 |
+
daemon=True
|
| 53 |
+
).start()
|
| 54 |
+
# Preload initial transactions for better UX
|
| 55 |
+
for _ in range(5):
|
| 56 |
+
TRANSACTIONS.append(generate_dummy_transaction())
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
logging.basicConfig(level=logging.INFO)
|
| 60 |
+
logger = logging.getLogger(__name__)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
app = Flask(__name__)
|
| 64 |
+
|
| 65 |
+
# Initialize components
|
| 66 |
+
iso_forest = joblib.load('trained_models/isolation_forest.pkl')
|
| 67 |
+
xgb = joblib.load('trained_models/xgboost.pkl')
|
| 68 |
+
# Load SHAP explainer if available (optional)
|
| 69 |
+
try:
|
| 70 |
+
shap_explainer = joblib.load('trained_models/shap_explainer.pkl')
|
| 71 |
+
except FileNotFoundError:
|
| 72 |
+
shap_explainer = None
|
| 73 |
+
print("SHAP explainer not found. Continuing without explainability.")
|
| 74 |
+
gnn_model = load_gnn_model('models/gnn_model.pt')
|
| 75 |
+
graph_builder = TransactionGraphBuilder()
|
| 76 |
+
report_generator = ReportGenerator()
|
| 77 |
+
profiler = CustomerRiskProfiler()
|
| 78 |
+
drift_detector = ConceptDriftDetector()
|
| 79 |
+
|
| 80 |
+
# Feature names
|
| 81 |
+
features = ['TransactionAmount', 'TransactionDuration', 'LoginAttempts',
|
| 82 |
+
'AccountBalance', 'DaysSinceLastTransaction', 'TransactionSpeed',
|
| 83 |
+
'AvgAmount', 'StdAmount', 'MaxAmount', 'AvgDuration', 'UniqueLocations',
|
| 84 |
+
'AmountDeviation', 'DurationDeviation', 'TransactionType',
|
| 85 |
+
'Location', 'DeviceID', 'MerchantID', 'Channel', 'CustomerOccupation']
|
| 86 |
+
|
| 87 |
+
# Background tasks
|
| 88 |
+
def auto_retrain():
|
| 89 |
+
while True:
|
| 90 |
+
try:
|
| 91 |
+
trainer = AutoMLTrainer("data/bank_transactions_data_2.csv")
|
| 92 |
+
best_model, score = trainer.train_models()
|
| 93 |
+
app.logger.info(f"AutoML retraining completed. Best model: {type(best_model).__name__} with score: {score:.4f}")
|
| 94 |
+
except Exception as e:
|
| 95 |
+
app.logger.error(f"AutoML retraining failed: {str(e)}")
|
| 96 |
+
time.sleep(7 * 24 * 60 * 60) # Run weekly
|
| 97 |
+
|
| 98 |
+
# Start background thread
|
| 99 |
+
retrain_thread = threading.Thread(target=auto_retrain, daemon=True)
|
| 100 |
+
retrain_thread.start()
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
# Initialize AutoML Trainer with proper error handling
|
| 104 |
+
try:
|
| 105 |
+
automl_trainer = AutoMLTrainer("data/bank_transactions_data_2.csv")
|
| 106 |
+
|
| 107 |
+
# Check if models exist, if not train initial models
|
| 108 |
+
required_models = ['isolation_forest.pkl', 'xgboost.pkl', 'shap_explainer.pkl']
|
| 109 |
+
if not all(os.path.exists(f"trained_models/{model}") for model in required_models):
|
| 110 |
+
logger.info("Initial models not found, training initial models...")
|
| 111 |
+
automl_trainer.train_models()
|
| 112 |
+
except Exception as e:
|
| 113 |
+
logger.error(f"Failed to initialize AutoML trainer: {str(e)}")
|
| 114 |
+
raise
|
| 115 |
+
|
| 116 |
+
@app.route('/')
|
| 117 |
+
def dashboard():
|
| 118 |
+
return render_template('dashboard.html')
|
| 119 |
+
|
| 120 |
+
@app.route('/api/analyze', methods=['POST'])
|
| 121 |
+
def analyze_transaction():
|
| 122 |
+
data = request.json
|
| 123 |
+
|
| 124 |
+
# Update customer profile
|
| 125 |
+
profiler.update_profile(data['AccountID'], {
|
| 126 |
+
'amount': float(data['TransactionAmount']),
|
| 127 |
+
'type': data['TransactionType'],
|
| 128 |
+
'date': data['TransactionDate']
|
| 129 |
+
})
|
| 130 |
+
|
| 131 |
+
# Get customer stats
|
| 132 |
+
cust_profile = profiler.get_risk_profile(data['AccountID'])
|
| 133 |
+
cust_stats = {
|
| 134 |
+
'AvgAmount': cust_profile.get('avg_amount', 150.0),
|
| 135 |
+
'StdAmount': cust_profile.get('std_amount', 75.0),
|
| 136 |
+
'MaxAmount': cust_profile.get('max_amount', 1000.0),
|
| 137 |
+
'AvgDuration': cust_profile.get('avg_duration', 120.0),
|
| 138 |
+
'UniqueLocations': cust_profile.get('unique_locations', 3)
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
# Create feature vector
|
| 142 |
+
transaction_date = datetime.strptime(data['TransactionDate'], '%Y-%m-%d %H:%M:%S')
|
| 143 |
+
prev_date = datetime.strptime(data['PreviousTransactionDate'], '%Y-%m-%d %H:%M:%S')
|
| 144 |
+
|
| 145 |
+
features_dict = {
|
| 146 |
+
'TransactionAmount': float(data['TransactionAmount']),
|
| 147 |
+
'TransactionDuration': float(data['TransactionDuration']),
|
| 148 |
+
'LoginAttempts': int(data['LoginAttempts']),
|
| 149 |
+
'AccountBalance': float(data['AccountBalance']),
|
| 150 |
+
'DaysSinceLastTransaction': (datetime.now() - prev_date).days,
|
| 151 |
+
'TransactionSpeed': float(data['TransactionAmount']) / float(data['TransactionDuration']),
|
| 152 |
+
'AvgAmount': cust_stats['AvgAmount'],
|
| 153 |
+
'StdAmount': cust_stats['StdAmount'],
|
| 154 |
+
'MaxAmount': cust_stats['MaxAmount'],
|
| 155 |
+
'AvgDuration': cust_stats['AvgDuration'],
|
| 156 |
+
'UniqueLocations': cust_stats['UniqueLocations'],
|
| 157 |
+
'AmountDeviation': (float(data['TransactionAmount']) - cust_stats['AvgAmount']) / cust_stats['StdAmount'],
|
| 158 |
+
'DurationDeviation': (float(data['TransactionDuration']) - cust_stats['AvgDuration']) / cust_stats['AvgDuration'],
|
| 159 |
+
'TransactionType': 0 if data['TransactionType'] == 'Debit' else 1,
|
| 160 |
+
'Location': hash(data['Location']) % 100,
|
| 161 |
+
'DeviceID': hash(data['DeviceID']) % 100,
|
| 162 |
+
'MerchantID': hash(data['MerchantID']) % 100,
|
| 163 |
+
'Channel': {'ATM': 0, 'Online': 1, 'Branch': 2}.get(data['Channel'], 0),
|
| 164 |
+
'CustomerOccupation': {'Student': 0, 'Doctor': 1, 'Engineer': 2, 'Retired': 3}.get(data['CustomerOccupation'], 0)
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
# Convert to DataFrame for prediction
|
| 168 |
+
X = pd.DataFrame([features_dict], columns=features)
|
| 169 |
+
|
| 170 |
+
# Check for concept drift
|
| 171 |
+
drift_detector.add_data(X.values[0])
|
| 172 |
+
|
| 173 |
+
# Get predictions
|
| 174 |
+
iso_score = -iso_forest.decision_function(X)[0]
|
| 175 |
+
xgb_prob = xgb.predict_proba(X)[0, 1]
|
| 176 |
+
|
| 177 |
+
# GNN prediction
|
| 178 |
+
graph_data = graph_builder.add_transaction(data)
|
| 179 |
+
with torch.no_grad():
|
| 180 |
+
gnn_prob = gnn_model(graph_data.x, graph_data.edge_index).item()
|
| 181 |
+
|
| 182 |
+
explanation = []
|
| 183 |
+
|
| 184 |
+
# --- SHAP explanations ---
|
| 185 |
+
explanation = []
|
| 186 |
+
|
| 187 |
+
if shap_explainer is not None:
|
| 188 |
+
shap_values = shap_explainer.shap_values(X)
|
| 189 |
+
for i, feature in enumerate(features):
|
| 190 |
+
explanation.append({
|
| 191 |
+
'feature': feature,
|
| 192 |
+
'value': X.iloc[0, i],
|
| 193 |
+
'shap_value': shap_values[0][i]
|
| 194 |
+
})
|
| 195 |
+
|
| 196 |
+
# Sort explanation (works even if empty)
|
| 197 |
+
explanation.sort(key=lambda x: abs(x['shap_value']), reverse=True)
|
| 198 |
+
|
| 199 |
+
# Composite score weighted by customer risk profile
|
| 200 |
+
cust_risk = cust_profile['risk_score'] if cust_profile else 0.5
|
| 201 |
+
composite_score = (
|
| 202 |
+
iso_score * 0.4 +
|
| 203 |
+
xgb_prob * 0.4 +
|
| 204 |
+
gnn_prob * 0.2
|
| 205 |
+
) * (0.5 + cust_risk)
|
| 206 |
+
|
| 207 |
+
return jsonify({
|
| 208 |
+
'isolation_forest_score': float(iso_score),
|
| 209 |
+
'xgboost_probability': float(xgb_prob),
|
| 210 |
+
'gnn_probability': float(gnn_prob),
|
| 211 |
+
'composite_score': float(composite_score),
|
| 212 |
+
'customer_risk_score': float(cust_risk),
|
| 213 |
+
'explanation': explanation[:5],
|
| 214 |
+
'drift_detected': drift_detector.drift_count > 0
|
| 215 |
+
})
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
from datetime import timedelta
|
| 219 |
+
|
| 220 |
+
@app.route('/api/transactions')
|
| 221 |
+
def get_recent_transactions():
|
| 222 |
+
days = request.args.get('days', default=1, type=int)
|
| 223 |
+
cutoff = datetime.now() - timedelta(days=days)
|
| 224 |
+
|
| 225 |
+
filtered = [
|
| 226 |
+
t for t in TRANSACTIONS
|
| 227 |
+
if datetime.strptime(
|
| 228 |
+
t['TransactionDate'], "%Y-%m-%d %H:%M:%S"
|
| 229 |
+
) >= cutoff
|
| 230 |
+
]
|
| 231 |
+
|
| 232 |
+
return jsonify(filtered)
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
@app.route('/api/reports/sar', methods=['POST'])
|
| 237 |
+
def generate_sar():
|
| 238 |
+
payload = request.json
|
| 239 |
+
transactions = payload.get("transactions", TRANSACTIONS)
|
| 240 |
+
|
| 241 |
+
buffer = io.BytesIO()
|
| 242 |
+
pdf = canvas.Canvas(buffer, pagesize=A4)
|
| 243 |
+
pdf.setFont("Helvetica", 12)
|
| 244 |
+
|
| 245 |
+
pdf.drawString(50, 800, "Suspicious Activity Report (SAR)")
|
| 246 |
+
pdf.drawString(50, 780, f"Generated: {datetime.now()}")
|
| 247 |
+
|
| 248 |
+
y = 750
|
| 249 |
+
for tx in transactions:
|
| 250 |
+
pdf.drawString(
|
| 251 |
+
50, y,
|
| 252 |
+
f"{tx['TransactionID']} | {tx['AccountID']} | "
|
| 253 |
+
f"Amount: {tx['TransactionAmount']} | Risk: {tx['RiskScore']}"
|
| 254 |
+
)
|
| 255 |
+
y -= 18
|
| 256 |
+
if y < 50:
|
| 257 |
+
pdf.showPage()
|
| 258 |
+
y = 800
|
| 259 |
+
|
| 260 |
+
pdf.save()
|
| 261 |
+
buffer.seek(0)
|
| 262 |
+
|
| 263 |
+
return send_file(
|
| 264 |
+
buffer,
|
| 265 |
+
as_attachment=True,
|
| 266 |
+
download_name="SAR_Report.pdf",
|
| 267 |
+
mimetype="application/pdf"
|
| 268 |
+
)
|
| 269 |
+
|
| 270 |
+
@app.route('/api/customer/<customer_id>/profile')
|
| 271 |
+
def get_customer_profile(customer_id):
|
| 272 |
+
profile = profiler.get_risk_profile(customer_id)
|
| 273 |
+
if profile:
|
| 274 |
+
return jsonify(profile)
|
| 275 |
+
return jsonify({"error": "Customer not found"}), 404
|
| 276 |
+
|
| 277 |
+
@app.route('/api/models/retrain', methods=['POST'])
|
| 278 |
+
def trigger_retraining():
|
| 279 |
+
try:
|
| 280 |
+
trainer = AutoMLTrainer("data/bank_transactions_data_2.csv")
|
| 281 |
+
best_model, score = trainer.train_models()
|
| 282 |
+
return jsonify({
|
| 283 |
+
"status": "success",
|
| 284 |
+
"best_model": type(best_model).__name__,
|
| 285 |
+
"score": score
|
| 286 |
+
})
|
| 287 |
+
except Exception as e:
|
| 288 |
+
return jsonify({"status": "error", "message": str(e)}), 500
|
| 289 |
+
|
| 290 |
+
@app.route('/api/drift/status')
|
| 291 |
+
def get_drift_status():
|
| 292 |
+
return jsonify({
|
| 293 |
+
"drift_detected": drift_detector.drift_count > 0,
|
| 294 |
+
"drift_count": drift_detector.drift_count
|
| 295 |
+
})
|
| 296 |
+
|
| 297 |
+
if __name__ == '__main__':
|
| 298 |
+
# Create required directories
|
| 299 |
+
import os
|
| 300 |
+
os.makedirs("reports", exist_ok=True)
|
| 301 |
+
os.makedirs("data", exist_ok=True)
|
| 302 |
+
|
| 303 |
+
# Initialize MLflow
|
| 304 |
+
mlflow.set_tracking_uri("http://localhost:5001")
|
| 305 |
+
|
| 306 |
+
app.run(debug=True, host='0.0.0.0')
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: '3'
|
| 2 |
+
services:
|
| 3 |
+
mlflow:
|
| 4 |
+
image: python:3.8
|
| 5 |
+
command: >
|
| 6 |
+
sh -c "pip install mlflow &&
|
| 7 |
+
mlflow server --backend-store-uri sqlite:///mlflow.db
|
| 8 |
+
--default-artifact-root ./mlruns
|
| 9 |
+
--host 0.0.0.0
|
| 10 |
+
--port 5000"
|
| 11 |
+
ports:
|
| 12 |
+
- "5000:5000"
|
| 13 |
+
volumes:
|
| 14 |
+
- ./mlruns:/mlruns
|
| 15 |
+
- ./mlflow.db:/mlflow.db
|
| 16 |
+
|
| 17 |
+
dashboard:
|
| 18 |
+
build: .
|
| 19 |
+
ports:
|
| 20 |
+
- "5001:5001"
|
| 21 |
+
depends_on:
|
| 22 |
+
- mlflow
|
| 23 |
+
environment:
|
| 24 |
+
- MLFLOW_TRACKING_URI=http://mlflow:5000
|
| 25 |
+
volumes:
|
| 26 |
+
- ./models:/app/models
|
| 27 |
+
- ./data:/app/data
|
drift/adapter.py
ADDED
|
File without changes
|
drift/detector.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from scipy.stats import ks_2samp
|
| 3 |
+
from sklearn.covariance import MinCovDet
|
| 4 |
+
import warnings
|
| 5 |
+
|
| 6 |
+
class ConceptDriftDetector:
|
| 7 |
+
def __init__(self, window_size=1000):
|
| 8 |
+
self.window_size = window_size
|
| 9 |
+
self.reference_window = None
|
| 10 |
+
self.current_window = []
|
| 11 |
+
self.drift_count = 0
|
| 12 |
+
|
| 13 |
+
def add_data(self, features):
|
| 14 |
+
if self.reference_window is None:
|
| 15 |
+
if len(self.current_window) < self.window_size:
|
| 16 |
+
self.current_window.append(features)
|
| 17 |
+
else:
|
| 18 |
+
self.reference_window = np.array(self.current_window)
|
| 19 |
+
self.current_window = []
|
| 20 |
+
else:
|
| 21 |
+
if len(self.current_window) < self.window_size:
|
| 22 |
+
self.current_window.append(features)
|
| 23 |
+
else:
|
| 24 |
+
self._test_for_drift()
|
| 25 |
+
self.current_window = []
|
| 26 |
+
|
| 27 |
+
def _test_for_drift(self):
|
| 28 |
+
current_data = np.array(self.current_window)
|
| 29 |
+
|
| 30 |
+
# 1. Kolmogorov-Smirnov test for each feature
|
| 31 |
+
p_values = []
|
| 32 |
+
for i in range(self.reference_window.shape[1]):
|
| 33 |
+
try:
|
| 34 |
+
_, p_value = ks_2samp(self.reference_window[:, i], current_data[:, i])
|
| 35 |
+
p_values.append(p_value)
|
| 36 |
+
except:
|
| 37 |
+
p_values.append(1.0)
|
| 38 |
+
|
| 39 |
+
# 2. Covariance shift detection
|
| 40 |
+
robust_cov = MinCovDet().fit(self.reference_window)
|
| 41 |
+
try:
|
| 42 |
+
cov_score = robust_cov.mahalanobis(current_data).mean()
|
| 43 |
+
cov_threshold = robust_cov.mahalanobis(self.reference_window).mean() * 1.5
|
| 44 |
+
except:
|
| 45 |
+
cov_score = 0
|
| 46 |
+
cov_threshold = 0
|
| 47 |
+
|
| 48 |
+
# Combined decision
|
| 49 |
+
significant_drift = any(p < 0.01 for p in p_values) or cov_score > cov_threshold
|
| 50 |
+
|
| 51 |
+
if significant_drift:
|
| 52 |
+
self.drift_count += 1
|
| 53 |
+
if self.drift_count >= 3: # Persistent drift
|
| 54 |
+
self._alert_drift()
|
| 55 |
+
self.drift_count = 0
|
| 56 |
+
|
| 57 |
+
def _alert_drift(self):
|
| 58 |
+
# In practice, this would trigger model retraining
|
| 59 |
+
print("Warning: Significant concept drift detected!")
|
| 60 |
+
# Could integrate with AutoML retraining
|
| 61 |
+
|
fraud_detection.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
graph_models/data_loader.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from torch_geometric.data import Data
|
| 3 |
+
from collections import defaultdict
|
| 4 |
+
|
| 5 |
+
class TransactionGraphBuilder:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
self.node_index = defaultdict(int)
|
| 8 |
+
self.current_id = 0
|
| 9 |
+
self.edges = []
|
| 10 |
+
self.node_features = []
|
| 11 |
+
self.node_types = []
|
| 12 |
+
|
| 13 |
+
def get_node_id(self, node_key, node_type):
|
| 14 |
+
if node_key not in self.node_index:
|
| 15 |
+
self.node_index[node_key] = self.current_id
|
| 16 |
+
self.current_id += 1
|
| 17 |
+
# Simple feature representation
|
| 18 |
+
self.node_features.append([1.0 if i == node_type else 0.0 for i in range(3)])
|
| 19 |
+
self.node_types.append(node_type)
|
| 20 |
+
return self.node_index[node_key]
|
| 21 |
+
|
| 22 |
+
def add_transaction(self, transaction):
|
| 23 |
+
# Account node (type 0)
|
| 24 |
+
acc_id = self.get_node_id(transaction['AccountID'], 0)
|
| 25 |
+
# Merchant node (type 1)
|
| 26 |
+
merchant_id = self.get_node_id(transaction['MerchantID'], 1)
|
| 27 |
+
# Device node (type 2)
|
| 28 |
+
device_id = self.get_node_id(transaction['DeviceID'], 2)
|
| 29 |
+
|
| 30 |
+
# Add edges
|
| 31 |
+
self.edges.append((acc_id, merchant_id))
|
| 32 |
+
self.edges.append((acc_id, device_id))
|
| 33 |
+
|
| 34 |
+
# Convert to PyG format
|
| 35 |
+
edge_index = torch.tensor(list(zip(*self.edges)), dtype=torch.long)
|
| 36 |
+
x = torch.tensor(self.node_features, dtype=torch.float)
|
| 37 |
+
|
| 38 |
+
return Data(x=x, edge_index=edge_index)
|
graph_models/gnn_model.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
from torch_geometric.nn import GCNConv
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
class FraudGNN(nn.Module):
|
| 8 |
+
def __init__(self, num_node_features, hidden_channels):
|
| 9 |
+
super(FraudGNN, self).__init__()
|
| 10 |
+
self.conv1 = GCNConv(num_node_features, hidden_channels)
|
| 11 |
+
self.conv2 = GCNConv(hidden_channels, hidden_channels)
|
| 12 |
+
self.classifier = nn.Linear(hidden_channels, 1)
|
| 13 |
+
|
| 14 |
+
def forward(self, x, edge_index):
|
| 15 |
+
# Node embeddings
|
| 16 |
+
x = self.conv1(x, edge_index)
|
| 17 |
+
x = F.relu(x)
|
| 18 |
+
x = F.dropout(x, training=self.training)
|
| 19 |
+
x = self.conv2(x, edge_index)
|
| 20 |
+
|
| 21 |
+
# Graph-level classification
|
| 22 |
+
x = torch.mean(x, dim=0) # Global mean pooling
|
| 23 |
+
x = self.classifier(x)
|
| 24 |
+
return torch.sigmoid(x)
|
| 25 |
+
|
| 26 |
+
def load_gnn_model(model_path='trained_models/gnn_model.pt', device='cpu'):
|
| 27 |
+
# Create models directory if it doesn't exist
|
| 28 |
+
os.makedirs('models', exist_ok=True)
|
| 29 |
+
|
| 30 |
+
# Initialize model
|
| 31 |
+
model = FraudGNN(num_node_features=32, hidden_channels=64)
|
| 32 |
+
|
| 33 |
+
try:
|
| 34 |
+
# Try to load pretrained weights
|
| 35 |
+
model.load_state_dict(torch.load(model_path))
|
| 36 |
+
print(f"Loaded GNN model from {model_path}")
|
| 37 |
+
except FileNotFoundError:
|
| 38 |
+
# If no model exists, initialize with random weights and save
|
| 39 |
+
print(f"No model found at {model_path}, creating new model")
|
| 40 |
+
torch.save(model.state_dict(), model_path)
|
| 41 |
+
print(f"New model saved to {model_path}")
|
| 42 |
+
|
| 43 |
+
model.to(device)
|
| 44 |
+
model.eval()
|
| 45 |
+
return model
|
graph_models/train_gnn.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from torch_geometric.data import Data
|
| 3 |
+
from gnn_model import FraudGNN
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
def train_and_save_gnn_model():
|
| 7 |
+
# Create sample data for demonstration
|
| 8 |
+
num_nodes = 100
|
| 9 |
+
num_features = 32
|
| 10 |
+
x = torch.randn((num_nodes, num_features))
|
| 11 |
+
edge_index = torch.randint(0, num_nodes, (2, 200))
|
| 12 |
+
y = torch.randint(0, 2, (1,)).float()
|
| 13 |
+
|
| 14 |
+
# Initialize model
|
| 15 |
+
model = FraudGNN(num_node_features=num_features, hidden_channels=64)
|
| 16 |
+
|
| 17 |
+
# Simple training loop (for demonstration)
|
| 18 |
+
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
|
| 19 |
+
criterion = nn.BCELoss()
|
| 20 |
+
|
| 21 |
+
for epoch in range(10):
|
| 22 |
+
optimizer.zero_grad()
|
| 23 |
+
out = model(x, edge_index)
|
| 24 |
+
loss = criterion(out, y)
|
| 25 |
+
loss.backward()
|
| 26 |
+
optimizer.step()
|
| 27 |
+
print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')
|
| 28 |
+
|
| 29 |
+
# Save model
|
| 30 |
+
os.makedirs('models', exist_ok=True)
|
| 31 |
+
torch.save(model.state_dict(), 'trained_models/gnn_model.pt')
|
| 32 |
+
print("GNN model saved to models/gnn_model.pt")
|
| 33 |
+
|
| 34 |
+
if __name__ == '__main__':
|
| 35 |
+
train_and_save_gnn_model()
|
mlruns/0/meta.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
artifact_location: mlflow-artifacts:/0
|
| 2 |
+
creation_time: 1766465772017
|
| 3 |
+
experiment_id: '0'
|
| 4 |
+
last_update_time: 1766465772017
|
| 5 |
+
lifecycle_stage: active
|
| 6 |
+
name: Default
|
mlruns/588265755531758591/0311fe8dff3e46ee9d5c6c013e0580bc/meta.yaml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
artifact_uri: mlflow-artifacts:/588265755531758591/0311fe8dff3e46ee9d5c6c013e0580bc/artifacts
|
| 2 |
+
end_time: 1766478502132
|
| 3 |
+
entry_point_name: ''
|
| 4 |
+
experiment_id: '588265755531758591'
|
| 5 |
+
lifecycle_stage: active
|
| 6 |
+
run_id: 0311fe8dff3e46ee9d5c6c013e0580bc
|
| 7 |
+
run_name: isolation_forest_2025-12-23 13:58:14.718726
|
| 8 |
+
run_uuid: 0311fe8dff3e46ee9d5c6c013e0580bc
|
| 9 |
+
source_name: ''
|
| 10 |
+
source_type: 4
|
| 11 |
+
source_version: ''
|
| 12 |
+
start_time: 1766478494726
|
| 13 |
+
status: 3
|
| 14 |
+
tags: []
|
| 15 |
+
user_id: Saheel Yadav
|
mlruns/588265755531758591/0311fe8dff3e46ee9d5c6c013e0580bc/metrics/roc_auc
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1766478495195 0.8415397408963584 0
|
mlruns/588265755531758591/0311fe8dff3e46ee9d5c6c013e0580bc/tags/mlflow.log-model.history
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[{"run_id": "0311fe8dff3e46ee9d5c6c013e0580bc", "artifact_path": "isolation_forest", "utc_time_created": "2025-12-23 08:28:15.216022", "model_uuid": "e6224c07956145faa2bc6621cb207dbc", "flavors": {"python_function": {"model_path": "model.pkl", "predict_fn": "predict", "loader_module": "mlflow.sklearn", "python_version": "3.10.6", "env": {"conda": "conda.yaml", "virtualenv": "python_env.yaml"}}, "sklearn": {"pickled_model": "model.pkl", "sklearn_version": "1.6.1", "serialization_format": "cloudpickle", "code": null}}}]
|
mlruns/588265755531758591/0311fe8dff3e46ee9d5c6c013e0580bc/tags/mlflow.runName
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
isolation_forest_2025-12-23 13:58:14.718726
|
mlruns/588265755531758591/0311fe8dff3e46ee9d5c6c013e0580bc/tags/mlflow.source.name
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
app.py
|
mlruns/588265755531758591/0311fe8dff3e46ee9d5c6c013e0580bc/tags/mlflow.source.type
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
LOCAL
|
mlruns/588265755531758591/0311fe8dff3e46ee9d5c6c013e0580bc/tags/mlflow.user
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Saheel Yadav
|
mlruns/588265755531758591/04c33c5f043e4977b3b7a930580a2dcb/meta.yaml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
artifact_uri: mlflow-artifacts:/588265755531758591/04c33c5f043e4977b3b7a930580a2dcb/artifacts
|
| 2 |
+
end_time: 1766478455065
|
| 3 |
+
entry_point_name: ''
|
| 4 |
+
experiment_id: '588265755531758591'
|
| 5 |
+
lifecycle_stage: active
|
| 6 |
+
run_id: 04c33c5f043e4977b3b7a930580a2dcb
|
| 7 |
+
run_name: random_forest_2025-12-23 13:57:16.650560
|
| 8 |
+
run_uuid: 04c33c5f043e4977b3b7a930580a2dcb
|
| 9 |
+
source_name: ''
|
| 10 |
+
source_type: 4
|
| 11 |
+
source_version: ''
|
| 12 |
+
start_time: 1766478436929
|
| 13 |
+
status: 3
|
| 14 |
+
tags: []
|
| 15 |
+
user_id: Saheel Yadav
|
mlruns/588265755531758591/04c33c5f043e4977b3b7a930580a2dcb/metrics/roc_auc
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1766478438200 0.9999781162464987 0
|
mlruns/588265755531758591/04c33c5f043e4977b3b7a930580a2dcb/tags/mlflow.log-model.history
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[{"run_id": "04c33c5f043e4977b3b7a930580a2dcb", "artifact_path": "random_forest", "utc_time_created": "2025-12-23 08:27:18.238060", "model_uuid": "0673e163efb24bd685ad61bd0ab4966d", "flavors": {"python_function": {"model_path": "model.pkl", "predict_fn": "predict", "loader_module": "mlflow.sklearn", "python_version": "3.10.6", "env": {"conda": "conda.yaml", "virtualenv": "python_env.yaml"}}, "sklearn": {"pickled_model": "model.pkl", "sklearn_version": "1.6.1", "serialization_format": "cloudpickle", "code": null}}}]
|
mlruns/588265755531758591/04c33c5f043e4977b3b7a930580a2dcb/tags/mlflow.runName
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
random_forest_2025-12-23 13:57:16.650560
|
mlruns/588265755531758591/04c33c5f043e4977b3b7a930580a2dcb/tags/mlflow.source.name
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
app.py
|
mlruns/588265755531758591/04c33c5f043e4977b3b7a930580a2dcb/tags/mlflow.source.type
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
LOCAL
|
mlruns/588265755531758591/04c33c5f043e4977b3b7a930580a2dcb/tags/mlflow.user
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Saheel Yadav
|
mlruns/588265755531758591/0ceb1e53b25f47d0b62d41bdb664e954/meta.yaml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
artifact_uri: mlflow-artifacts:/588265755531758591/0ceb1e53b25f47d0b62d41bdb664e954/artifacts
|
| 2 |
+
end_time: 1766478488484
|
| 3 |
+
entry_point_name: ''
|
| 4 |
+
experiment_id: '588265755531758591'
|
| 5 |
+
lifecycle_stage: active
|
| 6 |
+
run_id: 0ceb1e53b25f47d0b62d41bdb664e954
|
| 7 |
+
run_name: random_forest_2025-12-23 13:57:57.276290
|
| 8 |
+
run_uuid: 0ceb1e53b25f47d0b62d41bdb664e954
|
| 9 |
+
source_name: ''
|
| 10 |
+
source_type: 4
|
| 11 |
+
source_version: ''
|
| 12 |
+
start_time: 1766478477505
|
| 13 |
+
status: 3
|
| 14 |
+
tags: []
|
| 15 |
+
user_id: Saheel Yadav
|
mlruns/588265755531758591/0ceb1e53b25f47d0b62d41bdb664e954/metrics/roc_auc
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1766478478238 0.9999781162464986 0
|
mlruns/588265755531758591/0ceb1e53b25f47d0b62d41bdb664e954/tags/mlflow.log-model.history
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[{"run_id": "0ceb1e53b25f47d0b62d41bdb664e954", "artifact_path": "random_forest", "utc_time_created": "2025-12-23 08:27:58.260159", "model_uuid": "dc3b7db6cd894bf996ba04c2fc45630b", "flavors": {"python_function": {"model_path": "model.pkl", "predict_fn": "predict", "loader_module": "mlflow.sklearn", "python_version": "3.10.6", "env": {"conda": "conda.yaml", "virtualenv": "python_env.yaml"}}, "sklearn": {"pickled_model": "model.pkl", "sklearn_version": "1.6.1", "serialization_format": "cloudpickle", "code": null}}}]
|
mlruns/588265755531758591/0ceb1e53b25f47d0b62d41bdb664e954/tags/mlflow.runName
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
random_forest_2025-12-23 13:57:57.276290
|
mlruns/588265755531758591/0ceb1e53b25f47d0b62d41bdb664e954/tags/mlflow.source.name
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
app.py
|
mlruns/588265755531758591/0ceb1e53b25f47d0b62d41bdb664e954/tags/mlflow.source.type
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
LOCAL
|
mlruns/588265755531758591/0ceb1e53b25f47d0b62d41bdb664e954/tags/mlflow.user
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Saheel Yadav
|
mlruns/588265755531758591/0d42202ebbf14102b9771574825528e2/meta.yaml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
artifact_uri: mlflow-artifacts:/588265755531758591/0d42202ebbf14102b9771574825528e2/artifacts
|
| 2 |
+
end_time: 1766473160955
|
| 3 |
+
entry_point_name: ''
|
| 4 |
+
experiment_id: '588265755531758591'
|
| 5 |
+
lifecycle_stage: active
|
| 6 |
+
run_id: 0d42202ebbf14102b9771574825528e2
|
| 7 |
+
run_name: isolation_forest_2025-12-23 12:29:15.631115
|
| 8 |
+
run_uuid: 0d42202ebbf14102b9771574825528e2
|
| 9 |
+
source_name: ''
|
| 10 |
+
source_type: 4
|
| 11 |
+
source_version: ''
|
| 12 |
+
start_time: 1766473155636
|
| 13 |
+
status: 3
|
| 14 |
+
tags: []
|
| 15 |
+
user_id: Saheel Yadav
|
mlruns/588265755531758591/0d42202ebbf14102b9771574825528e2/metrics/roc_auc
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1766473155979 0.8715204831932772 0
|
mlruns/588265755531758591/0d42202ebbf14102b9771574825528e2/tags/mlflow.log-model.history
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[{"run_id": "0d42202ebbf14102b9771574825528e2", "artifact_path": "isolation_forest", "utc_time_created": "2025-12-23 06:59:15.990127", "model_uuid": "f4ef2bef9eef4d629e898d4b53ce4f3c", "flavors": {"python_function": {"model_path": "model.pkl", "predict_fn": "predict", "loader_module": "mlflow.sklearn", "python_version": "3.10.6", "env": {"conda": "conda.yaml", "virtualenv": "python_env.yaml"}}, "sklearn": {"pickled_model": "model.pkl", "sklearn_version": "1.6.1", "serialization_format": "cloudpickle", "code": null}}}]
|
mlruns/588265755531758591/0d42202ebbf14102b9771574825528e2/tags/mlflow.runName
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
isolation_forest_2025-12-23 12:29:15.631115
|
mlruns/588265755531758591/0d42202ebbf14102b9771574825528e2/tags/mlflow.source.name
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
app.py
|
mlruns/588265755531758591/0d42202ebbf14102b9771574825528e2/tags/mlflow.source.type
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
LOCAL
|
mlruns/588265755531758591/0d42202ebbf14102b9771574825528e2/tags/mlflow.user
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Saheel Yadav
|
mlruns/588265755531758591/12c38a58707142b49abf42946712e666/meta.yaml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
artifact_uri: mlflow-artifacts:/588265755531758591/12c38a58707142b49abf42946712e666/artifacts
|
| 2 |
+
end_time: 1766466362212
|
| 3 |
+
entry_point_name: ''
|
| 4 |
+
experiment_id: '588265755531758591'
|
| 5 |
+
lifecycle_stage: active
|
| 6 |
+
run_id: 12c38a58707142b49abf42946712e666
|
| 7 |
+
run_name: random_forest_2025-12-23 10:35:44.274066
|
| 8 |
+
run_uuid: 12c38a58707142b49abf42946712e666
|
| 9 |
+
source_name: ''
|
| 10 |
+
source_type: 4
|
| 11 |
+
source_version: ''
|
| 12 |
+
start_time: 1766466344647
|
| 13 |
+
status: 3
|
| 14 |
+
tags: []
|
| 15 |
+
user_id: Saheel Yadav
|
mlruns/588265755531758591/12c38a58707142b49abf42946712e666/metrics/roc_auc
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1766466346831 0.9999781162464987 0
|
mlruns/588265755531758591/12c38a58707142b49abf42946712e666/tags/mlflow.log-model.history
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[{"run_id": "12c38a58707142b49abf42946712e666", "artifact_path": "random_forest", "utc_time_created": "2025-12-23 05:05:46.868461", "model_uuid": "c2bc64e4725b4427bcfc4308e84c030e", "flavors": {"python_function": {"model_path": "model.pkl", "predict_fn": "predict", "loader_module": "mlflow.sklearn", "python_version": "3.10.6", "env": {"conda": "conda.yaml", "virtualenv": "python_env.yaml"}}, "sklearn": {"pickled_model": "model.pkl", "sklearn_version": "1.6.1", "serialization_format": "cloudpickle", "code": null}}}]
|
mlruns/588265755531758591/12c38a58707142b49abf42946712e666/tags/mlflow.runName
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
random_forest_2025-12-23 10:35:44.274066
|
mlruns/588265755531758591/12c38a58707142b49abf42946712e666/tags/mlflow.source.name
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
app.py
|
mlruns/588265755531758591/12c38a58707142b49abf42946712e666/tags/mlflow.source.type
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
LOCAL
|
mlruns/588265755531758591/12c38a58707142b49abf42946712e666/tags/mlflow.user
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Saheel Yadav
|
mlruns/588265755531758591/159a3dd3b1204e7fa3008a0eb85f5678/meta.yaml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
artifact_uri: mlflow-artifacts:/588265755531758591/159a3dd3b1204e7fa3008a0eb85f5678/artifacts
|
| 2 |
+
end_time: 1766473383191
|
| 3 |
+
entry_point_name: ''
|
| 4 |
+
experiment_id: '588265755531758591'
|
| 5 |
+
lifecycle_stage: active
|
| 6 |
+
run_id: 159a3dd3b1204e7fa3008a0eb85f5678
|
| 7 |
+
run_name: random_forest_2025-12-23 12:32:51.249903
|
| 8 |
+
run_uuid: 159a3dd3b1204e7fa3008a0eb85f5678
|
| 9 |
+
source_name: ''
|
| 10 |
+
source_type: 4
|
| 11 |
+
source_version: ''
|
| 12 |
+
start_time: 1766473371623
|
| 13 |
+
status: 3
|
| 14 |
+
tags: []
|
| 15 |
+
user_id: Saheel Yadav
|
mlruns/588265755531758591/159a3dd3b1204e7fa3008a0eb85f5678/metrics/roc_auc
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1766473373025 0.9999781162464986 0
|
mlruns/588265755531758591/159a3dd3b1204e7fa3008a0eb85f5678/tags/mlflow.log-model.history
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[{"run_id": "159a3dd3b1204e7fa3008a0eb85f5678", "artifact_path": "random_forest", "utc_time_created": "2025-12-23 07:02:53.046664", "model_uuid": "75d2ea2144cc437c886f9b57ce663873", "flavors": {"python_function": {"model_path": "model.pkl", "predict_fn": "predict", "loader_module": "mlflow.sklearn", "python_version": "3.10.6", "env": {"conda": "conda.yaml", "virtualenv": "python_env.yaml"}}, "sklearn": {"pickled_model": "model.pkl", "sklearn_version": "1.6.1", "serialization_format": "cloudpickle", "code": null}}}]
|
mlruns/588265755531758591/159a3dd3b1204e7fa3008a0eb85f5678/tags/mlflow.runName
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
random_forest_2025-12-23 12:32:51.249903
|