Neeraj Sathish Kumar commited on
Commit
298e633
·
1 Parent(s): bff189f
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ app_test.py
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a slim Python image
2
+ FROM python:3.10-slim
3
+
4
+ # Set working directory inside the container
5
+ WORKDIR /app
6
+
7
+ # Copy dependencies and install them
8
+ COPY ./requirements.txt /app/requirements.txt
9
+ RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
10
+
11
+ # Copy the application code and model files
12
+ COPY . /app
13
+
14
+ # Expose the standard Hugging Face Space port
15
+ EXPOSE 7860
16
+
17
+ # Command to run the app using Uvicorn
18
+ # 'app:app' means look for the object named 'app' inside the file named 'app.py'
19
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md DELETED
@@ -1,11 +0,0 @@
1
- ---
2
- title: CreditCardFraudDetection
3
- emoji: ⚡
4
- colorFrom: pink
5
- colorTo: indigo
6
- sdk: docker
7
- pinned: false
8
- license: apache-2.0
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
Readme.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Credit Card Fraud Detection API
3
+ emoji: Credit card
4
+ colorFrom: purple
5
+ colorTo: pink
6
+ sdk: docker
7
+ python_version: 3.10
8
+ pinned: false
9
+ ---
10
+
11
+ # Credit Card Fraud Detection API
12
+
13
+ This is an ML API deployed on Hugging Face Spaces using **FastAPI + Docker**.
14
+
15
+ **Endpoints:**
16
+ - `/docs` → Interactive Swagger UI
17
+ - `/predict` → Single transaction fraud score
18
+ - `/predict_multiple` → Batch prediction
19
+
20
+ Models available: `xgboost`, `random_forest`, `decision_tree`
absolute/ccfd_1.0_decision-tree.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fbdba46e3c71e148e877b8d61b5f66afad69087e521cdf8b8b694affdeb3374
3
+ size 155243
absolute/ccfd_1.0_random-forest.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d30529b90df0f17fa7396347c4230061093ab45c200307b0ce65fb3f5288e12b
3
+ size 43463794
absolute/ccfd_1.0_xg-boost.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e53f13355a23ff71608e94bbd3af142a30b8535b76fe78e96433c9202e5debd4
3
+ size 5746222
app.py ADDED
@@ -0,0 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import joblib
4
+ import pandas as pd
5
+ from typing import Dict, Any, List, Union, Optional
6
+ from fastapi import FastAPI, HTTPException
7
+ from pydantic import BaseModel, Field
8
+ import numpy as np
9
+ import warnings
10
+
11
+ # Suppress sklearn version warnings
12
+ warnings.filterwarnings("ignore", category=UserWarning, module="sklearn.base")
13
+
14
+ # --- FIX FOR SKLEARN VERSION COMPATIBILITY ---
15
+ try:
16
+ import sklearn
17
+ print(f"📦 scikit-learn version: {sklearn.__version__}")
18
+
19
+ # Fix for _RemainderColsList compatibility issue
20
+ from sklearn.compose._column_transformer import ColumnTransformer
21
+
22
+ # Check if _RemainderColsList exists, if not create a dummy class
23
+ if not hasattr(sys.modules['sklearn.compose._column_transformer'], '_RemainderColsList'):
24
+ class _RemainderColsList(list):
25
+ """Compatibility shim for older sklearn pickled models"""
26
+ pass
27
+
28
+ # Add it to the module so pickle can find it
29
+ sys.modules['sklearn.compose._column_transformer']._RemainderColsList = _RemainderColsList
30
+ print("✅ Applied sklearn compatibility patch for _RemainderColsList")
31
+
32
+ except Exception as e:
33
+ print(f"⚠️ Warning during sklearn compatibility setup: {e}")
34
+
35
+ # --- MODEL CONFIGURATION & CONSTANTS ---
36
+ VERSION = "1.0"
37
+ MODELS = {} # Global dictionary to store loaded pipelines
38
+
39
+ MODEL_MAP = {
40
+ "decision_tree": "classifier/ccfd_1.0_decision-tree.pkl",
41
+ "random_forest": "classifier/ccfd_1.0_random-forest.pkl",
42
+ "xgboost": "classifier/ccfd_1.0_xg-boost.pkl",
43
+ }
44
+
45
+ EXPECTED_FEATURES = [
46
+ "cc_num", "merchant", "category", "amt", "gender", "state", "zip",
47
+ "lat", "long", "city_pop", "job", "unix_time", "merch_lat",
48
+ "merch_long", "age", "trans_hour", "trans_day", "trans_month",
49
+ "trans_weekday", "distance"
50
+ ]
51
+
52
+ # --- FASTAPI SETUP ---
53
+ app = FastAPI(
54
+ title="Credit Card Fraud Detection API",
55
+ version=VERSION,
56
+ description="Pure API server for fraud detection using ML models. Returns fraud_score (probability 0-100%)."
57
+ )
58
+
59
+ class SingleTransactionPayload(BaseModel):
60
+ model_name: str = Field(..., description="Model alias (e.g., 'xgboost', 'random_forest', 'decision_tree').")
61
+ features: Dict[str, Any] = Field(..., description="Single transaction record for prediction.")
62
+
63
+ class MultipleTransactionsPayload(BaseModel):
64
+ model_name: str = Field(..., description="Model alias (e.g., 'xgboost', 'random_forest', 'decision_tree').")
65
+ features: List[Dict[str, Any]] = Field(..., description="List of transaction records for prediction.")
66
+
67
+ # --- LOAD MODELS AT STARTUP ---
68
+ def load_pipelines():
69
+ """Load all ML model pipelines"""
70
+ import sklearn
71
+ print(f"🚀 Loading models for server version: {VERSION}")
72
+ print(f"📦 Using scikit-learn: {sklearn.__version__}")
73
+ print(f"📂 Current working directory: {os.getcwd()}")
74
+
75
+ for alias, filename in MODEL_MAP.items():
76
+ try:
77
+ # Check if file exists
78
+ if not os.path.exists(filename):
79
+ abs_path = os.path.abspath(filename)
80
+ print(f"❌ Model file not found: {filename}")
81
+ print(f" Expected at: {abs_path}")
82
+ continue
83
+
84
+ # Get file info
85
+ file_size = os.path.getsize(filename) / (1024 * 1024) # MB
86
+ print(f"📥 Loading {alias} from {filename} ({file_size:.2f} MB)...")
87
+
88
+ # Load the model
89
+ MODELS[alias] = joblib.load(filename)
90
+ print(f"✅ Successfully loaded {alias}")
91
+
92
+ except AttributeError as e:
93
+ print(f"❌ Compatibility error loading {filename}")
94
+ print(f" Error: {e}")
95
+ print(f" 💡 This usually means the model was saved with a different sklearn version")
96
+ print(f" 💡 Try re-training and saving the model with sklearn {sklearn.__version__}")
97
+ except Exception as e:
98
+ print(f"❌ Failed to load {filename}")
99
+ print(f" Error type: {type(e).__name__}")
100
+ print(f" Error message: {e}")
101
+
102
+ if not MODELS:
103
+ print("⚠️ No models loaded. Predictions will fail.")
104
+ print(" 💡 Ensure .pkl files are in the same directory as app.py")
105
+ print(" 💡 Check that models were saved with compatible sklearn version")
106
+ else:
107
+ print(f"✅ Successfully loaded {len(MODELS)} model(s): {list(MODELS.keys())}")
108
+
109
+ # Load models on import
110
+ load_pipelines()
111
+
112
+ # --- HELPER FUNCTION: PREPARE FEATURES ---
113
+ def prepare_features(features_list: List[Dict[str, Any]]) -> pd.DataFrame:
114
+ """Validate and prepare features for prediction"""
115
+ df_features = pd.DataFrame(features_list)
116
+
117
+ # Check for missing features
118
+ missing_features = set(EXPECTED_FEATURES) - set(df_features.columns)
119
+ if missing_features:
120
+ raise ValueError(f"Missing required features: {list(missing_features)}")
121
+
122
+ # Reorder columns to match expected order
123
+ df_features = df_features[EXPECTED_FEATURES]
124
+
125
+ # CRITICAL: Convert object columns to category dtype (as done during training)
126
+ for col in df_features.select_dtypes(include=['object']).columns:
127
+ df_features[col] = df_features[col].astype("category")
128
+
129
+ return df_features
130
+
131
+ # --- FASTAPI ENDPOINTS ---
132
+ @app.get("/")
133
+ async def root():
134
+ """Root endpoint - API information"""
135
+ return {
136
+ "status": "ok",
137
+ "message": "Credit Card Fraud Detection API",
138
+ "version": VERSION,
139
+ "models_loaded": list(MODELS.keys()),
140
+ "endpoints": {
141
+ "health": "/health",
142
+ "models": "/models",
143
+ "predict": "/predict (POST) - Single transaction",
144
+ "predict_multiple": "/predict_multiple (POST) - Multiple transactions",
145
+ "docs": "/docs"
146
+ },
147
+ "response_format": {
148
+ "description": "Returns fraud_score (probability 0-100%) for fraud class",
149
+ "single": {"fraud_score": "float (0-100)"},
150
+ "multiple": {
151
+ "predictions": "list of {'fraud_score': float}",
152
+ "overall_stats": {
153
+ "total": "int",
154
+ "avg_fraud_score": "float",
155
+ "min_fraud_score": "float",
156
+ "max_fraud_score": "float"
157
+ }
158
+ }
159
+ }
160
+ }
161
+
162
+ @app.get("/health")
163
+ async def health_check():
164
+ """Health check endpoint"""
165
+ return {
166
+ "status": "healthy" if MODELS else "degraded",
167
+ "version": VERSION,
168
+ "models_loaded": list(MODELS.keys()),
169
+ "model_count": len(MODELS)
170
+ }
171
+
172
+ @app.get("/models")
173
+ async def list_models():
174
+ """List all available and loaded models"""
175
+ return {
176
+ "available_models": list(MODEL_MAP.keys()),
177
+ "loaded_models": list(MODELS.keys()),
178
+ "model_files": MODEL_MAP,
179
+ "version": VERSION
180
+ }
181
+
182
+ @app.post("/predict")
183
+ async def predict_single(payload: SingleTransactionPayload):
184
+ """
185
+ Predict fraud score for a SINGLE transaction
186
+
187
+ Returns fraud_score (probability 0-100% for fraud class)
188
+ """
189
+ model_name = payload.model_name
190
+ features = payload.features
191
+
192
+ # Validate model exists
193
+ if model_name not in MODELS:
194
+ raise HTTPException(
195
+ status_code=404,
196
+ detail=f"Model '{model_name}' not loaded. Available: {list(MODELS.keys())}"
197
+ )
198
+
199
+ model_pipeline = MODELS[model_name]
200
+
201
+ # Prepare features
202
+ try:
203
+ df_features = prepare_features([features])
204
+ except Exception as e:
205
+ raise HTTPException(
206
+ status_code=422,
207
+ detail=f"Data validation failed: {str(e)}"
208
+ )
209
+
210
+ # Perform prediction
211
+ try:
212
+ # Get probability (0-100%) - convert to Python float for JSON serialization
213
+ probability = float(model_pipeline.predict_proba(df_features)[:, 1][0] * 100)
214
+
215
+ return {
216
+ "success": True,
217
+ "model_used": model_name,
218
+ "fraud_score": round(probability, 2)
219
+ }
220
+
221
+ except Exception as e:
222
+ raise HTTPException(
223
+ status_code=500,
224
+ detail=f"Prediction execution failed: {str(e)}"
225
+ )
226
+
227
+ @app.post("/predict_multiple")
228
+ async def predict_multiple(payload: MultipleTransactionsPayload):
229
+ """
230
+ Predict fraud scores for MULTIPLE transactions
231
+
232
+ Returns fraud_score (0-100%) for each transaction, plus overall statistics
233
+ """
234
+ model_name = payload.model_name
235
+ features_list = payload.features
236
+
237
+ # Validate model exists
238
+ if model_name not in MODELS:
239
+ raise HTTPException(
240
+ status_code=404,
241
+ detail=f"Model '{model_name}' not loaded. Available: {list(MODELS.keys())}"
242
+ )
243
+
244
+ model_pipeline = MODELS[model_name]
245
+
246
+ # Prepare features
247
+ try:
248
+ df_features = prepare_features(features_list)
249
+ except Exception as e:
250
+ raise HTTPException(
251
+ status_code=422,
252
+ detail=f"Data validation failed: {str(e)}"
253
+ )
254
+
255
+ # Perform prediction
256
+ try:
257
+ # Get probabilities (0-100%)
258
+ probabilities = model_pipeline.predict_proba(df_features)[:, 1] * 100
259
+
260
+ # Prepare predictions
261
+ predictions = []
262
+ for prob in probabilities:
263
+ # Convert numpy float32 to Python float for JSON serialization
264
+ prob_value = float(prob)
265
+ predictions.append({
266
+ "fraud_score": round(prob_value, 2)
267
+ })
268
+
269
+ total = len(predictions)
270
+
271
+ return {
272
+ "success": True,
273
+ "model_used": model_name,
274
+ "total_transactions": total,
275
+ "predictions": predictions,
276
+ "overall_stats": {
277
+ "total": total,
278
+ "avg_fraud_score": round(float(probabilities.mean()), 2),
279
+ "max_fraud_score": round(float(probabilities.max()), 2),
280
+ "min_fraud_score": round(float(probabilities.min()), 2)
281
+ }
282
+ }
283
+
284
+ except Exception as e:
285
+ raise HTTPException(
286
+ status_code=500,
287
+ detail=f"Prediction execution failed: {str(e)}"
288
+ )
289
+
290
+ # For local development
291
+ if __name__ == "__main__":
292
+ import uvicorn
293
+ uvicorn.run(app, host="0.0.0.0", port=7860)
classifier/ccfd_1.0_decision-tree.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d5696eff56965f3f70b8bc7159cc1e8270e7031b6c036061c68b0d784d0189c
3
+ size 450366
classifier/ccfd_1.0_random-forest.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e29cef8d74812b9395dd928ec778cdf1f8a7b64982f10b11678791c8dbde4996
3
+ size 213830974
classifier/ccfd_1.0_xg-boost.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a9c0953b2960ce5024984f75f5331458e121ebf3c7e29fafa45af6b2d9cbea4
3
+ size 26586734
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pandas
4
+ joblib
5
+ numpy
6
+ scikit-learn==1.6.1
7
+ xgboost
stats/metrics.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "Model": "Decision Tree",
4
+ "Accuracy": 0.9993,
5
+ "Precision": 0.9905,
6
+ "Recall": 0.893,
7
+ "F1-Score": 0.9393,
8
+ "ROC-AUC": 0.9929,
9
+ "PR-AUC": 0.9511
10
+ },
11
+ {
12
+ "Model": "Random Forest",
13
+ "Accuracy": 0.9943,
14
+ "Precision": 1.0,
15
+ "Recall": 0.0187,
16
+ "F1-Score": 0.0366,
17
+ "ROC-AUC": 0.9976,
18
+ "PR-AUC": 0.8256
19
+ },
20
+ {
21
+ "Model": "XGBoost",
22
+ "Accuracy": 1.0,
23
+ "Precision": 1.0,
24
+ "Recall": 1.0,
25
+ "F1-Score": 1.0,
26
+ "ROC-AUC": 1.0,
27
+ "PR-AUC": 1.0
28
+ }
29
+ ]
stats/tested_result.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "Model": "Decision Tree",
4
+ "SUCCESS (%)": 99.81,
5
+ "FAIL (%)": 0.18,
6
+ "UNCERTAIN (%)": 0.0,
7
+ "Full Time (s)": 1.87,
8
+ "Per Request (ms)": 11.44,
9
+ "RAM (GB)": 2.37
10
+ },
11
+ {
12
+ "Model": "Random Forest",
13
+ "SUCCESS (%)": 99.61,
14
+ "FAIL (%)": 0.39,
15
+ "UNCERTAIN (%)": 0.0,
16
+ "Full Time (s)": 17.32,
17
+ "Per Request (ms)": 96.42,
18
+ "RAM (GB)": 2.44
19
+ },
20
+ {
21
+ "Model": "XGBoost",
22
+ "SUCCESS (%)": 99.86,
23
+ "FAIL (%)": 0.13,
24
+ "UNCERTAIN (%)": 0.01,
25
+ "Full Time (s)": 36.17,
26
+ "Per Request (ms)": 11.93,
27
+ "RAM (GB)": 2.39
28
+ }
29
+ ]
stats/train.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "Model": "Decision Tree",
4
+ "Train Time (s)": 82.9,
5
+ "RAM \u0394 (GB)": -0.18,
6
+ "Model Size (MB)": 0.15
7
+ },
8
+ {
9
+ "Model": "Random Forest",
10
+ "Train Time (s)": 992.0,
11
+ "RAM \u0394 (GB)": 0.17,
12
+ "Model Size (MB)": 41.45
13
+ },
14
+ {
15
+ "Model": "XGBoost",
16
+ "Train Time (s)": 284.8,
17
+ "RAM \u0394 (GB)": 0.08,
18
+ "Model Size (MB)": 5.48
19
+ }
20
+ ]