Spaces:
Sleeping
Sleeping
v2_app
Browse files- app.py +916 -312
- feature_engineering.py +486 -0
- requirements.txt +57 -9
app.py
CHANGED
|
@@ -1,265 +1,515 @@
|
|
| 1 |
"""
|
| 2 |
-
Fraud Detection API
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
from fastapi import FastAPI, HTTPException, status
|
| 8 |
from fastapi.responses import JSONResponse
|
| 9 |
-
from pydantic import BaseModel, Field
|
|
|
|
|
|
|
|
|
|
| 10 |
from huggingface_hub import hf_hub_download
|
| 11 |
|
|
|
|
| 12 |
import joblib
|
| 13 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
| 14 |
import os
|
| 15 |
-
from typing import List, Optional
|
| 16 |
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
# ==========================================
|
| 19 |
-
#
|
| 20 |
-
# ==========================================
|
|
|
|
|
|
|
| 21 |
REPO_ID = "Terorra/fd_model_jedha"
|
| 22 |
-
MODEL_FILENAME = "fraud_model.pkl"
|
| 23 |
-
MODEL_VERSION = None # None = latest, or specify "v1", "v2", etc.
|
| 24 |
|
| 25 |
-
#
|
| 26 |
-
#
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
app = FastAPI(
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
| 30 |
description="""
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
""",
|
| 51 |
-
|
|
|
|
|
|
|
| 52 |
contact={
|
| 53 |
"name": "Terorra",
|
| 54 |
"email": "your.email@example.com",
|
| 55 |
},
|
|
|
|
| 56 |
license_info={
|
| 57 |
"name": "MIT",
|
| 58 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
)
|
| 60 |
|
| 61 |
-
# ==========================================
|
| 62 |
-
# Global Model Variable
|
| 63 |
-
# ==========================================
|
| 64 |
-
model = None
|
| 65 |
|
| 66 |
-
# ==========================================
|
| 67 |
-
#
|
| 68 |
-
# ==========================================
|
| 69 |
|
| 70 |
-
class
|
| 71 |
"""
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
| 73 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
amt: float = Field(
|
| 75 |
...,
|
| 76 |
-
description="
|
| 77 |
example=150.75,
|
| 78 |
-
gt=0
|
| 79 |
-
le=100000
|
| 80 |
)
|
|
|
|
|
|
|
| 81 |
lat: float = Field(
|
| 82 |
...,
|
| 83 |
-
description="
|
| 84 |
example=40.7128,
|
| 85 |
ge=-90,
|
| 86 |
le=90
|
| 87 |
)
|
| 88 |
long: float = Field(
|
| 89 |
...,
|
| 90 |
-
description="
|
| 91 |
example=-74.0060,
|
| 92 |
ge=-180,
|
| 93 |
le=180
|
| 94 |
)
|
|
|
|
|
|
|
| 95 |
city_pop: int = Field(
|
| 96 |
...,
|
| 97 |
-
description="Population
|
| 98 |
example=8000000,
|
| 99 |
gt=0
|
| 100 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
merch_lat: float = Field(
|
| 102 |
...,
|
| 103 |
-
description="
|
| 104 |
example=40.7589,
|
| 105 |
ge=-90,
|
| 106 |
le=90
|
| 107 |
)
|
| 108 |
merch_long: float = Field(
|
| 109 |
...,
|
| 110 |
-
description="
|
| 111 |
example=-73.9851,
|
| 112 |
ge=-180,
|
| 113 |
le=180
|
| 114 |
)
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
class Config:
|
| 117 |
schema_extra = {
|
| 118 |
"example": {
|
|
|
|
| 119 |
"amt": 150.75,
|
| 120 |
"lat": 40.7128,
|
| 121 |
"long": -74.0060,
|
| 122 |
"city_pop": 8000000,
|
|
|
|
| 123 |
"merch_lat": 40.7589,
|
| 124 |
-
"merch_long": -73.9851
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
}
|
| 126 |
}
|
| 127 |
|
| 128 |
|
| 129 |
-
class
|
| 130 |
"""
|
| 131 |
-
|
|
|
|
|
|
|
| 132 |
"""
|
| 133 |
-
|
|
|
|
| 134 |
...,
|
| 135 |
-
description="
|
| 136 |
-
min_items=1,
|
| 137 |
-
max_items=100
|
| 138 |
)
|
| 139 |
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
|
| 165 |
class PredictionOutput(BaseModel):
|
| 166 |
"""
|
| 167 |
-
|
| 168 |
"""
|
|
|
|
| 169 |
is_fraud: bool = Field(
|
| 170 |
...,
|
| 171 |
-
description="
|
| 172 |
)
|
|
|
|
| 173 |
fraud_probability: float = Field(
|
| 174 |
...,
|
| 175 |
-
description="
|
| 176 |
ge=0.0,
|
| 177 |
le=1.0
|
| 178 |
)
|
|
|
|
|
|
|
| 179 |
risk_level: str = Field(
|
| 180 |
...,
|
| 181 |
-
description="
|
| 182 |
)
|
|
|
|
|
|
|
| 183 |
confidence: float = Field(
|
| 184 |
...,
|
| 185 |
-
description="
|
| 186 |
ge=0.0,
|
| 187 |
le=1.0
|
| 188 |
)
|
|
|
|
|
|
|
| 189 |
timestamp: str = Field(
|
| 190 |
...,
|
| 191 |
-
description="
|
| 192 |
)
|
| 193 |
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
"fraud_probability": 0.15,
|
| 199 |
-
"risk_level": "LOW",
|
| 200 |
-
"confidence": 0.85,
|
| 201 |
-
"timestamp": "2026-01-24T15:30:45.123456"
|
| 202 |
-
}
|
| 203 |
-
}
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
class BatchPredictionOutput(BaseModel):
|
| 207 |
-
"""
|
| 208 |
-
Output schema for batch predictions
|
| 209 |
-
"""
|
| 210 |
-
predictions: List[PredictionOutput]
|
| 211 |
-
total_transactions: int
|
| 212 |
-
fraud_count: int
|
| 213 |
-
fraud_rate: float
|
| 214 |
-
processing_time_ms: float
|
| 215 |
-
|
| 216 |
|
| 217 |
-
class HealthResponse(BaseModel):
|
| 218 |
-
"""
|
| 219 |
-
Health check response
|
| 220 |
-
"""
|
| 221 |
-
status: str
|
| 222 |
-
model_loaded: bool
|
| 223 |
-
model_repo: str
|
| 224 |
-
model_type: Optional[str]
|
| 225 |
-
timestamp: str
|
| 226 |
|
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
-
|
| 229 |
"""
|
| 230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
"""
|
| 232 |
-
|
| 233 |
-
model_filename: str
|
| 234 |
-
model_type: str
|
| 235 |
-
feature_names: List[str]
|
| 236 |
-
n_features: int
|
| 237 |
-
model_version: Optional[str]
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
# ==========================================
|
| 241 |
-
# Helper Functions
|
| 242 |
-
# ==========================================
|
| 243 |
-
|
| 244 |
-
def load_model_from_hf():
|
| 245 |
-
"""Load model from HuggingFace Hub"""
|
| 246 |
-
global model
|
| 247 |
|
| 248 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
model_path = hf_hub_download(
|
| 250 |
repo_id=REPO_ID,
|
| 251 |
filename=MODEL_FILENAME,
|
| 252 |
-
revision=MODEL_VERSION
|
|
|
|
| 253 |
)
|
|
|
|
|
|
|
|
|
|
| 254 |
model = joblib.load(model_path)
|
| 255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
|
| 257 |
except Exception as e:
|
| 258 |
-
|
|
|
|
|
|
|
| 259 |
|
| 260 |
|
| 261 |
def calculate_risk_level(probability: float) -> str:
|
| 262 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
if probability < 0.3:
|
| 264 |
return "LOW"
|
| 265 |
elif probability < 0.6:
|
|
@@ -270,279 +520,633 @@ def calculate_risk_level(probability: float) -> str:
|
|
| 270 |
return "CRITICAL"
|
| 271 |
|
| 272 |
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
|
|
|
|
|
|
|
|
|
| 278 |
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
|
|
|
| 282 |
|
| 283 |
-
|
| 284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
|
| 286 |
-
#
|
| 287 |
-
|
| 288 |
|
| 289 |
-
|
| 290 |
-
"
|
| 291 |
-
|
| 292 |
-
"
|
| 293 |
-
"
|
| 294 |
-
"timestamp": datetime.utcnow().isoformat()
|
| 295 |
-
}
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
# ==========================================
|
| 299 |
-
# Startup Event
|
| 300 |
-
# ==========================================
|
| 301 |
-
|
| 302 |
-
@app.on_event("startup")
|
| 303 |
-
async def startup_event():
|
| 304 |
-
"""Load model on startup"""
|
| 305 |
-
success = load_model_from_hf()
|
| 306 |
|
| 307 |
|
| 308 |
-
# ==========================================
|
| 309 |
-
#
|
| 310 |
-
# ==========================================
|
| 311 |
|
| 312 |
@app.get(
|
| 313 |
"/",
|
| 314 |
-
|
| 315 |
-
|
|
|
|
| 316 |
)
|
| 317 |
async def root():
|
| 318 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
return {
|
| 320 |
-
"message": "🚨 Fraud Detection API",
|
| 321 |
-
"version": "
|
| 322 |
"status": "online",
|
| 323 |
-
"
|
| 324 |
-
"
|
| 325 |
"endpoints": {
|
| 326 |
-
"
|
| 327 |
-
|
| 328 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
}
|
| 330 |
}
|
| 331 |
|
| 332 |
|
| 333 |
@app.get(
|
| 334 |
"/health",
|
| 335 |
-
|
| 336 |
summary="Health check",
|
| 337 |
-
description="
|
| 338 |
)
|
| 339 |
async def health_check():
|
| 340 |
"""
|
| 341 |
-
|
| 342 |
|
| 343 |
-
|
| 344 |
-
-
|
| 345 |
-
-
|
| 346 |
-
-
|
| 347 |
-
-
|
| 348 |
-
- **timestamp**: Current server time
|
| 349 |
"""
|
|
|
|
|
|
|
|
|
|
| 350 |
return {
|
| 351 |
-
"status": "healthy" if
|
| 352 |
"model_loaded": model is not None,
|
|
|
|
| 353 |
"model_repo": REPO_ID,
|
| 354 |
"model_type": type(model).__name__ if model else None,
|
|
|
|
| 355 |
"timestamp": datetime.utcnow().isoformat()
|
| 356 |
}
|
| 357 |
|
| 358 |
|
| 359 |
@app.get(
|
| 360 |
"/model/info",
|
| 361 |
-
|
| 362 |
-
summary="
|
| 363 |
-
description="
|
| 364 |
)
|
| 365 |
async def model_info():
|
| 366 |
"""
|
| 367 |
-
|
| 368 |
|
| 369 |
-
|
| 370 |
-
-
|
| 371 |
-
-
|
| 372 |
-
-
|
| 373 |
-
-
|
| 374 |
-
- **n_features**: Number of features
|
| 375 |
-
- **model_version**: Model version if specified
|
| 376 |
"""
|
| 377 |
-
|
|
|
|
| 378 |
raise HTTPException(
|
| 379 |
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
| 380 |
-
detail="
|
| 381 |
)
|
| 382 |
|
| 383 |
-
|
|
|
|
| 384 |
|
| 385 |
return {
|
| 386 |
-
"
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 392 |
}
|
| 393 |
|
| 394 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 395 |
@app.post(
|
| 396 |
-
"/
|
| 397 |
-
response_model=
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
)
|
| 402 |
-
async def
|
| 403 |
"""
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
"""
|
| 432 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
raise HTTPException(
|
| 434 |
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
| 435 |
-
detail="
|
| 436 |
)
|
| 437 |
|
| 438 |
try:
|
| 439 |
-
|
| 440 |
-
|
|
|
|
| 441 |
|
| 442 |
-
#
|
| 443 |
-
|
|
|
|
| 444 |
|
| 445 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 446 |
|
| 447 |
except Exception as e:
|
| 448 |
raise HTTPException(
|
| 449 |
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 450 |
-
detail=f"
|
| 451 |
)
|
| 452 |
|
| 453 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 454 |
@app.post(
|
| 455 |
-
"/predict
|
| 456 |
-
response_model=
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
)
|
| 461 |
-
async def
|
| 462 |
"""
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 485 |
"""
|
| 486 |
-
|
|
|
|
| 487 |
raise HTTPException(
|
| 488 |
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
| 489 |
-
detail="
|
| 490 |
)
|
| 491 |
|
| 492 |
try:
|
| 493 |
-
|
|
|
|
| 494 |
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
data = transaction.dict()
|
| 499 |
-
result = predict_transaction(data)
|
| 500 |
-
predictions.append(result)
|
| 501 |
|
| 502 |
-
#
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
fraud_rate = (fraud_count / total) * 100 if total > 0 else 0.0
|
| 506 |
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 517 |
}
|
| 518 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 519 |
except Exception as e:
|
| 520 |
raise HTTPException(
|
| 521 |
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 522 |
-
detail=f"
|
| 523 |
)
|
| 524 |
|
| 525 |
|
| 526 |
-
# ==========================================
|
| 527 |
-
#
|
| 528 |
-
# ==========================================
|
| 529 |
|
| 530 |
@app.exception_handler(ValueError)
|
| 531 |
async def value_error_handler(request, exc):
|
|
|
|
| 532 |
return JSONResponse(
|
| 533 |
status_code=status.HTTP_400_BAD_REQUEST,
|
| 534 |
-
content={
|
|
|
|
|
|
|
|
|
|
|
|
|
| 535 |
)
|
| 536 |
|
| 537 |
|
| 538 |
@app.exception_handler(Exception)
|
| 539 |
async def general_exception_handler(request, exc):
|
|
|
|
| 540 |
return JSONResponse(
|
| 541 |
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 542 |
-
content={
|
|
|
|
|
|
|
|
|
|
|
|
|
| 543 |
)
|
| 544 |
|
| 545 |
|
| 546 |
-
# ==========================================
|
| 547 |
-
#
|
| 548 |
-
# ==========================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
🚨 Fraud Detection API - Level UP Edition
|
| 3 |
+
=========================================
|
| 4 |
+
|
| 5 |
+
API FastAPI pour la détection de fraude en temps réel
|
| 6 |
+
avec preprocessing et feature engineering
|
| 7 |
+
|
| 8 |
+
Fonctionnalités:
|
| 9 |
+
- Download automatique du model + preprocessor depuis HuggingFace
|
| 10 |
+
- 3 endpoints: /predict, /preprocess, /feat_eng
|
| 11 |
+
- Feature engineering complet (distance GPS, features temporelles, âge)
|
| 12 |
+
- Documentation interactive sur /docs
|
| 13 |
+
|
| 14 |
+
Author: Terorra
|
| 15 |
+
Date: January 2026
|
| 16 |
+
Version: 2.0.0
|
| 17 |
"""
|
| 18 |
|
| 19 |
+
# =====================================================================
|
| 20 |
+
# IMPORTS
|
| 21 |
+
# =====================================================================
|
| 22 |
+
|
| 23 |
+
# FastAPI et types
|
| 24 |
from fastapi import FastAPI, HTTPException, status
|
| 25 |
from fastapi.responses import JSONResponse
|
| 26 |
+
from pydantic import BaseModel, Field
|
| 27 |
+
from typing import List, Optional, Dict, Any
|
| 28 |
+
|
| 29 |
+
# HuggingFace pour télécharger les modèles
|
| 30 |
from huggingface_hub import hf_hub_download
|
| 31 |
|
| 32 |
+
# ML et data
|
| 33 |
import joblib
|
| 34 |
import pandas as pd
|
| 35 |
+
import numpy as np
|
| 36 |
+
|
| 37 |
+
# Utilitaires
|
| 38 |
import os
|
|
|
|
| 39 |
from datetime import datetime
|
| 40 |
+
import time
|
| 41 |
+
|
| 42 |
+
# Notre module de feature engineering
|
| 43 |
+
from feature_engineering import (
|
| 44 |
+
engineer_features,
|
| 45 |
+
prepare_for_model,
|
| 46 |
+
get_model_features,
|
| 47 |
+
haversine_distance,
|
| 48 |
+
extract_time_features
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
|
| 52 |
+
# =====================================================================
|
| 53 |
+
# CONFIGURATION GLOBALE
|
| 54 |
+
# =====================================================================
|
| 55 |
+
|
| 56 |
+
# Repository HuggingFace où sont stockés les modèles
|
| 57 |
REPO_ID = "Terorra/fd_model_jedha"
|
|
|
|
|
|
|
| 58 |
|
| 59 |
+
# Noms des fichiers sur HuggingFace
|
| 60 |
+
MODEL_FILENAME = "fraud_model.pkl" # Le modèle RandomForest
|
| 61 |
+
PREPROCESSOR_FILENAME = "preprocessor.plk" # Le preprocessor (ColumnTransformer)
|
| 62 |
+
|
| 63 |
+
# Version du modèle (None = latest, ou "v1", "v2", etc.)
|
| 64 |
+
MODEL_VERSION = None
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
# =====================================================================
|
| 68 |
+
# VARIABLES GLOBALES (modèles chargés en mémoire)
|
| 69 |
+
# =====================================================================
|
| 70 |
+
|
| 71 |
+
# Ces variables seront remplies au démarrage de l'API
|
| 72 |
+
model = None # Le modèle ML (RandomForestClassifier)
|
| 73 |
+
preprocessor = None # Le preprocessor (StandardScaler + OneHotEncoder)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
# =====================================================================
|
| 77 |
+
# CRÉATION DE L'APPLICATION FASTAPI
|
| 78 |
+
# =====================================================================
|
| 79 |
+
|
| 80 |
app = FastAPI(
|
| 81 |
+
# Titre qui apparaît dans la doc
|
| 82 |
+
title="🚨 Fraud Detection API - Level UP",
|
| 83 |
+
|
| 84 |
+
# Description complète (supporte Markdown)
|
| 85 |
description="""
|
| 86 |
+
# API de Détection de Fraude en Temps Réel
|
| 87 |
+
|
| 88 |
+
Cette API utilise le Machine Learning pour détecter les transactions frauduleuses
|
| 89 |
+
sur les cartes de crédit.
|
| 90 |
+
|
| 91 |
+
## 🚀 Fonctionnalités
|
| 92 |
+
|
| 93 |
+
### Endpoints Principaux
|
| 94 |
+
|
| 95 |
+
1. **`/predict`** - Prédiction complète
|
| 96 |
+
- Prend les données brutes
|
| 97 |
+
- Applique le feature engineering
|
| 98 |
+
- Applique le preprocessing
|
| 99 |
+
- Retourne la prédiction de fraude
|
| 100 |
+
|
| 101 |
+
2. **`/feat_eng`** - Feature Engineering seulement
|
| 102 |
+
- Calcule la distance GPS client-marchand
|
| 103 |
+
- Extrait les features temporelles (heure, jour, weekend, etc.)
|
| 104 |
+
- Calcule l'âge du porteur
|
| 105 |
+
- Retourne les features transformées
|
| 106 |
+
|
| 107 |
+
3. **`/preprocess`** - Preprocessing seulement
|
| 108 |
+
- Prend les features (déjà engineered)
|
| 109 |
+
- Applique StandardScaler (normalisation)
|
| 110 |
+
- Applique OneHotEncoder (encoding catégories)
|
| 111 |
+
- Retourne les features preprocessed (prêtes pour le modèle)
|
| 112 |
+
|
| 113 |
+
### Endpoints Utilitaires
|
| 114 |
+
|
| 115 |
+
- **`/health`** - Vérifier que l'API fonctionne
|
| 116 |
+
- **`/model/info`** - Informations sur le modèle ML
|
| 117 |
+
- **`/features`** - Liste des features nécessaires
|
| 118 |
+
|
| 119 |
+
## 📊 Workflow Complet
|
| 120 |
+
|
| 121 |
+
```
|
| 122 |
+
Données Brutes
|
| 123 |
+
↓
|
| 124 |
+
/feat_eng → Feature Engineering
|
| 125 |
+
↓
|
| 126 |
+
/preprocess → Preprocessing (scaling + encoding)
|
| 127 |
+
↓
|
| 128 |
+
/predict → Prédiction ML
|
| 129 |
+
↓
|
| 130 |
+
Résultat: Fraude ou Non
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
## 🎯 Modèle ML
|
| 134 |
+
|
| 135 |
+
- **Algorithme**: RandomForestClassifier
|
| 136 |
+
- **Recall**: > 90% (optimisé pour détecter les fraudes)
|
| 137 |
+
- **Features**: 21 features (17 numériques + 4 catégorielles)
|
| 138 |
+
- **Preprocessing**: StandardScaler + OneHotEncoder
|
| 139 |
+
- **Hébergement**: HuggingFace Hub
|
| 140 |
+
|
| 141 |
+
## 💡 Cas d'Usage
|
| 142 |
+
|
| 143 |
+
1. **Validation en temps réel**: Valider une transaction au moment du paiement
|
| 144 |
+
2. **Analyse batch**: Analyser des milliers de transactions historiques
|
| 145 |
+
3. **Monitoring**: Surveiller les patterns de fraude
|
| 146 |
+
4. **Reporting**: Générer des rapports de fraude
|
| 147 |
+
|
| 148 |
+
## 🔧 Feature Engineering
|
| 149 |
+
|
| 150 |
+
L'API calcule automatiquement:
|
| 151 |
+
- **distance_km**: Distance GPS entre client et marchand (formule Haversine)
|
| 152 |
+
- **hour**: Heure de la transaction (0-23)
|
| 153 |
+
- **is_night, is_morning, is_afternoon, is_evening**: Période de la journée
|
| 154 |
+
- **is_business_hour**: Transaction pendant heures de bureau (8h-17h)
|
| 155 |
+
- **is_weekend**: Transaction le weekend
|
| 156 |
+
- **age**: Âge du porteur de carte
|
| 157 |
+
- **year, month, day, dayofweek**: Composantes de la date
|
| 158 |
+
|
| 159 |
+
## 📚 Documentation
|
| 160 |
+
|
| 161 |
+
- Cette page: Documentation interactive avec exemples
|
| 162 |
+
- Essayez les endpoints directement depuis cette page!
|
| 163 |
+
- Chaque endpoint a des exemples pré-remplis
|
| 164 |
+
|
| 165 |
+
## 🎓 Pour Commencer
|
| 166 |
+
|
| 167 |
+
1. Testez `/health` pour vérifier que l'API fonctionne
|
| 168 |
+
2. Regardez `/features` pour voir les features nécessaires
|
| 169 |
+
3. Essayez `/feat_eng` avec des données de test
|
| 170 |
+
4. Utilisez `/predict` pour une prédiction complète
|
| 171 |
""",
|
| 172 |
+
|
| 173 |
+
version="2.0.0",
|
| 174 |
+
|
| 175 |
contact={
|
| 176 |
"name": "Terorra",
|
| 177 |
"email": "your.email@example.com",
|
| 178 |
},
|
| 179 |
+
|
| 180 |
license_info={
|
| 181 |
"name": "MIT",
|
| 182 |
+
},
|
| 183 |
+
|
| 184 |
+
# Tags pour organiser les endpoints dans la doc
|
| 185 |
+
openapi_tags=[
|
| 186 |
+
{
|
| 187 |
+
"name": "🎯 Prediction",
|
| 188 |
+
"description": "Endpoints de prédiction de fraude"
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"name": "🔧 Feature Engineering",
|
| 192 |
+
"description": "Transformation des features"
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"name": "⚙️ Preprocessing",
|
| 196 |
+
"description": "Preprocessing des données"
|
| 197 |
+
},
|
| 198 |
+
{
|
| 199 |
+
"name": "📊 Information",
|
| 200 |
+
"description": "Informations sur l'API et le modèle"
|
| 201 |
+
},
|
| 202 |
+
]
|
| 203 |
)
|
| 204 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
+
# =====================================================================
|
| 207 |
+
# SCHEMAS PYDANTIC (Définition des types de données)
|
| 208 |
+
# =====================================================================
|
| 209 |
|
| 210 |
+
class TransactionRawInput(BaseModel):
|
| 211 |
"""
|
| 212 |
+
Données BRUTES d'une transaction (avant feature engineering)
|
| 213 |
+
|
| 214 |
+
Ce sont les données telles qu'elles arrivent de la base de données
|
| 215 |
+
ou du système de paiement, SANS transformation.
|
| 216 |
"""
|
| 217 |
+
# Informations carte
|
| 218 |
+
cc_num: int = Field(
|
| 219 |
+
...,
|
| 220 |
+
description="Numéro de carte de crédit (hashé)",
|
| 221 |
+
example=374125201044065
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
# Montant
|
| 225 |
amt: float = Field(
|
| 226 |
...,
|
| 227 |
+
description="Montant de la transaction en dollars",
|
| 228 |
example=150.75,
|
| 229 |
+
gt=0
|
|
|
|
| 230 |
)
|
| 231 |
+
|
| 232 |
+
# Localisation client
|
| 233 |
lat: float = Field(
|
| 234 |
...,
|
| 235 |
+
description="Latitude du client (coordonnées GPS)",
|
| 236 |
example=40.7128,
|
| 237 |
ge=-90,
|
| 238 |
le=90
|
| 239 |
)
|
| 240 |
long: float = Field(
|
| 241 |
...,
|
| 242 |
+
description="Longitude du client (coordonnées GPS)",
|
| 243 |
example=-74.0060,
|
| 244 |
ge=-180,
|
| 245 |
le=180
|
| 246 |
)
|
| 247 |
+
|
| 248 |
+
# Ville
|
| 249 |
city_pop: int = Field(
|
| 250 |
...,
|
| 251 |
+
description="Population de la ville du client",
|
| 252 |
example=8000000,
|
| 253 |
gt=0
|
| 254 |
)
|
| 255 |
+
zip: int = Field(
|
| 256 |
+
...,
|
| 257 |
+
description="Code postal",
|
| 258 |
+
example=10001
|
| 259 |
+
)
|
| 260 |
+
|
| 261 |
+
# Localisation marchand
|
| 262 |
merch_lat: float = Field(
|
| 263 |
...,
|
| 264 |
+
description="Latitude du marchand (coordonnées GPS)",
|
| 265 |
example=40.7589,
|
| 266 |
ge=-90,
|
| 267 |
le=90
|
| 268 |
)
|
| 269 |
merch_long: float = Field(
|
| 270 |
...,
|
| 271 |
+
description="Longitude du marchand (coordonnées GPS)",
|
| 272 |
example=-73.9851,
|
| 273 |
ge=-180,
|
| 274 |
le=180
|
| 275 |
)
|
| 276 |
|
| 277 |
+
# Marchand
|
| 278 |
+
merchant: str = Field(
|
| 279 |
+
...,
|
| 280 |
+
description="Nom du marchand",
|
| 281 |
+
example="Amazon"
|
| 282 |
+
)
|
| 283 |
+
category: str = Field(
|
| 284 |
+
...,
|
| 285 |
+
description="Catégorie de transaction",
|
| 286 |
+
example="shopping_net"
|
| 287 |
+
)
|
| 288 |
+
|
| 289 |
+
# Client
|
| 290 |
+
gender: str = Field(
|
| 291 |
+
...,
|
| 292 |
+
description="Genre du client (M/F)",
|
| 293 |
+
example="M"
|
| 294 |
+
)
|
| 295 |
+
state: str = Field(
|
| 296 |
+
...,
|
| 297 |
+
description="État (US)",
|
| 298 |
+
example="NY"
|
| 299 |
+
)
|
| 300 |
+
dob: str = Field(
|
| 301 |
+
...,
|
| 302 |
+
description="Date de naissance (YYYY-MM-DD)",
|
| 303 |
+
example="1990-01-15"
|
| 304 |
+
)
|
| 305 |
+
|
| 306 |
+
# Transaction
|
| 307 |
+
transaction_time: str = Field(
|
| 308 |
+
...,
|
| 309 |
+
description="Heure de la transaction (YYYY-MM-DD HH:MM:SS)",
|
| 310 |
+
example="2026-01-29 14:30:00"
|
| 311 |
+
)
|
| 312 |
+
|
| 313 |
class Config:
|
| 314 |
schema_extra = {
|
| 315 |
"example": {
|
| 316 |
+
"cc_num": 374125201044065,
|
| 317 |
"amt": 150.75,
|
| 318 |
"lat": 40.7128,
|
| 319 |
"long": -74.0060,
|
| 320 |
"city_pop": 8000000,
|
| 321 |
+
"zip": 10001,
|
| 322 |
"merch_lat": 40.7589,
|
| 323 |
+
"merch_long": -73.9851,
|
| 324 |
+
"merchant": "Amazon",
|
| 325 |
+
"category": "shopping_net",
|
| 326 |
+
"gender": "M",
|
| 327 |
+
"state": "NY",
|
| 328 |
+
"dob": "1990-01-15",
|
| 329 |
+
"transaction_time": "2026-01-29 14:30:00"
|
| 330 |
}
|
| 331 |
}
|
| 332 |
|
| 333 |
|
| 334 |
+
class FeaturesEngineeredOutput(BaseModel):
|
| 335 |
"""
|
| 336 |
+
Résultat du Feature Engineering
|
| 337 |
+
|
| 338 |
+
Contient les données originales + les features calculées
|
| 339 |
"""
|
| 340 |
+
# Données originales
|
| 341 |
+
original_data: Dict[str, Any] = Field(
|
| 342 |
...,
|
| 343 |
+
description="Données brutes d'entrée"
|
|
|
|
|
|
|
| 344 |
)
|
| 345 |
|
| 346 |
+
# Features engineered
|
| 347 |
+
engineered_features: Dict[str, Any] = Field(
|
| 348 |
+
...,
|
| 349 |
+
description="Nouvelles features calculées"
|
| 350 |
+
)
|
| 351 |
+
|
| 352 |
+
# Toutes les features combinées
|
| 353 |
+
all_features: Dict[str, Any] = Field(
|
| 354 |
+
...,
|
| 355 |
+
description="Données originales + features engineered"
|
| 356 |
+
)
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
class PreprocessedOutput(BaseModel):
|
| 360 |
+
"""
|
| 361 |
+
Résultat du Preprocessing
|
| 362 |
+
|
| 363 |
+
Features transformées (scaled + encoded) prêtes pour le modèle
|
| 364 |
+
"""
|
| 365 |
+
preprocessed_shape: tuple = Field(
|
| 366 |
+
...,
|
| 367 |
+
description="Dimensions des données preprocessed (lignes, colonnes)"
|
| 368 |
+
)
|
| 369 |
+
|
| 370 |
+
sample_values: List[float] = Field(
|
| 371 |
+
...,
|
| 372 |
+
description="Premières valeurs (pour debug)"
|
| 373 |
+
)
|
| 374 |
+
|
| 375 |
+
message: str = Field(
|
| 376 |
+
...,
|
| 377 |
+
description="Message de confirmation"
|
| 378 |
+
)
|
| 379 |
|
| 380 |
|
| 381 |
class PredictionOutput(BaseModel):
|
| 382 |
"""
|
| 383 |
+
Résultat de la Prédiction de Fraude
|
| 384 |
"""
|
| 385 |
+
# Prédiction
|
| 386 |
is_fraud: bool = Field(
|
| 387 |
...,
|
| 388 |
+
description="True si la transaction est frauduleuse"
|
| 389 |
)
|
| 390 |
+
|
| 391 |
fraud_probability: float = Field(
|
| 392 |
...,
|
| 393 |
+
description="Probabilité de fraude (0.0 à 1.0)",
|
| 394 |
ge=0.0,
|
| 395 |
le=1.0
|
| 396 |
)
|
| 397 |
+
|
| 398 |
+
# Classification du risque
|
| 399 |
risk_level: str = Field(
|
| 400 |
...,
|
| 401 |
+
description="Niveau de risque: LOW, MEDIUM, HIGH, CRITICAL"
|
| 402 |
)
|
| 403 |
+
|
| 404 |
+
# Confiance du modèle
|
| 405 |
confidence: float = Field(
|
| 406 |
...,
|
| 407 |
+
description="Confiance du modèle (0.0 à 1.0)",
|
| 408 |
ge=0.0,
|
| 409 |
le=1.0
|
| 410 |
)
|
| 411 |
+
|
| 412 |
+
# Métadonnées
|
| 413 |
timestamp: str = Field(
|
| 414 |
...,
|
| 415 |
+
description="Heure de la prédiction (ISO format)"
|
| 416 |
)
|
| 417 |
|
| 418 |
+
processing_time_ms: float = Field(
|
| 419 |
+
...,
|
| 420 |
+
description="Temps de traitement en millisecondes"
|
| 421 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 423 |
|
| 424 |
+
# =====================================================================
|
| 425 |
+
# FONCTIONS HELPER
|
| 426 |
+
# =====================================================================
|
| 427 |
|
| 428 |
+
def load_models_from_hf():
|
| 429 |
"""
|
| 430 |
+
Télécharge et charge les modèles depuis HuggingFace Hub
|
| 431 |
+
|
| 432 |
+
Cette fonction:
|
| 433 |
+
1. Télécharge fraud_model.pkl (le modèle ML)
|
| 434 |
+
2. Télécharge preprocessor.plk (le preprocessor)
|
| 435 |
+
3. Charge les 2 fichiers en mémoire
|
| 436 |
+
4. Met à jour les variables globales model et preprocessor
|
| 437 |
+
|
| 438 |
+
Returns:
|
| 439 |
+
tuple: (success: bool, message: str)
|
| 440 |
+
success = True si tout s'est bien passé
|
| 441 |
+
message = Message d'information ou d'erreur
|
| 442 |
"""
|
| 443 |
+
global model, preprocessor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 444 |
|
| 445 |
try:
|
| 446 |
+
print("=" * 70)
|
| 447 |
+
print("📥 Téléchargement des modèles depuis HuggingFace...")
|
| 448 |
+
print(f" Repository: {REPO_ID}")
|
| 449 |
+
print("=" * 70)
|
| 450 |
+
|
| 451 |
+
# ========================================
|
| 452 |
+
# 1. TÉLÉCHARGER LE MODÈLE ML
|
| 453 |
+
# ========================================
|
| 454 |
+
|
| 455 |
+
print(f"\n⬇️ Download: {MODEL_FILENAME}...")
|
| 456 |
model_path = hf_hub_download(
|
| 457 |
repo_id=REPO_ID,
|
| 458 |
filename=MODEL_FILENAME,
|
| 459 |
+
revision=MODEL_VERSION, # None = latest
|
| 460 |
+
cache_dir="/tmp" # Dossier de cache
|
| 461 |
)
|
| 462 |
+
print(f"✅ Téléchargé: {model_path}")
|
| 463 |
+
|
| 464 |
+
# Charger le modèle
|
| 465 |
model = joblib.load(model_path)
|
| 466 |
+
print(f"✅ Modèle chargé: {type(model).__name__}")
|
| 467 |
+
|
| 468 |
+
# ========================================
|
| 469 |
+
# 2. TÉLÉCHARGER LE PREPROCESSOR
|
| 470 |
+
# ========================================
|
| 471 |
+
|
| 472 |
+
print(f"\n⬇️ Download: {PREPROCESSOR_FILENAME}...")
|
| 473 |
+
preprocessor_path = hf_hub_download(
|
| 474 |
+
repo_id=REPO_ID,
|
| 475 |
+
filename=PREPROCESSOR_FILENAME,
|
| 476 |
+
revision=MODEL_VERSION,
|
| 477 |
+
cache_dir="/tmp"
|
| 478 |
+
)
|
| 479 |
+
print(f"✅ Téléchargé: {preprocessor_path}")
|
| 480 |
+
|
| 481 |
+
# Charger le preprocessor
|
| 482 |
+
preprocessor = joblib.load(preprocessor_path)
|
| 483 |
+
print(f"✅ Preprocessor chargé: {type(preprocessor).__name__}")
|
| 484 |
+
|
| 485 |
+
print("\n" + "=" * 70)
|
| 486 |
+
print("✅ TOUS LES MODÈLES SONT CHARGÉS ET PRÊTS")
|
| 487 |
+
print("=" * 70)
|
| 488 |
+
|
| 489 |
+
return True, "Models loaded successfully"
|
| 490 |
|
| 491 |
except Exception as e:
|
| 492 |
+
error_msg = f"Erreur lors du chargement des modèles: {str(e)}"
|
| 493 |
+
print(f"\n❌ {error_msg}")
|
| 494 |
+
return False, error_msg
|
| 495 |
|
| 496 |
|
| 497 |
def calculate_risk_level(probability: float) -> str:
|
| 498 |
+
"""
|
| 499 |
+
Calcule le niveau de risque basé sur la probabilité de fraude
|
| 500 |
+
|
| 501 |
+
Args:
|
| 502 |
+
probability (float): Probabilité de fraude (0.0 à 1.0)
|
| 503 |
+
|
| 504 |
+
Returns:
|
| 505 |
+
str: Niveau de risque (LOW, MEDIUM, HIGH, CRITICAL)
|
| 506 |
+
|
| 507 |
+
Seuils:
|
| 508 |
+
< 0.3 : LOW (Risque faible)
|
| 509 |
+
< 0.6 : MEDIUM (Risque moyen)
|
| 510 |
+
< 0.8 : HIGH (Risque élevé)
|
| 511 |
+
>= 0.8 : CRITICAL (Risque critique)
|
| 512 |
+
"""
|
| 513 |
if probability < 0.3:
|
| 514 |
return "LOW"
|
| 515 |
elif probability < 0.6:
|
|
|
|
| 520 |
return "CRITICAL"
|
| 521 |
|
| 522 |
|
| 523 |
+
# =====================================================================
|
| 524 |
+
# ÉVÉNEMENT DE DÉMARRAGE
|
| 525 |
+
# =====================================================================
|
| 526 |
+
|
| 527 |
+
@app.on_event("startup")
|
| 528 |
+
async def startup_event():
|
| 529 |
+
"""
|
| 530 |
+
Fonction appelée AU DÉMARRAGE de l'API
|
| 531 |
|
| 532 |
+
Cette fonction:
|
| 533 |
+
- Est exécutée UNE SEULE FOIS quand l'API démarre
|
| 534 |
+
- Télécharge et charge les modèles en mémoire
|
| 535 |
+
- Les modèles restent en mémoire pour toutes les requêtes
|
| 536 |
|
| 537 |
+
Si les modèles ne se chargent pas, l'API démarre quand même
|
| 538 |
+
mais les endpoints de prédiction renverront une erreur 503.
|
| 539 |
+
"""
|
| 540 |
+
print("\n" + "🚀" * 35)
|
| 541 |
+
print("🚀 DÉMARRAGE DE L'API FRAUD DETECTION")
|
| 542 |
+
print("🚀" * 35)
|
| 543 |
|
| 544 |
+
# Charger les modèles
|
| 545 |
+
success, message = load_models_from_hf()
|
| 546 |
|
| 547 |
+
if success:
|
| 548 |
+
print("\n✅ API prête à recevoir des requêtes!\n")
|
| 549 |
+
else:
|
| 550 |
+
print(f"\n⚠️ API démarrée mais modèles non chargés: {message}")
|
| 551 |
+
print("⚠️ Les endpoints de prédiction ne fonctionneront pas.\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 552 |
|
| 553 |
|
| 554 |
+
# =====================================================================
|
| 555 |
+
# ENDPOINTS - INFORMATION
|
| 556 |
+
# =====================================================================
|
| 557 |
|
| 558 |
@app.get(
|
| 559 |
"/",
|
| 560 |
+
tags=["📊 Information"],
|
| 561 |
+
summary="Page d'accueil",
|
| 562 |
+
description="Informations générales sur l'API"
|
| 563 |
)
|
| 564 |
async def root():
|
| 565 |
+
"""
|
| 566 |
+
Endpoint racine - Informations sur l'API
|
| 567 |
+
|
| 568 |
+
Retourne:
|
| 569 |
+
- Nom de l'API
|
| 570 |
+
- Version
|
| 571 |
+
- Liens vers la documentation
|
| 572 |
+
- Liste des endpoints disponibles
|
| 573 |
+
"""
|
| 574 |
return {
|
| 575 |
+
"message": "🚨 Fraud Detection API - Level UP",
|
| 576 |
+
"version": "2.0.0",
|
| 577 |
"status": "online",
|
| 578 |
+
"documentation": "/docs",
|
| 579 |
+
"health_check": "/health",
|
| 580 |
"endpoints": {
|
| 581 |
+
"prediction": {
|
| 582 |
+
"predict": "/predict - Prédiction complète (feat_eng + preprocess + predict)",
|
| 583 |
+
},
|
| 584 |
+
"feature_engineering": {
|
| 585 |
+
"feat_eng": "/feat_eng - Feature engineering seulement",
|
| 586 |
+
},
|
| 587 |
+
"preprocessing": {
|
| 588 |
+
"preprocess": "/preprocess - Preprocessing seulement",
|
| 589 |
+
},
|
| 590 |
+
"information": {
|
| 591 |
+
"model_info": "/model/info - Informations sur le modèle",
|
| 592 |
+
"features": "/features - Liste des features nécessaires",
|
| 593 |
+
}
|
| 594 |
+
},
|
| 595 |
+
"example_workflow": {
|
| 596 |
+
"1": "Données brutes → /feat_eng → Features engineered",
|
| 597 |
+
"2": "Features engineered → /preprocess → Features preprocessed",
|
| 598 |
+
"3": "Features preprocessed → /predict → Prédiction",
|
| 599 |
+
"shortcut": "Données brutes → /predict → Prédiction directe (recommandé)"
|
| 600 |
}
|
| 601 |
}
|
| 602 |
|
| 603 |
|
| 604 |
@app.get(
|
| 605 |
"/health",
|
| 606 |
+
tags=["📊 Information"],
|
| 607 |
summary="Health check",
|
| 608 |
+
description="Vérifier que l'API et les modèles sont opérationnels"
|
| 609 |
)
|
| 610 |
async def health_check():
|
| 611 |
"""
|
| 612 |
+
Vérifie l'état de santé de l'API
|
| 613 |
|
| 614 |
+
Retourne:
|
| 615 |
+
- Status de l'API (healthy/unhealthy)
|
| 616 |
+
- État du modèle ML (loaded/not loaded)
|
| 617 |
+
- État du preprocessor (loaded/not loaded)
|
| 618 |
+
- Timestamp
|
|
|
|
| 619 |
"""
|
| 620 |
+
# Vérifier si les modèles sont chargés
|
| 621 |
+
models_loaded = (model is not None) and (preprocessor is not None)
|
| 622 |
+
|
| 623 |
return {
|
| 624 |
+
"status": "healthy" if models_loaded else "unhealthy",
|
| 625 |
"model_loaded": model is not None,
|
| 626 |
+
"preprocessor_loaded": preprocessor is not None,
|
| 627 |
"model_repo": REPO_ID,
|
| 628 |
"model_type": type(model).__name__ if model else None,
|
| 629 |
+
"preprocessor_type": type(preprocessor).__name__ if preprocessor else None,
|
| 630 |
"timestamp": datetime.utcnow().isoformat()
|
| 631 |
}
|
| 632 |
|
| 633 |
|
| 634 |
@app.get(
|
| 635 |
"/model/info",
|
| 636 |
+
tags=["📊 Information"],
|
| 637 |
+
summary="Informations sur le modèle",
|
| 638 |
+
description="Détails techniques sur le modèle ML et le preprocessor"
|
| 639 |
)
|
| 640 |
async def model_info():
|
| 641 |
"""
|
| 642 |
+
Informations détaillées sur le modèle ML
|
| 643 |
|
| 644 |
+
Retourne:
|
| 645 |
+
- Type de modèle
|
| 646 |
+
- Repository HuggingFace
|
| 647 |
+
- Nombre de features
|
| 648 |
+
- Liste des features
|
|
|
|
|
|
|
| 649 |
"""
|
| 650 |
+
# Vérifier que les modèles sont chargés
|
| 651 |
+
if model is None or preprocessor is None:
|
| 652 |
raise HTTPException(
|
| 653 |
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
| 654 |
+
detail="Models not loaded. Please check /health endpoint."
|
| 655 |
)
|
| 656 |
|
| 657 |
+
# Récupérer la liste des features
|
| 658 |
+
features = get_model_features()
|
| 659 |
|
| 660 |
return {
|
| 661 |
+
"model": {
|
| 662 |
+
"type": type(model).__name__,
|
| 663 |
+
"repo_id": REPO_ID,
|
| 664 |
+
"filename": MODEL_FILENAME,
|
| 665 |
+
"version": MODEL_VERSION or "latest"
|
| 666 |
+
},
|
| 667 |
+
"preprocessor": {
|
| 668 |
+
"type": type(preprocessor).__name__,
|
| 669 |
+
"filename": PREPROCESSOR_FILENAME
|
| 670 |
+
},
|
| 671 |
+
"features": {
|
| 672 |
+
"total": len(features),
|
| 673 |
+
"numerical": 17,
|
| 674 |
+
"categorical": 4,
|
| 675 |
+
"list": features
|
| 676 |
+
}
|
| 677 |
+
}
|
| 678 |
+
|
| 679 |
+
|
| 680 |
+
@app.get(
|
| 681 |
+
"/features",
|
| 682 |
+
tags=["📊 Information"],
|
| 683 |
+
summary="Liste des features",
|
| 684 |
+
description="Liste complète des features nécessaires pour une prédiction"
|
| 685 |
+
)
|
| 686 |
+
async def list_features():
|
| 687 |
+
"""
|
| 688 |
+
Liste toutes les features attendues par le modèle
|
| 689 |
+
|
| 690 |
+
Retourne:
|
| 691 |
+
- Features numériques (17)
|
| 692 |
+
- Features catégorielles (4)
|
| 693 |
+
- Total (21)
|
| 694 |
+
"""
|
| 695 |
+
features = get_model_features()
|
| 696 |
+
|
| 697 |
+
numerical = features[:17] # Premières 17 = numériques
|
| 698 |
+
categorical = features[17:] # Dernières 4 = catégorielles
|
| 699 |
+
|
| 700 |
+
return {
|
| 701 |
+
"total_features": len(features),
|
| 702 |
+
"numerical_features": {
|
| 703 |
+
"count": len(numerical),
|
| 704 |
+
"list": numerical
|
| 705 |
+
},
|
| 706 |
+
"categorical_features": {
|
| 707 |
+
"count": len(categorical),
|
| 708 |
+
"list": categorical
|
| 709 |
+
},
|
| 710 |
+
"all_features_in_order": features
|
| 711 |
}
|
| 712 |
|
| 713 |
|
| 714 |
+
# =====================================================================
|
| 715 |
+
# ENDPOINTS - FEATURE ENGINEERING
|
| 716 |
+
# =====================================================================
|
| 717 |
+
|
| 718 |
@app.post(
|
| 719 |
+
"/feat_eng",
|
| 720 |
+
response_model=FeaturesEngineeredOutput,
|
| 721 |
+
tags=["🔧 Feature Engineering"],
|
| 722 |
+
summary="Feature Engineering",
|
| 723 |
+
description="Transforme les données brutes en features pour le modèle ML"
|
| 724 |
)
|
| 725 |
+
async def feature_engineering_endpoint(transaction: TransactionRawInput):
|
| 726 |
"""
|
| 727 |
+
Applique le FEATURE ENGINEERING sur une transaction
|
| 728 |
+
|
| 729 |
+
## Ce que fait cet endpoint:
|
| 730 |
+
|
| 731 |
+
1. **Calcul de distance GPS**
|
| 732 |
+
- Calcule la distance entre le client et le marchand
|
| 733 |
+
- Utilise la formule Haversine (précision: ±1%)
|
| 734 |
+
- Feature créée: `distance_km`
|
| 735 |
+
|
| 736 |
+
2. **Extraction des features temporelles**
|
| 737 |
+
- Heure de la journée (0-23)
|
| 738 |
+
- Jour de la semaine (0-6)
|
| 739 |
+
- Période (nuit, matin, après-midi, soir)
|
| 740 |
+
- Weekend ou non
|
| 741 |
+
- Heures de bureau ou non
|
| 742 |
+
- Features créées: `hour`, `dayofweek`, `is_night`, `is_morning`,
|
| 743 |
+
`is_afternoon`, `is_evening`, `is_business_hour`, `is_we`,
|
| 744 |
+
`year`, `month`, `day`
|
| 745 |
+
|
| 746 |
+
3. **Calcul de l'âge**
|
| 747 |
+
- À partir de la date de naissance
|
| 748 |
+
- Feature créée: `age`
|
| 749 |
+
|
| 750 |
+
## Input:
|
| 751 |
+
Données brutes de la transaction (voir schema TransactionRawInput)
|
| 752 |
+
|
| 753 |
+
## Output:
|
| 754 |
+
- `original_data`: Données brutes d'entrée
|
| 755 |
+
- `engineered_features`: Nouvelles features calculées
|
| 756 |
+
- `all_features`: Toutes les features (original + engineered)
|
| 757 |
+
|
| 758 |
+
## Exemple d'utilisation:
|
| 759 |
+
```python
|
| 760 |
+
import requests
|
| 761 |
+
|
| 762 |
+
data = {
|
| 763 |
+
"cc_num": 374125201044065,
|
| 764 |
+
"amt": 150.75,
|
| 765 |
+
"lat": 40.7128,
|
| 766 |
+
"long": -74.0060,
|
| 767 |
+
# ... autres champs
|
| 768 |
+
}
|
| 769 |
+
|
| 770 |
+
response = requests.post("http://localhost:8000/feat_eng", json=data)
|
| 771 |
+
features = response.json()["all_features"]
|
| 772 |
+
```
|
| 773 |
"""
|
| 774 |
+
try:
|
| 775 |
+
# Convertir en dictionnaire
|
| 776 |
+
transaction_dict = transaction.dict()
|
| 777 |
+
|
| 778 |
+
print("\n" + "=" * 70)
|
| 779 |
+
print("🔧 FEATURE ENGINEERING")
|
| 780 |
+
print("=" * 70)
|
| 781 |
+
|
| 782 |
+
# Appliquer le feature engineering
|
| 783 |
+
# (voir feature_engineering.py pour les détails)
|
| 784 |
+
engineered = engineer_features(transaction_dict)
|
| 785 |
+
|
| 786 |
+
# Identifier les features qui ont été ajoutées
|
| 787 |
+
original_keys = set(transaction_dict.keys())
|
| 788 |
+
all_keys = set(engineered.keys())
|
| 789 |
+
new_features = all_keys - original_keys
|
| 790 |
+
|
| 791 |
+
print(f"\n✅ Feature engineering terminé")
|
| 792 |
+
print(f" Features ajoutées: {len(new_features)}")
|
| 793 |
+
print(f" Total features: {len(engineered)}")
|
| 794 |
+
|
| 795 |
+
# Préparer la réponse
|
| 796 |
+
return {
|
| 797 |
+
"original_data": transaction_dict,
|
| 798 |
+
"engineered_features": {k: engineered[k] for k in new_features},
|
| 799 |
+
"all_features": engineered
|
| 800 |
+
}
|
| 801 |
+
|
| 802 |
+
except Exception as e:
|
| 803 |
+
raise HTTPException(
|
| 804 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 805 |
+
detail=f"Feature engineering failed: {str(e)}"
|
| 806 |
+
)
|
| 807 |
+
|
| 808 |
+
|
| 809 |
+
# =====================================================================
|
| 810 |
+
# ENDPOINTS - PREPROCESSING
|
| 811 |
+
# =====================================================================
|
| 812 |
+
|
| 813 |
+
@app.post(
|
| 814 |
+
"/preprocess",
|
| 815 |
+
response_model=PreprocessedOutput,
|
| 816 |
+
tags=["⚙️ Preprocessing"],
|
| 817 |
+
summary="Preprocessing",
|
| 818 |
+
description="Applique le preprocessing (scaling + encoding) sur les features"
|
| 819 |
+
)
|
| 820 |
+
async def preprocessing_endpoint(features: Dict[str, Any]):
|
| 821 |
+
"""
|
| 822 |
+
Applique le PREPROCESSING sur les features
|
| 823 |
+
|
| 824 |
+
## Ce que fait cet endpoint:
|
| 825 |
+
|
| 826 |
+
1. **StandardScaler** (normalisation)
|
| 827 |
+
- Met les features numériques à l'échelle
|
| 828 |
+
- Moyenne = 0, Écart-type = 1
|
| 829 |
+
- Exemple: 100$ → 0.52, 5000$ → 2.31
|
| 830 |
+
|
| 831 |
+
2. **OneHotEncoder** (encoding catégoriel)
|
| 832 |
+
- Convertit les catégories en colonnes binaires
|
| 833 |
+
- Exemple: 'NY' → [0, 0, 1, 0, ...] (vecteur de 50 dimensions)
|
| 834 |
+
- Exemple: 'shopping_net' → [0, 1, 0, ...] (vecteur de 14 dimensions)
|
| 835 |
+
|
| 836 |
+
## Input:
|
| 837 |
+
Dictionnaire avec toutes les features (déjà engineered)
|
| 838 |
+
|
| 839 |
+
Les 21 features attendues:
|
| 840 |
+
- **Numériques** (17): cc_num, amt, zip, city_pop, distance_km, age,
|
| 841 |
+
hour, is_night, is_morning, is_afternoon, is_evening, is_business_hour,
|
| 842 |
+
year, month, day, dayofweek, is_we
|
| 843 |
+
- **Catégorielles** (4): merchant, category, gender, state
|
| 844 |
+
|
| 845 |
+
## Output:
|
| 846 |
+
- `preprocessed_shape`: Dimensions des données transformées
|
| 847 |
+
- `sample_values`: Premières valeurs (pour vérification)
|
| 848 |
+
- `message`: Message de confirmation
|
| 849 |
+
|
| 850 |
+
## Note:
|
| 851 |
+
Les données preprocessed ne sont PAS retournées en entier
|
| 852 |
+
(trop volumineuses), seulement leur shape et un échantillon.
|
| 853 |
+
|
| 854 |
+
Pour obtenir une prédiction, utilisez directement `/predict`
|
| 855 |
+
qui fait feat_eng + preprocess + predict.
|
| 856 |
+
"""
|
| 857 |
+
# Vérifier que le preprocessor est chargé
|
| 858 |
+
if preprocessor is None:
|
| 859 |
raise HTTPException(
|
| 860 |
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
| 861 |
+
detail="Preprocessor not loaded"
|
| 862 |
)
|
| 863 |
|
| 864 |
try:
|
| 865 |
+
print("\n" + "=" * 70)
|
| 866 |
+
print("⚙️ PREPROCESSING")
|
| 867 |
+
print("=" * 70)
|
| 868 |
|
| 869 |
+
# Préparer les features pour le modèle
|
| 870 |
+
# (sélectionne les bonnes colonnes dans le bon ordre)
|
| 871 |
+
df = prepare_for_model(features)
|
| 872 |
|
| 873 |
+
if df is None:
|
| 874 |
+
raise HTTPException(
|
| 875 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
| 876 |
+
detail="Missing required features. Use /features to see the full list."
|
| 877 |
+
)
|
| 878 |
+
|
| 879 |
+
print(f"\n📊 Features préparées: {df.shape}")
|
| 880 |
+
|
| 881 |
+
# Appliquer le preprocessing
|
| 882 |
+
# Le preprocessor fait:
|
| 883 |
+
# 1. StandardScaler sur les numériques
|
| 884 |
+
# 2. OneHotEncoder sur les catégorielles
|
| 885 |
+
X_preprocessed = preprocessor.transform(df)
|
| 886 |
+
|
| 887 |
+
print(f"✅ Preprocessing terminé: {X_preprocessed.shape}")
|
| 888 |
+
print(f" Input: {df.shape[1]} features")
|
| 889 |
+
print(f" Output: {X_preprocessed.shape[1]} features (après encoding)")
|
| 890 |
+
|
| 891 |
+
# Retourner les informations (pas les données complètes, trop volumineux)
|
| 892 |
+
return {
|
| 893 |
+
"preprocessed_shape": X_preprocessed.shape,
|
| 894 |
+
"sample_values": X_preprocessed[0, :10].tolist(), # 10 premières valeurs
|
| 895 |
+
"message": f"Preprocessing successful. Shape: {X_preprocessed.shape}"
|
| 896 |
+
}
|
| 897 |
|
| 898 |
except Exception as e:
|
| 899 |
raise HTTPException(
|
| 900 |
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 901 |
+
detail=f"Preprocessing failed: {str(e)}"
|
| 902 |
)
|
| 903 |
|
| 904 |
|
| 905 |
+
# =====================================================================
|
| 906 |
+
# ENDPOINTS - PREDICTION
|
| 907 |
+
# =====================================================================
|
| 908 |
+
|
| 909 |
@app.post(
|
| 910 |
+
"/predict",
|
| 911 |
+
response_model=PredictionOutput,
|
| 912 |
+
tags=["🎯 Prediction"],
|
| 913 |
+
summary="Prédiction complète",
|
| 914 |
+
description="Prédiction de fraude complète (feature engineering + preprocessing + ML)"
|
| 915 |
)
|
| 916 |
+
async def predict_fraud(transaction: TransactionRawInput):
|
| 917 |
"""
|
| 918 |
+
Prédiction COMPLÈTE de fraude
|
| 919 |
+
|
| 920 |
+
## Workflow:
|
| 921 |
+
|
| 922 |
+
```
|
| 923 |
+
Données Brutes (TransactionRawInput)
|
| 924 |
+
↓
|
| 925 |
+
1. Feature Engineering
|
| 926 |
+
- Calcul distance GPS
|
| 927 |
+
- Extraction features temporelles
|
| 928 |
+
- Calcul âge
|
| 929 |
+
↓
|
| 930 |
+
2. Preprocessing
|
| 931 |
+
- StandardScaler (normalisation)
|
| 932 |
+
- OneHotEncoder (encoding)
|
| 933 |
+
↓
|
| 934 |
+
3. Prédiction ML
|
| 935 |
+
- RandomForestClassifier
|
| 936 |
+
- Probabilité de fraude
|
| 937 |
+
↓
|
| 938 |
+
Résultat (PredictionOutput)
|
| 939 |
+
```
|
| 940 |
+
|
| 941 |
+
## Input:
|
| 942 |
+
Données brutes de la transaction (voir TransactionRawInput schema)
|
| 943 |
+
|
| 944 |
+
## Output:
|
| 945 |
+
- **is_fraud**: True/False - Transaction frauduleuse ou non
|
| 946 |
+
- **fraud_probability**: 0.0 à 1.0 - Probabilité de fraude
|
| 947 |
+
- **risk_level**: LOW/MEDIUM/HIGH/CRITICAL - Niveau de risque
|
| 948 |
+
- **confidence**: 0.0 à 1.0 - Confiance du modèle
|
| 949 |
+
- **timestamp**: Heure de la prédiction
|
| 950 |
+
- **processing_time_ms**: Temps de traitement en millisecondes
|
| 951 |
+
|
| 952 |
+
## Niveaux de Risque:
|
| 953 |
+
- **LOW**: fraud_probability < 0.3 → Transaction probablement légitime
|
| 954 |
+
- **MEDIUM**: 0.3 ≤ fraud_probability < 0.6 → Vérification recommandée
|
| 955 |
+
- **HIGH**: 0.6 ≤ fraud_probability < 0.8 → Transaction suspecte
|
| 956 |
+
- **CRITICAL**: fraud_probability ≥ 0.8 → Bloquer la transaction
|
| 957 |
+
|
| 958 |
+
## Exemple de Code:
|
| 959 |
+
|
| 960 |
+
```python
|
| 961 |
+
import requests
|
| 962 |
+
|
| 963 |
+
# Données de transaction
|
| 964 |
+
transaction = {
|
| 965 |
+
"cc_num": 374125201044065,
|
| 966 |
+
"amt": 150.75,
|
| 967 |
+
"lat": 40.7128,
|
| 968 |
+
"long": -74.0060,
|
| 969 |
+
"city_pop": 8000000,
|
| 970 |
+
"zip": 10001,
|
| 971 |
+
"merch_lat": 40.7589,
|
| 972 |
+
"merch_long": -73.9851,
|
| 973 |
+
"merchant": "Amazon",
|
| 974 |
+
"category": "shopping_net",
|
| 975 |
+
"gender": "M",
|
| 976 |
+
"state": "NY",
|
| 977 |
+
"dob": "1990-01-15",
|
| 978 |
+
"transaction_time": "2026-01-29 14:30:00"
|
| 979 |
+
}
|
| 980 |
+
|
| 981 |
+
# Faire la prédiction
|
| 982 |
+
response = requests.post(
|
| 983 |
+
"http://localhost:8000/predict",
|
| 984 |
+
json=transaction
|
| 985 |
+
)
|
| 986 |
+
|
| 987 |
+
result = response.json()
|
| 988 |
+
|
| 989 |
+
if result["is_fraud"]:
|
| 990 |
+
print(f"⚠️ FRAUDE détectée! Probabilité: {result['fraud_probability']:.1%}")
|
| 991 |
+
print(f" Niveau de risque: {result['risk_level']}")
|
| 992 |
+
else:
|
| 993 |
+
print(f"✅ Transaction légitime. Probabilité de fraude: {result['fraud_probability']:.1%}")
|
| 994 |
+
```
|
| 995 |
+
|
| 996 |
+
## Performance:
|
| 997 |
+
- Temps de traitement moyen: 10-50ms
|
| 998 |
+
- Throughput: ~100-500 requêtes/seconde (selon hardware)
|
| 999 |
+
|
| 1000 |
+
## Use Cases:
|
| 1001 |
+
1. **Validation temps réel**: Au moment du paiement
|
| 1002 |
+
2. **Post-transaction**: Vérification après coup
|
| 1003 |
+
3. **Batch processing**: Analyse de milliers de transactions
|
| 1004 |
+
4. **Monitoring**: Détection de patterns de fraude
|
| 1005 |
"""
|
| 1006 |
+
# Vérifier que les modèles sont chargés
|
| 1007 |
+
if model is None or preprocessor is None:
|
| 1008 |
raise HTTPException(
|
| 1009 |
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
| 1010 |
+
detail="Models not loaded. Please check /health endpoint."
|
| 1011 |
)
|
| 1012 |
|
| 1013 |
try:
|
| 1014 |
+
# Timer pour mesurer le temps de traitement
|
| 1015 |
+
start_time = time.time()
|
| 1016 |
|
| 1017 |
+
print("\n" + "🎯" * 35)
|
| 1018 |
+
print("🎯 PRÉDICTION DE FRAUDE - WORKFLOW COMPLET")
|
| 1019 |
+
print("🎯" * 35)
|
|
|
|
|
|
|
|
|
|
| 1020 |
|
| 1021 |
+
# ========================================
|
| 1022 |
+
# ÉTAPE 1: FEATURE ENGINEERING
|
| 1023 |
+
# ========================================
|
|
|
|
| 1024 |
|
| 1025 |
+
print("\n[1/3] 🔧 Feature Engineering...")
|
| 1026 |
+
transaction_dict = transaction.dict()
|
| 1027 |
+
engineered = engineer_features(transaction_dict)
|
| 1028 |
|
| 1029 |
+
# ========================================
|
| 1030 |
+
# ÉTAPE 2: PREPROCESSING
|
| 1031 |
+
# ========================================
|
| 1032 |
+
|
| 1033 |
+
print("\n[2/3] ⚙️ Preprocessing...")
|
| 1034 |
+
df = prepare_for_model(engineered)
|
| 1035 |
+
|
| 1036 |
+
if df is None:
|
| 1037 |
+
raise HTTPException(
|
| 1038 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
| 1039 |
+
detail="Failed to prepare features for model"
|
| 1040 |
+
)
|
| 1041 |
+
|
| 1042 |
+
# Appliquer le preprocessing
|
| 1043 |
+
X_preprocessed = preprocessor.transform(df)
|
| 1044 |
+
print(f" Shape après preprocessing: {X_preprocessed.shape}")
|
| 1045 |
+
|
| 1046 |
+
# ========================================
|
| 1047 |
+
# ÉTAPE 3: PRÉDICTION ML
|
| 1048 |
+
# ========================================
|
| 1049 |
+
|
| 1050 |
+
print("\n[3/3] 🤖 Prédiction ML...")
|
| 1051 |
+
|
| 1052 |
+
# Faire la prédiction
|
| 1053 |
+
prediction = model.predict(X_preprocessed)[0] # 0 ou 1
|
| 1054 |
+
proba = model.predict_proba(X_preprocessed)[0] # [proba_class_0, proba_class_1]
|
| 1055 |
+
|
| 1056 |
+
# Extraire la probabilité de fraude (classe 1)
|
| 1057 |
+
fraud_prob = float(proba[1])
|
| 1058 |
+
|
| 1059 |
+
# Calculer la confiance
|
| 1060 |
+
# Confiance = distance par rapport à 0.5 (seuil de décision)
|
| 1061 |
+
# Plus on est loin de 0.5, plus on est confiant
|
| 1062 |
+
confidence = abs(fraud_prob - 0.5) * 2
|
| 1063 |
+
|
| 1064 |
+
# Calculer le niveau de risque
|
| 1065 |
+
risk = calculate_risk_level(fraud_prob)
|
| 1066 |
+
|
| 1067 |
+
# Temps de traitement
|
| 1068 |
+
processing_time = (time.time() - start_time) * 1000 # En millisecondes
|
| 1069 |
+
|
| 1070 |
+
# Résultat
|
| 1071 |
+
result = {
|
| 1072 |
+
"is_fraud": bool(prediction),
|
| 1073 |
+
"fraud_probability": round(fraud_prob, 4),
|
| 1074 |
+
"risk_level": risk,
|
| 1075 |
+
"confidence": round(confidence, 4),
|
| 1076 |
+
"timestamp": datetime.utcnow().isoformat(),
|
| 1077 |
+
"processing_time_ms": round(processing_time, 2)
|
| 1078 |
}
|
| 1079 |
|
| 1080 |
+
print("\n" + "=" * 70)
|
| 1081 |
+
print(f"✅ RÉSULTAT:")
|
| 1082 |
+
print(f" Fraude: {result['is_fraud']}")
|
| 1083 |
+
print(f" Probabilité: {result['fraud_probability']:.1%}")
|
| 1084 |
+
print(f" Risque: {result['risk_level']}")
|
| 1085 |
+
print(f" Temps: {result['processing_time_ms']:.2f}ms")
|
| 1086 |
+
print("=" * 70)
|
| 1087 |
+
|
| 1088 |
+
return result
|
| 1089 |
+
|
| 1090 |
except Exception as e:
|
| 1091 |
raise HTTPException(
|
| 1092 |
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 1093 |
+
detail=f"Prediction failed: {str(e)}"
|
| 1094 |
)
|
| 1095 |
|
| 1096 |
|
| 1097 |
+
# =====================================================================
|
| 1098 |
+
# ERROR HANDLERS (Gestion des erreurs)
|
| 1099 |
+
# =====================================================================
|
| 1100 |
|
| 1101 |
@app.exception_handler(ValueError)
|
| 1102 |
async def value_error_handler(request, exc):
|
| 1103 |
+
"""Gère les erreurs de validation de données"""
|
| 1104 |
return JSONResponse(
|
| 1105 |
status_code=status.HTTP_400_BAD_REQUEST,
|
| 1106 |
+
content={
|
| 1107 |
+
"error": "Invalid input",
|
| 1108 |
+
"detail": str(exc),
|
| 1109 |
+
"type": "ValueError"
|
| 1110 |
+
}
|
| 1111 |
)
|
| 1112 |
|
| 1113 |
|
| 1114 |
@app.exception_handler(Exception)
|
| 1115 |
async def general_exception_handler(request, exc):
|
| 1116 |
+
"""Gère toutes les autres erreurs inattendues"""
|
| 1117 |
return JSONResponse(
|
| 1118 |
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 1119 |
+
content={
|
| 1120 |
+
"error": "Internal server error",
|
| 1121 |
+
"detail": "An unexpected error occurred",
|
| 1122 |
+
"type": type(exc).__name__
|
| 1123 |
+
}
|
| 1124 |
)
|
| 1125 |
|
| 1126 |
|
| 1127 |
+
# =====================================================================
|
| 1128 |
+
# POINT D'ENTRÉE
|
| 1129 |
+
# =====================================================================
|
| 1130 |
+
|
| 1131 |
+
if __name__ == "__main__":
|
| 1132 |
+
"""
|
| 1133 |
+
Lancer l'API en mode développement
|
| 1134 |
+
|
| 1135 |
+
Commande:
|
| 1136 |
+
python app.py
|
| 1137 |
+
|
| 1138 |
+
Ou avec uvicorn:
|
| 1139 |
+
uvicorn app:app --reload --host 0.0.0.0 --port 8000
|
| 1140 |
+
|
| 1141 |
+
Documentation:
|
| 1142 |
+
http://localhost:8000/docs
|
| 1143 |
+
"""
|
| 1144 |
+
import uvicorn
|
| 1145 |
+
|
| 1146 |
+
uvicorn.run(
|
| 1147 |
+
"app:app",
|
| 1148 |
+
host="0.0.0.0",
|
| 1149 |
+
port=8000,
|
| 1150 |
+
reload=True, # Auto-reload en mode dev
|
| 1151 |
+
log_level="info"
|
| 1152 |
+
)
|
feature_engineering.py
ADDED
|
@@ -0,0 +1,486 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Feature Engineering Module
|
| 3 |
+
--------------------------
|
| 4 |
+
Toutes les transformations de features pour la détection de fraude
|
| 5 |
+
|
| 6 |
+
Ce module contient les fonctions pour :
|
| 7 |
+
1. Calculer la distance GPS entre client et marchand
|
| 8 |
+
2. Extraire les features temporelles (heure, jour, weekend, etc.)
|
| 9 |
+
3. Calculer l'âge du porteur de carte
|
| 10 |
+
4. Créer toutes les features nécessaires pour le modèle ML
|
| 11 |
+
|
| 12 |
+
Author: Terorra
|
| 13 |
+
Date: January 2026
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
from datetime import datetime, date
|
| 17 |
+
from math import radians, sin, cos, sqrt, atan2
|
| 18 |
+
import pandas as pd
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# =====================================================================
|
| 22 |
+
# FONCTION 1 : CALCUL DE DISTANCE GPS
|
| 23 |
+
# =====================================================================
|
| 24 |
+
|
| 25 |
+
def haversine_distance(lat1, lon1, lat2, lon2):
|
| 26 |
+
"""
|
| 27 |
+
Calcule la distance entre 2 points GPS avec la formule Haversine
|
| 28 |
+
|
| 29 |
+
La formule Haversine permet de calculer la distance entre deux points
|
| 30 |
+
sur une sphère (la Terre) à partir de leurs coordonnées GPS.
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
lat1 (float): Latitude du point 1 (client)
|
| 34 |
+
lon1 (float): Longitude du point 1 (client)
|
| 35 |
+
lat2 (float): Latitude du point 2 (marchand)
|
| 36 |
+
lon2 (float): Longitude du point 2 (marchand)
|
| 37 |
+
|
| 38 |
+
Returns:
|
| 39 |
+
float: Distance en kilomètres (arrondie à 2 décimales)
|
| 40 |
+
None: Si une coordonnée est manquante
|
| 41 |
+
|
| 42 |
+
Example:
|
| 43 |
+
>>> haversine_distance(48.8566, 2.3522, 51.5074, -0.1278)
|
| 44 |
+
344.45 # Distance Paris-Londres en km
|
| 45 |
+
"""
|
| 46 |
+
# Vérifier si des valeurs sont manquantes
|
| 47 |
+
if any(pd.isna([lat1, lon1, lat2, lon2])):
|
| 48 |
+
return None
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
# Rayon de la Terre en kilomètres
|
| 52 |
+
R = 6371
|
| 53 |
+
|
| 54 |
+
# Convertir les degrés en radians (nécessaire pour les calculs trigonométriques)
|
| 55 |
+
lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
|
| 56 |
+
|
| 57 |
+
# Différences de latitude et longitude
|
| 58 |
+
dlat = lat2 - lat1
|
| 59 |
+
dlon = lon2 - lon1
|
| 60 |
+
|
| 61 |
+
# Formule Haversine
|
| 62 |
+
# a = sin²(Δlat/2) + cos(lat1) * cos(lat2) * sin²(Δlon/2)
|
| 63 |
+
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
|
| 64 |
+
|
| 65 |
+
# c = 2 * atan2(√a, √(1-a))
|
| 66 |
+
c = 2 * atan2(sqrt(a), sqrt(1-a))
|
| 67 |
+
|
| 68 |
+
# Distance = R * c
|
| 69 |
+
distance = R * c
|
| 70 |
+
|
| 71 |
+
# Arrondir à 2 décimales
|
| 72 |
+
return round(distance, 2)
|
| 73 |
+
|
| 74 |
+
except Exception as e:
|
| 75 |
+
print(f"⚠️ Erreur calcul distance: {e}")
|
| 76 |
+
return None
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
# =====================================================================
|
| 80 |
+
# FONCTION 2 : CALCUL DE L'ÂGE
|
| 81 |
+
# =====================================================================
|
| 82 |
+
|
| 83 |
+
def calculate_age(born):
|
| 84 |
+
"""
|
| 85 |
+
Calcule l'âge à partir de la date de naissance
|
| 86 |
+
|
| 87 |
+
Cette fonction calcule l'âge en années complètes, en tenant compte
|
| 88 |
+
du fait que l'anniversaire peut ne pas encore être passé cette année.
|
| 89 |
+
|
| 90 |
+
Args:
|
| 91 |
+
born (str): Date de naissance au format 'YYYY-MM-DD'
|
| 92 |
+
|
| 93 |
+
Returns:
|
| 94 |
+
int: Âge en années
|
| 95 |
+
None: Si la date est invalide ou manquante
|
| 96 |
+
|
| 97 |
+
Example:
|
| 98 |
+
>>> calculate_age('1990-01-15')
|
| 99 |
+
36 # Si on est en 2026
|
| 100 |
+
"""
|
| 101 |
+
# Vérifier si la valeur est manquante
|
| 102 |
+
if pd.isna(born) or born is None or born == '':
|
| 103 |
+
return None
|
| 104 |
+
|
| 105 |
+
try:
|
| 106 |
+
# Convertir la string en date
|
| 107 |
+
born_date = datetime.strptime(str(born), '%Y-%m-%d').date()
|
| 108 |
+
|
| 109 |
+
# Date du jour
|
| 110 |
+
today = date.today()
|
| 111 |
+
|
| 112 |
+
# Calcul de l'âge
|
| 113 |
+
# On soustrait 1 si l'anniversaire n'est pas encore passé cette année
|
| 114 |
+
age = today.year - born_date.year - (
|
| 115 |
+
(today.month, today.day) < (born_date.month, born_date.day)
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
return age
|
| 119 |
+
|
| 120 |
+
except Exception as e:
|
| 121 |
+
print(f"⚠️ Erreur calcul âge pour {born}: {e}")
|
| 122 |
+
return None
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
# =====================================================================
|
| 126 |
+
# FONCTION 3 : FEATURES TEMPORELLES
|
| 127 |
+
# =====================================================================
|
| 128 |
+
|
| 129 |
+
def extract_time_features(transaction_time):
|
| 130 |
+
"""
|
| 131 |
+
Extrait toutes les features temporelles d'une transaction
|
| 132 |
+
|
| 133 |
+
À partir de l'heure de transaction, cette fonction crée :
|
| 134 |
+
- L'heure (0-23)
|
| 135 |
+
- Le jour de la semaine (0=lundi, 6=dimanche)
|
| 136 |
+
- Si c'est le weekend (samedi ou dimanche)
|
| 137 |
+
- Si c'est la nuit (22h-6h)
|
| 138 |
+
- Si c'est le matin (6h-12h)
|
| 139 |
+
- Si c'est l'après-midi (12h-18h)
|
| 140 |
+
- Si c'est le soir (18h-22h)
|
| 141 |
+
- Si c'est pendant les heures de bureau (8h-17h)
|
| 142 |
+
- L'année, le mois, le jour
|
| 143 |
+
|
| 144 |
+
Args:
|
| 145 |
+
transaction_time (str or datetime): Heure de la transaction
|
| 146 |
+
|
| 147 |
+
Returns:
|
| 148 |
+
dict: Dictionnaire avec toutes les features temporelles
|
| 149 |
+
None: Si la date est invalide
|
| 150 |
+
|
| 151 |
+
Example:
|
| 152 |
+
>>> extract_time_features('2026-01-29 14:30:00')
|
| 153 |
+
{
|
| 154 |
+
'hour': 14,
|
| 155 |
+
'day_of_week': 2, # Mercredi
|
| 156 |
+
'is_weekend': 0,
|
| 157 |
+
'is_night': 0,
|
| 158 |
+
'is_morning': 0,
|
| 159 |
+
'is_afternoon': 1,
|
| 160 |
+
'is_evening': 0,
|
| 161 |
+
'is_business_hour': 1,
|
| 162 |
+
'year': 2026,
|
| 163 |
+
'month': 1,
|
| 164 |
+
'day': 29
|
| 165 |
+
}
|
| 166 |
+
"""
|
| 167 |
+
# Vérifier si la valeur est manquante
|
| 168 |
+
if pd.isna(transaction_time) or transaction_time is None:
|
| 169 |
+
return None
|
| 170 |
+
|
| 171 |
+
try:
|
| 172 |
+
# Convertir en datetime si nécessaire
|
| 173 |
+
if isinstance(transaction_time, str):
|
| 174 |
+
dt = pd.to_datetime(transaction_time)
|
| 175 |
+
else:
|
| 176 |
+
dt = transaction_time
|
| 177 |
+
|
| 178 |
+
# Extraire l'heure (0-23)
|
| 179 |
+
hour = dt.hour
|
| 180 |
+
|
| 181 |
+
# Extraire le jour de la semaine (0=lundi, 6=dimanche)
|
| 182 |
+
day_of_week = dt.dayofweek
|
| 183 |
+
|
| 184 |
+
# Créer le dictionnaire de features
|
| 185 |
+
features = {
|
| 186 |
+
# Heure brute
|
| 187 |
+
'hour': hour,
|
| 188 |
+
'day_of_week': day_of_week,
|
| 189 |
+
|
| 190 |
+
# Périodes de la journée (binaire : 0 ou 1)
|
| 191 |
+
'is_night': 1 if 22 <= hour or hour < 6 else 0, # 22h-6h
|
| 192 |
+
'is_morning': 1 if 6 <= hour < 12 else 0, # 6h-12h
|
| 193 |
+
'is_afternoon': 1 if 12 <= hour < 18 else 0, # 12h-18h
|
| 194 |
+
'is_evening': 1 if 18 <= hour < 22 else 0, # 18h-22h
|
| 195 |
+
'is_business_hour': 1 if 8 <= hour < 17 else 0, # 8h-17h
|
| 196 |
+
|
| 197 |
+
# Weekend (samedi=5, dimanche=6)
|
| 198 |
+
'is_we': 1 if day_of_week in [5, 6] else 0,
|
| 199 |
+
|
| 200 |
+
# Composantes de la date
|
| 201 |
+
'year': dt.year,
|
| 202 |
+
'month': dt.month,
|
| 203 |
+
'day': dt.day
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
return features
|
| 207 |
+
|
| 208 |
+
except Exception as e:
|
| 209 |
+
print(f"⚠️ Erreur extraction features temps pour {transaction_time}: {e}")
|
| 210 |
+
return None
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
# =====================================================================
|
| 214 |
+
# FONCTION 4 : FEATURE ENGINEERING COMPLET
|
| 215 |
+
# =====================================================================
|
| 216 |
+
|
| 217 |
+
def engineer_features(transaction_data):
|
| 218 |
+
"""
|
| 219 |
+
Applique TOUTES les transformations de features sur une transaction
|
| 220 |
+
|
| 221 |
+
Cette fonction est la fonction PRINCIPALE qui :
|
| 222 |
+
1. Prend les données brutes d'une transaction
|
| 223 |
+
2. Calcule la distance GPS client-marchand
|
| 224 |
+
3. Extrait les features temporelles
|
| 225 |
+
4. Calcule l'âge si la date de naissance est fournie
|
| 226 |
+
5. Retourne un dictionnaire avec TOUTES les features
|
| 227 |
+
|
| 228 |
+
Args:
|
| 229 |
+
transaction_data (dict): Dictionnaire avec les données brutes
|
| 230 |
+
Clés requises:
|
| 231 |
+
- lat, long: Coordonnées client
|
| 232 |
+
- merch_lat, merch_long: Coordonnées marchand
|
| 233 |
+
- transaction_time: Heure de transaction
|
| 234 |
+
Clés optionnelles:
|
| 235 |
+
- dob: Date de naissance (pour calculer l'âge)
|
| 236 |
+
- amt, cc_num, etc.: Autres features
|
| 237 |
+
|
| 238 |
+
Returns:
|
| 239 |
+
dict: Dictionnaire avec toutes les features (brutes + engineered)
|
| 240 |
+
|
| 241 |
+
Example:
|
| 242 |
+
>>> data = {
|
| 243 |
+
... 'amt': 150.75,
|
| 244 |
+
... 'lat': 40.7128,
|
| 245 |
+
... 'long': -74.0060,
|
| 246 |
+
... 'merch_lat': 40.7589,
|
| 247 |
+
... 'merch_long': -73.9851,
|
| 248 |
+
... 'transaction_time': '2026-01-29 14:30:00',
|
| 249 |
+
... 'city_pop': 8000000,
|
| 250 |
+
... 'dob': '1990-01-15'
|
| 251 |
+
... }
|
| 252 |
+
>>> result = engineer_features(data)
|
| 253 |
+
>>> print(result['distance_km'])
|
| 254 |
+
5.87 # Distance en km
|
| 255 |
+
>>> print(result['hour'])
|
| 256 |
+
14
|
| 257 |
+
>>> print(result['age'])
|
| 258 |
+
36
|
| 259 |
+
"""
|
| 260 |
+
# Copier les données pour ne pas modifier l'original
|
| 261 |
+
features = transaction_data.copy()
|
| 262 |
+
|
| 263 |
+
# ========================================
|
| 264 |
+
# 1. CALCUL DE LA DISTANCE GPS
|
| 265 |
+
# ========================================
|
| 266 |
+
|
| 267 |
+
# Vérifier que les coordonnées sont présentes
|
| 268 |
+
if all(key in features for key in ['lat', 'long', 'merch_lat', 'merch_long']):
|
| 269 |
+
distance = haversine_distance(
|
| 270 |
+
features['lat'],
|
| 271 |
+
features['long'],
|
| 272 |
+
features['merch_lat'],
|
| 273 |
+
features['merch_long']
|
| 274 |
+
)
|
| 275 |
+
features['distance_km'] = distance
|
| 276 |
+
print(f" ✅ Distance calculée: {distance} km")
|
| 277 |
+
else:
|
| 278 |
+
features['distance_km'] = None
|
| 279 |
+
print(f" ⚠️ Coordonnées GPS manquantes, distance non calculée")
|
| 280 |
+
|
| 281 |
+
# ========================================
|
| 282 |
+
# 2. EXTRACTION DES FEATURES TEMPORELLES
|
| 283 |
+
# ========================================
|
| 284 |
+
|
| 285 |
+
# Vérifier que l'heure de transaction est présente
|
| 286 |
+
if 'transaction_time' in features:
|
| 287 |
+
time_features = extract_time_features(features['transaction_time'])
|
| 288 |
+
|
| 289 |
+
if time_features:
|
| 290 |
+
# Ajouter toutes les features temporelles au dictionnaire
|
| 291 |
+
features.update(time_features)
|
| 292 |
+
print(f" ✅ Features temporelles extraites (heure: {time_features['hour']})")
|
| 293 |
+
else:
|
| 294 |
+
print(f" ⚠️ Impossible d'extraire les features temporelles")
|
| 295 |
+
else:
|
| 296 |
+
print(f" ⚠️ Heure de transaction manquante")
|
| 297 |
+
|
| 298 |
+
# ========================================
|
| 299 |
+
# 3. CALCUL DE L'ÂGE (si date de naissance fournie)
|
| 300 |
+
# ========================================
|
| 301 |
+
|
| 302 |
+
if 'dob' in features:
|
| 303 |
+
age = calculate_age(features['dob'])
|
| 304 |
+
features['age'] = age
|
| 305 |
+
if age:
|
| 306 |
+
print(f" ✅ Âge calculé: {age} ans")
|
| 307 |
+
else:
|
| 308 |
+
print(f" ⚠️ Impossible de calculer l'âge")
|
| 309 |
+
else:
|
| 310 |
+
features['age'] = None
|
| 311 |
+
print(f" ⚠️ Date de naissance non fournie")
|
| 312 |
+
|
| 313 |
+
# ========================================
|
| 314 |
+
# RETOUR
|
| 315 |
+
# ========================================
|
| 316 |
+
|
| 317 |
+
return features
|
| 318 |
+
|
| 319 |
+
|
| 320 |
+
# =====================================================================
|
| 321 |
+
# FONCTION 5 : LISTE DES FEATURES POUR LE MODÈLE
|
| 322 |
+
# =====================================================================
|
| 323 |
+
|
| 324 |
+
def get_model_features():
|
| 325 |
+
"""
|
| 326 |
+
Retourne la liste EXACTE des features attendues par le modèle ML
|
| 327 |
+
|
| 328 |
+
Cette fonction définit l'ordre EXACT des colonnes que le modèle attend.
|
| 329 |
+
IMPORTANT: L'ordre DOIT être le même que lors de l'entraînement !
|
| 330 |
+
|
| 331 |
+
Returns:
|
| 332 |
+
list: Liste des noms de features dans le bon ordre
|
| 333 |
+
|
| 334 |
+
Categories:
|
| 335 |
+
- Numerical: 17 features numériques
|
| 336 |
+
- Categorical: 4 features catégorielles
|
| 337 |
+
"""
|
| 338 |
+
# Features NUMÉRIQUES (17 features)
|
| 339 |
+
numerical_features = [
|
| 340 |
+
'cc_num', # Numéro de carte (hashé)
|
| 341 |
+
'amt', # Montant de la transaction
|
| 342 |
+
'zip', # Code postal
|
| 343 |
+
'city_pop', # Population de la ville
|
| 344 |
+
'distance_km', # Distance client-marchand (ENGINEERED)
|
| 345 |
+
'age', # Âge du porteur (ENGINEERED)
|
| 346 |
+
'hour', # Heure 0-23 (ENGINEERED)
|
| 347 |
+
'is_night', # 1 si nuit (ENGINEERED)
|
| 348 |
+
'is_morning', # 1 si matin (ENGINEERED)
|
| 349 |
+
'is_afternoon', # 1 si après-midi (ENGINEERED)
|
| 350 |
+
'is_evening', # 1 si soir (ENGINEERED)
|
| 351 |
+
'is_business_hour', # 1 si heures de bureau (ENGINEERED)
|
| 352 |
+
'year', # Année (ENGINEERED)
|
| 353 |
+
'month', # Mois (ENGINEERED)
|
| 354 |
+
'day', # Jour (ENGINEERED)
|
| 355 |
+
'dayofweek', # Jour de la semaine (ENGINEERED) - Renommé de 'day_of_week'
|
| 356 |
+
'is_we' # 1 si weekend (ENGINEERED)
|
| 357 |
+
]
|
| 358 |
+
|
| 359 |
+
# Features CATÉGORIELLES (4 features)
|
| 360 |
+
categorical_features = [
|
| 361 |
+
'merchant', # Nom du marchand
|
| 362 |
+
'category', # Catégorie de la transaction
|
| 363 |
+
'gender', # Genre du client
|
| 364 |
+
'state' # État (US)
|
| 365 |
+
]
|
| 366 |
+
|
| 367 |
+
# TOUTES les features dans l'ORDRE
|
| 368 |
+
all_features = numerical_features + categorical_features
|
| 369 |
+
|
| 370 |
+
return all_features
|
| 371 |
+
|
| 372 |
+
|
| 373 |
+
def prepare_for_model(features_dict):
|
| 374 |
+
"""
|
| 375 |
+
Prépare les features dans le bon format pour le modèle
|
| 376 |
+
|
| 377 |
+
Cette fonction :
|
| 378 |
+
1. Prend le dictionnaire de features
|
| 379 |
+
2. Sélectionne UNIQUEMENT les features nécessaires
|
| 380 |
+
3. Les arrange dans le BON ORDRE
|
| 381 |
+
4. Convertit en DataFrame
|
| 382 |
+
5. Renomme 'day_of_week' en 'dayofweek' (compatibilité modèle)
|
| 383 |
+
|
| 384 |
+
Args:
|
| 385 |
+
features_dict (dict): Dictionnaire avec toutes les features
|
| 386 |
+
|
| 387 |
+
Returns:
|
| 388 |
+
pd.DataFrame: DataFrame avec les features dans le bon ordre
|
| 389 |
+
None: Si des features manquent
|
| 390 |
+
|
| 391 |
+
Example:
|
| 392 |
+
>>> features = engineer_features(transaction_data)
|
| 393 |
+
>>> df_ready = prepare_for_model(features)
|
| 394 |
+
>>> # df_ready est prêt pour model.predict()
|
| 395 |
+
"""
|
| 396 |
+
# Renommer 'day_of_week' en 'dayofweek' si présent
|
| 397 |
+
# (Le modèle a été entraîné avec 'dayofweek')
|
| 398 |
+
if 'day_of_week' in features_dict and 'dayofweek' not in features_dict:
|
| 399 |
+
features_dict['dayofweek'] = features_dict['day_of_week']
|
| 400 |
+
|
| 401 |
+
# Obtenir la liste des features attendues
|
| 402 |
+
expected_features = get_model_features()
|
| 403 |
+
|
| 404 |
+
# Vérifier que toutes les features sont présentes
|
| 405 |
+
missing_features = [f for f in expected_features if f not in features_dict]
|
| 406 |
+
|
| 407 |
+
if missing_features:
|
| 408 |
+
print(f"❌ Features manquantes: {missing_features}")
|
| 409 |
+
return None
|
| 410 |
+
|
| 411 |
+
# Sélectionner uniquement les features nécessaires DANS LE BON ORDRE
|
| 412 |
+
selected_data = {feature: features_dict[feature] for feature in expected_features}
|
| 413 |
+
|
| 414 |
+
# Convertir en DataFrame (le modèle attend un DataFrame)
|
| 415 |
+
df = pd.DataFrame([selected_data])
|
| 416 |
+
|
| 417 |
+
print(f"✅ Features préparées: {df.shape[1]} colonnes")
|
| 418 |
+
|
| 419 |
+
return df
|
| 420 |
+
|
| 421 |
+
|
| 422 |
+
# =====================================================================
|
| 423 |
+
# MÉTADONNÉES DU MODULE
|
| 424 |
+
# =====================================================================
|
| 425 |
+
|
| 426 |
+
__version__ = "1.0.0"
|
| 427 |
+
__author__ = "Terorra"
|
| 428 |
+
|
| 429 |
+
# Liste des fonctions exportées
|
| 430 |
+
__all__ = [
|
| 431 |
+
'haversine_distance',
|
| 432 |
+
'calculate_age',
|
| 433 |
+
'extract_time_features',
|
| 434 |
+
'engineer_features',
|
| 435 |
+
'get_model_features',
|
| 436 |
+
'prepare_for_model'
|
| 437 |
+
]
|
| 438 |
+
|
| 439 |
+
|
| 440 |
+
# =====================================================================
|
| 441 |
+
# TEST DU MODULE (si exécuté directement)
|
| 442 |
+
# =====================================================================
|
| 443 |
+
|
| 444 |
+
if __name__ == "__main__":
|
| 445 |
+
print("=" * 70)
|
| 446 |
+
print("🧪 Test du module Feature Engineering")
|
| 447 |
+
print("=" * 70)
|
| 448 |
+
|
| 449 |
+
# Données de test
|
| 450 |
+
test_transaction = {
|
| 451 |
+
'cc_num': 374125201044065,
|
| 452 |
+
'amt': 150.75,
|
| 453 |
+
'lat': 40.7128,
|
| 454 |
+
'long': -74.0060,
|
| 455 |
+
'city_pop': 8000000,
|
| 456 |
+
'merch_lat': 40.7589,
|
| 457 |
+
'merch_long': -73.9851,
|
| 458 |
+
'transaction_time': '2026-01-29 14:30:00',
|
| 459 |
+
'dob': '1990-01-15',
|
| 460 |
+
'merchant': 'Amazon',
|
| 461 |
+
'category': 'shopping_net',
|
| 462 |
+
'gender': 'M',
|
| 463 |
+
'state': 'NY',
|
| 464 |
+
'zip': 10001
|
| 465 |
+
}
|
| 466 |
+
|
| 467 |
+
print("\n📊 Données de test:")
|
| 468 |
+
for key, value in test_transaction.items():
|
| 469 |
+
print(f" {key}: {value}")
|
| 470 |
+
|
| 471 |
+
print("\n🔧 Application du feature engineering...")
|
| 472 |
+
engineered = engineer_features(test_transaction)
|
| 473 |
+
|
| 474 |
+
print("\n📊 Features créées:")
|
| 475 |
+
for key in ['distance_km', 'hour', 'is_afternoon', 'age']:
|
| 476 |
+
if key in engineered:
|
| 477 |
+
print(f" {key}: {engineered[key]}")
|
| 478 |
+
|
| 479 |
+
print("\n📦 Préparation pour le modèle...")
|
| 480 |
+
df_ready = prepare_for_model(engineered)
|
| 481 |
+
|
| 482 |
+
if df_ready is not None:
|
| 483 |
+
print(f"✅ Prêt pour prédiction: {df_ready.shape}")
|
| 484 |
+
print(f" Colonnes: {list(df_ready.columns)}")
|
| 485 |
+
|
| 486 |
+
print("\n" + "=" * 70)
|
requirements.txt
CHANGED
|
@@ -1,9 +1,57 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# =====================================================================
|
| 2 |
+
# Requirements pour l'API Fraud Detection - Level UP
|
| 3 |
+
# =====================================================================
|
| 4 |
+
#
|
| 5 |
+
# Installation:
|
| 6 |
+
# pip install -r requirements.txt
|
| 7 |
+
#
|
| 8 |
+
# Author: Terorra
|
| 9 |
+
# Date: January 2026
|
| 10 |
+
# =====================================================================
|
| 11 |
+
|
| 12 |
+
# =====================================================================
|
| 13 |
+
# FASTAPI ET SERVEUR WEB
|
| 14 |
+
# =====================================================================
|
| 15 |
+
fastapi[standard]
|
| 16 |
+
gunicorn
|
| 17 |
+
pydantic
|
| 18 |
+
|
| 19 |
+
# =====================================================================
|
| 20 |
+
# MACHINE LEARNING
|
| 21 |
+
# =====================================================================
|
| 22 |
+
scikit-learn
|
| 23 |
+
joblib
|
| 24 |
+
|
| 25 |
+
# =====================================================================
|
| 26 |
+
# DATA PROCESSING
|
| 27 |
+
# =====================================================================
|
| 28 |
+
pandas
|
| 29 |
+
numpy
|
| 30 |
+
maths
|
| 31 |
+
|
| 32 |
+
# test geopy
|
| 33 |
+
geopy
|
| 34 |
+
|
| 35 |
+
# =====================================================================
|
| 36 |
+
# HUGGINGFACE
|
| 37 |
+
# =====================================================================
|
| 38 |
+
huggingface-hub
|
| 39 |
+
|
| 40 |
+
# =====================================================================
|
| 41 |
+
# UTILITAIRES
|
| 42 |
+
# =====================================================================
|
| 43 |
+
python-dotenv
|
| 44 |
+
requests
|
| 45 |
+
|
| 46 |
+
# =====================================================================
|
| 47 |
+
# DÉVELOPPEMENT (optionnel)
|
| 48 |
+
# =====================================================================
|
| 49 |
+
|
| 50 |
+
# Pytest - Pour les tests unitaires (optionnel)
|
| 51 |
+
# pytest==7.4.3
|
| 52 |
+
|
| 53 |
+
# Black - Formatage de code (optionnel)
|
| 54 |
+
# black==23.12.1
|
| 55 |
+
|
| 56 |
+
# Flake8 - Linting (optionnel)
|
| 57 |
+
# flake8==7.0.0
|