| import os |
| import time |
| import joblib |
| import numpy as np |
| import pandas as pd |
| from fastapi import FastAPI, HTTPException |
| from fastapi.responses import JSONResponse |
|
|
| from src.api.schemas import TransactionInput, PredictionOutput |
|
|
| MODELS_DIR = os.path.join('models') |
| MODEL_PATH = os.path.join(MODELS_DIR, 'xgboost_tuned.pkl') |
| SCALER_PATH = os.path.join(MODELS_DIR, 'scaler.pkl') |
|
|
| FEATURE_ORDER = ( |
| ['Time'] + |
| [f'V{i}' for i in range(1, 29)] + |
| ['Amount'] |
| ) |
|
|
| if not os.path.exists(MODEL_PATH): |
| raise RuntimeError(f"Model not found at {MODEL_PATH}. Run tune_xgboost.py first.") |
| if not os.path.exists(SCALER_PATH): |
| raise RuntimeError(f"Scaler not found at {SCALER_PATH}. Run preprocess.py first.") |
|
|
| model = joblib.load(MODEL_PATH) |
| scaler = joblib.load(SCALER_PATH) |
|
|
| app = FastAPI( |
| title="Fraud Detection API", |
| description=( |
| "Predicts whether a credit card transaction is fraudulent. " |
| "Powered by a tuned XGBoost model trained on 284,807 real transactions." |
| ), |
| version="1.0.0", |
| ) |
|
|
|
|
| |
| |
| |
| @app.get("/", tags=["Info"]) |
| def root(): |
| return { |
| "name" : "Fraud Detection API", |
| "version" : "1.0.0", |
| "model" : "XGBoost (tuned)", |
| "endpoints" : { |
| "POST /predict" : "Send a transaction, get fraud prediction", |
| "GET /health" : "Health check", |
| "GET /docs" : "Interactive API documentation (Swagger UI)", |
| } |
| } |
|
|
|
|
| @app.api_route("/health", methods=["GET", "HEAD"], tags=["Info"]) |
| def health(): |
| return {"status": "ok", "model_loaded": model is not None} |
|
|
|
|
| @app.post("/predict", response_model=PredictionOutput, tags=["Prediction"]) |
| def predict(transaction: TransactionInput): |
| """ |
| Send a transaction's 30 features and get back: |
| - is_fraud: true or false |
| - fraud_probability: confidence score between 0 and 1 |
| - inference_ms: how long the prediction took |
| |
| Note on V1–V28: These are PCA-transformed features produced by the bank's |
| internal systems to protect cardholder privacy. You don't enter them manually — |
| the bank's transaction pipeline generates them automatically. For testing, |
| copy any row from creditcard.csv and remove the Class column. |
| """ |
| try: |
| |
| row = pd.DataFrame([transaction.model_dump()], columns=FEATURE_ORDER) |
|
|
| |
| row[['Amount', 'Time']] = scaler.transform(row[['Amount', 'Time']]) |
|
|
| t0 = time.perf_counter() |
| fraud_proba = float(model.predict_proba(row)[0][1]) |
| inference_ms = round((time.perf_counter() - t0) * 1000, 2) |
|
|
| is_fraud = fraud_proba >= 0.5 |
|
|
| return PredictionOutput( |
| is_fraud=is_fraud, |
| fraud_probability=round(fraud_proba, 4), |
| inference_ms=inference_ms, |
| ) |
|
|
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|