Spaces:

NeerajCodz
/

creditCardFraudDetection

Sleeping

App Files Files Community

Neeraj Sathish Kumar commited on Dec 4, 2025

Commit

298e633

1 Parent(s): bff189f

INIT

Browse files

Files changed (15) hide show

.gitignore +1 -0
Dockerfile +19 -0
README.md +0 -11
Readme.md +20 -0
absolute/ccfd_1.0_decision-tree.pkl +3 -0
absolute/ccfd_1.0_random-forest.pkl +3 -0
absolute/ccfd_1.0_xg-boost.pkl +3 -0
app.py +293 -0
classifier/ccfd_1.0_decision-tree.pkl +3 -0
classifier/ccfd_1.0_random-forest.pkl +3 -0
classifier/ccfd_1.0_xg-boost.pkl +3 -0
requirements.txt +7 -0
stats/metrics.json +29 -0
stats/tested_result.json +29 -0
stats/train.json +20 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ app_test.py

Dockerfile ADDED Viewed

	@@ -0,0 +1,19 @@

+# Use a slim Python image
+FROM python:3.10-slim
+# Set working directory inside the container
+WORKDIR /app
+# Copy dependencies and install them
+COPY ./requirements.txt /app/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
+# Copy the application code and model files
+COPY . /app
+# Expose the standard Hugging Face Space port
+EXPOSE 7860
+# Command to run the app using Uvicorn
+# 'app:app' means look for the object named 'app' inside the file named 'app.py'
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md DELETED Viewed

@@ -1,11 +0,0 @@
----
-title: CreditCardFraudDetection
-emoji: ⚡
-colorFrom: pink
-colorTo: indigo
-sdk: docker
-pinned: false
-license: apache-2.0
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

Readme.md ADDED Viewed

	@@ -0,0 +1,20 @@

+---
+title: Credit Card Fraud Detection API
+emoji: Credit card
+colorFrom: purple
+colorTo: pink
+sdk: docker
+python_version: 3.10
+pinned: false
+---
+# Credit Card Fraud Detection API
+This is an ML API deployed on Hugging Face Spaces using **FastAPI + Docker**.
+**Endpoints:**
+- `/docs` → Interactive Swagger UI
+- `/predict` → Single transaction fraud score
+- `/predict_multiple` → Batch prediction
+Models available: `xgboost`, `random_forest`, `decision_tree`

absolute/ccfd_1.0_decision-tree.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3fbdba46e3c71e148e877b8d61b5f66afad69087e521cdf8b8b694affdeb3374
+size 155243

absolute/ccfd_1.0_random-forest.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d30529b90df0f17fa7396347c4230061093ab45c200307b0ce65fb3f5288e12b
+size 43463794

absolute/ccfd_1.0_xg-boost.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e53f13355a23ff71608e94bbd3af142a30b8535b76fe78e96433c9202e5debd4
+size 5746222

app.py ADDED Viewed

	@@ -0,0 +1,293 @@

+import os
+import sys
+import joblib
+import pandas as pd
+from typing import Dict, Any, List, Union, Optional
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+import numpy as np
+import warnings
+# Suppress sklearn version warnings
+warnings.filterwarnings("ignore", category=UserWarning, module="sklearn.base")
+# --- FIX FOR SKLEARN VERSION COMPATIBILITY ---
+try:
+    import sklearn
+    print(f"📦 scikit-learn version: {sklearn.__version__}")
+    # Fix for _RemainderColsList compatibility issue
+    from sklearn.compose._column_transformer import ColumnTransformer
+    # Check if _RemainderColsList exists, if not create a dummy class
+    if not hasattr(sys.modules['sklearn.compose._column_transformer'], '_RemainderColsList'):
+        class _RemainderColsList(list):
+            """Compatibility shim for older sklearn pickled models"""
+            pass
+        # Add it to the module so pickle can find it
+        sys.modules['sklearn.compose._column_transformer']._RemainderColsList = _RemainderColsList
+        print("✅ Applied sklearn compatibility patch for _RemainderColsList")
+except Exception as e:
+    print(f"⚠️ Warning during sklearn compatibility setup: {e}")
+# --- MODEL CONFIGURATION & CONSTANTS ---
+VERSION = "1.0"
+MODELS = {}  # Global dictionary to store loaded pipelines
+MODEL_MAP = {
+    "decision_tree": "classifier/ccfd_1.0_decision-tree.pkl",
+    "random_forest": "classifier/ccfd_1.0_random-forest.pkl",
+    "xgboost": "classifier/ccfd_1.0_xg-boost.pkl",
+}
+EXPECTED_FEATURES = [
+    "cc_num", "merchant", "category", "amt", "gender", "state", "zip",
+    "lat", "long", "city_pop", "job", "unix_time", "merch_lat",
+    "merch_long", "age", "trans_hour", "trans_day", "trans_month",
+    "trans_weekday", "distance"
+]
+# --- FASTAPI SETUP ---
+app = FastAPI(
+    title="Credit Card Fraud Detection API",
+    version=VERSION,
+    description="Pure API server for fraud detection using ML models. Returns fraud_score (probability 0-100%)."
+)
+class SingleTransactionPayload(BaseModel):
+    model_name: str = Field(..., description="Model alias (e.g., 'xgboost', 'random_forest', 'decision_tree').")
+    features: Dict[str, Any] = Field(..., description="Single transaction record for prediction.")
+class MultipleTransactionsPayload(BaseModel):
+    model_name: str = Field(..., description="Model alias (e.g., 'xgboost', 'random_forest', 'decision_tree').")
+    features: List[Dict[str, Any]] = Field(..., description="List of transaction records for prediction.")
+# --- LOAD MODELS AT STARTUP ---
+def load_pipelines():
+    """Load all ML model pipelines"""
+    import sklearn
+    print(f"🚀 Loading models for server version: {VERSION}")
+    print(f"📦 Using scikit-learn: {sklearn.__version__}")
+    print(f"📂 Current working directory: {os.getcwd()}")
+    for alias, filename in MODEL_MAP.items():
+        try:
+            # Check if file exists
+            if not os.path.exists(filename):
+                abs_path = os.path.abspath(filename)
+                print(f"❌ Model file not found: {filename}")
+                print(f"   Expected at: {abs_path}")
+                continue
+            # Get file info
+            file_size = os.path.getsize(filename) / (1024 * 1024)  # MB
+            print(f"📥 Loading {alias} from {filename} ({file_size:.2f} MB)...")
+            # Load the model
+            MODELS[alias] = joblib.load(filename)
+            print(f"✅ Successfully loaded {alias}")
+        except AttributeError as e:
+            print(f"❌ Compatibility error loading {filename}")
+            print(f"   Error: {e}")
+            print(f"   💡 This usually means the model was saved with a different sklearn version")
+            print(f"   💡 Try re-training and saving the model with sklearn {sklearn.__version__}")
+        except Exception as e:
+            print(f"❌ Failed to load {filename}")
+            print(f"   Error type: {type(e).__name__}")
+            print(f"   Error message: {e}")
+    if not MODELS:
+        print("⚠️  No models loaded. Predictions will fail.")
+        print("   💡 Ensure .pkl files are in the same directory as app.py")
+        print("   💡 Check that models were saved with compatible sklearn version")
+    else:
+        print(f"✅ Successfully loaded {len(MODELS)} model(s): {list(MODELS.keys())}")
+# Load models on import
+load_pipelines()
+# --- HELPER FUNCTION: PREPARE FEATURES ---
+def prepare_features(features_list: List[Dict[str, Any]]) -> pd.DataFrame:
+    """Validate and prepare features for prediction"""
+    df_features = pd.DataFrame(features_list)
+    # Check for missing features
+    missing_features = set(EXPECTED_FEATURES) - set(df_features.columns)
+    if missing_features:
+        raise ValueError(f"Missing required features: {list(missing_features)}")
+    # Reorder columns to match expected order
+    df_features = df_features[EXPECTED_FEATURES]
+    # CRITICAL: Convert object columns to category dtype (as done during training)
+    for col in df_features.select_dtypes(include=['object']).columns:
+        df_features[col] = df_features[col].astype("category")
+    return df_features
+# --- FASTAPI ENDPOINTS ---
+@app.get("/")
+async def root():
+    """Root endpoint - API information"""
+    return {
+        "status": "ok",
+        "message": "Credit Card Fraud Detection API",
+        "version": VERSION,
+        "models_loaded": list(MODELS.keys()),
+        "endpoints": {
+            "health": "/health",
+            "models": "/models",
+            "predict": "/predict (POST) - Single transaction",
+            "predict_multiple": "/predict_multiple (POST) - Multiple transactions",
+            "docs": "/docs"
+        },
+        "response_format": {
+            "description": "Returns fraud_score (probability 0-100%) for fraud class",
+            "single": {"fraud_score": "float (0-100)"},
+            "multiple": {
+                "predictions": "list of {'fraud_score': float}",
+                "overall_stats": {
+                    "total": "int",
+                    "avg_fraud_score": "float",
+                    "min_fraud_score": "float",
+                    "max_fraud_score": "float"
+                }
+            }
+        }
+    }
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    return {
+        "status": "healthy" if MODELS else "degraded",
+        "version": VERSION,
+        "models_loaded": list(MODELS.keys()),
+        "model_count": len(MODELS)
+    }
+@app.get("/models")
+async def list_models():
+    """List all available and loaded models"""
+    return {
+        "available_models": list(MODEL_MAP.keys()),
+        "loaded_models": list(MODELS.keys()),
+        "model_files": MODEL_MAP,
+        "version": VERSION
+    }
+@app.post("/predict")
+async def predict_single(payload: SingleTransactionPayload):
+    """
+    Predict fraud score for a SINGLE transaction
+    Returns fraud_score (probability 0-100% for fraud class)
+    """
+    model_name = payload.model_name
+    features = payload.features
+    # Validate model exists
+    if model_name not in MODELS:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Model '{model_name}' not loaded. Available: {list(MODELS.keys())}"
+        )
+    model_pipeline = MODELS[model_name]
+    # Prepare features
+    try:
+        df_features = prepare_features([features])
+    except Exception as e:
+        raise HTTPException(
+            status_code=422,
+            detail=f"Data validation failed: {str(e)}"
+        )
+    # Perform prediction
+    try:
+        # Get probability (0-100%) - convert to Python float for JSON serialization
+        probability = float(model_pipeline.predict_proba(df_features)[:, 1][0] * 100)
+        return {
+            "success": True,
+            "model_used": model_name,
+            "fraud_score": round(probability, 2)
+        }
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Prediction execution failed: {str(e)}"
+        )
+@app.post("/predict_multiple")
+async def predict_multiple(payload: MultipleTransactionsPayload):
+    """
+    Predict fraud scores for MULTIPLE transactions
+    Returns fraud_score (0-100%) for each transaction, plus overall statistics
+    """
+    model_name = payload.model_name
+    features_list = payload.features
+    # Validate model exists
+    if model_name not in MODELS:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Model '{model_name}' not loaded. Available: {list(MODELS.keys())}"
+        )
+    model_pipeline = MODELS[model_name]
+    # Prepare features
+    try:
+        df_features = prepare_features(features_list)
+    except Exception as e:
+        raise HTTPException(
+            status_code=422,
+            detail=f"Data validation failed: {str(e)}"
+        )
+    # Perform prediction
+    try:
+        # Get probabilities (0-100%)
+        probabilities = model_pipeline.predict_proba(df_features)[:, 1] * 100
+        # Prepare predictions
+        predictions = []
+        for prob in probabilities:
+            # Convert numpy float32 to Python float for JSON serialization
+            prob_value = float(prob)
+            predictions.append({
+                "fraud_score": round(prob_value, 2)
+            })
+        total = len(predictions)
+        return {
+            "success": True,
+            "model_used": model_name,
+            "total_transactions": total,
+            "predictions": predictions,
+            "overall_stats": {
+                "total": total,
+                "avg_fraud_score": round(float(probabilities.mean()), 2),
+                "max_fraud_score": round(float(probabilities.max()), 2),
+                "min_fraud_score": round(float(probabilities.min()), 2)
+            }
+        }
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Prediction execution failed: {str(e)}"
+        )
+# For local development
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

classifier/ccfd_1.0_decision-tree.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d5696eff56965f3f70b8bc7159cc1e8270e7031b6c036061c68b0d784d0189c
+size 450366

classifier/ccfd_1.0_random-forest.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e29cef8d74812b9395dd928ec778cdf1f8a7b64982f10b11678791c8dbde4996
+size 213830974

classifier/ccfd_1.0_xg-boost.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a9c0953b2960ce5024984f75f5331458e121ebf3c7e29fafa45af6b2d9cbea4
+size 26586734

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi
+uvicorn
+pandas
+joblib
+numpy
+scikit-learn==1.6.1
+xgboost

stats/metrics.json ADDED Viewed

	@@ -0,0 +1,29 @@

+[
+    {
+        "Model": "Decision Tree",
+        "Accuracy": 0.9993,
+        "Precision": 0.9905,
+        "Recall": 0.893,
+        "F1-Score": 0.9393,
+        "ROC-AUC": 0.9929,
+        "PR-AUC": 0.9511
+    },
+    {
+        "Model": "Random Forest",
+        "Accuracy": 0.9943,
+        "Precision": 1.0,
+        "Recall": 0.0187,
+        "F1-Score": 0.0366,
+        "ROC-AUC": 0.9976,
+        "PR-AUC": 0.8256
+    },
+    {
+        "Model": "XGBoost",
+        "Accuracy": 1.0,
+        "Precision": 1.0,
+        "Recall": 1.0,
+        "F1-Score": 1.0,
+        "ROC-AUC": 1.0,
+        "PR-AUC": 1.0
+    }
+]

stats/tested_result.json ADDED Viewed

	@@ -0,0 +1,29 @@

+[
+    {
+        "Model": "Decision Tree",
+        "SUCCESS (%)": 99.81,
+        "FAIL (%)": 0.18,
+        "UNCERTAIN (%)": 0.0,
+        "Full Time (s)": 1.87,
+        "Per Request (ms)": 11.44,
+        "RAM (GB)": 2.37
+    },
+    {
+        "Model": "Random Forest",
+        "SUCCESS (%)": 99.61,
+        "FAIL (%)": 0.39,
+        "UNCERTAIN (%)": 0.0,
+        "Full Time (s)": 17.32,
+        "Per Request (ms)": 96.42,
+        "RAM (GB)": 2.44
+    },
+    {
+        "Model": "XGBoost",
+        "SUCCESS (%)": 99.86,
+        "FAIL (%)": 0.13,
+        "UNCERTAIN (%)": 0.01,
+        "Full Time (s)": 36.17,
+        "Per Request (ms)": 11.93,
+        "RAM (GB)": 2.39
+    }
+]

stats/train.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+    {
+        "Model": "Decision Tree",
+        "Train Time (s)": 82.9,
+        "RAM \u0394 (GB)": -0.18,
+        "Model Size (MB)": 0.15
+    },
+    {
+        "Model": "Random Forest",
+        "Train Time (s)": 992.0,
+        "RAM \u0394 (GB)": 0.17,
+        "Model Size (MB)": 41.45
+    },
+    {
+        "Model": "XGBoost",
+        "Train Time (s)": 284.8,
+        "RAM \u0394 (GB)": 0.08,
+        "Model Size (MB)": 5.48
+    }
+]