Spaces:

Nav772
/

fraud-detector-cicd

Sleeping

App Files Files Community

Nav772 commited on Feb 4

Commit

7537ea4

verified ·

1 Parent(s): c20d6a0

Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

README.md +22 -6
app.py +89 -0
metrics.json +30 -0
model.pkl +3 -0
requirements.txt +3 -0
retrain_report.json +47 -0

README.md CHANGED Viewed

@@ -1,12 +1,28 @@
 ---
-title: Fraud Detector Cicd
-emoji: 🏃
-colorFrom: green
-colorTo: indigo
 sdk: gradio
-sdk_version: 6.5.1
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Fraud Detector CI/CD
+emoji: 🛡
+colorFrom: red
+colorTo: red
 sdk: gradio
+sdk_version: 5.29.0
 app_file: app.py
 pinned: false
+license: mit
 ---
+# Fraud Detection System (Auto-Retrained)
+Credit card fraud detector with automated retraining pipeline.
+## Pipeline
+1. New data batch generated (simulating production)
+2. Drift detection (Wasserstein distance)
+3. Challenger model trained on combined data
+4. Champion vs Challenger comparison
+5. Deploy only if challenger wins
+## Links
+- [GitHub Repo](https://github.com/Algo-nav/ml-pipeline-demo)
+- [Author](https://huggingface.co/Nav772)

app.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import gradio as gr
+import pickle
+import numpy as np
+import json
+with open('model.pkl', 'rb') as f:
+    model = pickle.load(f)
+with open('metrics.json', 'r') as f:
+    metrics = json.load(f)
+with open('retrain_report.json', 'r') as f:
+    retrain_report = json.load(f)
+def predict(amount, hour, day_of_week, distance_from_home,
+            distance_from_last_txn, ratio_to_median, num_txn_last_24h,
+            is_foreign, merchant_risk_score, card_age_months):
+    features = np.array([[amount, hour, day_of_week, distance_from_home,
+                          distance_from_last_txn, ratio_to_median,
+                          num_txn_last_24h, int(is_foreign),
+                          merchant_risk_score, card_age_months]])
+    prob = model.predict_proba(features)[0]
+    return {'Legitimate': float(prob[0]), 'Fraud': float(prob[1])}
+def get_model_info():
+    m = metrics.get('metrics', {})
+    r = retrain_report
+    lines = []
+    lines.append('Last updated: ' + metrics.get('timestamp', 'N/A'))
+    lines.append('F1 Score: ' + str(round(m.get('f1', 0), 4)))
+    lines.append('ROC AUC: ' + str(round(m.get('roc_auc', 0), 4)))
+    lines.append('Precision: ' + str(round(m.get('precision', 0), 4)))
+    lines.append('Recall: ' + str(round(m.get('recall', 0), 4)))
+    lines.append('')
+    lines.append('Retrain Decision: ' + str(r.get('decision', 'N/A')))
+    lines.append('Reason: ' + str(r.get('reason', 'N/A')))
+    drift = r.get('drift', {})
+    lines.append('Drift Detected: ' + str(drift.get('dataset_drift', 'N/A')))
+    lines.append('Drifted Features: ' + str(drift.get('drifted_features', [])))
+    return chr(10).join(lines)
+with gr.Blocks() as demo:
+    gr.Markdown('# Fraud Detection System (Auto-Retrained)')
+    gr.Markdown('This model is automatically retrained when data drift is detected.')
+    with gr.Tab('Predict'):
+        with gr.Row():
+            with gr.Column():
+                amount = gr.Number(value=50.0, label='Transaction Amount ($)')
+                hour = gr.Slider(0, 23, value=14, step=1, label='Hour of Day')
+                day = gr.Slider(0, 6, value=3, step=1, label='Day of Week (0=Mon)')
+                dist_home = gr.Number(value=10.0, label='Distance from Home (km)')
+                dist_last = gr.Number(value=5.0, label='Distance from Last Txn (km)')
+            with gr.Column():
+                ratio = gr.Number(value=1.0, label='Ratio to Median Spending')
+                n_txn = gr.Slider(0, 20, value=3, step=1, label='Txns in Last 24h')
+                foreign = gr.Checkbox(value=False, label='Foreign Transaction')
+                merchant = gr.Slider(0, 1, value=0.2, step=0.05, label='Merchant Risk Score')
+                card_age = gr.Slider(1, 120, value=36, step=1, label='Card Age (months)')
+        predict_btn = gr.Button('Analyze Transaction', variant='primary')
+        output = gr.Label(num_top_classes=2, label='Result')
+        predict_btn.click(
+            fn=predict,
+            inputs=[amount, hour, day, dist_home, dist_last,
+                    ratio, n_txn, foreign, merchant, card_age],
+            outputs=output
+        )
+        gr.Examples(
+            examples=[
+                [25.0, 14, 2, 5.0, 3.0, 0.8, 2, False, 0.1, 48],
+                [500.0, 3, 5, 100.0, 80.0, 5.0, 10, True, 0.7, 6],
+                [1200.0, 2, 6, 200.0, 150.0, 8.0, 15, True, 0.9, 3],
+            ],
+            inputs=[amount, hour, day, dist_home, dist_last,
+                    ratio, n_txn, foreign, merchant, card_age],
+        )
+    with gr.Tab('Model Info'):
+        info_btn = gr.Button('Show Model Info')
+        info_output = gr.Textbox(label='Model Details', lines=12)
+        info_btn.click(fn=get_model_info, outputs=info_output)
+demo.launch()

metrics.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "metrics": {
+    "accuracy": 0.9975,
+    "precision": 0.9873417721518988,
+    "recall": 0.975,
+    "f1": 0.9811320754716981,
+    "roc_auc": 0.9999553571428571
+  },
+  "data_stats": {
+    "n_samples": 6000,
+    "fraud_rate": 0.06666666666666667,
+    "train_size": 4800,
+    "test_size": 1200
+  },
+  "timestamp": "2026-02-04T14:29:29.917435",
+  "model_type": "HistGradientBoostingClassifier",
+  "features": [
+    "amount",
+    "hour",
+    "day_of_week",
+    "distance_from_home",
+    "distance_from_last_txn",
+    "ratio_to_median",
+    "num_txn_last_24h",
+    "is_foreign",
+    "merchant_risk_score",
+    "card_age_months"
+  ],
+  "retrain_reason": "Drift detected (60% features). Challenger F1 0.9811 meets minimum threshold. Retraining on current distribution is recommended."
+}

model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fe2b153c3bd79ea33da32ead8eac2a9f85021da201917bbd864e952545a57581
+size 256566

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+numpy
+scikit-learn

retrain_report.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "timestamp": "2026-02-04T14:29:29.649381",
+  "drift_level": "severe",
+  "decision": "deploy",
+  "reason": "Drift detected (60% features). Challenger F1 0.9811 meets minimum threshold. Retraining on current distribution is recommended.",
+  "drift": {
+    "dataset_drift": true,
+    "drift_share": 0.6,
+    "drifted_features": [
+      "amount",
+      "distance_from_last_txn",
+      "ratio_to_median",
+      "num_txn_last_24h",
+      "is_foreign",
+      "merchant_risk_score"
+    ],
+    "feature_scores": {
+      "amount": 0.7076,
+      "hour": 0.067,
+      "day_of_week": 0.0239,
+      "distance_from_home": 0.0745,
+      "distance_from_last_txn": 0.2056,
+      "ratio_to_median": 0.216,
+      "num_txn_last_24h": 0.2618,
+      "is_foreign": 0.2407,
+      "merchant_risk_score": 0.3282,
+      "card_age_months": 0.0479
+    },
+    "threshold": 0.1
+  },
+  "challenger_metrics": {
+    "accuracy": 0.9975,
+    "precision": 0.9873417721518988,
+    "recall": 0.975,
+    "f1": 0.9811320754716981,
+    "roc_auc": 0.9999553571428571
+  },
+  "champion_metrics_on_new_data": {
+    "accuracy": 0.9991666666666666,
+    "precision": 0.9876543209876543,
+    "recall": 1.0,
+    "f1": 0.9937888198757764,
+    "roc_auc": 1.0
+  },
+  "f1_improvement": -0.0127,
+  "auc_improvement": -0.0
+}