Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- README.md +22 -6
- app.py +89 -0
- metrics.json +30 -0
- model.pkl +3 -0
- requirements.txt +3 -0
- retrain_report.json +47 -0
README.md
CHANGED
|
@@ -1,12 +1,28 @@
|
|
| 1 |
---
|
| 2 |
-
title: Fraud Detector
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Fraud Detector CI/CD
|
| 3 |
+
emoji: 🛡
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: red
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 5.29.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# Fraud Detection System (Auto-Retrained)
|
| 14 |
+
|
| 15 |
+
Credit card fraud detector with automated retraining pipeline.
|
| 16 |
+
|
| 17 |
+
## Pipeline
|
| 18 |
+
|
| 19 |
+
1. New data batch generated (simulating production)
|
| 20 |
+
2. Drift detection (Wasserstein distance)
|
| 21 |
+
3. Challenger model trained on combined data
|
| 22 |
+
4. Champion vs Challenger comparison
|
| 23 |
+
5. Deploy only if challenger wins
|
| 24 |
+
|
| 25 |
+
## Links
|
| 26 |
+
|
| 27 |
+
- [GitHub Repo](https://github.com/Algo-nav/ml-pipeline-demo)
|
| 28 |
+
- [Author](https://huggingface.co/Nav772)
|
app.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pickle
|
| 3 |
+
import numpy as np
|
| 4 |
+
import json
|
| 5 |
+
|
| 6 |
+
with open('model.pkl', 'rb') as f:
|
| 7 |
+
model = pickle.load(f)
|
| 8 |
+
|
| 9 |
+
with open('metrics.json', 'r') as f:
|
| 10 |
+
metrics = json.load(f)
|
| 11 |
+
|
| 12 |
+
with open('retrain_report.json', 'r') as f:
|
| 13 |
+
retrain_report = json.load(f)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def predict(amount, hour, day_of_week, distance_from_home,
|
| 17 |
+
distance_from_last_txn, ratio_to_median, num_txn_last_24h,
|
| 18 |
+
is_foreign, merchant_risk_score, card_age_months):
|
| 19 |
+
features = np.array([[amount, hour, day_of_week, distance_from_home,
|
| 20 |
+
distance_from_last_txn, ratio_to_median,
|
| 21 |
+
num_txn_last_24h, int(is_foreign),
|
| 22 |
+
merchant_risk_score, card_age_months]])
|
| 23 |
+
prob = model.predict_proba(features)[0]
|
| 24 |
+
return {'Legitimate': float(prob[0]), 'Fraud': float(prob[1])}
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def get_model_info():
|
| 28 |
+
m = metrics.get('metrics', {})
|
| 29 |
+
r = retrain_report
|
| 30 |
+
lines = []
|
| 31 |
+
lines.append('Last updated: ' + metrics.get('timestamp', 'N/A'))
|
| 32 |
+
lines.append('F1 Score: ' + str(round(m.get('f1', 0), 4)))
|
| 33 |
+
lines.append('ROC AUC: ' + str(round(m.get('roc_auc', 0), 4)))
|
| 34 |
+
lines.append('Precision: ' + str(round(m.get('precision', 0), 4)))
|
| 35 |
+
lines.append('Recall: ' + str(round(m.get('recall', 0), 4)))
|
| 36 |
+
lines.append('')
|
| 37 |
+
lines.append('Retrain Decision: ' + str(r.get('decision', 'N/A')))
|
| 38 |
+
lines.append('Reason: ' + str(r.get('reason', 'N/A')))
|
| 39 |
+
drift = r.get('drift', {})
|
| 40 |
+
lines.append('Drift Detected: ' + str(drift.get('dataset_drift', 'N/A')))
|
| 41 |
+
lines.append('Drifted Features: ' + str(drift.get('drifted_features', [])))
|
| 42 |
+
return chr(10).join(lines)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
with gr.Blocks() as demo:
|
| 46 |
+
gr.Markdown('# Fraud Detection System (Auto-Retrained)')
|
| 47 |
+
gr.Markdown('This model is automatically retrained when data drift is detected.')
|
| 48 |
+
|
| 49 |
+
with gr.Tab('Predict'):
|
| 50 |
+
with gr.Row():
|
| 51 |
+
with gr.Column():
|
| 52 |
+
amount = gr.Number(value=50.0, label='Transaction Amount ($)')
|
| 53 |
+
hour = gr.Slider(0, 23, value=14, step=1, label='Hour of Day')
|
| 54 |
+
day = gr.Slider(0, 6, value=3, step=1, label='Day of Week (0=Mon)')
|
| 55 |
+
dist_home = gr.Number(value=10.0, label='Distance from Home (km)')
|
| 56 |
+
dist_last = gr.Number(value=5.0, label='Distance from Last Txn (km)')
|
| 57 |
+
with gr.Column():
|
| 58 |
+
ratio = gr.Number(value=1.0, label='Ratio to Median Spending')
|
| 59 |
+
n_txn = gr.Slider(0, 20, value=3, step=1, label='Txns in Last 24h')
|
| 60 |
+
foreign = gr.Checkbox(value=False, label='Foreign Transaction')
|
| 61 |
+
merchant = gr.Slider(0, 1, value=0.2, step=0.05, label='Merchant Risk Score')
|
| 62 |
+
card_age = gr.Slider(1, 120, value=36, step=1, label='Card Age (months)')
|
| 63 |
+
|
| 64 |
+
predict_btn = gr.Button('Analyze Transaction', variant='primary')
|
| 65 |
+
output = gr.Label(num_top_classes=2, label='Result')
|
| 66 |
+
|
| 67 |
+
predict_btn.click(
|
| 68 |
+
fn=predict,
|
| 69 |
+
inputs=[amount, hour, day, dist_home, dist_last,
|
| 70 |
+
ratio, n_txn, foreign, merchant, card_age],
|
| 71 |
+
outputs=output
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
gr.Examples(
|
| 75 |
+
examples=[
|
| 76 |
+
[25.0, 14, 2, 5.0, 3.0, 0.8, 2, False, 0.1, 48],
|
| 77 |
+
[500.0, 3, 5, 100.0, 80.0, 5.0, 10, True, 0.7, 6],
|
| 78 |
+
[1200.0, 2, 6, 200.0, 150.0, 8.0, 15, True, 0.9, 3],
|
| 79 |
+
],
|
| 80 |
+
inputs=[amount, hour, day, dist_home, dist_last,
|
| 81 |
+
ratio, n_txn, foreign, merchant, card_age],
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
with gr.Tab('Model Info'):
|
| 85 |
+
info_btn = gr.Button('Show Model Info')
|
| 86 |
+
info_output = gr.Textbox(label='Model Details', lines=12)
|
| 87 |
+
info_btn.click(fn=get_model_info, outputs=info_output)
|
| 88 |
+
|
| 89 |
+
demo.launch()
|
metrics.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"metrics": {
|
| 3 |
+
"accuracy": 0.9975,
|
| 4 |
+
"precision": 0.9873417721518988,
|
| 5 |
+
"recall": 0.975,
|
| 6 |
+
"f1": 0.9811320754716981,
|
| 7 |
+
"roc_auc": 0.9999553571428571
|
| 8 |
+
},
|
| 9 |
+
"data_stats": {
|
| 10 |
+
"n_samples": 6000,
|
| 11 |
+
"fraud_rate": 0.06666666666666667,
|
| 12 |
+
"train_size": 4800,
|
| 13 |
+
"test_size": 1200
|
| 14 |
+
},
|
| 15 |
+
"timestamp": "2026-02-04T14:29:29.917435",
|
| 16 |
+
"model_type": "HistGradientBoostingClassifier",
|
| 17 |
+
"features": [
|
| 18 |
+
"amount",
|
| 19 |
+
"hour",
|
| 20 |
+
"day_of_week",
|
| 21 |
+
"distance_from_home",
|
| 22 |
+
"distance_from_last_txn",
|
| 23 |
+
"ratio_to_median",
|
| 24 |
+
"num_txn_last_24h",
|
| 25 |
+
"is_foreign",
|
| 26 |
+
"merchant_risk_score",
|
| 27 |
+
"card_age_months"
|
| 28 |
+
],
|
| 29 |
+
"retrain_reason": "Drift detected (60% features). Challenger F1 0.9811 meets minimum threshold. Retraining on current distribution is recommended."
|
| 30 |
+
}
|
model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe2b153c3bd79ea33da32ead8eac2a9f85021da201917bbd864e952545a57581
|
| 3 |
+
size 256566
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
numpy
|
| 3 |
+
scikit-learn
|
retrain_report.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp": "2026-02-04T14:29:29.649381",
|
| 3 |
+
"drift_level": "severe",
|
| 4 |
+
"decision": "deploy",
|
| 5 |
+
"reason": "Drift detected (60% features). Challenger F1 0.9811 meets minimum threshold. Retraining on current distribution is recommended.",
|
| 6 |
+
"drift": {
|
| 7 |
+
"dataset_drift": true,
|
| 8 |
+
"drift_share": 0.6,
|
| 9 |
+
"drifted_features": [
|
| 10 |
+
"amount",
|
| 11 |
+
"distance_from_last_txn",
|
| 12 |
+
"ratio_to_median",
|
| 13 |
+
"num_txn_last_24h",
|
| 14 |
+
"is_foreign",
|
| 15 |
+
"merchant_risk_score"
|
| 16 |
+
],
|
| 17 |
+
"feature_scores": {
|
| 18 |
+
"amount": 0.7076,
|
| 19 |
+
"hour": 0.067,
|
| 20 |
+
"day_of_week": 0.0239,
|
| 21 |
+
"distance_from_home": 0.0745,
|
| 22 |
+
"distance_from_last_txn": 0.2056,
|
| 23 |
+
"ratio_to_median": 0.216,
|
| 24 |
+
"num_txn_last_24h": 0.2618,
|
| 25 |
+
"is_foreign": 0.2407,
|
| 26 |
+
"merchant_risk_score": 0.3282,
|
| 27 |
+
"card_age_months": 0.0479
|
| 28 |
+
},
|
| 29 |
+
"threshold": 0.1
|
| 30 |
+
},
|
| 31 |
+
"challenger_metrics": {
|
| 32 |
+
"accuracy": 0.9975,
|
| 33 |
+
"precision": 0.9873417721518988,
|
| 34 |
+
"recall": 0.975,
|
| 35 |
+
"f1": 0.9811320754716981,
|
| 36 |
+
"roc_auc": 0.9999553571428571
|
| 37 |
+
},
|
| 38 |
+
"champion_metrics_on_new_data": {
|
| 39 |
+
"accuracy": 0.9991666666666666,
|
| 40 |
+
"precision": 0.9876543209876543,
|
| 41 |
+
"recall": 1.0,
|
| 42 |
+
"f1": 0.9937888198757764,
|
| 43 |
+
"roc_auc": 1.0
|
| 44 |
+
},
|
| 45 |
+
"f1_improvement": -0.0127,
|
| 46 |
+
"auc_improvement": -0.0
|
| 47 |
+
}
|