Nav772 commited on
Commit
7537ea4
·
verified ·
1 Parent(s): c20d6a0

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. README.md +22 -6
  2. app.py +89 -0
  3. metrics.json +30 -0
  4. model.pkl +3 -0
  5. requirements.txt +3 -0
  6. retrain_report.json +47 -0
README.md CHANGED
@@ -1,12 +1,28 @@
1
  ---
2
- title: Fraud Detector Cicd
3
- emoji: 🏃
4
- colorFrom: green
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 6.5.1
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Fraud Detector CI/CD
3
+ emoji: 🛡
4
+ colorFrom: red
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 5.29.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
+ # Fraud Detection System (Auto-Retrained)
14
+
15
+ Credit card fraud detector with automated retraining pipeline.
16
+
17
+ ## Pipeline
18
+
19
+ 1. New data batch generated (simulating production)
20
+ 2. Drift detection (Wasserstein distance)
21
+ 3. Challenger model trained on combined data
22
+ 4. Champion vs Challenger comparison
23
+ 5. Deploy only if challenger wins
24
+
25
+ ## Links
26
+
27
+ - [GitHub Repo](https://github.com/Algo-nav/ml-pipeline-demo)
28
+ - [Author](https://huggingface.co/Nav772)
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pickle
3
+ import numpy as np
4
+ import json
5
+
6
+ with open('model.pkl', 'rb') as f:
7
+ model = pickle.load(f)
8
+
9
+ with open('metrics.json', 'r') as f:
10
+ metrics = json.load(f)
11
+
12
+ with open('retrain_report.json', 'r') as f:
13
+ retrain_report = json.load(f)
14
+
15
+
16
+ def predict(amount, hour, day_of_week, distance_from_home,
17
+ distance_from_last_txn, ratio_to_median, num_txn_last_24h,
18
+ is_foreign, merchant_risk_score, card_age_months):
19
+ features = np.array([[amount, hour, day_of_week, distance_from_home,
20
+ distance_from_last_txn, ratio_to_median,
21
+ num_txn_last_24h, int(is_foreign),
22
+ merchant_risk_score, card_age_months]])
23
+ prob = model.predict_proba(features)[0]
24
+ return {'Legitimate': float(prob[0]), 'Fraud': float(prob[1])}
25
+
26
+
27
+ def get_model_info():
28
+ m = metrics.get('metrics', {})
29
+ r = retrain_report
30
+ lines = []
31
+ lines.append('Last updated: ' + metrics.get('timestamp', 'N/A'))
32
+ lines.append('F1 Score: ' + str(round(m.get('f1', 0), 4)))
33
+ lines.append('ROC AUC: ' + str(round(m.get('roc_auc', 0), 4)))
34
+ lines.append('Precision: ' + str(round(m.get('precision', 0), 4)))
35
+ lines.append('Recall: ' + str(round(m.get('recall', 0), 4)))
36
+ lines.append('')
37
+ lines.append('Retrain Decision: ' + str(r.get('decision', 'N/A')))
38
+ lines.append('Reason: ' + str(r.get('reason', 'N/A')))
39
+ drift = r.get('drift', {})
40
+ lines.append('Drift Detected: ' + str(drift.get('dataset_drift', 'N/A')))
41
+ lines.append('Drifted Features: ' + str(drift.get('drifted_features', [])))
42
+ return chr(10).join(lines)
43
+
44
+
45
+ with gr.Blocks() as demo:
46
+ gr.Markdown('# Fraud Detection System (Auto-Retrained)')
47
+ gr.Markdown('This model is automatically retrained when data drift is detected.')
48
+
49
+ with gr.Tab('Predict'):
50
+ with gr.Row():
51
+ with gr.Column():
52
+ amount = gr.Number(value=50.0, label='Transaction Amount ($)')
53
+ hour = gr.Slider(0, 23, value=14, step=1, label='Hour of Day')
54
+ day = gr.Slider(0, 6, value=3, step=1, label='Day of Week (0=Mon)')
55
+ dist_home = gr.Number(value=10.0, label='Distance from Home (km)')
56
+ dist_last = gr.Number(value=5.0, label='Distance from Last Txn (km)')
57
+ with gr.Column():
58
+ ratio = gr.Number(value=1.0, label='Ratio to Median Spending')
59
+ n_txn = gr.Slider(0, 20, value=3, step=1, label='Txns in Last 24h')
60
+ foreign = gr.Checkbox(value=False, label='Foreign Transaction')
61
+ merchant = gr.Slider(0, 1, value=0.2, step=0.05, label='Merchant Risk Score')
62
+ card_age = gr.Slider(1, 120, value=36, step=1, label='Card Age (months)')
63
+
64
+ predict_btn = gr.Button('Analyze Transaction', variant='primary')
65
+ output = gr.Label(num_top_classes=2, label='Result')
66
+
67
+ predict_btn.click(
68
+ fn=predict,
69
+ inputs=[amount, hour, day, dist_home, dist_last,
70
+ ratio, n_txn, foreign, merchant, card_age],
71
+ outputs=output
72
+ )
73
+
74
+ gr.Examples(
75
+ examples=[
76
+ [25.0, 14, 2, 5.0, 3.0, 0.8, 2, False, 0.1, 48],
77
+ [500.0, 3, 5, 100.0, 80.0, 5.0, 10, True, 0.7, 6],
78
+ [1200.0, 2, 6, 200.0, 150.0, 8.0, 15, True, 0.9, 3],
79
+ ],
80
+ inputs=[amount, hour, day, dist_home, dist_last,
81
+ ratio, n_txn, foreign, merchant, card_age],
82
+ )
83
+
84
+ with gr.Tab('Model Info'):
85
+ info_btn = gr.Button('Show Model Info')
86
+ info_output = gr.Textbox(label='Model Details', lines=12)
87
+ info_btn.click(fn=get_model_info, outputs=info_output)
88
+
89
+ demo.launch()
metrics.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metrics": {
3
+ "accuracy": 0.9975,
4
+ "precision": 0.9873417721518988,
5
+ "recall": 0.975,
6
+ "f1": 0.9811320754716981,
7
+ "roc_auc": 0.9999553571428571
8
+ },
9
+ "data_stats": {
10
+ "n_samples": 6000,
11
+ "fraud_rate": 0.06666666666666667,
12
+ "train_size": 4800,
13
+ "test_size": 1200
14
+ },
15
+ "timestamp": "2026-02-04T14:29:29.917435",
16
+ "model_type": "HistGradientBoostingClassifier",
17
+ "features": [
18
+ "amount",
19
+ "hour",
20
+ "day_of_week",
21
+ "distance_from_home",
22
+ "distance_from_last_txn",
23
+ "ratio_to_median",
24
+ "num_txn_last_24h",
25
+ "is_foreign",
26
+ "merchant_risk_score",
27
+ "card_age_months"
28
+ ],
29
+ "retrain_reason": "Drift detected (60% features). Challenger F1 0.9811 meets minimum threshold. Retraining on current distribution is recommended."
30
+ }
model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe2b153c3bd79ea33da32ead8eac2a9f85021da201917bbd864e952545a57581
3
+ size 256566
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ numpy
3
+ scikit-learn
retrain_report.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "timestamp": "2026-02-04T14:29:29.649381",
3
+ "drift_level": "severe",
4
+ "decision": "deploy",
5
+ "reason": "Drift detected (60% features). Challenger F1 0.9811 meets minimum threshold. Retraining on current distribution is recommended.",
6
+ "drift": {
7
+ "dataset_drift": true,
8
+ "drift_share": 0.6,
9
+ "drifted_features": [
10
+ "amount",
11
+ "distance_from_last_txn",
12
+ "ratio_to_median",
13
+ "num_txn_last_24h",
14
+ "is_foreign",
15
+ "merchant_risk_score"
16
+ ],
17
+ "feature_scores": {
18
+ "amount": 0.7076,
19
+ "hour": 0.067,
20
+ "day_of_week": 0.0239,
21
+ "distance_from_home": 0.0745,
22
+ "distance_from_last_txn": 0.2056,
23
+ "ratio_to_median": 0.216,
24
+ "num_txn_last_24h": 0.2618,
25
+ "is_foreign": 0.2407,
26
+ "merchant_risk_score": 0.3282,
27
+ "card_age_months": 0.0479
28
+ },
29
+ "threshold": 0.1
30
+ },
31
+ "challenger_metrics": {
32
+ "accuracy": 0.9975,
33
+ "precision": 0.9873417721518988,
34
+ "recall": 0.975,
35
+ "f1": 0.9811320754716981,
36
+ "roc_auc": 0.9999553571428571
37
+ },
38
+ "champion_metrics_on_new_data": {
39
+ "accuracy": 0.9991666666666666,
40
+ "precision": 0.9876543209876543,
41
+ "recall": 1.0,
42
+ "f1": 0.9937888198757764,
43
+ "roc_auc": 1.0
44
+ },
45
+ "f1_improvement": -0.0127,
46
+ "auc_improvement": -0.0
47
+ }