solfedge commited on
Commit
1bb2414
·
verified ·
1 Parent(s): 95f00b5

Upload 9 files

Browse files
Files changed (9) hide show
  1. app.py +93 -0
  2. fraud_model.py +82 -0
  3. iso_forest.pkl +3 -0
  4. parser.py +31 -0
  5. requirements.txt +8 -0
  6. train_columns.pkl +3 -0
  7. xgb_explainer.pkl +3 -0
  8. xgb_fraud.json +0 -0
  9. xgb_fraud.pkl +3 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import joblib
4
+ import pandas as pd
5
+ import shap
6
+ import xgboost as xgb
7
+ from xgboost import XGBClassifier
8
+ import numpy as np
9
+ import matplotlib
10
+ matplotlib.use('Agg')
11
+ import matplotlib.pyplot as plt
12
+ import os
13
+
14
+ try:
15
+ iso_forest = joblib.load("iso_forest.pkl")
16
+ # Load XGBoost from JSON
17
+ xgb = XGBClassifier()
18
+ xgb.load_model("xgb_fraud.json")
19
+
20
+ # Load training columns
21
+ train_cols = joblib.load("train_columns.pkl")
22
+
23
+ except FileNotFoundError as e:
24
+ raise FileNotFoundError(f"File missing: {e}. Did you run fraud_model.py?")
25
+
26
+
27
+ explainer = shap.Explainer(xgb, pd.DataFrame(np.zeros((1, len(train_cols))), columns=train_cols))
28
+
29
+ def predict_fraud(amount, hour, country, merchant_category, is_weekend):
30
+ try:
31
+ amount = float(amount)
32
+ hour = int(hour)
33
+ is_weekend = int(is_weekend)
34
+ except ValueError:
35
+ return " Invalid input: Please enter valid numbers.", None
36
+
37
+ input_data = pd.DataFrame({
38
+ "amount": [amount],
39
+ "hour": [hour],
40
+ "is_weekend": [is_weekend],
41
+ "country": [country],
42
+ "merchant_category": [merchant_category]
43
+ })
44
+
45
+ input_data['amount_log'] = np.log1p(input_data['amount'])
46
+ input_data = pd.get_dummies(input_data, columns=["country", "merchant_category"])
47
+ input_data = input_data.reindex(columns=train_cols, fill_value=0)
48
+
49
+ risk_score = iso_forest.score_samples(input_data)[0]
50
+ prediction = xgb.predict(input_data)[0]
51
+
52
+
53
+ shap_values = explainer(input_data)
54
+ fig, ax = plt.subplots(figsize=(8, 5))
55
+ shap.plots.waterfall(shap_values[0], max_display=6, show=False)
56
+ plt.tight_layout()
57
+ plt.close()
58
+
59
+ if prediction == 1:
60
+ return f" FRAUD DETECTED! Anomaly Score: {risk_score:.3f}", fig
61
+ else:
62
+ return f" No Fraud. Anomaly Score: {risk_score:.3f}", fig
63
+
64
+
65
+ # Gradio Interface
66
+ with gr.Blocks(title="FraudGuard", theme=gr.themes.Soft()) as demo:
67
+ gr.Markdown("""
68
+ # FraudGuard Real-Time Transaction Fraud Detector
69
+ Enter transaction details below. FraudGuard uses AI to detect and **explain** fraud risk.
70
+ """)
71
+
72
+ with gr.Row():
73
+ amount = gr.Number(label="Transaction Amount ($)", value=100.0)
74
+ hour = gr.Slider(0, 23, step=1, label="Hour of Day", value=14)
75
+ country = gr.Dropdown(["US", "Nigeria", "Russia", "China", "UK"], label="Country", value="US")
76
+ merchant_category = gr.Dropdown(["Retail", "Health", "Crypto", "Gambling", "Travel"],
77
+ label="Merchant Category", value="Retail")
78
+ is_weekend = gr.Checkbox(label="Is Weekend?")
79
+
80
+ output = gr.Textbox(label="Risk Status")
81
+ explanation = gr.Plot(label="Why This Decision? (SHAP Explanation)")
82
+
83
+ submit_btn = gr.Button(" Analyze Transaction")
84
+ submit_btn.click(
85
+ fn=predict_fraud,
86
+ inputs=[amount, hour, country, merchant_category, is_weekend],
87
+ outputs=[output, explanation]
88
+ )
89
+
90
+ if __name__ == "__main__":
91
+ demo.launch(share=True)
92
+
93
+
fraud_model.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.ensemble import IsolationForest
5
+ from xgboost import XGBClassifier
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.metrics import classification_report
8
+ import shap
9
+ import joblib
10
+
11
+ def generate_fraud_dataset(num_samples=10000):
12
+ # Base data
13
+ amount = np.random.lognormal(3, 0.5, num_samples)
14
+ hour = np.random.randint(0, 24, num_samples)
15
+ country = np.random.choice(["US", "Nigeria", "Russia", "China", "UK"], num_samples)
16
+ merchant_category = np.random.choice(["Retail", "Health", "Crypto", "Gambling", "Travel"], num_samples)
17
+ is_weekend = np.random.choice([0, 1], num_samples)
18
+
19
+
20
+ fraud_risk = np.zeros(num_samples)
21
+
22
+
23
+ fraud_risk += (amount > 1000).astype(float) * 0.3
24
+
25
+
26
+ high_risk_countries = ["Nigeria", "Russia", "China"]
27
+ fraud_risk += np.isin(country, high_risk_countries).astype(float) * 0.3
28
+
29
+
30
+ risky_merchants = ["Crypto", "Gambling"]
31
+ fraud_risk += np.isin(merchant_category, risky_merchants).astype(float) * 0.3
32
+
33
+
34
+ fraud_risk += ((hour >= 2) & (hour <= 5)).astype(float) * 0.1
35
+
36
+ # Combine and cap at 0.95
37
+ fraud_risk = np.clip(fraud_risk, 0, 0.95)
38
+
39
+ # Generate target: higher fraud_risk → higher chance of fraud
40
+ target = (np.random.rand(num_samples) < fraud_risk).astype(int)
41
+
42
+ return pd.DataFrame({
43
+ "amount": amount,
44
+ "hour": hour,
45
+ "country": country,
46
+ "merchant_category": merchant_category,
47
+ "is_weekend": is_weekend,
48
+ "target": target
49
+ })
50
+
51
+ # Train models
52
+ df = generate_fraud_dataset()
53
+ df['amount_log'] = np.log1p(df['amount'])
54
+ df = pd.get_dummies(df, columns=["country", "merchant_category"])
55
+ for col in df.columns:
56
+ if df[col].dtype == 'bool':
57
+ df[col] = df[col].astype(int)
58
+
59
+ X = df.drop("target", axis=1)
60
+ y = df["target"]
61
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
62
+
63
+ # Train Isolation Forest
64
+ iso_forest = IsolationForest(contamination=0.05, random_state=42)
65
+ iso_forest.fit(X_train)
66
+ joblib.dump(iso_forest, "iso_forest.pkl")
67
+
68
+ # Train XGBoost
69
+ xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
70
+ xgb.fit(X_train, y_train)
71
+
72
+
73
+ xgb.get_booster().save_model("xgb_fraud.json")
74
+
75
+ joblib.dump(X_train.columns.tolist(), "train_columns.pkl")
76
+
77
+ # Evaluate
78
+ preds = xgb.predict(X_test)
79
+ print(classification_report(y_test, preds))
80
+ print(" Models saved: iso_forest.pkl, xgb_fraud.json, train_columns.pkl")
81
+
82
+
iso_forest.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98cd90d8667eaefb4df9f0559b162c6f085d8016e92cd656cf1984153ca984a2
3
+ size 1911161
parser.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.preprocessing import StandardScaler
5
+
6
+ def prase_transaction_data(file_path):
7
+ """prase and clean transaction data"""
8
+ df = pd.read_csv(file_path)
9
+
10
+ df = df.dropna()
11
+ df['hour'] = pd.to_datetime(df['timestamp']).dt.hour
12
+ df['amount_log'] = np.log1p(df['amount'])
13
+ df['is_high_risk_country'] = df['country'].apply(lambda x: 1 if x in ["Nigeria", "Russia", "China"] else 0)
14
+
15
+ return df
16
+
17
+ def preprocess_for_model(df):
18
+ """Prepare data for fraud detection model"""
19
+ features = ['amount_log', 'hour','is_high_risk_country','merchant_category']
20
+ X = df[features]
21
+ y = df.get('fraud_label', None)
22
+
23
+ #One-hot encode category
24
+ X = pd.get_dummies(X,columns=['merchant_category'],drop_first=True)
25
+
26
+ #Normalize
27
+ scaler = StandardScaler()
28
+ X_scaled = scaler.fit_transform(X)
29
+
30
+ return X_scaled, y
31
+
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ numpy
3
+ scikit-learn
4
+ xgboost
5
+ shap
6
+ gradio
7
+ joblib
8
+ matplotlib
train_columns.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81b0dd6b6e09b1fa1e3c073f711c98fadbfd1b0ac866d3c06755025958da8bb2
3
+ size 272
xgb_explainer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41e0d684af68af6429f119b65cbb79cc30bbe438a996084917a87434ef44c748
3
+ size 1150093
xgb_fraud.json ADDED
The diff for this file is too large to render. See raw diff
 
xgb_fraud.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e1e8aae48dff34de465ecc84cd199306980ae617a4ace286b92febe6a828d4e
3
+ size 350795