Spaces:

solfedge
/

FraudGuard_AI

Sleeping

App Files Files Community

solfedge commited on Jul 31, 2025

Commit

1bb2414

verified ·

1 Parent(s): 95f00b5

Upload 9 files

Browse files

Files changed (9) hide show

app.py +93 -0
fraud_model.py +82 -0
iso_forest.pkl +3 -0
parser.py +31 -0
requirements.txt +8 -0
train_columns.pkl +3 -0
xgb_explainer.pkl +3 -0
xgb_fraud.json +0 -0
xgb_fraud.pkl +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import gradio as gr
+import joblib
+import pandas as pd
+import shap
+import xgboost as xgb
+from xgboost import XGBClassifier
+import numpy as np
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import os
+try:
+    iso_forest = joblib.load("iso_forest.pkl")
+    # Load XGBoost from JSON
+    xgb = XGBClassifier()
+    xgb.load_model("xgb_fraud.json")
+    # Load training columns
+    train_cols = joblib.load("train_columns.pkl")
+except FileNotFoundError as e:
+    raise FileNotFoundError(f"File missing: {e}. Did you run fraud_model.py?")
+explainer = shap.Explainer(xgb, pd.DataFrame(np.zeros((1, len(train_cols))), columns=train_cols))
+def predict_fraud(amount, hour, country, merchant_category, is_weekend):
+    try:
+        amount = float(amount)
+        hour = int(hour)
+        is_weekend = int(is_weekend)
+    except ValueError:
+        return " Invalid input: Please enter valid numbers.", None
+    input_data = pd.DataFrame({
+        "amount": [amount],
+        "hour": [hour],
+        "is_weekend": [is_weekend],
+        "country": [country],
+        "merchant_category": [merchant_category]
+    })
+    input_data['amount_log'] = np.log1p(input_data['amount'])
+    input_data = pd.get_dummies(input_data, columns=["country", "merchant_category"])
+    input_data = input_data.reindex(columns=train_cols, fill_value=0)
+    risk_score = iso_forest.score_samples(input_data)[0]
+    prediction = xgb.predict(input_data)[0]
+    shap_values = explainer(input_data)
+    fig, ax = plt.subplots(figsize=(8, 5))
+    shap.plots.waterfall(shap_values[0], max_display=6, show=False)
+    plt.tight_layout()
+    plt.close()
+    if prediction == 1:
+        return f" FRAUD DETECTED! Anomaly Score: {risk_score:.3f}", fig
+    else:
+        return f" No Fraud. Anomaly Score: {risk_score:.3f}", fig
+# Gradio Interface
+with gr.Blocks(title="FraudGuard", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    #  FraudGuard  Real-Time Transaction Fraud Detector
+    Enter transaction details below. FraudGuard uses AI to detect and **explain** fraud risk.
+    """)
+    with gr.Row():
+        amount = gr.Number(label="Transaction Amount ($)", value=100.0)
+        hour = gr.Slider(0, 23, step=1, label="Hour of Day", value=14)
+        country = gr.Dropdown(["US", "Nigeria", "Russia", "China", "UK"], label="Country", value="US")
+        merchant_category = gr.Dropdown(["Retail", "Health", "Crypto", "Gambling", "Travel"],
+                                        label="Merchant Category", value="Retail")
+        is_weekend = gr.Checkbox(label="Is Weekend?")
+    output = gr.Textbox(label="Risk Status")
+    explanation = gr.Plot(label="Why This Decision? (SHAP Explanation)")
+    submit_btn = gr.Button(" Analyze Transaction")
+    submit_btn.click(
+        fn=predict_fraud,
+        inputs=[amount, hour, country, merchant_category, is_weekend],
+        outputs=[output, explanation]
+    )
+if __name__ == "__main__":
+    demo.launch(share=True)

fraud_model.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import pandas as pd
+import numpy as np
+from sklearn.ensemble import IsolationForest
+from xgboost import XGBClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report
+import shap
+import joblib
+def generate_fraud_dataset(num_samples=10000):
+    # Base data
+    amount = np.random.lognormal(3, 0.5, num_samples)
+    hour = np.random.randint(0, 24, num_samples)
+    country = np.random.choice(["US", "Nigeria", "Russia", "China", "UK"], num_samples)
+    merchant_category = np.random.choice(["Retail", "Health", "Crypto", "Gambling", "Travel"], num_samples)
+    is_weekend = np.random.choice([0, 1], num_samples)
+    fraud_risk = np.zeros(num_samples)
+    fraud_risk += (amount > 1000).astype(float) * 0.3
+    high_risk_countries = ["Nigeria", "Russia", "China"]
+    fraud_risk += np.isin(country, high_risk_countries).astype(float) * 0.3
+    risky_merchants = ["Crypto", "Gambling"]
+    fraud_risk += np.isin(merchant_category, risky_merchants).astype(float) * 0.3
+    fraud_risk += ((hour >= 2) & (hour <= 5)).astype(float) * 0.1
+    # Combine and cap at 0.95
+    fraud_risk = np.clip(fraud_risk, 0, 0.95)
+    # Generate target: higher fraud_risk → higher chance of fraud
+    target = (np.random.rand(num_samples) < fraud_risk).astype(int)
+    return pd.DataFrame({
+        "amount": amount,
+        "hour": hour,
+        "country": country,
+        "merchant_category": merchant_category,
+        "is_weekend": is_weekend,
+        "target": target
+    })
+    # Train models
+df = generate_fraud_dataset()
+df['amount_log'] = np.log1p(df['amount'])
+df = pd.get_dummies(df, columns=["country", "merchant_category"])
+for col in df.columns:
+    if df[col].dtype == 'bool':
+        df[col] = df[col].astype(int)
+X = df.drop("target", axis=1)
+y = df["target"]
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+# Train Isolation Forest
+iso_forest = IsolationForest(contamination=0.05, random_state=42)
+iso_forest.fit(X_train)
+joblib.dump(iso_forest, "iso_forest.pkl")
+# Train XGBoost
+xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
+xgb.fit(X_train, y_train)
+xgb.get_booster().save_model("xgb_fraud.json")
+joblib.dump(X_train.columns.tolist(), "train_columns.pkl")
+# Evaluate
+preds = xgb.predict(X_test)
+print(classification_report(y_test, preds))
+print(" Models saved: iso_forest.pkl, xgb_fraud.json, train_columns.pkl")

iso_forest.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:98cd90d8667eaefb4df9f0559b162c6f085d8016e92cd656cf1984153ca984a2
+size 1911161

parser.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+def prase_transaction_data(file_path):
+  """prase and clean transaction data"""
+  df = pd.read_csv(file_path)
+  df = df.dropna()
+  df['hour'] = pd.to_datetime(df['timestamp']).dt.hour
+  df['amount_log'] = np.log1p(df['amount'])
+  df['is_high_risk_country'] = df['country'].apply(lambda x: 1 if x in ["Nigeria", "Russia", "China"] else 0)
+  return df
+def preprocess_for_model(df):
+    """Prepare data for fraud detection model"""
+    features = ['amount_log', 'hour','is_high_risk_country','merchant_category']
+    X = df[features]
+    y = df.get('fraud_label', None)
+    #One-hot encode category
+    X = pd.get_dummies(X,columns=['merchant_category'],drop_first=True)
+    #Normalize
+    scaler = StandardScaler()
+    X_scaled = scaler.fit_transform(X)
+    return X_scaled, y

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+pandas
+numpy
+scikit-learn
+xgboost
+shap
+gradio
+joblib
+matplotlib

train_columns.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81b0dd6b6e09b1fa1e3c073f711c98fadbfd1b0ac866d3c06755025958da8bb2
+size 272

xgb_explainer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:41e0d684af68af6429f119b65cbb79cc30bbe438a996084917a87434ef44c748
+size 1150093

xgb_fraud.json ADDED Viewed

The diff for this file is too large to render. See raw diff

xgb_fraud.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e1e8aae48dff34de465ecc84cd199306980ae617a4ace286b92febe6a828d4e
+size 350795