hashirlodhi's picture
Upload 3 files
52765e8 verified
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
import gradio as gr
# Define path for dataset
DATA_PATH = "synthetic_fraud_dataset.csv"
# Load and preprocess data, and train the model
def train_model():
# Load the dataset
df = pd.read_csv(DATA_PATH)
# Select important columns
important_columns = ['Transaction_Amount', 'Previous_Fraudulent_Activity', 'Risk_Score', 'Transaction_Distance',
'Daily_Transaction_Count', 'Failed_Transaction_Count_7d', 'Location', 'Fraud_Label']
df = df[important_columns]
# Convert Fraud_Label to binary
df['Fraud_Label'] = (df['Fraud_Label'] >= 0.5).astype(int)
# Label encode Location
le = LabelEncoder()
unique_locations = list(df['Location'].unique()) + ['Unknown']
le.fit(unique_locations)
df['Location'] = le.transform(df['Location'])
# Scale numerical features
numerical_cols = ['Transaction_Amount', 'Previous_Fraudulent_Activity', 'Risk_Score', 'Transaction_Distance',
'Daily_Transaction_Count', 'Failed_Transaction_Count_7d']
scaler = StandardScaler()
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])
# Train-test split
X = df.drop('Fraud_Label', axis=1)
y = df['Fraud_Label']
X_train, _, y_train, _ = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
# Train Random Forest Model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
rf_model.fit(X_train, y_train)
return rf_model, scaler, le, numerical_cols
# Predict function for Gradio
def predict_transaction(transaction_amount, previous_fraudulent_activity, risk_score, transaction_distance,
daily_transaction_count, failed_transaction_count_7d, location):
# Train the model and get preprocessors
model, scaler, le, numerical_cols = train_model()
# Define feature columns
feature_columns = ['Transaction_Amount', 'Previous_Fraudulent_Activity', 'Risk_Score', 'Transaction_Distance',
'Daily_Transaction_Count', 'Failed_Transaction_Count_7d', 'Location']
# Create input DataFrame
input_data = [
float(transaction_amount),
int(previous_fraudulent_activity),
float(risk_score),
float(transaction_distance),
int(daily_transaction_count),
int(failed_transaction_count_7d),
location
]
input_df = pd.DataFrame([input_data], columns=feature_columns)
# Preprocess Location
input_df['Location'] = input_df['Location'].apply(lambda x: x if x in le.classes_ else 'Unknown')
input_df['Location'] = le.transform(input_df['Location'])
# Scale numerical features
input_df[numerical_cols] = scaler.transform(input_df[numerical_cols])
# Make prediction
input_data_as_numpy_array = np.asarray(input_df)
input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)
prediction = model.predict(input_data_reshaped)
probability = model.predict_proba(input_data_reshaped)[:, 1]
# Return results
result = "Fraudulent" if prediction[0] == 1 else "Not Fraudulent"
return f"Prediction: {result}\nFraud Probability: {probability[0]:.4f}"
# Create Gradio interface
iface = gr.Interface(
fn=predict_transaction,
inputs=[
gr.Number(label="Transaction Amount ($)"),
gr.Number(label="Previous Fraudulent Activity (0 or 1)", precision=0),
gr.Number(label="Risk Score (0 to 1)"),
gr.Number(label="Transaction Distance (miles)"),
gr.Number(label="Daily Transaction Count", precision=0),
gr.Number(label="Failed Transaction Count (7 days)", precision=0),
gr.Textbox(label="Location")
],
outputs="text",
title="ATM Fraud Detector",
description="Enter transaction details to predict if it's fraudulent. The model is retrained each time."
)
# Launch the interface
if __name__ == "__main__":
iface.launch()