Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import LabelEncoder, StandardScaler | |
| from sklearn.ensemble import RandomForestClassifier | |
| import gradio as gr | |
| # Define path for dataset | |
| DATA_PATH = "synthetic_fraud_dataset.csv" | |
| # Load and preprocess data, and train the model | |
| def train_model(): | |
| # Load the dataset | |
| df = pd.read_csv(DATA_PATH) | |
| # Select important columns | |
| important_columns = ['Transaction_Amount', 'Previous_Fraudulent_Activity', 'Risk_Score', 'Transaction_Distance', | |
| 'Daily_Transaction_Count', 'Failed_Transaction_Count_7d', 'Location', 'Fraud_Label'] | |
| df = df[important_columns] | |
| # Convert Fraud_Label to binary | |
| df['Fraud_Label'] = (df['Fraud_Label'] >= 0.5).astype(int) | |
| # Label encode Location | |
| le = LabelEncoder() | |
| unique_locations = list(df['Location'].unique()) + ['Unknown'] | |
| le.fit(unique_locations) | |
| df['Location'] = le.transform(df['Location']) | |
| # Scale numerical features | |
| numerical_cols = ['Transaction_Amount', 'Previous_Fraudulent_Activity', 'Risk_Score', 'Transaction_Distance', | |
| 'Daily_Transaction_Count', 'Failed_Transaction_Count_7d'] | |
| scaler = StandardScaler() | |
| df[numerical_cols] = scaler.fit_transform(df[numerical_cols]) | |
| # Train-test split | |
| X = df.drop('Fraud_Label', axis=1) | |
| y = df['Fraud_Label'] | |
| X_train, _, y_train, _ = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y) | |
| # Train Random Forest Model | |
| rf_model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced') | |
| rf_model.fit(X_train, y_train) | |
| return rf_model, scaler, le, numerical_cols | |
| # Predict function for Gradio | |
| def predict_transaction(transaction_amount, previous_fraudulent_activity, risk_score, transaction_distance, | |
| daily_transaction_count, failed_transaction_count_7d, location): | |
| # Train the model and get preprocessors | |
| model, scaler, le, numerical_cols = train_model() | |
| # Define feature columns | |
| feature_columns = ['Transaction_Amount', 'Previous_Fraudulent_Activity', 'Risk_Score', 'Transaction_Distance', | |
| 'Daily_Transaction_Count', 'Failed_Transaction_Count_7d', 'Location'] | |
| # Create input DataFrame | |
| input_data = [ | |
| float(transaction_amount), | |
| int(previous_fraudulent_activity), | |
| float(risk_score), | |
| float(transaction_distance), | |
| int(daily_transaction_count), | |
| int(failed_transaction_count_7d), | |
| location | |
| ] | |
| input_df = pd.DataFrame([input_data], columns=feature_columns) | |
| # Preprocess Location | |
| input_df['Location'] = input_df['Location'].apply(lambda x: x if x in le.classes_ else 'Unknown') | |
| input_df['Location'] = le.transform(input_df['Location']) | |
| # Scale numerical features | |
| input_df[numerical_cols] = scaler.transform(input_df[numerical_cols]) | |
| # Make prediction | |
| input_data_as_numpy_array = np.asarray(input_df) | |
| input_data_reshaped = input_data_as_numpy_array.reshape(1, -1) | |
| prediction = model.predict(input_data_reshaped) | |
| probability = model.predict_proba(input_data_reshaped)[:, 1] | |
| # Return results | |
| result = "Fraudulent" if prediction[0] == 1 else "Not Fraudulent" | |
| return f"Prediction: {result}\nFraud Probability: {probability[0]:.4f}" | |
| # Create Gradio interface | |
| iface = gr.Interface( | |
| fn=predict_transaction, | |
| inputs=[ | |
| gr.Number(label="Transaction Amount ($)"), | |
| gr.Number(label="Previous Fraudulent Activity (0 or 1)", precision=0), | |
| gr.Number(label="Risk Score (0 to 1)"), | |
| gr.Number(label="Transaction Distance (miles)"), | |
| gr.Number(label="Daily Transaction Count", precision=0), | |
| gr.Number(label="Failed Transaction Count (7 days)", precision=0), | |
| gr.Textbox(label="Location") | |
| ], | |
| outputs="text", | |
| title="ATM Fraud Detector", | |
| description="Enter transaction details to predict if it's fraudulent. The model is retrained each time." | |
| ) | |
| # Launch the interface | |
| if __name__ == "__main__": | |
| iface.launch() |