import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import classification_report, confusion_matrix, precision_recall_curve, auc from imblearn.over_sampling import SMOTE import joblib import os # Load data print("Loading data...") df = pd.read_csv('c:/card/creditcard.csv') # Preprocessing print("Preprocessing...") scaler_amount = StandardScaler() scaler_time = StandardScaler() df['scaled_amount'] = scaler_amount.fit_transform(df['Amount'].values.reshape(-1, 1)) df['scaled_time'] = scaler_time.fit_transform(df['Time'].values.reshape(-1, 1)) # Drop original Time and Amount df.drop(['Time', 'Amount'], axis=1, inplace=True) # Define X and y X = df.drop('Class', axis=1) y = df['Class'] # Split data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) # Handle imbalance with SMOTE print("Applying SMOTE to balance training data...") sm = SMOTE(random_state=42) X_train_res, y_train_res = sm.fit_resample(X_train, y_train) print(f"Original training shape: {X_train.shape}") print(f"Resampled training shape: {X_train_res.shape}") # Train Model print("Training Random Forest Classifier (this might take a minute)...") model = RandomForestClassifier(n_estimators=50, max_depth=10, random_state=42, n_jobs=-1) model.fit(X_train_res, y_train_res) # Evaluate print("Evaluating model...") y_pred = model.predict(X_test) print("\nConfusion Matrix:") print(confusion_matrix(y_test, y_pred)) print("\nClassification Report:") print(classification_report(y_test, y_pred)) # Save model and scalers print("Saving model and scalers...") joblib.dump(model, 'c:/card/fraud_model.joblib') joblib.dump(scaler_amount, 'c:/card/scaler_amount.joblib') joblib.dump(scaler_time, 'c:/card/scaler_time.joblib') print("Done! Files saved to c:/card/")