# This Python 3 environment comes with many helpful analytics libraries installed # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python # For example, here's several helpful packages to load import numpy as np # linear algebra import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) # Input data files are available in the read-only "../input/" directory # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory import os for dirname, _, filenames in os.walk('/kaggle/input'): for filename in filenames: print(os.path.join(dirname, filename)) # You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" # You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report, RocCurveDisplay from sklearn.linear_model import LogisticRegression df = pd.read_csv("/kaggle/input/creditcardfraud/creditcard.csv") df.head() df.shape df.info() df.columns df.isnull().sum().sort_values(ascending=False) df["Class"].value_counts() sns.countplot(x="Class", data=df) plt.title("Class Distribution") plt.show() plt.figure(figsize=(8,4)) sns.histplot(df["Amount"], bins=50) plt.title("Transaction Amount Distribution") plt.show() scaler = StandardScaler() df["Amount"] = scaler.fit_transform(df[["Amount"]]) df["Time"] = scaler.fit_transform(df[["Time"]]) X = df.drop("Class", axis=1) y = df["Class"] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42) model = LogisticRegression(max_iter=1000) model.fit(X_train, y_train) y_pred_proba = model.predict_proba(X_test)[:, 1] roc_auc_score(y_test, y_pred_proba) RocCurveDisplay.from_predictions(y_test, y_pred_proba) plt.show() y_pred = model.predict(X_test) cm = confusion_matrix(y_test, y_pred) sns.heatmap(cm, annot=True, fmt="d", cmap="Blues") plt.title("Confusion Matrix") plt.show() print(classification_report(y_test, y_pred)) import joblib joblib.dump(model, "model.pkl") np.save("X_test.npy", X_test) np.save("y_test.npy", y_test)