|
|
import pandas as pd
|
|
|
import matplotlib.pyplot as plt
|
|
|
import seaborn as sns
|
|
|
|
|
|
|
|
|
df = pd.read_csv('c:/card/creditcard.csv')
|
|
|
|
|
|
|
|
|
print("Dataset Shape:", df.shape)
|
|
|
print("\nFirst 5 rows:")
|
|
|
print(df.head())
|
|
|
|
|
|
|
|
|
print("\nMissing values:")
|
|
|
print(df.isnull().sum().max())
|
|
|
|
|
|
|
|
|
print("\nClass Distribution (0: Normal, 1: Fraud):")
|
|
|
print(df['Class'].value_counts())
|
|
|
print("\nPercentage:")
|
|
|
print(df['Class'].value_counts(normalize=True) * 100)
|
|
|
|
|
|
|
|
|
print("\nSummary Statistics:")
|
|
|
print(df.describe())
|
|
|
|
|
|
|
|
|
plt.figure(figsize=(8, 6))
|
|
|
sns.countplot(x='Class', data=df, palette='viridis')
|
|
|
plt.title('Class Distribution (0: Normal, 1: Fraud)')
|
|
|
plt.savefig('c:/card/class_distribution.png')
|
|
|
|
|
|
|
|
|
plt.figure(figsize=(12, 4))
|
|
|
plt.subplot(1, 2, 1)
|
|
|
sns.histplot(df['Amount'], bins=50, kde=True, color='blue')
|
|
|
plt.title('Transaction Amount Distribution')
|
|
|
|
|
|
plt.subplot(1, 2, 2)
|
|
|
sns.histplot(df['Time'], bins=50, kde=True, color='red')
|
|
|
plt.title('Transaction Time Distribution')
|
|
|
plt.savefig('c:/card/distributions.png')
|
|
|
|