| # -*- coding: utf-8 -*- | |
| """Final_project_of_Credit_Card_Fraud_Detection(1).ipynb | |
| Automatically generated by Colaboratory. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1PSHcV_bp0wcT0Kl_f2n5QwtlOZj3M5BV | |
| """ | |
| import pandas as pd | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| data=pd.read_csv('/content/data4.csv') | |
| data.head() | |
| data.shape | |
| data.isnull().sum().sum() | |
| data.keys() | |
| data.info() | |
| data=data.drop(['Unnamed: 0','nameOrig','nameDest'],axis=1) | |
| data.shape | |
| data['isFraud'].value_counts() | |
| plt.pie(data['isFraud'].value_counts(),labels=['Not_Fraud','Fraud'],autopct='%0.2f%%') | |
| plt.show() | |
| #sns.countplot('isFraud',data=data) | |
| sns.countplot(data=data, x="type", hue="isFraud") | |
| plt.show() | |
| plt.figure(figsize=(6,8)) | |
| sns.countplot(data=data, x="isFraud", hue="type") | |
| plt.show() | |
| data.tail() | |
| data['type'].value_counts() | |
| dict1={'CASH_OUT':0,'TRANSFER':1,'PAYMENT':2,'CASH_IN':3,'DEBIT':4} | |
| data['type']=data['type'].map(dict1) | |
| data.head() | |
| X=data.drop('isFraud',axis=1) | |
| X | |
| y=data['isFraud'] | |
| y | |
| from sklearn.model_selection import train_test_split | |
| X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=0) | |
| print(X_train.shape) | |
| print(X_test.shape) | |
| print(y_train.shape) | |
| print(y_test.shape) | |
| from sklearn.preprocessing import StandardScaler | |
| sc=StandardScaler() | |
| X_train_sc=sc.fit_transform(X_train) | |
| X_test_sc=sc.transform(X_test) | |
| X_train_sc | |
| X_test_sc | |
| from sklearn.linear_model import LogisticRegression | |
| model1=LogisticRegression() | |
| model1.fit(X_train_sc,y_train) | |
| y_pred1=model1.predict(X_test_sc) | |
| from sklearn.metrics import classification_report | |
| print(classification_report(y_test,y_pred1)) | |
| from sklearn.naive_bayes import GaussianNB | |
| model2=GaussianNB() | |
| model2.fit(X_train_sc,y_train) | |
| y_pred2=model2.predict(X_test_sc) | |
| print(classification_report(y_test,y_pred2)) | |
| from sklearn.neighbors import KNeighborsClassifier | |
| model3=KNeighborsClassifier() | |
| model3.fit(X_train_sc,y_train) | |
| y_pred3=model3.predict(X_test_sc) | |
| print(classification_report(y_test,y_pred3)) | |
| from sklearn.tree import DecisionTreeClassifier | |
| model4=DecisionTreeClassifier() | |
| model4.fit(X_train_sc,y_train) | |
| y_pred4=model4.predict(X_test_sc) | |
| print(classification_report(y_test,y_pred4)) | |
| from sklearn import tree | |
| plt.figure(figsize=(10,10)) | |
| tree.plot_tree(model4,filled=True) | |
| plt.show() | |
| from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier | |
| model5=RandomForestClassifier() | |
| model5.fit(X_train_sc,y_train) | |
| y_pred5=model5.predict(X_test_sc) | |
| print(classification_report(y_test,y_pred5)) | |
| model6=AdaBoostClassifier() | |
| model6.fit(X_train_sc,y_train) | |
| y_pred6=model6.predict(X_test_sc) | |
| print(classification_report(y_test,y_pred6)) | |
| model5.predict([[239,2,5178.72,400705.00,395526.28,0.00,0.00]]) | |
| model5.predict([[369,0,89596.79,89596.79,0.0,0.00,89596.79]]) | |