# -*- coding: utf-8 -*- """Final_project_of_Credit_Card_Fraud_Detection(1).ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1PSHcV_bp0wcT0Kl_f2n5QwtlOZj3M5BV """ import pandas as pd import seaborn as sns import matplotlib.pyplot as plt data=pd.read_csv('/content/data4.csv') data.head() data.shape data.isnull().sum().sum() data.keys() data.info() data=data.drop(['Unnamed: 0','nameOrig','nameDest'],axis=1) data.shape data['isFraud'].value_counts() plt.pie(data['isFraud'].value_counts(),labels=['Not_Fraud','Fraud'],autopct='%0.2f%%') plt.show() #sns.countplot('isFraud',data=data) sns.countplot(data=data, x="type", hue="isFraud") plt.show() plt.figure(figsize=(6,8)) sns.countplot(data=data, x="isFraud", hue="type") plt.show() data.tail() data['type'].value_counts() dict1={'CASH_OUT':0,'TRANSFER':1,'PAYMENT':2,'CASH_IN':3,'DEBIT':4} data['type']=data['type'].map(dict1) data.head() X=data.drop('isFraud',axis=1) X y=data['isFraud'] y from sklearn.model_selection import train_test_split X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=0) print(X_train.shape) print(X_test.shape) print(y_train.shape) print(y_test.shape) from sklearn.preprocessing import StandardScaler sc=StandardScaler() X_train_sc=sc.fit_transform(X_train) X_test_sc=sc.transform(X_test) X_train_sc X_test_sc from sklearn.linear_model import LogisticRegression model1=LogisticRegression() model1.fit(X_train_sc,y_train) y_pred1=model1.predict(X_test_sc) from sklearn.metrics import classification_report print(classification_report(y_test,y_pred1)) from sklearn.naive_bayes import GaussianNB model2=GaussianNB() model2.fit(X_train_sc,y_train) y_pred2=model2.predict(X_test_sc) print(classification_report(y_test,y_pred2)) from sklearn.neighbors import KNeighborsClassifier model3=KNeighborsClassifier() model3.fit(X_train_sc,y_train) y_pred3=model3.predict(X_test_sc) print(classification_report(y_test,y_pred3)) from sklearn.tree import DecisionTreeClassifier model4=DecisionTreeClassifier() model4.fit(X_train_sc,y_train) y_pred4=model4.predict(X_test_sc) print(classification_report(y_test,y_pred4)) from sklearn import tree plt.figure(figsize=(10,10)) tree.plot_tree(model4,filled=True) plt.show() from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier model5=RandomForestClassifier() model5.fit(X_train_sc,y_train) y_pred5=model5.predict(X_test_sc) print(classification_report(y_test,y_pred5)) model6=AdaBoostClassifier() model6.fit(X_train_sc,y_train) y_pred6=model6.predict(X_test_sc) print(classification_report(y_test,y_pred6)) model5.predict([[239,2,5178.72,400705.00,395526.28,0.00,0.00]]) model5.predict([[369,0,89596.79,89596.79,0.0,0.00,89596.79]])