|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import matplotlib.pyplot as plt |
|
|
import scipy.stats as stats |
|
|
import pylab |
|
|
|
|
|
|
|
|
|
|
|
def diagnostic_plots(df, variable): |
|
|
|
|
|
|
|
|
|
|
|
plt.figure(figsize=(15,6)) |
|
|
plt.subplot(1, 2, 1) |
|
|
df[variable].hist() |
|
|
|
|
|
plt.subplot(1, 2, 2) |
|
|
stats.probplot(df[variable], dist="norm", plot=pylab) |
|
|
|
|
|
plt.show() |
|
|
|
|
|
|
|
|
def log_transform(data,cols=[]): |
|
|
""" |
|
|
Logarithmic transformation |
|
|
""" |
|
|
|
|
|
data_copy = data.copy(deep=True) |
|
|
for i in cols: |
|
|
data_copy[i+'_log'] = np.log(data_copy[i]+1) |
|
|
print('Variable ' + i +' Q-Q plot') |
|
|
diagnostic_plots(data_copy,str(i+'_log')) |
|
|
return data_copy |
|
|
|
|
|
|
|
|
def reciprocal_transform(data,cols=[]): |
|
|
""" |
|
|
Reciprocal transformation |
|
|
""" |
|
|
|
|
|
data_copy = data.copy(deep=True) |
|
|
for i in cols: |
|
|
data_copy[i+'_reciprocal'] = 1/(data_copy[i]) |
|
|
print('Variable ' + i +' Q-Q plot') |
|
|
diagnostic_plots(data_copy,str(i+'_reciprocal')) |
|
|
return data_copy |
|
|
|
|
|
|
|
|
def square_root_transform(data,cols=[]): |
|
|
""" |
|
|
square root transformation |
|
|
""" |
|
|
|
|
|
data_copy = data.copy(deep=True) |
|
|
for i in cols: |
|
|
data_copy[i+'_square_root'] = (data_copy[i])**(0.5) |
|
|
print('Variable ' + i +' Q-Q plot') |
|
|
diagnostic_plots(data_copy,str(i+'_square_root')) |
|
|
return data_copy |
|
|
|
|
|
|
|
|
def exp_transform(data,coef,cols=[]): |
|
|
""" |
|
|
exp transformation |
|
|
""" |
|
|
|
|
|
data_copy = data.copy(deep=True) |
|
|
for i in cols: |
|
|
data_copy[i+'_exp'] = (data_copy[i])**coef |
|
|
print('Variable ' + i +' Q-Q plot') |
|
|
diagnostic_plots(data_copy,str(i+'_exp')) |
|
|
return data_copy |
|
|
|
|
|
|