aitek230telu's picture
Upload 52 files
0ab7b0c verified
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import pylab
# from warnings import warn
# 2018.11.26 Created by Eamon.Zhang
def diagnostic_plots(df, variable):
# function to plot a histogram and a Q-Q plot
# side by side, for a certain variable
plt.figure(figsize=(15,6))
plt.subplot(1, 2, 1)
df[variable].hist()
plt.subplot(1, 2, 2)
stats.probplot(df[variable], dist="norm", plot=pylab)
plt.show()
def log_transform(data,cols=[]):
"""
Logarithmic transformation
"""
data_copy = data.copy(deep=True)
for i in cols:
data_copy[i+'_log'] = np.log(data_copy[i]+1)
print('Variable ' + i +' Q-Q plot')
diagnostic_plots(data_copy,str(i+'_log'))
return data_copy
def reciprocal_transform(data,cols=[]):
"""
Reciprocal transformation
"""
data_copy = data.copy(deep=True)
for i in cols:
data_copy[i+'_reciprocal'] = 1/(data_copy[i])
print('Variable ' + i +' Q-Q plot')
diagnostic_plots(data_copy,str(i+'_reciprocal'))
return data_copy
def square_root_transform(data,cols=[]):
"""
square root transformation
"""
data_copy = data.copy(deep=True)
for i in cols:
data_copy[i+'_square_root'] = (data_copy[i])**(0.5)
print('Variable ' + i +' Q-Q plot')
diagnostic_plots(data_copy,str(i+'_square_root'))
return data_copy
def exp_transform(data,coef,cols=[]):
"""
exp transformation
"""
data_copy = data.copy(deep=True)
for i in cols:
data_copy[i+'_exp'] = (data_copy[i])**coef
print('Variable ' + i +' Q-Q plot')
diagnostic_plots(data_copy,str(i+'_exp'))
return data_copy