#import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt import os plt.style.use('seaborn-colorblind') # 2018.11.07 Created by Eamon.Zhang def get_dtypes(data,drop_col=[]): """Return the dtypes for each column of a pandas Dataframe Parameters ---------- data : pandas Dataframe drop_col : columns to omit in a list Returns ------- str_var_list, num_var_list, all_var_list """ name_of_col = list(data.columns) num_var_list = [] str_var_list = [] all_var_list = [] str_var_list = name_of_col.copy() for var in name_of_col: # check if column belongs to numeric type if (data[var].dtypes in (np.int, np.int64, np.uint, np.int32, np.float, np.float64, np.float32, np.double)): str_var_list.remove(var) num_var_list.append(var) # drop the omit column from list for var in drop_col: if var in str_var_list: str_var_list.remove(var) if var in num_var_list: num_var_list.remove(var) all_var_list.extend(str_var_list) all_var_list.extend(num_var_list) return str_var_list, num_var_list, all_var_list def describe(data,output_path=None): """output the general description of a pandas Dataframe into a csv file """ result = data.describe(include='all') if output_path is not None: output = os.path.join(output_path,'describe.csv') result.to_csv(output) print('result saved at:', str(output)) return result def discrete_var_barplot(x,y,data,output_path=None): """draw the barplot of a discrete variable x against y(target variable). By default the bar shows the mean value of y. Parameters ---------- Returns ------- figure save as PNG """ plt.figure(figsize=(15,10)) sns.barplot(x=x,y=y,data=data) if output_path is not None: output = os.path.join(output_path,'Barplot_'+str(x)+'_'+str(y)+'.png') plt.savefig(output) print('Image saved at', str(output)) def discrete_var_countplot(x,data,output_path=None): """draw the countplot of a discrete variable x. Parameters ---------- Returns ------- figure save as PNG """ plt.figure(figsize=(15,10)) sns.countplot(x=x,data=data) if output_path is not None: output = os.path.join(output_path,'Countplot_'+str(x)+'.png') plt.savefig(output) print('Image saved at',str(output)) def discrete_var_boxplot(x,y,data,output_path=None): """draw the boxplot of a discrete variable x against y. Parameters ---------- Returns ------- figure save as PNG """ plt.figure(figsize=(15,10)) sns.boxplot(x=x,y=y,data=data) if output_path is not None: output = os.path.join(output_path,'Boxplot_'+str(x)+'_'+str(y)+'.png') plt.savefig(output) print('Image saved at',str(output)) def continuous_var_distplot(x,output_path=None,bins=None): """draw the distplot of a continuous variable x. Parameters ---------- Returns ------- figure save as PNG """ plt.figure(figsize=(15,10)) sns.distplot(a=x,kde=False,bins=bins) if output_path is not None: output=os.path.join(output_path,'Distplot_'+str(x.name)+'.png') plt.savefig(output) print('Image saved at',str(output)) # 2018.11.28 Created by Eamon.Zhang def scatter_plot(x,y,data,output_path=None): """draw the scatter-plot of two variables. Parameters ---------- Returns ------- figure save as PNG """ plt.figure(figsize=(15,10)) sns.scatterplot(x=x,y=y,data=data) if output_path is not None: output = os.path.join(output_path,'Scatter_plot_'+str(x.name)+'_'+str(y.name)+'.png') plt.savefig(output) print('Image saved at',str(output)) def correlation_plot(data,output_path=None): """draw the correlation plot between variables. Parameters ---------- Returns ------- figure save as PNG """ corrmat = data.corr() fig, ax = plt.subplots() fig.set_size_inches(11,11) sns.heatmap(corrmat,cmap="YlGnBu",linewidths=.5,annot=True) if output_path is not None: output = os.path.join(output_path,'Corr_plot'+'.png') plt.savefig(output) print('Image saved at',str(output)) def heatmap(data,output_path=None,fmt='d'): """draw the heatmap between 2 variables. Parameters ---------- Returns ------- figure save as PNG """ fig, ax = plt.subplots() fig.set_size_inches(11,11) sns.heatmap(data,cmap="YlGnBu",linewidths=.5,annot=True,fmt=fmt) if output_path is not None: output = os.path.join(output_path,'Heatmap'+'.png') plt.savefig(output) print('Image saved at',str(output))