""" @author: Edward R Jones @version 1.34 @copyright 2020 - Edward R Jones, all rights reserved. """ import sys import numpy as np import pandas as pd from math import sqrt import matplotlib.pyplot as plt from sklearn.metrics import mean_absolute_error, mean_squared_error from sklearn.metrics import median_absolute_error, r2_score from sklearn.metrics import accuracy_score, precision_score, recall_score from sklearn.metrics import f1_score, confusion_matrix, classification_report from sklearn.neural_network import MLPClassifier class nn_regressor(object): def display_metrics(nn, X, y): predictions = nn.predict(X) #Calculate number of weights n_weights = 0 for i in range(nn.n_layers_ - 1): n_weights += len(nn.intercepts_[i]) n_weights += nn.coefs_[i].shape[0]*nn.coefs_[i].shape[1] print("\nModel Metrics") print("{:.<23s}{:15d}".format('Observations', X.shape[0])) print("{:.<23s}{:15d}".format('Features', X.shape[1])) print("{:.<23s}{:15d}".format('Hidden Layers',\ nn.n_layers_-2)) print("{:.<23s}{:15d}".format('Outputs', \ nn.n_outputs_)) n_neurons = 0 nl = nn.n_layers_-2 if nl>1: for i in range(nl): n_neurons += nn.hidden_layer_sizes[i] else: n_neurons = nn.hidden_layer_sizes print("{:.<23s}{:15d}".format('Neurons',\ n_neurons)) print("{:.<23s}{:15d}".format('Weights', \ n_weights)) print("{:.<23s}{:15d}".format('NIterations', \ nn.n_iter_)) print("{:.<23s}{:>15s}".format('Activation Function', \ nn.activation)) print("{:.<23s}{:15.4f}".format('Loss', nn.loss_)) print("{:.<23s}{:15.4f}".format('R-Squared', \ r2_score(y,predictions))) print("{:.<23s}{:15.4f}".format('Mean Absolute Error', \ mean_absolute_error(y,predictions))) print("{:.<23s}{:15.4f}".format('Median Absolute Error', \ median_absolute_error(y,predictions))) print("{:.<23s}{:15.4f}".format('Avg Squared Error', \ mean_squared_error(y,predictions))) print("{:.<23s}{:15.4f}".format('Square Root ASE', \ sqrt(mean_squared_error(y,predictions)))) def display_split_metrics(nn, Xt, yt, Xv, yv): predict_t = nn.predict(Xt) predict_v = nn.predict(Xv) #Calculate number of weights n_weights = 0 for i in range(nn.n_layers_ - 1): n_weights += len(nn.intercepts_[i]) n_weights += nn.coefs_[i].shape[0]*nn.coefs_[i].shape[1] print("\n") print("{:.<23s}{:>15s}{:>15s}".format('Model Metrics', \ 'Training', 'Validation')) print("{:.<23s}{:15d}{:15d}".format('Observations', \ Xt.shape[0], Xv.shape[0])) print("{:.<23s}{:15d}{:15d}".format('Features', \ Xt.shape[1], Xv.shape[1])) print("{:.<23s}{:15d}{:15d}".format('Hidden Layers',\ (nn.n_layers_-2),(nn.n_layers_-2))) n_neurons = 0 nl = nn.n_layers_-2 if nl>1: for i in range(nl): n_neurons += nn.hidden_layer_sizes[i] else: n_neurons = nn.hidden_layer_sizes print("{:.<23s}{:15d}{:15d}".format('Neurons',\ n_neurons, n_neurons)) print("{:.<23s}{:15d}{:15d}".format('Outputs', \ nn.n_outputs_, nn.n_outputs_)) print("{:.<23s}{:15d}{:15d}".format('Weights', \ n_weights, n_weights)) print("{:.<23s}{:15d}{:15d}".format('Iterations', \ nn.n_iter_, nn.n_iter_)) print("{:.<23s}{:>15s}{:>15s}".format('Activation Function', \ nn.activation, nn.activation)) print("{:.<23s}{:15.4f}".format('Loss', nn.loss_)) R2t = r2_score(yt, predict_t) R2v = r2_score(yv, predict_v) print("{:.<23s}{:15.4f}{:15.4f}".format('R-Squared', R2t, R2v)) print("{:.<23s}{:15.4f}{:15.4f}".format('Mean Absolute Error', \ mean_absolute_error(yt,predict_t), \ mean_absolute_error(yv,predict_v))) print("{:.<23s}{:15.4f}{:15.4f}".format('Median Absolute Error', \ median_absolute_error(yt,predict_t), \ median_absolute_error(yv,predict_v))) print("{:.<23s}{:15.4f}{:15.4f}".format('Avg Squared Error', \ mean_squared_error(yt,predict_t), \ mean_squared_error(yv,predict_v))) print("{:.<23s}{:15.4f}{:15.4f}".format('Square Root ASE', \ sqrt(mean_squared_error(yt,predict_t)), \ sqrt(mean_squared_error(yv,predict_v)))) class nn_classifier(object): def display_metrics(nn, X, y): if len(nn.classes_) <= 2: #BINARY METRICS numpy_y = np.ravel(y) if type(numpy_y[0])==str: classes_ = nn.classes_ else: classes_ = [str(int(nn.classes_[0])), str(int(nn.classes_[1]))] z = np.zeros(len(y)) predictions = nn.predict(X) # get binary class predictions conf_mat = confusion_matrix(y_true=y, y_pred=predictions) tmisc = conf_mat[0][1]+conf_mat[1][0] misc = 100*(tmisc)/(len(y)) for i in range(len(y)): if numpy_y[i] == 1: z[i] = 1 probability = nn.predict_proba(X) # get binary probabilities #Calculate number of weights n_weights = 0 for i in range(nn.n_layers_ - 1): n_weights += len(nn.intercepts_[i]) n_weights += nn.coefs_[i].shape[0]*nn.coefs_[i].shape[1] #probability = nn.predict_proba(X) print("\nModel Metrics") print("{:.<27s}{:10d}".format('Observations', X.shape[0])) print("{:.<27s}{:10d}".format('Features', X.shape[1])) print("{:.<27s}{:10d}".format('Hidden Layers',\ nn.n_layers_-2)) print("{:.<27s}{:10d}".format('Outputs', \ nn.n_outputs_)) n_neurons = 0 nl = nn.n_layers_-2 if nl>1: for i in range(nl): n_neurons += nn.hidden_layer_sizes[i] else: n_neurons = nn.hidden_layer_sizes print("{:.<27s}{:10d}".format('Neurons',\ n_neurons)) print("{:.<27s}{:10d}".format('Weights', \ n_weights)) print("{:.<27s}{:10d}".format('Iterations', \ nn.n_iter_)) print("{:.<27s}{:>10s}".format('Hidden Layer Activation', \ nn.activation)) print("{:.<27s}{:>10s}".format('Target Activation', \ nn.out_activation_)) print("{:.<27s}{:10.4f}".format('Loss Function', \ nn.loss_)) print("{:.<27s}{:10.4f}".format('Mean Absolute Error', \ mean_absolute_error(z,probability[:, 1]))) print("{:.<27s}{:10.4f}".format('Avg Squared Error', \ mean_squared_error(z,probability[:, 1]))) acc = accuracy_score(y, predictions) print("{:.<27s}{:10.4f}".format('Accuracy', acc)) if type(numpy_y[0]) == str: pre = precision_score(y, predictions, pos_label=classes_[1]) tpr = recall_score(y, predictions, pos_label=classes_[1]) f1 = f1_score(y,predictions, pos_label=classes_[1]) pre = precision_score(y, predictions, pos_label=classes_[1]) tpr = recall_score(y, predictions, pos_label=classes_[1]) f1 = f1_score(y,predictions, pos_label=classes_[1]) else: pre = precision_score(y, predictions) tpr = recall_score(y, predictions) f1 = f1_score(y,predictions) pre = precision_score(y, predictions) tpr = recall_score(y, predictions) f1 = f1_score(y,predictions) print("{:.<27s}{:10.4f}".format('Precision', pre)) print("{:.<27s}{:10.4f}".format('Recall (Sensitivity)', tpr)) print("{:.<27s}{:10.4f}".format('F1-Score', f1)) print("{:.<27s}{:10d}".format(\ 'Total Misclassifications', tmisc)) print("{:.<27s}{:9.1f}{:s}".format(\ 'MISC (Misclassification)', misc, '%')) n_ = [conf_mat[0][0]+conf_mat[0][1], conf_mat[1][0]+conf_mat[1][1]] miscc = [100*conf_mat[0][1]/n_[0], 100*conf_mat[1][0]/n_[1]] for i in range(2): print("{:s}{:<16s}{:>9.1f}{:<1s}".format(\ ' class ', classes_[i], miscc[i], '%')) print("\n\n Confusion Class Class") print(" Matrix", end="") print("{:1s}{:>10s}{:>10s}".format(" ", classes_[0], classes_[1])) for i in range(2): print("{:s}{:.<6s}".format(' Class ', classes_[i]), end="") for j in range(2): print("{:>10d}".format(conf_mat[i][j]), end="") print("") print("") else: #NOMINAL METRICS n_classes = len(nn.classes_) n_obs = y.shape[0] if n_classes < 2: raise RuntimeError("\n Call to display_metrics invalid"+\ "\n Target does not appear to be nominal.\n") sys.exit() predict_ = nn.predict(X) prob_ = nn.predict_proba(X) ase_sum = 0 mase_sum = 0 misc_ = 0 misc = [] n_ = [] conf_mat = [] for i in range(n_classes): conf_mat.append(np.zeros(n_classes)) for i in range(n_classes): misc.append(0) n_.append(0) for i in range(n_obs): if type(y) == pd.DataFrame: ky = y.iloc[i].argmax() else: ky = y[i].argmax() for j in range(n_classes): if ky == nn.classes_[j]: ase_sum += (1-prob_[i,j])*(1-prob_[i,j]) mase_sum += 1-prob_[i,j] idx = j n_[j] += 1 else: ase_sum += prob_[i,j]*prob_[i,j] mase_sum += prob_[i,j] kp = predict_[i].argmax() for j in range(n_classes): if kp == nn.classes_[j]: conf_mat[idx][j] += 1 continue if kp != nn.classes_[idx]: misc_ += 1 misc[idx] += 1 tmisc = misc_ misc_ = 100*misc_/n_obs ase = ase_sum/(n_classes*n_obs) mase = mase_sum/(n_classes*n_obs) #Calculate number of weights n_weights = 0 if type(nn)==MLPClassifier: for i in range(nn.n_layers_ - 1): n_weights += len(nn.intercepts_[i]) n_weights += nn.coefs_[i].shape[0]*nn.coefs_[i].shape[1] print("\nModel Metrics") print("{:.<27s}{:10d}".format('Observations', X.shape[0])) print("{:.<27s}{:10d}".format('Features', X.shape[1])) if type(nn)==MLPClassifier: print("{:.<27s}{:10d}".format('Hidden Layers', nn.n_layers_-2)) for i in range(nn.n_layers_-2): print("{:<24s}{:.<3d}{:10d}".format(' Neurons Hidden Layer ',\ i+1, nn.coefs_[i].shape[1])) print("{:.<27s}{:10d}".format('Outputs', \ nn.n_outputs_)) print("{:.<27s}{:10d}".format('Weights', \ n_weights)) print("{:.<27s}{:10d}".format('Iterations', \ nn.n_iter_)) print("{:.<27s}{:>10s}".format('Hidden Layer Activation', \ nn.activation)) print("{:.<27s}{:10.4f}".format('Loss Function', \ nn.loss_)) print("{:.<27s}{:10.4f}".format('Avg Squared Error', ase)) print("{:.<27s}{:10.4f}".format('Root ASE', sqrt(ase))) print("{:.<27s}{:10.4f}".format('Mean Absolute Error', mase)) print(y.shape, predict_.shape) acc = accuracy_score(y, predict_) print("{:.<27s}{:10.4f}".format('Accuracy', acc)) pre = precision_score(y, predict_, average='macro') print("{:.<27s}{:10.4f}".format('Precision', pre)) tpr = recall_score(y, predict_, average='macro') print("{:.<27s}{:10.4f}".format('Recall (Sensitivity)', tpr)) f1 = f1_score(y,predict_, average='macro') print("{:.<27s}{:10.4f}".format('F1-Score', f1)) if type(nn)==MLPClassifier: print("{:.<27s}{:10.4f}".format('Loss', nn.loss_)) print("{:.<27s}{:10d}".format(\ 'Total Misclassifications', tmisc)) print("{:.<27s}{:9.1f}{:s}".format(\ 'MISC (Misclassification)', misc_, '%')) if type(nn.classes_[0]) == str: fstr = "{:s}{:.<16s}{:>9.1f}{:<1s}" else: fstr = "{:s}{:.<16.0f}{:>9.1f}{:<1s}" for i in range(n_classes): if n_[i]>0: misc[i] = 100*misc[i]/n_[i] print(fstr.format(\ ' class ', nn.classes_[i], misc[i], '%')) print("\n\n Confusion") print(" Matrix ", end="") if type(nn.classes_[0]) == str: fstr1 = "{:>7s}{:<3s}" fstr2 = "{:s}{:.<6s}" else: fstr1 = "{:>7s}{:<3.0f}" fstr2 = "{:s}{:.<6.0f}" for i in range(n_classes): print(fstr1.format('Class ', nn.classes_[i]), end="") print("") for i in range(n_classes): print(fstr2.format('Class ', nn.classes_[i]), end="") for j in range(n_classes): print("{:>10.0f}".format(conf_mat[i][j]), end="") print("") cr = classification_report(y, predict_, labels=nn.classes_) print("\n",cr) def display_split_metrics(nn, Xt, yt, Xv, yv, target_names=None): if len(nn.classes_) <=2: try: if len(nn.classes_) != 2: raise RuntimeError(" Call to display_split_metrics "+\ "invalid.\n Target does not have two classes.\n") sys.exit() except: raise RuntimeError(" Call to display_split_metrics "+\ "invalid.\n Target does not have two classes.\n") sys.exit() if type(nn.classes_[0])==np.str_: classes_ = nn.classes_ else: classes_ = [str(int(nn.classes_[0])), str(int(nn.classes_[1]))] #Calculate number of weights n_weights = 0 if type(nn)==MLPClassifier: for i in range(nn.n_layers_ - 1): n_weights += len(nn.intercepts_[i]) n_weights += nn.coefs_[i].shape[0]*nn.coefs_[i].shape[1] numpy_yt = np.ravel(yt) numpy_yv = np.ravel(yv) zt = np.zeros(len(yt)) zv = np.zeros(len(yv)) #zt = deepcopy(yt) for i in range(len(yt)): if numpy_yt[i] == 1: zt[i] = 1 for i in range(len(yv)): if numpy_yv[i] == 1: zv[i] = 1 predict_t = nn.predict(Xt) predict_v = nn.predict(Xv) conf_matt = confusion_matrix(y_true=yt, y_pred=predict_t) conf_matv = confusion_matrix(y_true=yv, y_pred=predict_v) prob_t = nn.predict_proba(Xt) prob_v = nn.predict_proba(Xv) print("\n") print("{:.<27s}{:>11s}{:>15s}".format('Model Metrics', 'Training', 'Validation')) print("{:.<27s}{:11d}{:15d}".format('Observations', Xt.shape[0], Xv.shape[0])) print("{:.<27s}{:11d}{:15d}".format('Features', Xt.shape[1], Xv.shape[1])) if type(nn)==MLPClassifier: print("{:.<27s}{:11d}{:15d}".format('Hidden Layers',\ nn.n_layers_-2, nn.n_layers_-2)) print("{:.<27s}{:11d}{:15d}".format('Outputs', \ nn.n_outputs_, nn.n_outputs_)) n_neurons = 0 nl = nn.n_layers_-2 if nl>1: for i in range(nl): n_neurons += nn.hidden_layer_sizes[i] else: n_neurons = nn.hidden_layer_sizes print("{:.<27s}{:11d}{:15d}".format('Neurons', n_neurons, n_neurons)) print("{:.<27s}{:11d}{:15d}".format('Weights', n_weights, n_weights)) print("{:.<27s}{:11d}{:15d}".format('Iterations', nn.n_iter_, nn.n_iter_)) print("{:.<27s}{:>11s}{:>15s}".format('Hidden Layer Activation', nn.activation, nn.activation)) print("{:.<27s}{:11.4f}{:15.4f}".format('Loss', nn.loss_, nn.loss_)) print("{:.<27s}{:11.4f}{:15.4f}".format('Mean Absolute Error', mean_absolute_error(zt,prob_t[:,1]), mean_absolute_error(zv,prob_v[:,1]))) print("{:.<27s}{:11.4f}{:15.4f}".format('Avg Squared Error', mean_squared_error(zt,prob_t[:,1]), \ mean_squared_error(zv,prob_v[:,1]))) acct = accuracy_score(yt, predict_t) accv = accuracy_score(yv, predict_v) print("{:.<27s}{:11.4f}{:15.4f}".format('Accuracy', acct, accv)) if type(numpy_yt[0])==str: pre_t = precision_score(yt, predict_t, pos_label=classes_[1]) tpr_t = recall_score(yt, predict_t, pos_label=classes_[1]) f1_t = f1_score(yt,predict_t, pos_label=classes_[1]) pre_v = precision_score(yv, predict_v, pos_label=classes_[1]) tpr_v = recall_score(yv, predict_v, pos_label=classes_[1]) f1_v = f1_score(yv,predict_v, pos_label=classes_[1]) else: pre_t = precision_score(yt, predict_t) tpr_t = recall_score(yt, predict_t) f1_t = f1_score(yt,predict_t) pre_v = precision_score(yv, predict_v) tpr_v = recall_score(yv, predict_v) f1_v = f1_score(yv,predict_v) print("{:.<27s}{:11.4f}{:15.4f}".format('Precision', pre_t, pre_v)) print("{:.<27s}{:11.4f}{:15.4f}".format('Recall (Sensitivity)', tpr_t, tpr_v)) print("{:.<27s}{:11.4f}{:15.4f}".format('F1-score', f1_t, f1_v)) misct_ = conf_matt[0][1]+conf_matt[1][0] miscv_ = conf_matv[0][1]+conf_matv[1][0] misct = 100*misct_/len(yt) miscv = 100*miscv_/len(yv) n_t = [conf_matt[0][0]+conf_matt[0][1], \ conf_matt[1][0]+conf_matt[1][1]] n_v = [conf_matv[0][0]+conf_matv[0][1], \ conf_matv[1][0]+conf_matv[1][1]] misc_ = [[0,0], [0,0]] misc_[0][0] = 100*conf_matt[0][1]/n_t[0] misc_[0][1] = 100*conf_matt[1][0]/n_t[1] misc_[1][0] = 100*conf_matv[0][1]/n_v[0] misc_[1][1] = 100*conf_matv[1][0]/n_v[1] print("{:.<27s}{:11d}{:15d}".format(\ 'Total Misclassifications', misct_, miscv_)) print("{:.<27s}{:10.1f}{:s}{:14.1f}{:s}".format(\ 'MISC (Misclassification)', misct, '%', miscv, '%')) for i in range(2): print("{:s}{:.<16s}{:>10.1f}{:<1s}{:>14.1f}{:<1s}".format( ' class ', classes_[i], misc_[0][i], '%', misc_[1][i], '%')) print("\n\nTraining Class Class") print("{:<21s}{:>10s}{:>10s}".format("Confusion Matrix", classes_[0], classes_[1]) ) for i in range(2): print("{:6s}{:.<15s}".format('Class ', classes_[i]), end="") for j in range(2): print("{:>10d}".format(conf_matt[i][j]), end="") print("") # In the binary case, the classification report is incorrect cr = classification_report(yv, predict_v, labels=nn.classes_) print("\n",cr) print("\n\nValidation Class Class") print("{:<21s}{:>10s}{:>10s}".format("Confusion Matrix", classes_[0], classes_[1]) ) for i in range(2): print("{:6s}{:.<15s}".format('Class ', classes_[i]), end="") for j in range(2): print("{:>10d}".format(conf_matv[i][j]), end="") print("") # In the binary case, the classification report is incorrect #cr = classification_report(yv, predict_v, nn.classes_) #print("\n",cr) else: # NOMINAL TARGET try: if len(nn.classes_) == 2: raise RuntimeError(" Call to display_split_metrics "+\ "invalid.\n Target is Binary.") sys.exit() except: raise RuntimeError(" Call to display_split_metrics "+\ "invalid.\n Target is Binary.\n") try: if len(nn.classes_) < 3: raise RuntimeError(" Call to display_split_metrics "+\ "invalid.\n Target has less than three classes.\n") sys.exit() except: raise RuntimeError(" Call to display_split_metrics "+\ "invalid.\n Target has less than three classes.\n") sys.exit() predict_t = nn.predict(Xt) predict_v = nn.predict(Xv) if type(yt) == pd.DataFrame or len(yt.shape) > 1: # Nominal Target n = yt.shape[0] m = yt.shape[1] else: # Binomial Target n = len(yt) m = 2 print("\n******** Confusion Matrix **********") print("------------------------------------\n") print("******** Training Data **********") print("------------------------------------") conf_mat_t= np.zeros((m, m), dtype='int32') misc = 0 if type(yt) == pd.DataFrame or len(yt.shape) > 1: if type(yt) == pd.DataFrame: for i in range(n): kp = predict_t[i,].argmax() ky = yt.iloc[i,].argmax() conf_mat_t[ky, kp] += 1 if ky != kp: misc += 1 else: for i in range(n): kp = predict_t[i,].argmax() ky = yt[i,].argmax() conf_mat_t[ky, kp] += 1 if ky != kp: misc += 1 miscp = 100*misc/n for i in range(m): for j in range(m): print("{:>6d} ".format(conf_mat_t[i,j]), end="") print("") else: for i in range(n): if yt[i] == 0 and predict_t[i] < 0.5: conf_mat_t[0,0] += 1 elif yt[i] == 0 and predict_t[i] >= 0.5: conf_mat_t[0,1] += 1 misc += 1 elif yt[i] == 1 and predict_t[i] >= 0.5: conf_mat_t[1,1] += 1 elif yt[i] == 1 and predict_t[i] < 0.5: conf_mat_t[1,0] += 1 misc += 1 miscp = 100*misc/n for i in range(m): print("{:>5d} {:>5d}".\ format(conf_mat_t[i,0], conf_mat_t[i,1])) print("------------------------------------") print("Training Misclassification: {}/{}={:>5.3f}%".\ format(misc, n, miscp)) if type(yv) == pd.DataFrame or len(yv.shape) > 1: # Nominal Target n = yv.shape[0] m = yv.shape[1] else: # Binomial Target n = len(yv) m = 2 print("\n------------------------------------") print("******** Validation Data **********") print("------------------------------------") conf_mat_v = np.zeros((m, m), dtype='int32') misc = 0 if type(yv) == pd.DataFrame or len(yv.shape) > 1: if type(yv) == pd.DataFrame: for i in range(n): kp = predict_v[i,].argmax() ky = yv.iloc[i,].argmax() conf_mat_v[ky, kp] += 1 if ky != kp: misc += 1 else: for i in range(n): kp = predict_v[i,].argmax() ky = yv[i,].argmax() conf_mat_v[ky, kp] += 1 if ky != kp: misc += 1 miscp = 100*misc/n for i in range(m): for j in range(m): print("{:>6d} ".format(conf_mat_v[i,j]), end="") print("") else: for i in range(n): if yv[i] == 0 and predict_v[i] < 0.5: conf_mat_v[0,0] += 1 elif yv[i] == 0 and predict_v[i] >= 0.5: conf_mat_v[0,1] += 1 misc += 1 elif yv[i] == 1 and predict_v[i] >= 0.5: conf_mat_v[1,1] += 1 elif yv[i] == 1 and predict_v[i] < 0.5: conf_mat_v[1,0] += 1 misc += 1 miscp = 100*misc/n for i in range(m): print("{:>5d} {:>5d}".\ format(conf_mat_v[i,0], conf_mat_v[i,1])) print("------------------------------------") print("Validation Misclassification: {}/{}={:>5.3f}%".\ format(misc, n, miscp)) """************************************************************""" prob_t = nn.predict_proba(Xt) # or is this nn._predict_proba_dt ? prob_v = nn.predict_proba(Xv) n_classes = len(nn.classes_) ase_sumt = 0 ase_sumv = 0 mase_sumt = 0 mase_sumv = 0 misc_t = 0 misc_v = 0 misct = [] miscv = [] n_t = [] n_v = [] nt_obs = yt.shape[0] nv_obs = yv.shape[0] #y_t = np.ravel(yt) # necessary because yt is a df with row keys #y_v = np.ravel(yv) # likewise y_t = yt y_v = yv for i in range(n_classes): misct.append(0) n_t.append(0) miscv.append(0) n_v.append(0) for i in range(nt_obs): for j in range(n_classes): if type(y_t) == pd.DataFrame: if y_t.iloc[i].argmax() == nn.classes_[j]: ase_sumt += (1-prob_t[i,j])*(1-prob_t[i,j]) mase_sumt += (1-prob_t[i,j]) idx = j else: ase_sumt += prob_t[i,j]*prob_t[i,j] mase_sumt += prob_t[i,j] else: if y_t[i].argmax() == nn.classes_[j]: ase_sumt += (1-prob_t[i,j])*(1-prob_t[i,j]) mase_sumt += (1-prob_t[i,j]) idx = j else: ase_sumt += prob_t[i,j]*prob_t[i,j] mase_sumt += prob_t[i,j] n_t[idx] += 1 if type(y_t) == pd.DataFrame: if predict_t[i].argmax() != y_t.iloc[i].argmax(): misc_t += 1 misct[idx] += 1 else: if predict_t[i].argmax() != y_t[i].argmax(): misc_t += 1 misct[idx] += 1 for i in range(nv_obs): if type(y_v) == pd.DataFrame: for j in range(n_classes): if y_v.iloc[i].argmax() == nn.classes_[j]: ase_sumv += (1-prob_v[i,j])*(1-prob_v[i,j]) mase_sumv += (1-prob_v[i,j]) idx = j else: ase_sumv += prob_v[i,j]*prob_v[i,j] mase_sumv += prob_v[i,j] else: for j in range(n_classes): if y_v[i].argmax() == nn.classes_[j]: ase_sumv += (1-prob_v[i,j])*(1-prob_v[i,j]) mase_sumv += (1-prob_v[i,j]) idx = j else: ase_sumv += prob_v[i,j]*prob_v[i,j] mase_sumv += prob_v[i,j] n_v[idx] += 1 if type(y_v) == pd.DataFrame: if predict_v[i].argmax() != y_v.iloc[i].argmax(): misc_v += 1 miscv[idx] += 1 else: if predict_v[i].argmax() != y_v[i].argmax(): misc_v += 1 miscv[idx] += 1 misct_ = misc_t miscv_ = misc_v misc_t = 100*misc_t/nt_obs misc_v = 100*misc_v/nv_obs aset = ase_sumt/(n_classes*nt_obs) asev = ase_sumv/(n_classes*nv_obs) maset = mase_sumt/(n_classes*nt_obs) masev = mase_sumv/(n_classes*nt_obs) #Calculate number of weights n_weights = 0 for i in range(nn.n_layers_ - 1): n_weights += len(nn.intercepts_[i]) n_weights += nn.coefs_[i].shape[0]*nn.coefs_[i].shape[1] print("\n") print("{:.<27s}{:>15s}{:>15s}".format('Model Metrics', \ 'Training', 'Validation')) print("{:.<27s}{:15d}{:15d}".format('Observations', \ Xt.shape[0], Xv.shape[0])) print("{:.<27s}{:15d}{:15d}".format('Features', Xt.shape[1], \ Xv.shape[1])) print("{:.<27s}{:15d}{:15d}".format('Hidden Layers',\ nn.n_layers_-2, nn.n_layers_-2)) for i in range(nn.n_layers_-2): neurons=nn.coefs_[i].shape[1] print("{:<20s}{:.<3d}{:15d}{:15d}".format(' Neurons Hidden Layer ',\ i+1, neurons, neurons)) print("{:.<27s}{:15d}{:15d}".format('Outputs', \ nn.n_outputs_, nn.n_outputs_)) print("{:.<27s}{:15d}{:15d}".format('Weights', \ n_weights, n_weights)) print("{:.<27s}{:15d}{:15d}".format('Iterations', \ nn.n_iter_, nn.n_iter_)) print("{:.<27s}{:>15s}{:>15s}".format('Hidden Layer Activation', \ nn.activation, nn.activation)) print("{:.<27s}{:>15s}{:>15s}".format('Target Activation', \ nn.out_activation_, nn.out_activation_)) print("{:.<27s}{:15.4f}{:15.4f}".format('Loss', \ nn.loss_, nn.loss_)) print("{:.<27s}{:15.4f}{:15.4f}".format('Avg Squared Error', \ aset, asev)) print("{:.<27s}{:15.4f}{:15.4f}".format(\ 'Root ASE', sqrt(aset), sqrt(asev))) print("{:.<27s}{:15.4f}{:15.4f}".format('Mean Absolute Error', \ maset, masev)) acct = accuracy_score(yt, predict_t) accv = accuracy_score(yv, predict_v) print("{:.<27s}{:15.4f}{:15.4f}".format('Accuracy', acct, accv)) print("{:.<27s}{:15.4f}{:15.4f}".format('Precision', \ precision_score(yt,predict_t, average='macro'), \ precision_score(yv,predict_v, average='macro'))) print("{:.<27s}{:15.4f}{:15.4f}".format('Recall (Sensitivity)', \ recall_score(yt,predict_t, average='macro'), \ recall_score(yv,predict_v, average='macro'))) print("{:.<27s}{:15.4f}{:15.4f}".format('F1-score', \ f1_score(yt,predict_t, average='macro'), \ f1_score(yv,predict_v, average='macro'))) print("{:.<27s}{:15d}{:15d}".format(\ 'Total Misclassifications', misct_, miscv_)) print("{:.<27s}{:14.1f}{:s}{:14.1f}{:s}".format(\ 'MISC (Misclassification)', misc_t, '%', misc_v, '%')) fstr0="{:s}{:.<16s}{:>14.1f}{:<1s}{:>14.1f}{:<1s}" fstr1="{:>7s}{:<3s}" fstr2="{:s}{:.<6s}" classes_ = [] if type(nn.classes_[0])==str: classes_ = nn.classes_ else: for i in range(n_classes): classes_.append(str(int(nn.classes_[i]))) for i in range(n_classes): if n_t[i] > 0: misct[i] = 100*misct[i]/n_t[i] else: misct[i] = 0 if n_v[i] > 0: miscv[i] = 100*miscv[i]/n_v[i] else: miscv[i] = 0 print(fstr0.format(\ ' class ', classes_[i], misct[i], '%', miscv[i], '%')) print("\n\nTraining") print("Confusion Matrix ", end="") for i in range(n_classes): print(fstr1.format('Class ', classes_[i]), end="") print("") for i in range(n_classes): print(fstr2.format('Class ', classes_[i]), end="") for j in range(n_classes): print("{:>10d}".format(conf_mat_t[i][j]), end="") print("") ct = classification_report(yt, predict_t, labels=target_names) print("\nTraining \nMetrics:\n",ct) print("\n\nValidation") print("Confusion Matrix ", end="") for i in range(n_classes): print(fstr1.format('Class ', classes_[i]), end="") print("") for i in range(n_classes): print(fstr2.format('Class ', classes_[i]), end="") for j in range(n_classes): print("{:>10d}".format(conf_mat_v[i][j]), end="") print("") cv = classification_report(yv, predict_v, labels=target_names) print("\nValidation \nMetrics:\n",cv) class nn_keras(object): def accuracy_plot(history_dic): loss_values = history_dic['loss'] val_loss_values = history_dic["val_loss"] acc_values = history_dic['accuracy'] val_acc_values = history_dic['val_accuracy'] epochs = range(1, len(val_loss_values) + 1) plt.subplot(211) plt.plot(epochs, loss_values, 'ro', label='Training Loss') plt.plot(epochs, val_loss_values, 'b', label='Validation Loss') plt.title("Loss vs. Accuracy") plt.ylabel("Loss") plt.legend() plt.subplot(212) plt.plot(epochs, acc_values, 'ro', label='Training Accuracy') plt.plot(epochs, val_acc_values, 'b', label='Validation Accuracy') plt.xlabel("Epoch") plt.ylabel("Accuracy") plt.legend() plt.show() def display_metrics(nn, X, y): if (len(y.shape) == 1 or y.shape[1]==1) \ and len(np.unique(y)) == 2: #BINARY METRICS numpy_y = np.ravel(y) classes_ = np.unique(y) if type(numpy_y[0])!=str: classes_ = [str(int(classes_[0])), str(int(classes_[1]))] predictions = (nn.predict(X)>0.5).astype('int32') conf_mat = confusion_matrix(y_true=y, y_pred=predictions) tmisc = conf_mat[0][1]+conf_mat[1][0] misc = 100*(tmisc)/(len(y)) z = np.zeros(len(y)) for i in range(len(y)): if numpy_y[i] == 1: z[i] = 1 probability = nn.predict(X) # get binary probabilities #Calculate number of weights n_weights = nn.count_params() n_layers_ = len(nn.layers) n_outputs_ = len(classes_) print("\nModel Metrics") print("{:.<27s}{:10d}".format('Observations', X.shape[0])) print("{:.<27s}{:10d}".format('Features', X.shape[1])) print("{:.<27s}{:10d}".format('Hidden Layers',\ n_layers_-1)) print("{:.<27s}{:10d}".format('Outputs', \ n_outputs_-1)) n_neurons = 0 config_dic = nn.get_config() l = 0 for dic in config_dic['layers']: if dic['class_name'] == 'Dense': n_neurons += dic['config']['units'] if l == n_layers_-2: hl_activation = dic['config']['activation'] if l == n_layers_-1: out_activation = dic['config']['activation'] l += 1 print("{:.<27s}{:10d}".format('Neurons',\ n_neurons)) print("{:.<27s}{:10d}".format('Weights', \ n_weights)) print("{:.<27s}{:>10s}".format('Hidden Layer Activation', \ hl_activation)) print("{:.<27s}{:>10s}".format('Output Layer Activation', \ out_activation)) print("{:.<27s}{:10.4f}".format('Mean Absolute Error', \ mean_absolute_error(z,probability[:, 0]))) print("{:.<27s}{:10.4f}".format('Avg Squared Error', \ mean_squared_error(z,probability[:, 0]))) acc = accuracy_score(y, predictions) print("{:.<27s}{:10.4f}".format('Accuracy', acc)) if type(numpy_y[0]) == str: pre = precision_score(y, predictions, pos_label=classes_[1]) tpr = recall_score(y, predictions, pos_label=classes_[1]) f1 = f1_score(y,predictions, pos_label=classes_[1]) pre = precision_score(y, predictions, pos_label=classes_[1]) tpr = recall_score(y, predictions, pos_label=classes_[1]) f1 = f1_score(y,predictions, pos_label=classes_[1]) else: pre = precision_score(y, predictions) tpr = recall_score(y, predictions) f1 = f1_score(y,predictions) pre = precision_score(y, predictions) tpr = recall_score(y, predictions) f1 = f1_score(y,predictions) print("{:.<27s}{:10.4f}".format('Precision', pre)) print("{:.<27s}{:10.4f}".format('Recall (Sensitivity)', tpr)) print("{:.<27s}{:10.4f}".format('F1-Score', f1)) print("{:.<27s}{:10d}".format(\ 'Total Misclassifications', tmisc)) print("{:.<27s}{:9.1f}{:s}".format(\ 'MISC (Misclassification)', misc, '%')) n_ = [conf_mat[0][0]+conf_mat[0][1], conf_mat[1][0]+conf_mat[1][1]] miscc = [100*conf_mat[0][1]/n_[0], 100*conf_mat[1][0]/n_[1]] for i in range(2): print("{:s}{:<16s}{:>9.1f}{:<1s}".format(\ ' class ', classes_[i], miscc[i], '%')) print("\n\n Confusion Class Class") print(" Matrix", end="") print("{:1s}{:>10s}{:>10s}".format(" ", classes_[0], classes_[1])) for i in range(2): print("{:s}{:.<6s}".format(' Class ', classes_[i]), end="") for j in range(2): print("{:>10d}".format(conf_mat[i][j]), end="") print("") print("") else: #NOMINAL METRICS n_classes = y.shape[1] n_obs = y.shape[0] if n_classes < 2: raise RuntimeError("\n Call to display_metrics invalid"+\ "\n Target does not appear to be nominal.\n") sys.exit() prob_ = nn.predict(X) predict_ = np.argmax(prob_, axis=-1) y_ = np.argmax(y, axis=-1) classes_ = np.unique(y_) #Calculate number of weights n_weights = nn.count_params() n_layers_ = len(nn.layers) n_outputs_ = n_classes ase_sum = 0 mase_sum = 0 misc_ = 0 misc = [0]*n_classes n_ = [0]*n_classes conf_mat = [] for i in range(n_classes): conf_mat.append(np.zeros(n_classes)) for i in range(n_obs): for j in range(n_classes): if y[i][j] == 1: ase_sum += (1-prob_[i,j])*(1-prob_[i,j]) mase_sum += 1-prob_[i,j] idx = j n_[j] += 1 else: ase_sum += prob_[i,j]*prob_[i,j] mase_sum += prob_[i,j] j = predict_[i] conf_mat[idx][j] += 1 if j != idx: misc_ += 1 misc[idx] += 1 tmisc = misc_ misc_ = 100*misc_/n_obs ase = ase_sum/(n_classes*n_obs) mase = mase_sum/(n_classes*n_obs) print("\nModel Metrics") print("{:.<27s}{:10d}".format('Observations', X.shape[0])) print("{:.<27s}{:10d}".format('Features', X.shape[1])) print("{:.<27s}{:10d}".format('Hidden Layers', n_layers_-1)) n_neurons = 0 config_dic = nn.get_config() l = 0 for dic in config_dic['layers']: if dic['class_name'] == 'Dense': n_neurons += dic['config']['units'] if l <= n_layers_-2: hl_activation = dic['config']['activation'] print("{:<24s}{:.<3d}{:10d}".format(\ ' Neurons Hidden Layer ', l, dic['config']['units'])) if l == n_layers_-1: out_activation = dic['config']['activation'] l += 1 print("{:.<27s}{:10d}".format('Outputs', \ n_outputs_)) print("{:.<27s}{:10d}".format('Weights', \ n_weights)) print("{:.<27s}{:>10s}".format('Hidden Layer Activation', \ hl_activation)) print("{:.<27s}{:10.4f}".format('Avg Squared Error', ase)) print("{:.<27s}{:10.4f}".format('Root ASE', sqrt(ase))) print("{:.<27s}{:10.4f}".format('Mean Absolute Error', mase)) acc = accuracy_score(y_, predict_) print("{:.<27s}{:10.4f}".format('Accuracy', acc)) pre = precision_score(y_, predict_, average='macro') print("{:.<27s}{:10.4f}".format('Precision', pre)) tpr = recall_score(y_, predict_, average='macro') print("{:.<27s}{:10.4f}".format('Recall (Sensitivity)', tpr)) f1 = f1_score(y_,predict_, average='macro') print("{:.<27s}{:10.4f}".format('F1-Score', f1)) print("{:.<27s}{:10d}".format(\ 'Total Misclassifications', tmisc)) print("{:.<27s}{:9.1f}{:s}".format(\ 'MISC (Misclassification)', misc_, '%')) if type(classes_[0]) == str: fstr = "{:s}{:.<16s}{:>9.1f}{:<1s}" else: fstr = "{:s}{:.<16.0f}{:>9.1f}{:<1s}" for i in range(n_classes): if n_[i]>0: misc[i] = 100*misc[i]/n_[i] print(fstr.format(\ ' class ', classes_[i], misc[i], '%')) print("\n\n Confusion") print(" Matrix ", end="") if type(classes_[0]) == str: fstr1 = "{:>7s}{:<3s}" fstr2 = "{:s}{:.<6s}" else: fstr1 = "{:>7s}{:<3.0f}" fstr2 = "{:s}{:.<6.0f}" for i in range(n_classes): print(fstr1.format('Class ', classes_[i]), end="") print("") for i in range(n_classes): print(fstr2.format('Class ', classes_[i]), end="") for j in range(n_classes): print("{:>10.0f}".format(conf_mat[i][j]), end="") print("") cr = classification_report(y_, predict_, labels=classes_, digits=4) print("\n",cr)