Spaces:

HouBioLab
/

Demo_ROC_curves

Sleeping

App Files Files Community

jiehou commited on Feb 9

Commit

4a45429

verified ·

1 Parent(s): 951b39c

Create app.py

Browse files

Files changed (1) hide show

app.py +311 -0

app.py ADDED Viewed

	@@ -0,0 +1,311 @@

+import pandas as pd
+import seaborn as sn
+import matplotlib.pyplot as plt
+from sklearn.metrics import confusion_matrix
+from matplotlib.colors import ListedColormap
+import numpy as np
+import gradio as gr
+set_input = gr.Dataframe(type="numpy", row_count=10, col_count=3, headers=['Sample Index', 'Predicted Prob', 'Label (Y)'], datatype=["number", "number", "number"])
+set_input2 = gr.Slider(0, 1, step = 0.1, value=0.4, label="Set Probability Threshold (Default = 0.5)")
+#set_output = gr.Textbox(label ='test')
+set_output1 = gr.Dataframe(type="pandas", label = 'Predicted Labels',max_rows=10)
+set_output2 = gr.Image(label="Confusion Matrix")
+set_output3 = gr.Image(label="ROC curve")
+set_output4 = gr.Image(label="Threshold Tuning curve")
+def perf_measure(y_actual, y_hat):
+    TP = 0
+    FP = 0
+    TN = 0
+    FN = 0
+    for i in range(len(y_hat)):
+        if y_actual[i]==y_hat[i]==1:
+           TP += 1
+        if y_hat[i]==1 and y_actual[i]!=y_hat[i]:
+           FP += 1
+        if y_actual[i]==y_hat[i]==0:
+           TN += 1
+        if y_hat[i]==0 and y_actual[i]!=y_hat[i]:
+           FN += 1
+    return(TP, FP, TN, FN)
+def visualize_ROC(set_threshold,set_input):
+  import numpy as np
+  prob = set_input[:,1]
+  pred_label = (prob >= set_threshold).astype(int)
+  actual_label = set_input[:,2]
+  import pandas as pd
+  data = {
+          'Predicted Prob': prob,
+          'Predicted Label': pred_label,
+          'Actual Label':   actual_label
+          }
+  import pandas as pd
+  import seaborn as sn
+  import matplotlib.pyplot as plt
+  df = pd.DataFrame(data)
+  confusion_matrix_results = confusion_matrix(df['Actual Label'], df['Predicted Label'])
+  fig, ax = plt.subplots(figsize=(12,4))
+  sn.heatmap(confusion_matrix_results, annot=True,annot_kws={"size": 20},cbar=False,
+                  square=False,
+                  fmt='g',
+                  cmap=ListedColormap(['white']), linecolor='black',
+                  linewidths=1.5)
+  sn.set(font_scale=2)
+  plt.xlabel("Predicted Label")
+  plt.ylabel("Actual Label")
+  plt.text(0.6,0.55,'(TN)')
+  plt.text(1.6,0.55,'(FP)')
+  plt.text(0.6,1.55,'(FN)')
+  plt.text(1.6,1.55,'(TP)')
+  ax.xaxis.tick_top()
+  ax.xaxis.set_ticks_position('top')
+  ax.xaxis.set_label_position('top')
+  plt.tight_layout()
+  plt.savefig('tmp.png', dpi=100)
+  ## get ROC curve
+  from sklearn.metrics import roc_curve
+  fpr_mod, tpr_mod, thrsholds_mod = roc_curve(df['Actual Label'], df['Predicted Prob'])
+  TP, FP, TN, FN = perf_measure(df['Actual Label'], df['Predicted Label'])
+  # Sensitivity, hit rate, recall, or true positive rate
+  try:
+    recall = TP/(TP+FN)
+  except:
+    recall = 0
+  try:
+    precision = TP/(TP+FP)
+  except:
+    precision = 0
+  try:
+    specificity = TN/(TN+FP)
+  except:
+    specificity = 0
+  try:
+    TPR = TP/(TP+FN)
+  except:
+    TPR = 0
+  # Fall out or false positive rate
+  try:
+    FPR = FP/(FP+TN)
+  except:
+    FPR = 0
+  try:
+    f1_score_cur  = 2*recall*precision/(precision+recall)
+  except:
+    f1_score_cur  = 0
+  try:
+    g_mean_cur  = np.sqrt(recall*specificity)
+  except:
+    g_mean_cur = 0
+  fig, ax = plt.subplots(figsize=(12,8))
+  import matplotlib.pyplot as plt
+  import numpy as np
+  plt.rcParams["figure.autolayout"] = True
+  plt.rcParams['figure.facecolor'] = 'white'
+  m1, c1 = 1, 0
+  x = np.linspace(0, 1, 500)
+  plt.plot(fpr_mod, tpr_mod, label = 'ROC', c='blue', linestyle='-')
+  plt.plot(x, x * m1 + c1, 'black', linestyle='--')
+  plt.xlim(0, 1)
+  plt.ylim(0, 1)
+  #xi = (c1 - c2) / (m2 - m1)
+  #yi = m1 * xi + c1
+  plt.axvline(x=FPR, color='gray', linestyle='--')
+  plt.axhline(y=TPR, color='gray', linestyle='--')
+  plt.scatter(FPR, TPR, color='red', s=300)
+  ax.set_facecolor("white")
+  ax.tick_params(axis='x', colors='black')
+  ax.tick_params(axis='y', colors='black')
+  ax.spines['left'].set_color('black')
+  ax.spines['bottom'].set_color('black')
+  ax.spines['top'].set_color('black')
+  ax.spines['right'].set_color('black')
+  plt.xlabel('False Positive Rate (1 - specificity)')
+  plt.ylabel('True Positive Rate (Recall)')
+  plt.text(FPR, TPR, 'FPR:%s, TPR:%s' % (round(FPR,2),round(TPR,2)))
+  plt.title("ROC curve", fontsize=20)
+  plt.tight_layout()
+  plt.savefig('tmp2.png', dpi=100)
+  ### plot threshold versus f1-score
+  thres_list = []
+  f1_score_list = []
+  g_mean_list = []
+  for thres in np.arange(0,1,0.01):
+      prob = set_input[:,1]
+      pred_label = (prob >= thres).astype(int)
+      actual_label = set_input[:,2]
+      import pandas as pd
+      data = {
+              'Predicted Prob': prob,
+              'Predicted Label': pred_label,
+              'Actual Label':   actual_label
+              }
+      df = pd.DataFrame(data)
+      confusion_matrix_results = confusion_matrix(df['Actual Label'], df['Predicted Label'])
+      TP, FP, TN, FN = perf_measure(df['Actual Label'], df['Predicted Label'])
+      # Sensitivity, hit rate, recall, or true positive rate
+      try:
+        recall = TP/(TP+FN)
+      except:
+        recall = 0
+      try:
+        precision = TP/(TP+FP)
+      except:
+        precision = 0
+      try:
+        specificity = TN/(TN+FP)
+      except:
+        specificity = 0
+      try:
+        TPR = TP/(TP+FN)
+      except:
+        TPR = 0
+      # Fall out or false positive rate
+      try:
+        FPR = FP/(FP+TN)
+      except:
+        FPR = 0
+      try:
+        f1_score = 2*recall*precision/(precision+recall)
+      except:
+        f1_score = 0
+      try:
+        g_mean = np.sqrt(recall*specificity)
+      except:
+        g_mean = 0
+      thres_list.append(thres)
+      f1_score_list.append(f1_score)
+      g_mean_list.append(g_mean)
+  fig, ax = plt.subplots(figsize=(12,8))
+  import matplotlib.pyplot as plt
+  import numpy as np
+  plt.rcParams["figure.autolayout"] = True
+  plt.rcParams['figure.facecolor'] = 'white'
+  m1, c1 = 1, 0
+  x = np.linspace(0, 1, 500)
+  plt.plot(thres_list, f1_score_list, label = 'F1-score', c='black', linestyle='-')
+  plt.plot(thres_list, g_mean_list, label = 'G-mean', c='red', linestyle='-')
+  plt.xlim(0, 1)
+  plt.ylim(0, 1)
+  #xi = (c1 - c2) / (m2 - m1)
+  #yi = m1 * xi + c1
+  plt.axvline(x=set_threshold, color='gray', linestyle='--')
+  plt.axhline(y=f1_score_cur, color='gray', linestyle='--')
+  plt.scatter(set_threshold, f1_score_cur, color='red', s=300)
+  plt.scatter(set_threshold, g_mean_cur, color='red', s=300)
+  ax.set_facecolor("white")
+  ax.tick_params(axis='x', colors='black')
+  ax.tick_params(axis='y', colors='black')
+  ax.spines['left'].set_color('black')
+  ax.spines['bottom'].set_color('black')
+  ax.spines['top'].set_color('black')
+  ax.spines['right'].set_color('black')
+  plt.xlabel('Threshold cut-off')
+  plt.ylabel('F1-score & G-mean')
+  plt.legend(loc='upper left')
+  plt.text(set_threshold, f1_score_cur, 'F1-score:%s' % (round(f1_score_cur,2)))
+  plt.text(set_threshold, g_mean_cur, 'G-mean:%s' % (round(g_mean_cur,2)))
+  plt.title("Threshold tuning curves (F1-score & G-mean)", fontsize=20)
+  plt.tight_layout()
+  plt.savefig('tmp3.png', dpi=100)
+  #return df,'tmp.png','tmp2.png'
+  return 'tmp.png','tmp2.png','tmp3.png'
+def get_example():
+  import numpy as np
+  import pandas as pd
+  np.random.seed(seed = 42)
+  N=100
+  pd_class1 = pd.DataFrame({'Sample Index': [i for i in range(1,int(N/4)+1)],'Predicted Prob': np.random.uniform(0.4,0.8,int(N/4)), 'Label (Y)': np.repeat(1,int(N/4))})
+  pd_class2 = pd.DataFrame({'Sample Index': [i for i in range(int(N/4)+1,N+1)],'Predicted Prob': np.random.uniform(0,0.7,int(3*N/4)), 'Label (Y)': np.repeat(0,int(3*N/4))})
+  pd_all = pd.concat([pd_class1, pd_class2]).reset_index(drop=True)
+  pd_all = pd_all.sample(frac=1).reset_index(drop=True)
+  pd_all['Sample Index'] = [i for i in range(1,N+1)]
+  return pd_all.to_numpy()
+### configure Gradio
+interface = gr.Interface(fn=visualize_ROC,
+                         inputs=[set_input2, set_input],
+                         outputs=[set_output2,set_output3,set_output4],
+                         examples_per_page = 2,
+                         examples=[
+                              [0.5,get_example()],
+                              [0.7,get_example()],
+                          ],
+                         title="ML Demo for Receiver Operating Characteristic (ROC) curve",
+                         description= "Click examples below for a quick demo",
+                         theme = 'huggingface',
+                         #layout = 'horizontal',
+                         live=True
+                         )
+interface.launch(debug=True, height=1400, width=2800)