Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,147 +1,784 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import io
|
| 2 |
-
import
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
import
|
|
|
|
|
|
|
| 6 |
import panel as pn
|
| 7 |
-
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
return (await resp.json())[0]["url"]
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
def
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
)
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
)
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
try:
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
name=class_item.strip(), value=f"{class_likelihood:.2%}", align="center"
|
| 87 |
-
)
|
| 88 |
-
row_bar = pn.indicators.Progress(
|
| 89 |
-
value=int(class_likelihood * 100),
|
| 90 |
-
sizing_mode="stretch_width",
|
| 91 |
-
bar_color="secondary",
|
| 92 |
-
margin=(0, 10),
|
| 93 |
-
design=pn.theme.Material,
|
| 94 |
-
)
|
| 95 |
-
results.append(pn.Column(row_label, row_bar))
|
| 96 |
-
yield results
|
| 97 |
-
finally:
|
| 98 |
-
main.disabled = False
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
# create widgets
|
| 102 |
-
randomize_url = pn.widgets.Button(name="Randomize URL", align="end")
|
| 103 |
-
|
| 104 |
-
image_url = pn.widgets.TextInput(
|
| 105 |
-
name="Image URL to classify",
|
| 106 |
-
value=pn.bind(random_url, randomize_url),
|
| 107 |
-
)
|
| 108 |
-
class_names = pn.widgets.TextInput(
|
| 109 |
-
name="Comma separated class names",
|
| 110 |
-
placeholder="Enter possible class names, e.g. cat, dog",
|
| 111 |
-
value="cat, dog, parrot",
|
| 112 |
-
)
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
pn.Row(image_url, randomize_url),
|
| 117 |
-
class_names,
|
| 118 |
-
)
|
| 119 |
|
| 120 |
-
#
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
)
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# coding: utf-8
|
| 3 |
+
import numpy as np
|
| 4 |
import io
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import datetime
|
| 9 |
+
import gc #garabage collector
|
| 10 |
+
from io import BytesIO
|
| 11 |
import panel as pn
|
| 12 |
+
import holoviews as hv
|
| 13 |
+
import hvplot.pandas
|
| 14 |
+
import xlsxwriter
|
| 15 |
+
from warnings import filterwarnings
|
| 16 |
+
'''
|
| 17 |
+
development env: panel serve script.py --autoreload
|
| 18 |
+
prod prep: panel convert script.py --to pyodide-worker --out pyodide
|
| 19 |
+
'''
|
| 20 |
|
| 21 |
+
filterwarnings("ignore")
|
| 22 |
+
# hv.extension('bokeh')
|
| 23 |
+
pn.extension( "plotly", template="fast")
|
| 24 |
|
| 25 |
+
pn.state.template.param.update(
|
| 26 |
+
# site_url="",
|
| 27 |
+
site="ModelMonitor",
|
| 28 |
+
title="Classification Model Metrics",
|
| 29 |
+
# favicon="https://raw.githubusercontent.com/firobeid/firobeid.github.io/main/docs/compose-plots/Resources/favicon.ico",
|
| 30 |
+
)
|
| 31 |
+
#######################
|
| 32 |
+
###UTILITY FUNCTIONS###
|
| 33 |
+
#######################
|
| 34 |
+
def percentage(df):
|
| 35 |
+
def segment(df):
|
| 36 |
+
return round(df["Count"]/df["Count"].sum(),4)
|
| 37 |
+
df["percent"] = segment(df)
|
| 38 |
+
return df
|
| 39 |
|
| 40 |
+
def AUC(group):
|
| 41 |
+
from sklearn.metrics import roc_auc_score
|
| 42 |
+
auc = roc_auc_score(group['TARGET'],group['SCORE'])
|
| 43 |
+
# N = sum(group["N"])
|
| 44 |
+
N = round(len(group.loc[group["TARGET"].notna()]),0)
|
| 45 |
+
cols = ["AUC","Count"]
|
| 46 |
+
# return trapezoidal_rule(FPR.to_numpy(),TPR.to_numpy())
|
| 47 |
+
return pd.Series([auc, N], index = cols)
|
| 48 |
|
| 49 |
+
def ROC(group):
|
| 50 |
+
from sklearn.metrics import roc_curve
|
| 51 |
+
FPR,TPR,T = roc_curve(group['TARGET'],group['SCORE'])
|
| 52 |
+
cols = ['TPR', 'FPR']
|
| 53 |
+
return pd.concat([pd.Series(TPR),pd.Series(FPR)], keys = cols, axis = 1)
|
|
|
|
| 54 |
|
| 55 |
+
def ks(group):
|
| 56 |
+
from scipy.stats import ks_2samp
|
| 57 |
+
y_real = group['TARGET']
|
| 58 |
+
y_proba = group['SCORE']
|
| 59 |
+
|
| 60 |
+
df = pd.DataFrame()
|
| 61 |
+
df['real'] = y_real
|
| 62 |
+
df['proba'] = y_proba
|
| 63 |
+
|
| 64 |
+
# Recover each class
|
| 65 |
+
class0 = df[df['real'] == 0]
|
| 66 |
+
class1 = df[df['real'] == 1]
|
| 67 |
+
|
| 68 |
+
ks_ = ks_2samp(class0['proba'], class1['proba'])
|
| 69 |
+
|
| 70 |
+
N = round(len(group.loc[group["TARGET"].notna()]),0)
|
| 71 |
+
cols = ["KS","Count"]
|
| 72 |
+
|
| 73 |
+
return pd.Series([ks_[0], N], index = cols)
|
| 74 |
|
| 75 |
+
def psi(df):
|
| 76 |
+
'''
|
| 77 |
+
https://mwburke.github.io/data%20science/2018/04/29/population-stability-index.html#:~:text=To%20calculate%20the%20PSI%20we,the%20percents%20in%20each%20bucket.
|
| 78 |
+
'''
|
| 79 |
+
df[df == 0] = 0.001
|
| 80 |
+
sub = df.copy()
|
| 81 |
+
sub = sub.iloc[:,:-1].sub(df.validation,axis = 0)
|
| 82 |
+
div = df.copy()
|
| 83 |
+
div= div.iloc[:,:-1].div(df.validation, axis=0)
|
| 84 |
+
div = np.log(div)
|
| 85 |
+
return (sub*div).sum(axis = 0)
|
| 86 |
|
| 87 |
+
def add_extremes_OOT(df, name:str, score:str):
|
| 88 |
+
'''
|
| 89 |
+
Mitigate bias in OOT/Serving/baseline set that might not have high confidence scores or low confidence scores
|
| 90 |
+
:param: name: str, name of the appid column
|
| 91 |
+
:param: score: str, name of the score column
|
| 92 |
+
'''
|
| 93 |
+
# df.loc[len(df.index)] = [np.nan, "Extreme_Case_Max", np.nan, np.nan, np.nan,994.0,0.0009,np.nan,np.nan,np.nan,np.nan]
|
| 94 |
+
# df.loc[len(df.index)] = [np.nan, "Extreme_Case_Min", np.nan, np.nan, np.nan,158.0,0.9999,np.nan,np.nan,np.nan,np.nan]
|
| 95 |
+
df.loc[len(df.index)] = [np.nan for i in range(0,df.shape[1])]
|
| 96 |
+
df.loc[(len(df.index)-1), [name, score]] = ["Extreme_Case_Max", 0.0009]
|
| 97 |
+
df.loc[len(df.index)] = [np.nan for i in range(0,df.shape[1])]
|
| 98 |
+
df.loc[(len(df.index)-1), [name, score]] = ["Extreme_Case_Min", 0.9999]
|
| 99 |
+
return df
|
| 100 |
|
| 101 |
+
# def last_3months(df):
|
| 102 |
+
# from datetime import datetime
|
| 103 |
+
# from dateutil.relativedelta import relativedelta
|
| 104 |
+
# from pandas.tseries.offsets import MonthEnd
|
| 105 |
|
| 106 |
+
# end_of_month = ((pd.Timestamp(datetime.now().strftime('%Y-%m-%d')) - pd.Timedelta(70, unit='D')) + relativedelta(months=-1)) + MonthEnd(0)
|
| 107 |
+
# start_of_month = end_of_month + MonthEnd(-3) + relativedelta(days=1)
|
| 108 |
+
# end_of_month = end_of_month +relativedelta(hours=23, minutes=59, seconds=59)
|
| 109 |
+
# print('Start Month %r --- End Month %r' % (start_of_month, end_of_month))
|
| 110 |
+
# try:
|
| 111 |
+
# date_column = list(filter(lambda x:x.endswith("DATE"),gains_df.columns))[0]
|
| 112 |
+
# except:
|
| 113 |
+
# date_column = 'CREATED_DATE'
|
| 114 |
+
# return df[df[date_column].between(start_of_month, end_of_month)]
|
| 115 |
|
| 116 |
+
def gains_table_proba(data=None,target=None, prob=None):
|
| 117 |
+
data = data.copy()
|
| 118 |
+
data['target0'] = 1 - data[target]
|
| 119 |
+
data['bucket'] = pd.qcut(data[prob], 10)
|
| 120 |
+
grouped = data.groupby('bucket', as_index = False)
|
| 121 |
+
kstable = pd.DataFrame()
|
| 122 |
+
kstable['min_prob'] = grouped.min()[prob]
|
| 123 |
+
kstable['max_prob'] = grouped.max()[prob]
|
| 124 |
+
kstable['count'] = grouped.count()['target0']
|
| 125 |
+
kstable['cum_total']=(kstable['count'] / kstable['count'].sum()).cumsum()
|
| 126 |
+
kstable['events'] = grouped.sum()[target]
|
| 127 |
+
kstable['nonevents'] = grouped.sum()['target0']
|
| 128 |
+
kstable['interval_rate'] = kstable['events'] / kstable['count']
|
| 129 |
+
kstable = kstable.sort_values(by="min_prob", ascending=0).reset_index(drop = True)
|
| 130 |
+
kstable['event_rate'] = (kstable.events / data[target].sum()).apply('{0:.2%}'.format)
|
| 131 |
+
kstable['nonevent_rate'] = (kstable.nonevents / data['target0'].sum()).apply('{0:.2%}'.format)
|
| 132 |
+
kstable['cum_eventrate']=(kstable.events / data[target].sum()).cumsum()
|
| 133 |
+
kstable['cum_noneventrate']=(kstable.nonevents / data['target0'].sum()).cumsum()
|
| 134 |
+
kstable['mid_point'] = np.nan
|
| 135 |
+
kstable['KS'] = np.round(kstable['cum_eventrate']-kstable['cum_noneventrate'], 4) * 100
|
| 136 |
|
| 137 |
+
#Formating
|
| 138 |
+
kstable["cum_total"] = kstable["cum_total"].sort_values().values
|
| 139 |
+
kstable = kstable.rename(columns={"min_prob":"low", "max_prob":"high"})
|
| 140 |
+
kstable['mid_point'] = round((kstable['high'] + kstable['low']) / 2, 4)
|
| 141 |
+
kstable['cum_eventrate']= kstable['cum_eventrate'].apply('{0:.2%}'.format)
|
| 142 |
+
kstable['cum_noneventrate']= kstable['cum_noneventrate'].apply('{0:.2%}'.format)
|
| 143 |
+
kstable.index = range(1,11)
|
| 144 |
+
kstable.index.rename('Decile', inplace=True)
|
| 145 |
+
pd.set_option('display.max_columns', 15)
|
| 146 |
+
# print(kstable)
|
| 147 |
+
#Display KS
|
| 148 |
+
from colorama import Fore
|
| 149 |
+
ks_3mnths = "KS is " + str(max(kstable['KS']))+"%"+ " at decile " + str((kstable.index[kstable['KS']==max(kstable['KS'])][0]))
|
| 150 |
+
print("KS is " + str(max(kstable['KS']))+"%"+ " at decile " + str((kstable.index[kstable['KS']==max(kstable['KS'])][0])))
|
| 151 |
+
kstable['cum_eventrate']= kstable['cum_eventrate'].str.replace("%","").astype(float)
|
| 152 |
+
kstable['cum_noneventrate']= kstable['cum_noneventrate'].str.replace("%","").astype(float)
|
| 153 |
+
kstable.index = list(range(10,0,-1))
|
| 154 |
+
kstable = kstable.iloc[::-1]
|
| 155 |
+
return(kstable, ks_3mnths)
|
| 156 |
|
| 157 |
+
def calculate_psi(expected, actual, buckettype='bins', buckets=10, axis=0):
|
| 158 |
+
# https://www.kaggle.com/code/podsyp/population-stability-index
|
| 159 |
+
'''Calculate the PSI (population stability index) across all variables
|
| 160 |
+
Args:
|
| 161 |
+
expected: numpy matrix of original values
|
| 162 |
+
actual: numpy matrix of new values, same size as expected
|
| 163 |
+
buckettype: type of strategy for creating buckets, bins splits into even splits, quantiles splits into quantile buckets
|
| 164 |
+
buckets: number of quantiles to use in bucketing variables
|
| 165 |
+
axis: axis by which variables are defined, 0 for vertical, 1 for horizontal
|
| 166 |
+
Returns:
|
| 167 |
+
psi_values: ndarray of psi values for each variable
|
| 168 |
+
Author:
|
| 169 |
+
Matthew Burke
|
| 170 |
+
github.com/mwburke
|
| 171 |
+
worksofchart.com
|
| 172 |
+
'''
|
| 173 |
+
|
| 174 |
+
def psi(expected_array, actual_array, buckets):
|
| 175 |
+
'''Calculate the PSI for a single variable
|
| 176 |
+
Args:
|
| 177 |
+
expected_array: numpy array of original values
|
| 178 |
+
actual_array: numpy array of new values, same size as expected
|
| 179 |
+
buckets: number of percentile ranges to bucket the values into
|
| 180 |
+
Returns:
|
| 181 |
+
psi_value: calculated PSI value
|
| 182 |
+
'''
|
| 183 |
+
|
| 184 |
+
def scale_range (input, min, max):
|
| 185 |
+
input += -(np.min(input))
|
| 186 |
+
input /= np.max(input) / (max - min)
|
| 187 |
+
input += min
|
| 188 |
+
return input
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
breakpoints = np.arange(0, buckets + 1) / (buckets) * 100
|
| 192 |
+
|
| 193 |
+
if buckettype == 'bins':
|
| 194 |
+
breakpoints = scale_range(breakpoints, np.min(expected_array), np.max(expected_array))
|
| 195 |
+
elif buckettype == 'quantiles':
|
| 196 |
+
breakpoints = np.stack([np.percentile(expected_array, b) for b in breakpoints])
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
expected_percents = np.histogram(expected_array, breakpoints)[0] / len(expected_array)
|
| 201 |
+
actual_percents = np.histogram(actual_array, breakpoints)[0] / len(actual_array)
|
| 202 |
+
|
| 203 |
+
def sub_psi(e_perc, a_perc):
|
| 204 |
+
'''Calculate the actual PSI value from comparing the values.
|
| 205 |
+
Update the actual value to a very small number if equal to zero
|
| 206 |
+
'''
|
| 207 |
+
if a_perc == 0:
|
| 208 |
+
a_perc = 0.0001
|
| 209 |
+
if e_perc == 0:
|
| 210 |
+
e_perc = 0.0001
|
| 211 |
+
|
| 212 |
+
value = (e_perc - a_perc) * np.log(e_perc / a_perc)
|
| 213 |
+
return(value)
|
| 214 |
+
|
| 215 |
+
psi_value = np.sum(sub_psi(expected_percents[i], actual_percents[i]) for i in range(0, len(expected_percents)))
|
| 216 |
+
|
| 217 |
+
return(psi_value)
|
| 218 |
+
|
| 219 |
+
if len(expected.shape) == 1:
|
| 220 |
+
psi_values = np.empty(len(expected.shape))
|
| 221 |
+
else:
|
| 222 |
+
psi_values = np.empty(expected.shape[axis])
|
| 223 |
+
|
| 224 |
+
for i in range(0, len(psi_values)):
|
| 225 |
+
if len(psi_values) == 1:
|
| 226 |
+
psi_values = psi(expected, actual, buckets)
|
| 227 |
+
elif axis == 0:
|
| 228 |
+
psi_values[i] = psi(expected[:,i], actual[:,i], buckets)
|
| 229 |
+
elif axis == 1:
|
| 230 |
+
psi_values[i] = psi(expected[i,:], actual[i,:], buckets)
|
| 231 |
+
|
| 232 |
+
return(psi_values)
|
| 233 |
+
|
| 234 |
+
return round(10 **((158.313177 - UW5_Score) /274.360149), 18)
|
| 235 |
+
|
| 236 |
+
def lift_init(df:pd.DataFrame, baseline = None, is_baseline = True):
|
| 237 |
+
from tqdm import tqdm
|
| 238 |
+
# global standalone_scores_OOT
|
| 239 |
+
cols = ['SCORE']
|
| 240 |
|
| 241 |
+
lift_chart_data_OOT = pd.DataFrame()
|
| 242 |
+
for q in tqdm([10,20,50,100]):
|
| 243 |
+
# df_new["QUARTER"] = pd.PeriodIndex(df_new.CREATE_DATE, freq='Q')
|
| 244 |
+
# fd = baseline.dropna(subset = period_metrics.value)[cols].apply(lambda col: pd.qcut(col.rank(method='first'),q = q, ), axis = 0).copy()
|
| 245 |
+
# pd.cut(prod['SCORE'], bins = pd.qcut(baseline['SCORE'],10, retbins = True)[1])
|
| 246 |
+
if is_baseline == True:
|
| 247 |
+
# print(df)
|
| 248 |
+
# print(df.dropna(subset = ['MONTHLY']))
|
| 249 |
+
fd = df.dropna(subset = [period_metrics.value])[cols].apply(lambda col: pd.cut(col, bins = pd.qcut(col,q=q, retbins = True)[1]) , axis = 0).copy()
|
| 250 |
+
fd = pd.concat([df.dropna(subset = [period_metrics.value])[period_metrics.value], df.dropna(subset = [period_metrics.value])['TARGET'], fd], axis = 1)
|
| 251 |
+
fd = pd.concat([fd.groupby(x)['TARGET'].mean().fillna(0) for x in fd[cols]], axis = 1, keys = cols)
|
| 252 |
+
fd.index.name = 'SCORE_BAND'
|
| 253 |
+
|
| 254 |
+
else:
|
| 255 |
+
# print(baseline.dropna(subset = [period_metrics.value])[cols].values.ravel().shape)
|
| 256 |
+
# print(pd.qcut(baseline.dropna(subset = [period_metrics.value])[cols].values.ravel(),q=q, retbins = True))
|
| 257 |
+
bins_ = pd.qcut(baseline.dropna(subset = [period_metrics.value])[cols].values.ravel(),q=q, retbins = True)[1]
|
| 258 |
+
fd = df.groupby([period_metrics.value]).apply(lambda col: col[cols].apply(lambda col: pd.cut(col, bins = bins_), axis = 0)).copy()
|
| 259 |
+
# fd = df.groupby(period_metrics.value).apply(lambda col: col[cols].apply(lambda col: pd.cut(col, bins = pd.qcut(col,q=q, retbins = True)[1]), axis = 0)).copy()
|
| 260 |
+
fd = pd.concat([df[period_metrics.value], df['TARGET'], fd], axis = 1)
|
| 261 |
+
fd = fd.groupby(period_metrics.value).apply(lambda col: pd.concat([col.groupby(x)['TARGET'].mean().fillna(0) for x in col[cols]], axis = 1, keys = cols))
|
| 262 |
+
fd.index.names = [period_metrics.value, 'SCORE_BAND']
|
| 263 |
+
# fd['APPLICATION_MONTH'] = fd['APPLICATION_MONTH'].astype(str)
|
| 264 |
+
fd = fd.reset_index()
|
| 265 |
+
fd['BINS'] = q
|
| 266 |
+
lift_chart_data_OOT = lift_chart_data_OOT.append(fd)
|
| 267 |
+
if is_baseline == True:
|
| 268 |
+
lift_chart_data_OOT[period_metrics.value] = 'Baseline'
|
| 269 |
+
|
| 270 |
+
standalone_scores_OOT = lift_chart_data_OOT.melt(id_vars=[period_metrics.value,'BINS','SCORE_BAND'],value_vars=cols,
|
| 271 |
+
var_name='SCORE',
|
| 272 |
+
value_name='BAD_RATE').dropna().reset_index(drop = True).copy()
|
| 273 |
+
standalone_scores_OOT[['BINS', 'SCORE_BAND']] = standalone_scores_OOT[['BINS', 'SCORE_BAND']].astype(str)
|
| 274 |
+
standalone_scores_OOT = pd.concat([standalone_scores_OOT["BINS"] + "-" + standalone_scores_OOT["SCORE_BAND"] + "-" + standalone_scores_OOT["SCORE"],
|
| 275 |
+
standalone_scores_OOT[[period_metrics.value,'BAD_RATE']]], axis = 1).rename(columns = {0:'BINS_SCOREBAND_SCORE'})
|
| 276 |
+
standalone_scores_OOT = standalone_scores_OOT.pivot(index = 'BINS_SCOREBAND_SCORE', columns=period_metrics.value)['BAD_RATE'].reset_index()
|
| 277 |
+
standalone_scores_OOT.index.name = ""
|
| 278 |
+
standalone_scores_OOT.columns.name = ""
|
| 279 |
+
standalone_scores_OOT = pd.concat([standalone_scores_OOT['BINS_SCOREBAND_SCORE'].str.split('-', expand=True),
|
| 280 |
+
standalone_scores_OOT],axis = 1).rename(columns ={0:'BINS', 1: 'SCORE_BAND', 2: 'SCORE'}).drop(columns = 'BINS_SCOREBAND_SCORE')
|
| 281 |
+
# standalone_scores_OOT[['BINS', 'SCORE_BAND']] = standalone_scores_OOT[['BINS', 'SCORE_BAND']]#.astype(int)
|
| 282 |
+
standalone_scores_OOT['BINS'] = standalone_scores_OOT['BINS']
|
| 283 |
+
standalone_scores_OOT.sort_values(['SCORE', 'SCORE_BAND'], inplace = True)
|
| 284 |
+
return standalone_scores_OOT, lift_chart_data_OOT
|
| 285 |
+
|
| 286 |
+
def lift_init_plots(df:pd.DataFrame, is_baseline = True):
|
| 287 |
+
from tqdm import tqdm
|
| 288 |
+
# global standalone_scores_OOT
|
| 289 |
+
cols = ['SCORE']
|
| 290 |
+
|
| 291 |
+
lift_chart_data_OOT = pd.DataFrame()
|
| 292 |
+
for q in tqdm([10,20,50,100]):
|
| 293 |
+
# df_new["QUARTER"] = pd.PeriodIndex(df_new.CREATE_DATE, freq='Q')
|
| 294 |
+
# fd = baseline.dropna(subset = period_metrics.value)[cols].apply(lambda col: pd.qcut(col.rank(method='first'),q = q, ), axis = 0).copy()
|
| 295 |
+
# pd.cut(prod['SCORE'], bins = pd.qcut(baseline['SCORE'],10, retbins = True)[1])
|
| 296 |
+
# fd = df.dropna(subset = period_metrics.value)[cols].apply(lambda col: pd.cut(col, bins = pd.qcut(col,q=q, retbins = True)[1]) , axis = 0).copy()
|
| 297 |
+
if is_baseline == True:
|
| 298 |
+
fd = df.dropna(subset = period_metrics.value)[cols].apply(lambda col: pd.qcut(col.rank(method='first'),q = q, labels=range(1, q + 1)), axis = 0).copy()
|
| 299 |
+
fd = pd.concat([df.dropna(subset = period_metrics.value)[period_metrics.value], df.dropna(subset = period_metrics.value)['TARGET'], fd], axis = 1)
|
| 300 |
+
fd = pd.concat([fd.groupby(x)['TARGET'].mean().fillna(0) for x in fd[cols]], axis = 1, keys = cols)
|
| 301 |
+
fd.index.name = 'SCORE_BAND'
|
| 302 |
+
|
| 303 |
+
else:
|
| 304 |
+
fd = df.groupby(period_metrics.value).apply(lambda col: col[cols].apply(lambda col: pd.qcut(col.rank(method='first'),q = q, labels=range(1,q + 1)), axis = 0)).copy()
|
| 305 |
+
fd = pd.concat([df[period_metrics.value], df['TARGET'], fd], axis = 1)
|
| 306 |
+
fd = fd.groupby(period_metrics.value).apply(lambda col: pd.concat([col.groupby(x)['TARGET'].mean().fillna(0) for x in col[cols]], axis = 1, keys = cols))
|
| 307 |
+
# print(fd.index)
|
| 308 |
+
fd.index.names = [period_metrics.value, 'SCORE_BAND']
|
| 309 |
+
# fd = fd.reset_index(names = ['APPLICATION_MONTH', 'SCORE_BAND'])
|
| 310 |
+
fd = fd.reset_index()
|
| 311 |
+
# fd['APPLICATION_MONTH'] = fd['APPLICATION_MONTH'].astype(str)
|
| 312 |
+
fd['BINS'] = q
|
| 313 |
+
lift_chart_data_OOT = lift_chart_data_OOT.append(fd)
|
| 314 |
+
if is_baseline == True:
|
| 315 |
+
lift_chart_data_OOT[period_metrics.value] = 'Baseline'
|
| 316 |
+
lift_chart_data_OOT.sort_values(['SCORE', 'SCORE_BAND'], inplace = True)
|
| 317 |
+
standalone_scores_OOT = lift_chart_data_OOT.melt(id_vars=[period_metrics.value,'BINS','SCORE_BAND'],value_vars=cols,
|
| 318 |
+
var_name='SCORE',
|
| 319 |
+
value_name='BAD_RATE').dropna().reset_index(drop = True).copy()
|
| 320 |
+
standalone_scores_OOT[['BINS', 'SCORE_BAND']] = standalone_scores_OOT[['BINS', 'SCORE_BAND']].astype(str)
|
| 321 |
+
standalone_scores_OOT = pd.concat([standalone_scores_OOT["BINS"] + "-" + standalone_scores_OOT["SCORE_BAND"] + "-" + standalone_scores_OOT["SCORE"],
|
| 322 |
+
standalone_scores_OOT[[period_metrics.value,'BAD_RATE']]], axis = 1).rename(columns = {0:'BINS_SCOREBAND_SCORE'})
|
| 323 |
+
standalone_scores_OOT = standalone_scores_OOT.pivot(index = 'BINS_SCOREBAND_SCORE', columns=period_metrics.value)['BAD_RATE'].reset_index()
|
| 324 |
+
standalone_scores_OOT.index.name = ""
|
| 325 |
+
standalone_scores_OOT.columns.name = ""
|
| 326 |
+
standalone_scores_OOT = pd.concat([standalone_scores_OOT['BINS_SCOREBAND_SCORE'].str.split('-', expand=True),
|
| 327 |
+
standalone_scores_OOT],axis = 1).rename(columns ={0:'BINS', 1: 'SCORE_BAND', 2: 'SCORE'}).drop(columns = 'BINS_SCOREBAND_SCORE')
|
| 328 |
+
standalone_scores_OOT[['BINS', 'SCORE_BAND']] = standalone_scores_OOT[['BINS', 'SCORE_BAND']].astype(int)
|
| 329 |
+
standalone_scores_OOT['BINS'] = standalone_scores_OOT['BINS']
|
| 330 |
+
standalone_scores_OOT.sort_values(['SCORE', 'SCORE_BAND'], inplace = True)
|
| 331 |
+
return standalone_scores_OOT
|
| 332 |
+
|
| 333 |
+
def save_csv(df, metric):
|
| 334 |
+
from io import StringIO
|
| 335 |
+
sio = StringIO()
|
| 336 |
+
df.to_csv(sio)
|
| 337 |
+
sio.seek(0)
|
| 338 |
+
return pn.widgets.FileDownload(sio, embed=True, filename='%s.csv'%metric)
|
| 339 |
+
|
| 340 |
+
def get_xlsx(df1,df2,df3,df4,df5,df6):
|
| 341 |
+
from io import BytesIO
|
| 342 |
+
output = BytesIO()
|
| 343 |
+
writer = pd.ExcelWriter(output, engine='xlsxwriter')
|
| 344 |
+
df1.to_excel(writer, sheet_name="PSI")
|
| 345 |
+
df2.to_excel(writer, sheet_name="AUC")
|
| 346 |
+
df3.to_excel(writer, sheet_name="KS")
|
| 347 |
+
df4.to_excel(writer, sheet_name="LABEL_DRIFT")
|
| 348 |
+
df5.to_excel(writer, sheet_name="LABEL_Tables")
|
| 349 |
+
df6.to_excel(writer, sheet_name="GAINS_Tables")
|
| 350 |
+
writer.save() # Important!
|
| 351 |
+
output.seek(0) # Important!
|
| 352 |
+
return pn.widgets.FileDownload(output,embed=True, filename='results.csv', button_type="primary")
|
| 353 |
+
|
| 354 |
+
|
| 355 |
+
def expected_calibration_error(y, proba, bins = 'fd'):
|
| 356 |
+
import numpy as np
|
| 357 |
+
bin_count, bin_edges = np.histogram(proba, bins = bins)
|
| 358 |
+
n_bins = len(bin_count)
|
| 359 |
+
bin_edges[0] -= 1e-8 # because left edge is not included
|
| 360 |
+
bin_id = np.digitize(proba, bin_edges, right = True) - 1
|
| 361 |
+
bin_ysum = np.bincount(bin_id, weights = y, minlength = n_bins)
|
| 362 |
+
bin_probasum = np.bincount(bin_id, weights = proba, minlength = n_bins)
|
| 363 |
+
bin_ymean = np.divide(bin_ysum, bin_count, out = np.zeros(n_bins), where = bin_count > 0)
|
| 364 |
+
bin_probamean = np.divide(bin_probasum, bin_count, out = np.zeros(n_bins), where = bin_count > 0)
|
| 365 |
+
ece = np.abs((bin_probamean - bin_ymean) * bin_count).sum() / len(proba)
|
| 366 |
+
return ece, bin_probamean, bin_ymean, bin_id, bin_count, bin_edges
|
| 367 |
+
###############################
|
| 368 |
+
###END OFF UTILITY FUNCTIONS###
|
| 369 |
+
###############################
|
| 370 |
+
|
| 371 |
+
text = """
|
| 372 |
+
#Classification Model Metrics
|
| 373 |
+
## AUTHOR: [`FIRAS ALI OBEID`](https://www.linkedin.com/in/feras-obeid/)
|
| 374 |
+
### GNU General Public License v3.0 (GPL-3.0)
|
| 375 |
+
#### Developed while working at [OppFi Inc.](https://www.oppfi.com/)
|
| 376 |
+
|
| 377 |
+
This tool performs ML model ,in production, monitoring across time,
|
| 378 |
+
where production weeks/months/quarters are compared too a selective baseline.
|
| 379 |
+
|
| 380 |
+
1. Upload a CSV containing:
|
| 381 |
+
|
| 382 |
+
**(Date)** Highly Recommended but **optional**
|
| 383 |
+
**(Score)** Probability Predictions
|
| 384 |
+
**(Target)** Binary Target/True Label
|
| 385 |
+
|
| 386 |
+
2. Check the box if you CSV has a DATE column, otherwise dates are generated based on current timestamp and spanning back by
|
| 387 |
+
timedelta of csv length in hourly frequency.
|
| 388 |
+
|
| 389 |
+
3. Choose & press the right columns in the `Select Boxes` below when you upload a csv
|
| 390 |
+
|
| 391 |
+
4. Select a baseline date slice **mandatory**. If your baseline is from a different time then the production time,
|
| 392 |
+
make sure to append it to the csv before uploading.
|
| 393 |
+
|
| 394 |
+
5. Press Get Metrics
|
| 395 |
+
|
| 396 |
+
6. Wait few seconds and analyze the updated charts
|
| 397 |
+
"""
|
| 398 |
+
|
| 399 |
+
|
| 400 |
+
|
| 401 |
+
# date = str(input('What is the name off the date column: ').upper())
|
| 402 |
+
# id_ = str(input('What is the name off the APP name/ID column: ').upper())
|
| 403 |
+
# score = str(input('What is the name off the score column (i.e UW5,DM_QL...): ').upper())
|
| 404 |
+
# target = str(input('What is the name off the Target column (i.e Real target values such as PD70_RATIO...: ').upper())
|
| 405 |
+
|
| 406 |
+
file_input = pn.widgets.FileInput(align='center')
|
| 407 |
+
date_selector = pn.widgets.Select(name='Select Date Column',)
|
| 408 |
+
check_date = pn.widgets.Checkbox(name = '<--',value = False) # T/F
|
| 409 |
+
target_selector = pn.widgets.Select(name='Select Target Variable(True Label)')
|
| 410 |
+
score_selector = pn.widgets.Select(name='Select Predictions Column(Raw Probaility)')
|
| 411 |
+
period_metrics = pn.widgets.Select(name='Select Period', options = ['MONTHLY','WEEKLY', 'QUARTERLY'])
|
| 412 |
+
|
| 413 |
+
date_range_ = pn.widgets.DateRangeSlider(name='Baseline Period',) #value=(start, end), start=start, end=end
|
| 414 |
+
|
| 415 |
+
random_seed = pn.widgets.IntSlider(name='Random Seed for Random Generated Data (OnSet)', value=42, start=0, end=1000, step=1)
|
| 416 |
+
|
| 417 |
+
button = pn.widgets.Button(name='Get Metrics')
|
| 418 |
+
widgets = pn.WidgetBox(
|
| 419 |
+
pn.panel(text, margin=(0, 20)),
|
| 420 |
+
pn.panel('**Check box if your data has a date column *before uploading the file* \n (otherwise keep it empty)**'),
|
| 421 |
+
check_date,
|
| 422 |
+
file_input,
|
| 423 |
+
random_seed,
|
| 424 |
+
pn.panel('\n'),
|
| 425 |
+
date_selector,
|
| 426 |
+
target_selector,
|
| 427 |
+
score_selector,
|
| 428 |
+
period_metrics,
|
| 429 |
+
date_range_,
|
| 430 |
+
button
|
| 431 |
+
)
|
| 432 |
+
|
| 433 |
+
# start, end = stocks.index.min(), stocks.index.max()
|
| 434 |
+
# year = pn.widgets.DateRangeSlider(name='Year', value=(start, end), start=start, end=end)
|
| 435 |
+
# ,id_:'ID',
|
| 436 |
+
|
| 437 |
+
|
| 438 |
+
def get_data():
|
| 439 |
+
global df
|
| 440 |
+
if file_input.value is None:
|
| 441 |
+
np.random.seed(random_seed.value)
|
| 442 |
try:
|
| 443 |
+
df = pd.DataFrame({'DATE': pd.date_range(start = (datetime.datetime.today() - pd.DateOffset(hours = 9999)), end = datetime.datetime.today(), tz = "US/Eastern", freq = "H"),
|
| 444 |
+
'ID': [i for i in range(10000)],
|
| 445 |
+
'SCORE':np.random.uniform(size = 10000),
|
| 446 |
+
'TARGET': np.random.choice([0,1],10000, p=[0.9,0.1])})
|
| 447 |
+
except:
|
| 448 |
+
try:
|
| 449 |
+
df = pd.DataFrame({'DATE': pd.date_range(start = (datetime.datetime.today() - pd.DateOffset(hours = 9999 + 1)), end = datetime.datetime.today(), tz = "US/Eastern", freq = "H"),
|
| 450 |
+
'ID': [i for i in range(10000)],
|
| 451 |
+
'SCORE':np.random.uniform(size = 10000),
|
| 452 |
+
'TARGET': np.random.choice([0,1],10000, p=[0.9,0.1])})
|
| 453 |
+
except:
|
| 454 |
+
df = pd.DataFrame({'DATE': pd.date_range(start = (datetime.datetime.today() - pd.DateOffset(hours = 9999 - 1)), end = datetime.datetime.today(), tz = "US/Eastern", freq = "H"),
|
| 455 |
+
'ID': [i for i in range(10000)],
|
| 456 |
+
'SCORE':np.random.uniform(size = 10000),
|
| 457 |
+
'TARGET': np.random.choice([0,1],10000, p=[0.9,0.1])})
|
| 458 |
+
# df.to_csv("test_upload.csv")
|
| 459 |
+
else:
|
| 460 |
+
df = BytesIO()
|
| 461 |
+
df.write(file_input.value)
|
| 462 |
+
df.seek(0)
|
| 463 |
+
try:
|
| 464 |
+
df = pd.read_csv(df, error_bad_lines=False).apply(pd.to_numeric, errors='ignore')
|
| 465 |
+
except:
|
| 466 |
+
df = pd.read_csv(df, error_bad_lines=False)
|
| 467 |
+
|
| 468 |
+
df = df.select_dtypes(exclude=["category"])
|
| 469 |
+
df = df.replace([np.inf, -np.inf], np.nan)
|
| 470 |
+
df.columns = [i.upper() for i in df.columns]
|
| 471 |
+
return df
|
| 472 |
+
|
| 473 |
+
def update_target(event):
|
| 474 |
+
df = get_data()
|
| 475 |
+
cols = list(df.columns)
|
| 476 |
+
date_selector.set_param(options=cols)
|
| 477 |
+
target_selector.set_param(options=cols)
|
| 478 |
+
score_selector.set_param(options=cols)
|
| 479 |
+
# print(check_date.value)
|
| 480 |
+
# print(type(df.DATE.min()))
|
| 481 |
+
if check_date.value == True:
|
| 482 |
+
date_column = [i.find("DATE") for i in df.columns]
|
| 483 |
+
date_column = [date_column.index(i) for i in [i for i in date_column if i !=-1]]
|
| 484 |
+
if len(date_column) > 0:
|
| 485 |
+
df = df.iloc[:,date_column].iloc[:,[0]]
|
| 486 |
+
df.columns = ['DATE']
|
| 487 |
+
print(type(df.DATE.min()))
|
| 488 |
+
start, end = pd.Timestamp(df.DATE.min()), pd.Timestamp(df.DATE.max())
|
| 489 |
+
try:
|
| 490 |
+
date_range_.set_param(value=(start, end), start=start, end=end)
|
| 491 |
+
except:
|
| 492 |
+
date_range_.set_param(value=(end, start), start=end, end=start)
|
| 493 |
+
else:
|
| 494 |
+
print('Creating synthetic dates')
|
| 495 |
+
synthetic_date = pd.date_range(start = (datetime.datetime.today() - pd.DateOffset(hours = len(df))), end = datetime.datetime.today(), tz = "US/Eastern", freq = "H") #remove len(df) - 1
|
| 496 |
+
df['DATE'] = synthetic_date[:len(df)]
|
| 497 |
+
start, end = df.DATE.min(), df.DATE.max()
|
| 498 |
+
date_range_.set_param(value=(start, end), start=start, end=end)
|
| 499 |
+
else:
|
| 500 |
+
print('Creating synthetic dates')
|
| 501 |
+
synthetic_date = pd.date_range(start = (datetime.datetime.today() - pd.DateOffset(hours = len(df))), end = datetime.datetime.today(), tz = "US/Eastern", freq = "H")
|
| 502 |
+
df['DATE'] = synthetic_date[:len(df)]
|
| 503 |
+
start, end = df.DATE.min(), df.DATE.max()
|
| 504 |
+
date_range_.set_param(value=(start, end), start=start, end=end)
|
| 505 |
+
file_input.param.watch(update_target, 'value')
|
| 506 |
+
update_target(None)
|
| 507 |
+
|
| 508 |
+
@pn.depends(button.param.clicks)
|
| 509 |
+
def run(_):
|
| 510 |
+
print(random_seed.value)
|
| 511 |
+
print(score_selector.value)
|
| 512 |
+
df = get_data()
|
| 513 |
+
try:
|
| 514 |
+
if file_input.value is None:
|
| 515 |
+
pass
|
| 516 |
+
elif check_date.value == True:
|
| 517 |
+
df = df.rename(columns={date_selector.value:'DATE',score_selector.value:'SCORE',target_selector.value:'TARGET'})
|
| 518 |
+
else:
|
| 519 |
+
synthetic_date = pd.date_range(start = (datetime.datetime.today() - pd.DateOffset(hours = len(df) - 1)), end = datetime.datetime.today(), tz = "US/Eastern", freq = "H")
|
| 520 |
+
df['DATE'] = synthetic_date[:len(df)]
|
| 521 |
+
df = df.rename(columns={score_selector.value:'SCORE',target_selector.value:'TARGET'})
|
| 522 |
+
except Exception as e:
|
| 523 |
+
return pn.pane.Markdown(f"""{e}""")
|
| 524 |
+
try:
|
| 525 |
+
df.DATE = pd.to_datetime(df.DATE, format="%Y-%m-%d %H:%M:%S", utc = True)
|
| 526 |
+
# print(pd.to_datetime(df.DATE,utc = True))
|
| 527 |
+
df["MONTHLY"] = df["DATE"].dt.strftime('%Y-%m')
|
| 528 |
+
print(f"J - DAYS COUNT: {datetime.datetime.now() - pd.Timestamp('2023-03-06 03:27')}" )
|
| 529 |
+
df['QUARTERLY'] = pd.PeriodIndex(df.DATE, freq='Q').astype(str)
|
| 530 |
+
df['WEEKLY'] = pd.PeriodIndex(df.DATE, freq='W').astype(str)
|
| 531 |
+
except Exception as e:
|
| 532 |
+
return pn.pane.Markdown(f"""{e}""")
|
| 533 |
+
df = df.reset_index().rename(columns={df.index.name:'ID'}) #crate synthetic prediction ID for my code to run
|
| 534 |
+
# df = df.dropna(subset = 'TARGET', axis = 1)
|
| 535 |
+
df = df[~(df.TARGET.isna()) | (df.SCORE.isna())]
|
| 536 |
+
if df.TARGET.nunique() > 2:
|
| 537 |
+
df.TARGET = np.where(df.TARGET > 0 , 1 , 0)
|
| 538 |
+
df.SCORE = df.SCORE.astype(np.float64)
|
| 539 |
+
|
| 540 |
|
| 541 |
+
|
| 542 |
+
# baselines
|
| 543 |
+
# try:
|
| 544 |
+
# baseline = df.set_index('MONTHLY').loc[date_range_.value[0]: date_range_.value[1]].reset_index().copy()
|
| 545 |
+
# except:
|
| 546 |
+
# baseline = df.copy()
|
| 547 |
+
# baseline = baseline.set_index('MONTHLY')
|
| 548 |
+
# baseline.index = pd.to_datetime(baseline.index)
|
| 549 |
+
# baseline = baseline.loc[date_range_.value[0]: date_range_.value[1]].reset_index()
|
| 550 |
+
# baseline["MONTHLY"] = baseline["MONTHLY"] .dt.strftime('%Y-%m')
|
| 551 |
+
print(date_range_.value[0])
|
| 552 |
+
print(date_range_.value[1])
|
| 553 |
+
|
| 554 |
+
baseline = df.set_index('DATE').sort_index().loc[date_range_.value[0]: date_range_.value[1]].reset_index()
|
| 555 |
+
print(baseline.DATE.min())
|
| 556 |
+
print(baseline.DATE.max())
|
| 557 |
+
print(df.DATE.max())
|
| 558 |
+
|
| 559 |
+
# print(df.set_index('DATE').loc[date_range_.value[0]: date_range_.value[1]].index.max())
|
| 560 |
+
#prods
|
| 561 |
+
# prod = df.loc[~df.MONTHLY.isin(list(baseline.MONTHLY.unique()))].copy()
|
| 562 |
+
prod_dates = df.set_index('DATE').sort_index().index.difference(baseline.set_index('DATE').index)
|
| 563 |
+
# print(prod_dates)
|
| 564 |
+
prod = df.set_index('DATE').loc[prod_dates].reset_index()
|
| 565 |
+
if len(baseline) > len(prod):
|
| 566 |
+
prod = baseline
|
| 567 |
+
##START##
|
| 568 |
+
intiate = pn.pane.Alert('''### Baseline Period: \n%s to %s
|
| 569 |
+
'''%(baseline.DATE.min(),baseline.DATE.max()), alert_type="info")
|
| 570 |
+
intiate2 = pn.pane.Alert('''### Production Period: \n%s to %s
|
| 571 |
+
'''%(prod.DATE.min(),prod.DATE.max()), alert_type="info")
|
| 572 |
+
if prod.equals(baseline):
|
| 573 |
+
intiate3 = pn.pane.Alert('''### Baseline Set is identical to Production Set \n Please choose a slice to be a baseline set''', alert_type="danger")
|
| 574 |
+
else:
|
| 575 |
+
intiate3 = None
|
| 576 |
+
##PSI##
|
| 577 |
+
baseline_psi = baseline.copy()
|
| 578 |
+
prod_psi = prod.copy()
|
| 579 |
+
|
| 580 |
+
baseline_psi = add_extremes_OOT(baseline_psi, name = 'ID', score = 'SCORE')
|
| 581 |
+
prod_psi["DEC_BANDS"] = pd.cut(prod_psi['SCORE'], bins = pd.qcut(baseline_psi['SCORE'],10, retbins = True)[1])
|
| 582 |
+
prod_psi = prod_psi.groupby([period_metrics.value,
|
| 583 |
+
"DEC_BANDS"]).agg(Count = ("DEC_BANDS",
|
| 584 |
+
"count")).sort_index(level = 0).reset_index()
|
| 585 |
+
prod_psi = prod_psi.groupby(period_metrics.value).apply(percentage).drop("Count",axis = 1)
|
| 586 |
+
|
| 587 |
+
baseline_psi["DECILE"] = pd.cut(baseline_psi['SCORE'], bins = pd.qcut(baseline_psi['SCORE'],10, retbins = True)[1])
|
| 588 |
+
baseline_psi = baseline_psi["DECILE"].value_counts()
|
| 589 |
+
baseline_psi = baseline_psi / sum(baseline_psi)
|
| 590 |
+
baseline_psi = baseline_psi.reset_index().rename(columns={"index":"DEC_BANDS", "DECILE": "percent"})
|
| 591 |
+
baseline_psi[period_metrics.value] = "validation"
|
| 592 |
+
baseline_psi = baseline_psi[[period_metrics.value, "DEC_BANDS", "percent"]]
|
| 593 |
+
|
| 594 |
+
prod_psi = pd.concat([prod_psi,baseline_psi])
|
| 595 |
+
|
| 596 |
+
prod_psi = prod_psi.pivot(index = "DEC_BANDS", columns=period_metrics.value)["percent"]
|
| 597 |
+
if len(baseline) < len(prod):
|
| 598 |
+
psi_ = psi(prod_psi).to_frame("%s_PSI"%period_metrics.value)
|
| 599 |
+
psi_results = pn.widgets.DataFrame(psi_)
|
| 600 |
+
else:
|
| 601 |
+
psi_ = pd.DataFrame()
|
| 602 |
+
psi_results = pn.pane.Alert("### Choose a Baseline in the left banner to get PSI results", alert_type="warning")
|
| 603 |
+
#CONFIGS
|
| 604 |
+
baseline['QUARTERLY'] = 'Baseline: '+ baseline['QUARTERLY'].unique()[0] + '_' + baseline['QUARTERLY'].unique()[-1]
|
| 605 |
+
baseline['MONTHLY'] = 'Baseline: '+ baseline['MONTHLY'].unique()[0] + '_' + baseline['MONTHLY'].unique()[-1]
|
| 606 |
+
baseline['WEEKLY'] = 'Baseline: '+ baseline['WEEKLY'].unique()[0] + '_' + baseline['WEEKLY'].unique()[-1]
|
| 607 |
+
#AUC
|
| 608 |
+
auc_b = baseline.groupby([period_metrics.value]).apply(AUC)
|
| 609 |
+
auc_p = prod.groupby([period_metrics.value]).apply(AUC)
|
| 610 |
+
baseline_auc = pn.widgets.DataFrame(auc_b)
|
| 611 |
+
prod_auc = pn.widgets.DataFrame(auc_p,name = 'AUC') #autosize_mode='fit_columns'
|
| 612 |
|
| 613 |
+
from sklearn.metrics import roc_curve
|
| 614 |
+
from holoviews import Slope
|
| 615 |
+
b_label = baseline.MONTHLY.min()
|
| 616 |
+
FPR,TPR,T = roc_curve(baseline['TARGET'],baseline['SCORE'])
|
| 617 |
+
roc_baseline = pd.concat([pd.Series(TPR), pd.Series(FPR)], keys = ['TPR', 'FPR'], axis = 1)
|
| 618 |
+
roc_baseline_p = roc_baseline.hvplot.line(x ='FPR', y = 'TPR', label = "Baseline", color = 'red')
|
| 619 |
+
|
| 620 |
+
roc_plot = prod.groupby([period_metrics.value]).apply(ROC).reset_index(level = 0).hvplot.line(x ='FPR', y = 'TPR', title = "%s ROC (Production VS %s)"%(period_metrics.value, b_label),
|
| 621 |
+
groupby = period_metrics.value, width = 600, height = 500, label = "Prod",
|
| 622 |
+
xlim = (0,1), ylim = (0,1), grid = True) * Slope(slope=1, y_intercept=0).opts(color='black', line_dash='dashed') * roc_baseline_p
|
| 623 |
+
#KS
|
| 624 |
+
ks_b = baseline.groupby([period_metrics.value]).apply(ks)
|
| 625 |
+
ks_p = prod.groupby([period_metrics.value]).apply(ks)
|
| 626 |
+
baseline_ks = pn.widgets.DataFrame(ks_b)
|
| 627 |
+
prod_ks = pn.widgets.DataFrame(ks_p,name = 'AUC') #autosize_mode='fit_columns'
|
| 628 |
+
|
| 629 |
+
#LIFT
|
| 630 |
+
baseline_lift_raw, baseline_lift_raw_bins = lift_init(df = baseline)
|
| 631 |
+
baseline_lift_raw = baseline_lift_raw.rename(columns = {'Baseline': b_label})
|
| 632 |
+
prod_lift_raw, prod_lift_raw_bins = lift_init(df = prod, baseline = baseline, is_baseline = False)
|
| 633 |
+
cols_b = baseline_lift_raw.columns.drop(['BINS', 'SCORE'])
|
| 634 |
+
cols = prod_lift_raw.columns.drop(['BINS', 'SCORE'])
|
| 635 |
+
|
| 636 |
+
baseline_lift = baseline_lift_raw.loc[baseline_lift_raw.BINS =='10',cols_b]
|
| 637 |
+
prod_lift = prod_lift_raw.loc[prod_lift_raw.BINS =='10',cols]
|
| 638 |
+
# prod_lift = pd.concat([prod_lift.dropna(subset = [col]).dropna(axis = 1).reset_index(drop = 1) for col in prod_lift][1:], axis = 1)
|
| 639 |
+
lift_table = prod_lift_raw.loc[prod_lift_raw.BINS =='10',cols].melt(id_vars="SCORE_BAND",
|
| 640 |
+
var_name='column',
|
| 641 |
+
value_name='value').dropna().reset_index(drop = True).rename(columns = {'column':period_metrics.value , 'value': 'Target_PCT'})
|
| 642 |
+
# print(prod_lift_raw_bins.loc[prod_lift_raw_bins.BINS ==10])
|
| 643 |
+
lift_table = lift_table.hvplot.table(groupby = period_metrics.value, title="%s Lift Table"%period_metrics.value, hover = True, responsive=True,
|
| 644 |
+
shared_axes= False, fit_columns = True,
|
| 645 |
+
padding=True , index_position = 0, fontscale = 1.5)
|
| 646 |
+
# print(prod_lift_raw.loc[prod_lift_raw.BINS =='10',cols])
|
| 647 |
+
# print(baseline_lift_raw.loc[baseline_lift_raw.BINS == '10',cols_b])
|
| 648 |
+
prod_lift_raw['BINS'] = prod_lift_raw['BINS'].astype(int)
|
| 649 |
+
baseline_lift_raw['BINS'] = baseline_lift_raw['BINS'].astype(int)
|
| 650 |
|
| 651 |
+
prod_lift_raw_bins['SCORE_BAND'] = prod_lift_raw_bins['SCORE_BAND'].astype(str)
|
| 652 |
+
# prod_lift_raw_bins['BINS'] = prod_lift_raw_bins['BINS'].astype(str)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 653 |
|
| 654 |
+
baseline_lift_raw_bins['SCORE_BAND'] = baseline_lift_raw_bins['SCORE_BAND'].astype(str)
|
| 655 |
+
# baseline_lift_raw_bins['BINS'] = baseline_lift_raw_bins['BINS'].astype(str)
|
|
|
|
|
|
|
|
|
|
| 656 |
|
| 657 |
+
# print(prod_lift_raw.loc[:,list(cols)+['BINS']])
|
| 658 |
+
p1 = prod_lift_raw_bins.set_index('SCORE_BAND'
|
| 659 |
+
).reset_index().hvplot.line(x = 'SCORE_BAND', groupby = ['BINS', period_metrics.value],
|
| 660 |
+
grid = True, width = 1200, height = 500,
|
| 661 |
+
label = 'Production', rot = 45)
|
| 662 |
+
|
| 663 |
+
# print(baseline_lift_raw_bins)
|
| 664 |
+
# print(prod_lift_raw_bins)
|
| 665 |
+
p2 = prod_lift_raw_bins.set_index('SCORE_BAND'
|
| 666 |
+
).reset_index().hvplot.scatter(x = 'SCORE_BAND', groupby = ['BINS', period_metrics.value], grid = True, color='DarkBlue', label='Production', rot = 45)
|
| 667 |
|
| 668 |
+
b_label = baseline.MONTHLY.min()
|
| 669 |
+
# print(baseline_lift_raw.loc[baseline_lift_raw.BINS == '10',cols_b][b_label])
|
| 670 |
+
b1 = baseline_lift_raw_bins.hvplot.line(x = 'SCORE_BAND', groupby = ['BINS'],
|
| 671 |
+
grid = True, width = 1200, height = 500,
|
| 672 |
+
line_dash='dashed', color = 'black', label = b_label, rot = 45)
|
| 673 |
+
|
| 674 |
+
b2 = baseline_lift_raw_bins.hvplot.scatter(x = 'SCORE_BAND', groupby = ['BINS'], grid = True, color='DarkGreen', label = b_label, rot = 45)
|
| 675 |
+
|
| 676 |
+
final_lift_plots = (p1*p2*b1*b2).opts(ylabel = '%target_rate_mean', title = "%s Lift Chart " % (period_metrics.value.title()))
|
| 677 |
+
|
| 678 |
+
#LABEL_DRIFT
|
| 679 |
+
mean_score_prod = prod.groupby(period_metrics.value).agg(MEAN_SCORE=("SCORE","mean"), MEAN_TARGET=("TARGET","mean"),Count = ("TARGET","count"))
|
| 680 |
+
mean_score_base = baseline.groupby(period_metrics.value).agg(MEAN_SCORE=("SCORE","mean"), MEAN_TARGET=("TARGET","mean"),Count = ("TARGET","count"))
|
| 681 |
+
baseline_label_drift = pn.widgets.DataFrame(mean_score_base)
|
| 682 |
+
prod_label_drift = pn.widgets.DataFrame(mean_score_prod,name = 'DRIFT')
|
| 683 |
+
|
| 684 |
+
#Lift Tables
|
| 685 |
+
# gains_final_all,_ = gains_table_proba(prod,'TARGET', 'SCORE')
|
| 686 |
+
lift_data = pd.concat([baseline_lift, prod_lift], axis = 0)
|
| 687 |
+
lift_data = pd.concat([lift_data.dropna(subset = [col]).dropna(axis = 1).reset_index(drop = 1) for col in lift_data][1:], axis = 1).dropna(axis = 1, how = 'any')
|
| 688 |
+
lift_data = lift_data.loc[:,~lift_data.columns.duplicated()].set_index('SCORE_BAND')
|
| 689 |
+
if (lift_data.shape[1] > 4) | (lift_data.shape[0] > 10):
|
| 690 |
+
prod_lift = pn.pane.Markdown('### Please download the csv as the lift table will congest the screen')
|
| 691 |
+
else:
|
| 692 |
+
prod_lift = pn.widgets.DataFrame(lift_data,name = 'LIFT')
|
| 693 |
+
#GAINS_TABLE
|
| 694 |
+
gains_final_prod,_ = gains_table_proba(prod,'TARGET', 'SCORE')
|
| 695 |
+
gains_final_base,_ = gains_table_proba(baseline,'TARGET', 'SCORE')
|
| 696 |
+
gains_final_base.index.names = [b_label]
|
| 697 |
+
gains_final_p = pn.widgets.DataFrame(gains_final_prod.set_index(['low','high']),name = 'GAINS',)
|
| 698 |
+
gains_final_b = pn.widgets.DataFrame(gains_final_base.set_index(['low','high']),name = 'GAINS',)
|
| 699 |
+
|
| 700 |
+
ece, bin_probamean, bin_ymean, bin_id, bin_count, bin_edges = expected_calibration_error(prod.TARGET.values, prod.SCORE.values)
|
| 701 |
+
error = pd.DataFrame(np.array([bin_probamean, bin_ymean]).T,columns= ["SCORE_MEAN", "TARGET_MEAN"])
|
| 702 |
+
error_plot = error.hvplot.scatter(x ='SCORE_MEAN', y = 'TARGET_MEAN', width = 800, height = 500, label = "Bin (Score vs Target Mean)", title = 'Model Scores Calibration (--- Perfect Calibration)',
|
| 703 |
+
xlim = (0,1), ylim = (0,1), grid = True, xlabel = 'Bins Mean of Scores', ylabel = 'Bins Mean of Target') * Slope(slope=1, y_intercept=0,legend = 'Perfect Calibration').opts(color='black', line_dash='dashed')
|
| 704 |
+
variable_ = pn.pane.Alert('''### FJ Day Count: \n%s
|
| 705 |
+
'''%(datetime.datetime.now() - pd.Timestamp('2023-03-06 03:27')), alert_type="success")
|
| 706 |
+
return pn.Tabs(
|
| 707 |
+
('Metrics', pn.Column(
|
| 708 |
+
pn.Row(intiate, intiate2, intiate3, width = 1200),
|
| 709 |
+
'# PSI',
|
| 710 |
+
pn.Row(psi_results, save_csv(psi_, 'PSI')),
|
| 711 |
+
'# AUC',
|
| 712 |
+
pn.Row(prod_auc, baseline_auc, save_csv(pd.concat([auc_b, auc_p], axis = 0), 'AUC')),
|
| 713 |
+
'# KS',
|
| 714 |
+
pn.Row(prod_ks, baseline_ks, save_csv(pd.concat([ks_b, ks_p], axis = 0), 'KS')),
|
| 715 |
+
'# LABEL DRIFT',
|
| 716 |
+
pn.Row(prod_label_drift, baseline_label_drift, save_csv(pd.concat([mean_score_base, mean_score_prod], axis = 0), 'LABEL_DRIFT')),
|
| 717 |
+
'# LIFT TABLES',
|
| 718 |
+
pn.Row(prod_lift, save_csv(lift_data, 'LIFT_TABLES')),
|
| 719 |
+
'# GAINS TABLE',
|
| 720 |
+
pn.Row(gains_final_b, gains_final_p, save_csv(pd.concat([gains_final_base, gains_final_prod], axis = 1), 'GAINS_TABLES')),
|
| 721 |
+
get_xlsx(psi_, pd.concat([auc_b, auc_p], axis = 0), pd.concat([ks_b, ks_p], axis = 0), pd.concat([mean_score_base, mean_score_prod], axis = 0), lift_data, pd.concat([gains_final_base, gains_final_prod], axis = 1)),
|
| 722 |
+
pn.Row(variable_, width = 200),
|
| 723 |
+
)
|
| 724 |
+
), #sizing_mode='stretch_width'
|
| 725 |
+
('Charts', pn.Column(pn.Row(roc_plot.opts(legend_position = 'bottom_right'), error_plot.opts(legend_position = 'top_left')) ,
|
| 726 |
+
lift_table,
|
| 727 |
+
final_lift_plots.opts(legend_position = 'bottom_right')
|
| 728 |
+
)
|
| 729 |
+
)
|
| 730 |
+
|
| 731 |
+
)
|
| 732 |
+
|
| 733 |
+
|
| 734 |
+
# return pn.Tabs(
|
| 735 |
+
# ('Analysis', pn.Column(
|
| 736 |
+
# pn.Row(vol_ret, pn.layout.Spacer(width=20), pn.Column(div, table), sizing_mode='stretch_width'),
|
| 737 |
+
# pn.Column(pn.Row(year, investment), return_curve, sizing_mode='stretch_width'),
|
| 738 |
+
# sizing_mode='stretch_width')),
|
| 739 |
+
# ('Timeseries', timeseries),
|
| 740 |
+
# ('Log Return', pn.Column(
|
| 741 |
+
# '## Daily normalized log returns',
|
| 742 |
+
# 'Width of distribution indicates volatility and center of distribution the mean daily return.',
|
| 743 |
+
# log_ret_hists,
|
| 744 |
+
# sizing_mode='stretch_width'
|
| 745 |
+
# ))
|
| 746 |
+
# )
|
| 747 |
+
|
| 748 |
+
pn.Row(pn.Column(widgets), pn.layout.Spacer(width=30), run).servable()
|
| 749 |
+
|
| 750 |
+
|
| 751 |
+
|
| 752 |
+
|
| 753 |
+
|
| 754 |
+
|
| 755 |
+
|
| 756 |
+
|
| 757 |
+
|
| 758 |
+
|
| 759 |
+
|
| 760 |
+
|
| 761 |
+
|
| 762 |
+
# Caveats
|
| 763 |
+
# The maximum sizes set in either Bokeh or Tornado refer to the maximum size of the message that
|
| 764 |
+
# is transferred through the web socket connection, which is going to be larger than the actual
|
| 765 |
+
# size of the uploaded file since the file content is encoded in a base64 string. So if you set a
|
| 766 |
+
# maximum size of 100 MB for your application, you should indicate your users that the upload
|
| 767 |
+
# limit is a value that is less than 100 MB.
|
| 768 |
+
|
| 769 |
+
# When a file whose size is larger than the limits is selected by a user, their browser/tab may
|
| 770 |
+
# just crash. Alternatively the web socket connection can close (sometimes with an error message
|
| 771 |
+
# printed in the browser console such as [bokeh] Lost websocket 0 connection, 1009 (message too
|
| 772 |
+
# big)) which means the application will become unresponsive and needs to be refreshed.
|
| 773 |
+
|
| 774 |
+
# app = ...
|
| 775 |
+
|
| 776 |
+
# MAX_SIZE_MB = 150
|
| 777 |
|
| 778 |
+
# pn.serve(
|
| 779 |
+
# app,
|
| 780 |
+
# # Increase the maximum websocket message size allowed by Bokeh
|
| 781 |
+
# websocket_max_message_size=MAX_SIZE_MB*1024*1014,
|
| 782 |
+
# # Increase the maximum buffer size allowed by Tornado
|
| 783 |
+
# http_server_kwargs={'max_buffer_size': MAX_SIZE_MB*1024*1014}
|
| 784 |
+
# )
|