Spaces:
Sleeping
Sleeping
File size: 1,840 Bytes
5d4981c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import pandas as pd
import streamlit as st
import plotly.graph_objects as go #type: ignore
def plot_gains(y_true, y_probs):
# Build and sort dataframe
df = pd.DataFrame({
'Actual': y_true,
'Predicted': y_probs
}).sort_values(by='Predicted', ascending=False).reset_index(drop=True)
# Compute cumulative gain
df['Cumulative Percent'] = df['Actual'].cumsum() / df['Actual'].sum()
df['Percent of Data'] = (df.index + 1) / len(df)
# Compute K-stat (max distance from curve)
df['ks_stat'] = df['Cumulative Percent'] - df['Percent of Data']
ks_value = df['ks_stat'].max()
ks_idx = df['ks_stat'].idxmax()
cum_percent = df['Cumulative Percent'][ks_idx]
data_percent = df['Percent of Data'][ks_idx]
# Plotly figure
fig = go.Figure()
# Model Gains Curve
fig.add_trace(go.Scatter(
x=df['Percent of Data'],
y=df['Cumulative Percent'],
mode='lines',
name='Model Gains Curve',
line=dict(width=3)
))
# Random baseline
fig.add_trace(go.Scatter(
x=[0, 1],
y=[0, 1],
mode='lines',
name='Random Baseline',
line=dict(width=2, dash='dash', color='gray')
))
fig.add_annotation(
x=data_percent,
y=cum_percent,
text=f'Best Returns: {data_percent*100:.2f}%'
)
fig.update_layout(
title="Gains Curve",
xaxis_title="Percent of Data",
yaxis_title="Percent of Total Positive Cases Captured",
template="plotly_white",
height=450,
legend=dict(yanchor="bottom", y=0, xanchor="right", x=1)
)
st.session_state.ks_value = ks_value
st.session_state.peak_gains = round(data_percent * 100, 2)
st.session_state.percent_data = round(cum_percent * 100, 2)
return fig, data_percent, cum_percent |