ads505-app / utils /plot_gains.py
Taylor Kirk
Fresh deployment after moving datasets to hf datahub
5d4981c
import pandas as pd
import streamlit as st
import plotly.graph_objects as go #type: ignore
def plot_gains(y_true, y_probs):
# Build and sort dataframe
df = pd.DataFrame({
'Actual': y_true,
'Predicted': y_probs
}).sort_values(by='Predicted', ascending=False).reset_index(drop=True)
# Compute cumulative gain
df['Cumulative Percent'] = df['Actual'].cumsum() / df['Actual'].sum()
df['Percent of Data'] = (df.index + 1) / len(df)
# Compute K-stat (max distance from curve)
df['ks_stat'] = df['Cumulative Percent'] - df['Percent of Data']
ks_value = df['ks_stat'].max()
ks_idx = df['ks_stat'].idxmax()
cum_percent = df['Cumulative Percent'][ks_idx]
data_percent = df['Percent of Data'][ks_idx]
# Plotly figure
fig = go.Figure()
# Model Gains Curve
fig.add_trace(go.Scatter(
x=df['Percent of Data'],
y=df['Cumulative Percent'],
mode='lines',
name='Model Gains Curve',
line=dict(width=3)
))
# Random baseline
fig.add_trace(go.Scatter(
x=[0, 1],
y=[0, 1],
mode='lines',
name='Random Baseline',
line=dict(width=2, dash='dash', color='gray')
))
fig.add_annotation(
x=data_percent,
y=cum_percent,
text=f'Best Returns: {data_percent*100:.2f}%'
)
fig.update_layout(
title="Gains Curve",
xaxis_title="Percent of Data",
yaxis_title="Percent of Total Positive Cases Captured",
template="plotly_white",
height=450,
legend=dict(yanchor="bottom", y=0, xanchor="right", x=1)
)
st.session_state.ks_value = ks_value
st.session_state.peak_gains = round(data_percent * 100, 2)
st.session_state.percent_data = round(cum_percent * 100, 2)
return fig, data_percent, cum_percent