import pandas as pd import streamlit as st import plotly.graph_objects as go #type: ignore def plot_gains(y_true, y_probs): # Build and sort dataframe df = pd.DataFrame({ 'Actual': y_true, 'Predicted': y_probs }).sort_values(by='Predicted', ascending=False).reset_index(drop=True) # Compute cumulative gain df['Cumulative Percent'] = df['Actual'].cumsum() / df['Actual'].sum() df['Percent of Data'] = (df.index + 1) / len(df) # Compute K-stat (max distance from curve) df['ks_stat'] = df['Cumulative Percent'] - df['Percent of Data'] ks_value = df['ks_stat'].max() ks_idx = df['ks_stat'].idxmax() cum_percent = df['Cumulative Percent'][ks_idx] data_percent = df['Percent of Data'][ks_idx] # Plotly figure fig = go.Figure() # Model Gains Curve fig.add_trace(go.Scatter( x=df['Percent of Data'], y=df['Cumulative Percent'], mode='lines', name='Model Gains Curve', line=dict(width=3) )) # Random baseline fig.add_trace(go.Scatter( x=[0, 1], y=[0, 1], mode='lines', name='Random Baseline', line=dict(width=2, dash='dash', color='gray') )) fig.add_annotation( x=data_percent, y=cum_percent, text=f'Best Returns: {data_percent*100:.2f}%' ) fig.update_layout( title="Gains Curve", xaxis_title="Percent of Data", yaxis_title="Percent of Total Positive Cases Captured", template="plotly_white", height=450, legend=dict(yanchor="bottom", y=0, xanchor="right", x=1) ) st.session_state.ks_value = ks_value st.session_state.peak_gains = round(data_percent * 100, 2) st.session_state.percent_data = round(cum_percent * 100, 2) return fig, data_percent, cum_percent