Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import streamlit as st | |
| import plotly.graph_objects as go #type: ignore | |
| def plot_gains(y_true, y_probs): | |
| # Build and sort dataframe | |
| df = pd.DataFrame({ | |
| 'Actual': y_true, | |
| 'Predicted': y_probs | |
| }).sort_values(by='Predicted', ascending=False).reset_index(drop=True) | |
| # Compute cumulative gain | |
| df['Cumulative Percent'] = df['Actual'].cumsum() / df['Actual'].sum() | |
| df['Percent of Data'] = (df.index + 1) / len(df) | |
| # Compute K-stat (max distance from curve) | |
| df['ks_stat'] = df['Cumulative Percent'] - df['Percent of Data'] | |
| ks_value = df['ks_stat'].max() | |
| ks_idx = df['ks_stat'].idxmax() | |
| cum_percent = df['Cumulative Percent'][ks_idx] | |
| data_percent = df['Percent of Data'][ks_idx] | |
| # Plotly figure | |
| fig = go.Figure() | |
| # Model Gains Curve | |
| fig.add_trace(go.Scatter( | |
| x=df['Percent of Data'], | |
| y=df['Cumulative Percent'], | |
| mode='lines', | |
| name='Model Gains Curve', | |
| line=dict(width=3) | |
| )) | |
| # Random baseline | |
| fig.add_trace(go.Scatter( | |
| x=[0, 1], | |
| y=[0, 1], | |
| mode='lines', | |
| name='Random Baseline', | |
| line=dict(width=2, dash='dash', color='gray') | |
| )) | |
| fig.add_annotation( | |
| x=data_percent, | |
| y=cum_percent, | |
| text=f'Best Returns: {data_percent*100:.2f}%' | |
| ) | |
| fig.update_layout( | |
| title="Gains Curve", | |
| xaxis_title="Percent of Data", | |
| yaxis_title="Percent of Total Positive Cases Captured", | |
| template="plotly_white", | |
| height=450, | |
| legend=dict(yanchor="bottom", y=0, xanchor="right", x=1) | |
| ) | |
| st.session_state.ks_value = ks_value | |
| st.session_state.peak_gains = round(data_percent * 100, 2) | |
| st.session_state.percent_data = round(cum_percent * 100, 2) | |
| return fig, data_percent, cum_percent |