Spaces:

tkbarb10
/

ads505-app

Sleeping

ads505-app / utils /plot_gains.py

Taylor Kirk

Fresh deployment after moving datasets to hf datahub

5d4981c about 1 month ago

1.84 kB

	import pandas as pd
	import streamlit as st
	import plotly.graph_objects as go #type: ignore

	def plot_gains(y_true, y_probs):
	# Build and sort dataframe
	df = pd.DataFrame({
	'Actual': y_true,
	'Predicted': y_probs
	}).sort_values(by='Predicted', ascending=False).reset_index(drop=True)

	# Compute cumulative gain
	df['Cumulative Percent'] = df['Actual'].cumsum() / df['Actual'].sum()
	df['Percent of Data'] = (df.index + 1) / len(df)

	# Compute K-stat (max distance from curve)
	df['ks_stat'] = df['Cumulative Percent'] - df['Percent of Data']
	ks_value = df['ks_stat'].max()
	ks_idx = df['ks_stat'].idxmax()
	cum_percent = df['Cumulative Percent'][ks_idx]
	data_percent = df['Percent of Data'][ks_idx]

	# Plotly figure
	fig = go.Figure()

	# Model Gains Curve
	fig.add_trace(go.Scatter(
	x=df['Percent of Data'],
	y=df['Cumulative Percent'],
	mode='lines',
	name='Model Gains Curve',
	line=dict(width=3)
	))

	# Random baseline
	fig.add_trace(go.Scatter(
	x=[0, 1],
	y=[0, 1],
	mode='lines',
	name='Random Baseline',
	line=dict(width=2, dash='dash', color='gray')
	))

	fig.add_annotation(
	x=data_percent,
	y=cum_percent,
	text=f'Best Returns: {data_percent*100:.2f}%'
	)

	fig.update_layout(
	title="Gains Curve",
	xaxis_title="Percent of Data",
	yaxis_title="Percent of Total Positive Cases Captured",
	template="plotly_white",
	height=450,
	legend=dict(yanchor="bottom", y=0, xanchor="right", x=1)
	)

	st.session_state.ks_value = ks_value
	st.session_state.peak_gains = round(data_percent * 100, 2)
	st.session_state.percent_data = round(cum_percent * 100, 2)

	return fig, data_percent, cum_percent