Spaces:

SRVCP
/

MHICombined

Build error

Rakesh

Update src/streamlit_app.py

79c5988 verified 8 months ago

16.3 kB

	import streamlit as st
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots
	import numpy as np

	# Set page config
	st.set_page_config(
	page_title="Health Parameter Transition Dashboard",
	page_icon="🏥",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Custom CSS for better styling
	st.markdown("""
	<style>
	.main-header {
	font-size: 2.5rem;
	font-weight: bold;
	color: #1f77b4;
	text-align: center;
	margin-bottom: 2rem;
	}

	.metric-card {
	background-color: #f0f2f6;
	padding: 1rem;
	border-radius: 0.5rem;
	border-left: 4px solid #1f77b4;
	}

	.improvement {
	color: #2ca02c;
	font-weight: bold;
	}

	.decline {
	color: #d62728;
	font-weight: bold;
	}

	.stable {
	color: #ff7f0e;
	font-weight: bold;
	}
	</style>
	""", unsafe_allow_html=True)

	@st.cache_data
	def load_data():
	"""Load and preprocess the health data"""
	try:
	df = pd.read_csv("Combines 2,3,7,9,11(Sheet1).csv")
	return df
	except Exception as e:
	st.error(f"Error loading data: {e}")
	return None

	def clean_tag_data(df):
	"""Clean and standardize tag data"""
	# Define health parameters with their old and new tag columns
	health_params = {
	'HbA1c': {'old_tag': 'Hba1c tag old', 'new_tag': 'Hba1c tag'},
	'LDL': {'old_tag': 'LDLtag old', 'new_tag': 'LDLtag'},
	'BMI': {'old_tag': 'BMItag old', 'new_tag': 'BMItag'},
	'BP': {'old_tag': 'Bptag old', 'new_tag': 'Bptag'},
	'Biometrics': {'old_tag': 'biometric tag old', 'new_tag': 'biometric tag'},
	'MHI': {'old_tag': 'MHI old', 'new_tag': 'MHI NEW'}
	}

	# Clean the data
	for param, cols in health_params.items():
	# Fill NaN values with 'Not Available'
	df[cols['old_tag']] = df[cols['old_tag']].fillna('Not Available')
	df[cols['new_tag']] = df[cols['new_tag']].fillna('Not Available')

	# Standardize tag values
	for col in [cols['old_tag'], cols['new_tag']]:
	df[col] = df[col].astype(str).str.strip().str.title()
	# Map common variations
	df[col] = df[col].replace({
	'Alert': 'Red',
	'Sub-Optimal': 'Orange',
	'Optimal': 'Green',
	'Suboptimal': 'Orange',
	'0': 'Not Available',
	'': 'Not Available'
	})

	return df, health_params

	def calculate_transitions(df, health_params, location_filter=None):
	"""Calculate transition matrices for each health parameter"""
	if location_filter and location_filter != "All Locations":
	df_filtered = df[df['Location Shared'] == location_filter].copy()
	else:
	df_filtered = df.copy()

	transitions = {}

	for param, cols in health_params.items():
	old_col = cols['old_tag']
	new_col = cols['new_tag']

	# Create transition matrix
	transition_df = df_filtered[[old_col, new_col]].copy()
	transition_df = transition_df[
	(transition_df[old_col] != 'Not Available') &
	(transition_df[new_col] != 'Not Available')
	]

	if len(transition_df) > 0:
	transition_matrix = pd.crosstab(
	transition_df[old_col],
	transition_df[new_col],
	margins=True
	)

	# Calculate transition summary
	total_users = len(transition_df)

	# Count improvements, declines, and stable
	improved = 0
	declined = 0
	stable = 0

	tag_hierarchy = {'Red': 3, 'Orange': 2, 'Green': 1}

	for _, row in transition_df.iterrows():
	old_val = row[old_col]
	new_val = row[new_col]

	if old_val in tag_hierarchy and new_val in tag_hierarchy:
	old_score = tag_hierarchy[old_val]
	new_score = tag_hierarchy[new_val]

	if new_score < old_score: # Lower score is better
	improved += 1
	elif new_score > old_score:
	declined += 1
	else:
	stable += 1

	transitions[param] = {
	'matrix': transition_matrix,
	'total_users': total_users,
	'improved': improved,
	'declined': declined,
	'stable': stable,
	'improvement_rate': (improved / total_users * 100) if total_users > 0 else 0,
	'decline_rate': (declined / total_users * 100) if total_users > 0 else 0,
	'stable_rate': (stable / total_users * 100) if total_users > 0 else 0
	}

	return transitions

	def create_transition_heatmap(transition_matrix, param_name):
	"""Create a heatmap for transition matrix"""
	# Remove the 'All' row and column for cleaner visualization
	matrix_clean = transition_matrix.drop('All', axis=0).drop('All', axis=1)

	fig = px.imshow(
	matrix_clean.values,
	x=matrix_clean.columns,
	y=matrix_clean.index,
	color_continuous_scale='Blues',
	aspect="auto",
	title=f"{param_name} Transition Matrix"
	)

	# Add text annotations
	for i, row in enumerate(matrix_clean.index):
	for j, col in enumerate(matrix_clean.columns):
	fig.add_annotation(
	x=j, y=i,
	text=str(matrix_clean.loc[row, col]),
	showarrow=False,
	font=dict(color="white" if matrix_clean.loc[row, col] > matrix_clean.values.max()/2 else "black")
	)

	fig.update_layout(
	xaxis_title="New Status",
	yaxis_title="Old Status",
	height=400
	)

	return fig

	def create_summary_chart(transitions):
	"""Create summary chart showing improvement/decline rates"""
	params = list(transitions.keys())
	improvement_rates = [transitions[p]['improvement_rate'] for p in params]
	decline_rates = [transitions[p]['decline_rate'] for p in params]
	stable_rates = [transitions[p]['stable_rate'] for p in params]

	fig = go.Figure()

	fig.add_trace(go.Bar(
	name='Improved',
	x=params,
	y=improvement_rates,
	marker_color='#2ca02c'
	))

	fig.add_trace(go.Bar(
	name='Declined',
	x=params,
	y=decline_rates,
	marker_color='#d62728'
	))

	fig.add_trace(go.Bar(
	name='Stable',
	x=params,
	y=stable_rates,
	marker_color='#ff7f0e'
	))

	fig.update_layout(
	title="Health Parameter Transition Summary",
	xaxis_title="Health Parameters",
	yaxis_title="Percentage of Users",
	barmode='stack',
	height=500
	)

	return fig

	def create_sankey_diagram(df, param, old_col, new_col, location_filter=None):
	"""Create Sankey diagram for parameter transitions"""
	if location_filter and location_filter != "All Locations":
	df_filtered = df[df['Location Shared'] == location_filter].copy()
	else:
	df_filtered = df.copy()

	# Filter out 'Not Available' values
	df_filtered = df_filtered[
	(df_filtered[old_col] != 'Not Available') &
	(df_filtered[new_col] != 'Not Available')
	]

	if len(df_filtered) == 0:
	return None

	# Create transition counts
	transitions = df_filtered.groupby([old_col, new_col]).size().reset_index(name='count')

	# Create unique labels
	all_labels = list(set(transitions[old_col].tolist() + transitions[new_col].tolist()))
	label_map = {label: i for i, label in enumerate(all_labels)}

	# Prepare data for Sankey
	source = [label_map[old] for old in transitions[old_col]]
	target = [label_map[new] + len(set(transitions[old_col])) for new in transitions[new_col]]
	values = transitions['count'].tolist()

	# Create color mapping
	color_map = {'Green': '#2ca02c', 'Orange': '#ff7f0e', 'Red': '#d62728'}
	node_colors = [color_map.get(label, '#1f77b4') for label in all_labels]

	fig = go.Figure(data=[go.Sankey(
	node=dict(
	pad=15,
	thickness=20,
	line=dict(color="black", width=0.5),
	label=[f"{label} (Old)" if i < len(set(transitions[old_col])) else f"{label} (New)"
	for i, label in enumerate(all_labels + all_labels)],
	color=node_colors + node_colors
	),
	link=dict(
	source=source,
	target=target,
	value=values
	)
	)])

	fig.update_layout(
	title_text=f"{param} Parameter Transitions",
	font_size=10,
	height=400
	)

	return fig

	def main():
	st.markdown('<h1 class="main-header">🏥 Health Parameter Transition Dashboard</h1>', unsafe_allow_html=True)

	# Add description
	st.markdown("""
	This dashboard analyzes health parameter transitions between old and new measurements.
	It tracks improvements, declines, and stability across different health metrics with location-based filtering.

	Health Parameters Analyzed:
	- HbA1c: Blood glucose control indicator
	- LDL: Low-density lipoprotein cholesterol
	- BMI: Body Mass Index
	- BP: Blood Pressure
	- Biometrics: Overall biometric assessment
	- MHI: Mental Health Index
	""")

	# Load data
	df = load_data()
	if df is None:
	st.error("Unable to load data. Please check if the data file is available.")
	st.stop()

	# Clean data
	df_clean, health_params = clean_tag_data(df)

	# Sidebar for filters
	st.sidebar.header("📊 Dashboard Filters")

	# Location filter
	locations = ['All Locations'] + sorted(df_clean['Location Shared'].dropna().unique().tolist())
	selected_location = st.sidebar.selectbox("Select Location", locations)

	# Calculate transitions
	transitions = calculate_transitions(df_clean, health_params, selected_location)

	# Display summary metrics
	st.header("📈 Overall Summary")

	if selected_location != "All Locations":
	st.info(f"📍 Showing data for: {selected_location}")

	# Create columns for summary metrics
	col1, col2, col3, col4 = st.columns(4)

	total_users = sum([t['total_users'] for t in transitions.values()]) // len(transitions) if transitions else 0
	avg_improvement = np.mean([t['improvement_rate'] for t in transitions.values()]) if transitions else 0
	avg_decline = np.mean([t['decline_rate'] for t in transitions.values()]) if transitions else 0
	avg_stable = np.mean([t['stable_rate'] for t in transitions.values()]) if transitions else 0

	with col1:
	st.metric("Total Users Analyzed", f"{total_users:,}")

	with col2:
	st.metric("Average Improvement Rate", f"{avg_improvement:.1f}%",
	delta=f"+{avg_improvement:.1f}%" if avg_improvement > 0 else None)

	with col3:
	st.metric("Average Decline Rate", f"{avg_decline:.1f}%",
	delta=f"-{avg_decline:.1f}%" if avg_decline > 0 else None)

	with col4:
	st.metric("Average Stable Rate", f"{avg_stable:.1f}%")

	# Summary chart
	if transitions:
	st.plotly_chart(create_summary_chart(transitions), use_container_width=True)

	# Parameter-wise analysis
	st.header("🔍 Parameter-wise Analysis")

	if transitions:
	tabs = st.tabs(list(health_params.keys()))

	for i, (param, cols) in enumerate(health_params.items()):
	with tabs[i]:
	if param in transitions and transitions[param]['total_users'] > 0:
	col1, col2 = st.columns([1, 1])

	with col1:
	# Display metrics for this parameter
	st.subheader(f"{param} Metrics")

	metrics_col1, metrics_col2, metrics_col3 = st.columns(3)

	with metrics_col1:
	st.metric("Users", transitions[param]['total_users'])

	with metrics_col2:
	improvement_rate = transitions[param]['improvement_rate']
	st.metric("Improved", f"{transitions[param]['improved']}",
	f"{improvement_rate:.1f}%")

	with metrics_col3:
	decline_rate = transitions[param]['decline_rate']
	st.metric("Declined", f"{transitions[param]['declined']}",
	f"{decline_rate:.1f}%")

	# Transition matrix heatmap
	st.plotly_chart(
	create_transition_heatmap(transitions[param]['matrix'], param),
	use_container_width=True
	)

	with col2:
	# Sankey diagram
	sankey_fig = create_sankey_diagram(
	df_clean, param, cols['old_tag'], cols['new_tag'], selected_location
	)
	if sankey_fig:
	st.plotly_chart(sankey_fig, use_container_width=True)
	else:
	st.info("No transition data available for Sankey diagram")

	# Detailed transition table
	st.subheader(f"{param} Detailed Transitions")
	transition_table = transitions[param]['matrix']
	st.dataframe(transition_table, use_container_width=True)

	else:
	st.warning(f"No data available for {param} parameter")
	else:
	st.warning("No transition data available for the selected location.")

	# Data insights
	st.header("💡 Key Insights")

	insights = []

	for param, data in transitions.items():
	if data['total_users'] > 0:
	if data['improvement_rate'] > 50:
	insights.append(f"✅ {param}: Excellent improvement rate of {data['improvement_rate']:.1f}%")
	elif data['improvement_rate'] > 30:
	insights.append(f"🟡 {param}: Good improvement rate of {data['improvement_rate']:.1f}%")

	if data['decline_rate'] > 30:
	insights.append(f"⚠️ {param}: High decline rate of {data['decline_rate']:.1f}% - needs attention")

	if insights:
	for insight in insights:
	st.markdown(insight)
	else:
	st.info("No significant insights to highlight at this time.")

	# Export functionality
	st.header("📥 Export Data")

	if st.button("Generate Summary Report"):
	summary_data = []
	for param, data in transitions.items():
	summary_data.append({
	'Parameter': param,
	'Total Users': data['total_users'],
	'Improved': data['improved'],
	'Declined': data['declined'],
	'Stable': data['stable'],
	'Improvement Rate (%)': round(data['improvement_rate'], 2),
	'Decline Rate (%)': round(data['decline_rate'], 2),
	'Stable Rate (%)': round(data['stable_rate'], 2)
	})

	summary_df = pd.DataFrame(summary_data)

	st.download_button(
	label="Download Summary CSV",
	data=summary_df.to_csv(index=False),
	file_name=f"health_transitions_summary_{selected_location.replace(' ', '_')}.csv",
	mime="text/csv"
	)

	st.dataframe(summary_df, use_container_width=True)

	if __name__ == "__main__":
	main()