MHI / app.py
Rakesh
Upload 3 files
dee9b3a verified
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
# Set page config
st.set_page_config(
page_title="Health Parameter Transition Dashboard",
page_icon="🏥",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS for better styling
st.markdown("""
<style>
.main-header {
font-size: 2.5rem;
font-weight: bold;
color: #1f77b4;
text-align: center;
margin-bottom: 2rem;
}
.metric-card {
background-color: #f0f2f6;
padding: 1rem;
border-radius: 0.5rem;
border-left: 4px solid #1f77b4;
}
.improvement {
color: #2ca02c;
font-weight: bold;
}
.decline {
color: #d62728;
font-weight: bold;
}
.stable {
color: #ff7f0e;
font-weight: bold;
}
</style>
""", unsafe_allow_html=True)
@st.cache_data
def load_data():
"""Load and preprocess the health data"""
try:
df = pd.read_csv("Combines 2,3,7,9,11(Sheet1).csv")
return df
except Exception as e:
st.error(f"Error loading data: {e}")
return None
def clean_tag_data(df):
"""Clean and standardize tag data"""
# Define health parameters with their old and new tag columns
health_params = {
'HbA1c': {'old_tag': 'Hba1c tag old', 'new_tag': 'Hba1c tag'},
'LDL': {'old_tag': 'LDLtag old', 'new_tag': 'LDLtag'},
'BMI': {'old_tag': 'BMItag old', 'new_tag': 'BMItag'},
'BP': {'old_tag': 'Bptag old', 'new_tag': 'Bptag'},
'Biometrics': {'old_tag': 'biometric tag old', 'new_tag': 'biometric tag'},
'MHI': {'old_tag': 'MHI old', 'new_tag': 'MHI NEW'}
}
# Clean the data
for param, cols in health_params.items():
# Fill NaN values with 'Not Available'
df[cols['old_tag']] = df[cols['old_tag']].fillna('Not Available')
df[cols['new_tag']] = df[cols['new_tag']].fillna('Not Available')
# Standardize tag values
for col in [cols['old_tag'], cols['new_tag']]:
df[col] = df[col].astype(str).str.strip().str.title()
# Map common variations
df[col] = df[col].replace({
'Alert': 'Red',
'Sub-Optimal': 'Orange',
'Optimal': 'Green',
'Suboptimal': 'Orange',
'0': 'Not Available',
'': 'Not Available'
})
return df, health_params
def calculate_transitions(df, health_params, location_filter=None):
"""Calculate transition matrices for each health parameter"""
if location_filter and location_filter != "All Locations":
df_filtered = df[df['Location Shared'] == location_filter].copy()
else:
df_filtered = df.copy()
transitions = {}
for param, cols in health_params.items():
old_col = cols['old_tag']
new_col = cols['new_tag']
# Create transition matrix
transition_df = df_filtered[[old_col, new_col]].copy()
transition_df = transition_df[
(transition_df[old_col] != 'Not Available') &
(transition_df[new_col] != 'Not Available')
]
if len(transition_df) > 0:
transition_matrix = pd.crosstab(
transition_df[old_col],
transition_df[new_col],
margins=True
)
# Calculate transition summary
total_users = len(transition_df)
# Count improvements, declines, and stable
improved = 0
declined = 0
stable = 0
tag_hierarchy = {'Red': 3, 'Orange': 2, 'Green': 1}
for _, row in transition_df.iterrows():
old_val = row[old_col]
new_val = row[new_col]
if old_val in tag_hierarchy and new_val in tag_hierarchy:
old_score = tag_hierarchy[old_val]
new_score = tag_hierarchy[new_val]
if new_score < old_score: # Lower score is better
improved += 1
elif new_score > old_score:
declined += 1
else:
stable += 1
transitions[param] = {
'matrix': transition_matrix,
'total_users': total_users,
'improved': improved,
'declined': declined,
'stable': stable,
'improvement_rate': (improved / total_users * 100) if total_users > 0 else 0,
'decline_rate': (declined / total_users * 100) if total_users > 0 else 0,
'stable_rate': (stable / total_users * 100) if total_users > 0 else 0
}
return transitions
def create_transition_heatmap(transition_matrix, param_name):
"""Create a heatmap for transition matrix"""
# Remove the 'All' row and column for cleaner visualization
matrix_clean = transition_matrix.drop('All', axis=0).drop('All', axis=1)
fig = px.imshow(
matrix_clean.values,
x=matrix_clean.columns,
y=matrix_clean.index,
color_continuous_scale='Blues',
aspect="auto",
title=f"{param_name} Transition Matrix"
)
# Add text annotations
for i, row in enumerate(matrix_clean.index):
for j, col in enumerate(matrix_clean.columns):
fig.add_annotation(
x=j, y=i,
text=str(matrix_clean.loc[row, col]),
showarrow=False,
font=dict(color="white" if matrix_clean.loc[row, col] > matrix_clean.values.max()/2 else "black")
)
fig.update_layout(
xaxis_title="New Status",
yaxis_title="Old Status",
height=400
)
return fig
def create_summary_chart(transitions):
"""Create summary chart showing improvement/decline rates"""
params = list(transitions.keys())
improvement_rates = [transitions[p]['improvement_rate'] for p in params]
decline_rates = [transitions[p]['decline_rate'] for p in params]
stable_rates = [transitions[p]['stable_rate'] for p in params]
fig = go.Figure()
fig.add_trace(go.Bar(
name='Improved',
x=params,
y=improvement_rates,
marker_color='#2ca02c'
))
fig.add_trace(go.Bar(
name='Declined',
x=params,
y=decline_rates,
marker_color='#d62728'
))
fig.add_trace(go.Bar(
name='Stable',
x=params,
y=stable_rates,
marker_color='#ff7f0e'
))
fig.update_layout(
title="Health Parameter Transition Summary",
xaxis_title="Health Parameters",
yaxis_title="Percentage of Users",
barmode='stack',
height=500
)
return fig
def create_sankey_diagram(df, param, old_col, new_col, location_filter=None):
"""Create Sankey diagram for parameter transitions"""
if location_filter and location_filter != "All Locations":
df_filtered = df[df['Location Shared'] == location_filter].copy()
else:
df_filtered = df.copy()
# Filter out 'Not Available' values
df_filtered = df_filtered[
(df_filtered[old_col] != 'Not Available') &
(df_filtered[new_col] != 'Not Available')
]
if len(df_filtered) == 0:
return None
# Create transition counts
transitions = df_filtered.groupby([old_col, new_col]).size().reset_index(name='count')
# Create unique labels
all_labels = list(set(transitions[old_col].tolist() + transitions[new_col].tolist()))
label_map = {label: i for i, label in enumerate(all_labels)}
# Prepare data for Sankey
source = [label_map[old] for old in transitions[old_col]]
target = [label_map[new] + len(set(transitions[old_col])) for new in transitions[new_col]]
values = transitions['count'].tolist()
# Create color mapping
color_map = {'Green': '#2ca02c', 'Orange': '#ff7f0e', 'Red': '#d62728'}
node_colors = [color_map.get(label, '#1f77b4') for label in all_labels]
fig = go.Figure(data=[go.Sankey(
node=dict(
pad=15,
thickness=20,
line=dict(color="black", width=0.5),
label=[f"{label} (Old)" if i < len(set(transitions[old_col])) else f"{label} (New)"
for i, label in enumerate(all_labels + all_labels)],
color=node_colors + node_colors
),
link=dict(
source=source,
target=target,
value=values
)
)])
fig.update_layout(
title_text=f"{param} Parameter Transitions",
font_size=10,
height=400
)
return fig
def main():
st.markdown('<h1 class="main-header">🏥 Health Parameter Transition Dashboard</h1>', unsafe_allow_html=True)
# Add description
st.markdown("""
This dashboard analyzes health parameter transitions between old and new measurements.
It tracks improvements, declines, and stability across different health metrics with location-based filtering.
**Health Parameters Analyzed:**
- **HbA1c**: Blood glucose control indicator
- **LDL**: Low-density lipoprotein cholesterol
- **BMI**: Body Mass Index
- **BP**: Blood Pressure
- **Biometrics**: Overall biometric assessment
- **MHI**: Mental Health Index
""")
# Load data
df = load_data()
if df is None:
st.error("Unable to load data. Please check if the data file is available.")
st.stop()
# Clean data
df_clean, health_params = clean_tag_data(df)
# Sidebar for filters
st.sidebar.header("📊 Dashboard Filters")
# Location filter
locations = ['All Locations'] + sorted(df_clean['Location Shared'].dropna().unique().tolist())
selected_location = st.sidebar.selectbox("Select Location", locations)
# Calculate transitions
transitions = calculate_transitions(df_clean, health_params, selected_location)
# Display summary metrics
st.header("📈 Overall Summary")
if selected_location != "All Locations":
st.info(f"📍 Showing data for: **{selected_location}**")
# Create columns for summary metrics
col1, col2, col3, col4 = st.columns(4)
total_users = sum([t['total_users'] for t in transitions.values()]) // len(transitions) if transitions else 0
avg_improvement = np.mean([t['improvement_rate'] for t in transitions.values()]) if transitions else 0
avg_decline = np.mean([t['decline_rate'] for t in transitions.values()]) if transitions else 0
avg_stable = np.mean([t['stable_rate'] for t in transitions.values()]) if transitions else 0
with col1:
st.metric("Total Users Analyzed", f"{total_users:,}")
with col2:
st.metric("Average Improvement Rate", f"{avg_improvement:.1f}%",
delta=f"+{avg_improvement:.1f}%" if avg_improvement > 0 else None)
with col3:
st.metric("Average Decline Rate", f"{avg_decline:.1f}%",
delta=f"-{avg_decline:.1f}%" if avg_decline > 0 else None)
with col4:
st.metric("Average Stable Rate", f"{avg_stable:.1f}%")
# Summary chart
if transitions:
st.plotly_chart(create_summary_chart(transitions), use_container_width=True)
# Parameter-wise analysis
st.header("🔍 Parameter-wise Analysis")
if transitions:
tabs = st.tabs(list(health_params.keys()))
for i, (param, cols) in enumerate(health_params.items()):
with tabs[i]:
if param in transitions and transitions[param]['total_users'] > 0:
col1, col2 = st.columns([1, 1])
with col1:
# Display metrics for this parameter
st.subheader(f"{param} Metrics")
metrics_col1, metrics_col2, metrics_col3 = st.columns(3)
with metrics_col1:
st.metric("Users", transitions[param]['total_users'])
with metrics_col2:
improvement_rate = transitions[param]['improvement_rate']
st.metric("Improved", f"{transitions[param]['improved']}",
f"{improvement_rate:.1f}%")
with metrics_col3:
decline_rate = transitions[param]['decline_rate']
st.metric("Declined", f"{transitions[param]['declined']}",
f"{decline_rate:.1f}%")
# Transition matrix heatmap
st.plotly_chart(
create_transition_heatmap(transitions[param]['matrix'], param),
use_container_width=True
)
with col2:
# Sankey diagram
sankey_fig = create_sankey_diagram(
df_clean, param, cols['old_tag'], cols['new_tag'], selected_location
)
if sankey_fig:
st.plotly_chart(sankey_fig, use_container_width=True)
else:
st.info("No transition data available for Sankey diagram")
# Detailed transition table
st.subheader(f"{param} Detailed Transitions")
transition_table = transitions[param]['matrix']
st.dataframe(transition_table, use_container_width=True)
else:
st.warning(f"No data available for {param} parameter")
else:
st.warning("No transition data available for the selected location.")
# Data insights
st.header("💡 Key Insights")
insights = []
for param, data in transitions.items():
if data['total_users'] > 0:
if data['improvement_rate'] > 50:
insights.append(f"✅ **{param}**: Excellent improvement rate of {data['improvement_rate']:.1f}%")
elif data['improvement_rate'] > 30:
insights.append(f"🟡 **{param}**: Good improvement rate of {data['improvement_rate']:.1f}%")
if data['decline_rate'] > 30:
insights.append(f"⚠️ **{param}**: High decline rate of {data['decline_rate']:.1f}% - needs attention")
if insights:
for insight in insights:
st.markdown(insight)
else:
st.info("No significant insights to highlight at this time.")
# Export functionality
st.header("📥 Export Data")
if st.button("Generate Summary Report"):
summary_data = []
for param, data in transitions.items():
summary_data.append({
'Parameter': param,
'Total Users': data['total_users'],
'Improved': data['improved'],
'Declined': data['declined'],
'Stable': data['stable'],
'Improvement Rate (%)': round(data['improvement_rate'], 2),
'Decline Rate (%)': round(data['decline_rate'], 2),
'Stable Rate (%)': round(data['stable_rate'], 2)
})
summary_df = pd.DataFrame(summary_data)
st.download_button(
label="Download Summary CSV",
data=summary_df.to_csv(index=False),
file_name=f"health_transitions_summary_{selected_location.replace(' ', '_')}.csv",
mime="text/csv"
)
st.dataframe(summary_df, use_container_width=True)
if __name__ == "__main__":
main()