Rakesh commited on
Commit
79c5988
Β·
verified Β·
1 Parent(s): 58de496

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +459 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,461 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ from plotly.subplots import make_subplots
6
+ import numpy as np
7
+
8
+ # Set page config
9
+ st.set_page_config(
10
+ page_title="Health Parameter Transition Dashboard",
11
+ page_icon="πŸ₯",
12
+ layout="wide",
13
+ initial_sidebar_state="expanded"
14
+ )
15
+
16
+ # Custom CSS for better styling
17
+ st.markdown("""
18
+ <style>
19
+ .main-header {
20
+ font-size: 2.5rem;
21
+ font-weight: bold;
22
+ color: #1f77b4;
23
+ text-align: center;
24
+ margin-bottom: 2rem;
25
+ }
26
+
27
+ .metric-card {
28
+ background-color: #f0f2f6;
29
+ padding: 1rem;
30
+ border-radius: 0.5rem;
31
+ border-left: 4px solid #1f77b4;
32
+ }
33
+
34
+ .improvement {
35
+ color: #2ca02c;
36
+ font-weight: bold;
37
+ }
38
+
39
+ .decline {
40
+ color: #d62728;
41
+ font-weight: bold;
42
+ }
43
+
44
+ .stable {
45
+ color: #ff7f0e;
46
+ font-weight: bold;
47
+ }
48
+ </style>
49
+ """, unsafe_allow_html=True)
50
+
51
+ @st.cache_data
52
+ def load_data():
53
+ """Load and preprocess the health data"""
54
+ try:
55
+ df = pd.read_csv("Combines 2,3,7,9,11(Sheet1).csv")
56
+ return df
57
+ except Exception as e:
58
+ st.error(f"Error loading data: {e}")
59
+ return None
60
+
61
+ def clean_tag_data(df):
62
+ """Clean and standardize tag data"""
63
+ # Define health parameters with their old and new tag columns
64
+ health_params = {
65
+ 'HbA1c': {'old_tag': 'Hba1c tag old', 'new_tag': 'Hba1c tag'},
66
+ 'LDL': {'old_tag': 'LDLtag old', 'new_tag': 'LDLtag'},
67
+ 'BMI': {'old_tag': 'BMItag old', 'new_tag': 'BMItag'},
68
+ 'BP': {'old_tag': 'Bptag old', 'new_tag': 'Bptag'},
69
+ 'Biometrics': {'old_tag': 'biometric tag old', 'new_tag': 'biometric tag'},
70
+ 'MHI': {'old_tag': 'MHI old', 'new_tag': 'MHI NEW'}
71
+ }
72
+
73
+ # Clean the data
74
+ for param, cols in health_params.items():
75
+ # Fill NaN values with 'Not Available'
76
+ df[cols['old_tag']] = df[cols['old_tag']].fillna('Not Available')
77
+ df[cols['new_tag']] = df[cols['new_tag']].fillna('Not Available')
78
+
79
+ # Standardize tag values
80
+ for col in [cols['old_tag'], cols['new_tag']]:
81
+ df[col] = df[col].astype(str).str.strip().str.title()
82
+ # Map common variations
83
+ df[col] = df[col].replace({
84
+ 'Alert': 'Red',
85
+ 'Sub-Optimal': 'Orange',
86
+ 'Optimal': 'Green',
87
+ 'Suboptimal': 'Orange',
88
+ '0': 'Not Available',
89
+ '': 'Not Available'
90
+ })
91
+
92
+ return df, health_params
93
+
94
+ def calculate_transitions(df, health_params, location_filter=None):
95
+ """Calculate transition matrices for each health parameter"""
96
+ if location_filter and location_filter != "All Locations":
97
+ df_filtered = df[df['Location Shared'] == location_filter].copy()
98
+ else:
99
+ df_filtered = df.copy()
100
+
101
+ transitions = {}
102
+
103
+ for param, cols in health_params.items():
104
+ old_col = cols['old_tag']
105
+ new_col = cols['new_tag']
106
+
107
+ # Create transition matrix
108
+ transition_df = df_filtered[[old_col, new_col]].copy()
109
+ transition_df = transition_df[
110
+ (transition_df[old_col] != 'Not Available') &
111
+ (transition_df[new_col] != 'Not Available')
112
+ ]
113
+
114
+ if len(transition_df) > 0:
115
+ transition_matrix = pd.crosstab(
116
+ transition_df[old_col],
117
+ transition_df[new_col],
118
+ margins=True
119
+ )
120
+
121
+ # Calculate transition summary
122
+ total_users = len(transition_df)
123
+
124
+ # Count improvements, declines, and stable
125
+ improved = 0
126
+ declined = 0
127
+ stable = 0
128
+
129
+ tag_hierarchy = {'Red': 3, 'Orange': 2, 'Green': 1}
130
+
131
+ for _, row in transition_df.iterrows():
132
+ old_val = row[old_col]
133
+ new_val = row[new_col]
134
+
135
+ if old_val in tag_hierarchy and new_val in tag_hierarchy:
136
+ old_score = tag_hierarchy[old_val]
137
+ new_score = tag_hierarchy[new_val]
138
+
139
+ if new_score < old_score: # Lower score is better
140
+ improved += 1
141
+ elif new_score > old_score:
142
+ declined += 1
143
+ else:
144
+ stable += 1
145
+
146
+ transitions[param] = {
147
+ 'matrix': transition_matrix,
148
+ 'total_users': total_users,
149
+ 'improved': improved,
150
+ 'declined': declined,
151
+ 'stable': stable,
152
+ 'improvement_rate': (improved / total_users * 100) if total_users > 0 else 0,
153
+ 'decline_rate': (declined / total_users * 100) if total_users > 0 else 0,
154
+ 'stable_rate': (stable / total_users * 100) if total_users > 0 else 0
155
+ }
156
+
157
+ return transitions
158
+
159
+ def create_transition_heatmap(transition_matrix, param_name):
160
+ """Create a heatmap for transition matrix"""
161
+ # Remove the 'All' row and column for cleaner visualization
162
+ matrix_clean = transition_matrix.drop('All', axis=0).drop('All', axis=1)
163
+
164
+ fig = px.imshow(
165
+ matrix_clean.values,
166
+ x=matrix_clean.columns,
167
+ y=matrix_clean.index,
168
+ color_continuous_scale='Blues',
169
+ aspect="auto",
170
+ title=f"{param_name} Transition Matrix"
171
+ )
172
+
173
+ # Add text annotations
174
+ for i, row in enumerate(matrix_clean.index):
175
+ for j, col in enumerate(matrix_clean.columns):
176
+ fig.add_annotation(
177
+ x=j, y=i,
178
+ text=str(matrix_clean.loc[row, col]),
179
+ showarrow=False,
180
+ font=dict(color="white" if matrix_clean.loc[row, col] > matrix_clean.values.max()/2 else "black")
181
+ )
182
+
183
+ fig.update_layout(
184
+ xaxis_title="New Status",
185
+ yaxis_title="Old Status",
186
+ height=400
187
+ )
188
+
189
+ return fig
190
+
191
+ def create_summary_chart(transitions):
192
+ """Create summary chart showing improvement/decline rates"""
193
+ params = list(transitions.keys())
194
+ improvement_rates = [transitions[p]['improvement_rate'] for p in params]
195
+ decline_rates = [transitions[p]['decline_rate'] for p in params]
196
+ stable_rates = [transitions[p]['stable_rate'] for p in params]
197
+
198
+ fig = go.Figure()
199
+
200
+ fig.add_trace(go.Bar(
201
+ name='Improved',
202
+ x=params,
203
+ y=improvement_rates,
204
+ marker_color='#2ca02c'
205
+ ))
206
+
207
+ fig.add_trace(go.Bar(
208
+ name='Declined',
209
+ x=params,
210
+ y=decline_rates,
211
+ marker_color='#d62728'
212
+ ))
213
+
214
+ fig.add_trace(go.Bar(
215
+ name='Stable',
216
+ x=params,
217
+ y=stable_rates,
218
+ marker_color='#ff7f0e'
219
+ ))
220
+
221
+ fig.update_layout(
222
+ title="Health Parameter Transition Summary",
223
+ xaxis_title="Health Parameters",
224
+ yaxis_title="Percentage of Users",
225
+ barmode='stack',
226
+ height=500
227
+ )
228
+
229
+ return fig
230
+
231
+ def create_sankey_diagram(df, param, old_col, new_col, location_filter=None):
232
+ """Create Sankey diagram for parameter transitions"""
233
+ if location_filter and location_filter != "All Locations":
234
+ df_filtered = df[df['Location Shared'] == location_filter].copy()
235
+ else:
236
+ df_filtered = df.copy()
237
+
238
+ # Filter out 'Not Available' values
239
+ df_filtered = df_filtered[
240
+ (df_filtered[old_col] != 'Not Available') &
241
+ (df_filtered[new_col] != 'Not Available')
242
+ ]
243
+
244
+ if len(df_filtered) == 0:
245
+ return None
246
+
247
+ # Create transition counts
248
+ transitions = df_filtered.groupby([old_col, new_col]).size().reset_index(name='count')
249
+
250
+ # Create unique labels
251
+ all_labels = list(set(transitions[old_col].tolist() + transitions[new_col].tolist()))
252
+ label_map = {label: i for i, label in enumerate(all_labels)}
253
+
254
+ # Prepare data for Sankey
255
+ source = [label_map[old] for old in transitions[old_col]]
256
+ target = [label_map[new] + len(set(transitions[old_col])) for new in transitions[new_col]]
257
+ values = transitions['count'].tolist()
258
+
259
+ # Create color mapping
260
+ color_map = {'Green': '#2ca02c', 'Orange': '#ff7f0e', 'Red': '#d62728'}
261
+ node_colors = [color_map.get(label, '#1f77b4') for label in all_labels]
262
+
263
+ fig = go.Figure(data=[go.Sankey(
264
+ node=dict(
265
+ pad=15,
266
+ thickness=20,
267
+ line=dict(color="black", width=0.5),
268
+ label=[f"{label} (Old)" if i < len(set(transitions[old_col])) else f"{label} (New)"
269
+ for i, label in enumerate(all_labels + all_labels)],
270
+ color=node_colors + node_colors
271
+ ),
272
+ link=dict(
273
+ source=source,
274
+ target=target,
275
+ value=values
276
+ )
277
+ )])
278
+
279
+ fig.update_layout(
280
+ title_text=f"{param} Parameter Transitions",
281
+ font_size=10,
282
+ height=400
283
+ )
284
+
285
+ return fig
286
+
287
+ def main():
288
+ st.markdown('<h1 class="main-header">πŸ₯ Health Parameter Transition Dashboard</h1>', unsafe_allow_html=True)
289
+
290
+ # Add description
291
+ st.markdown("""
292
+ This dashboard analyzes health parameter transitions between old and new measurements.
293
+ It tracks improvements, declines, and stability across different health metrics with location-based filtering.
294
+
295
+ **Health Parameters Analyzed:**
296
+ - **HbA1c**: Blood glucose control indicator
297
+ - **LDL**: Low-density lipoprotein cholesterol
298
+ - **BMI**: Body Mass Index
299
+ - **BP**: Blood Pressure
300
+ - **Biometrics**: Overall biometric assessment
301
+ - **MHI**: Mental Health Index
302
+ """)
303
+
304
+ # Load data
305
+ df = load_data()
306
+ if df is None:
307
+ st.error("Unable to load data. Please check if the data file is available.")
308
+ st.stop()
309
+
310
+ # Clean data
311
+ df_clean, health_params = clean_tag_data(df)
312
+
313
+ # Sidebar for filters
314
+ st.sidebar.header("πŸ“Š Dashboard Filters")
315
+
316
+ # Location filter
317
+ locations = ['All Locations'] + sorted(df_clean['Location Shared'].dropna().unique().tolist())
318
+ selected_location = st.sidebar.selectbox("Select Location", locations)
319
+
320
+ # Calculate transitions
321
+ transitions = calculate_transitions(df_clean, health_params, selected_location)
322
+
323
+ # Display summary metrics
324
+ st.header("πŸ“ˆ Overall Summary")
325
+
326
+ if selected_location != "All Locations":
327
+ st.info(f"πŸ“ Showing data for: **{selected_location}**")
328
+
329
+ # Create columns for summary metrics
330
+ col1, col2, col3, col4 = st.columns(4)
331
+
332
+ total_users = sum([t['total_users'] for t in transitions.values()]) // len(transitions) if transitions else 0
333
+ avg_improvement = np.mean([t['improvement_rate'] for t in transitions.values()]) if transitions else 0
334
+ avg_decline = np.mean([t['decline_rate'] for t in transitions.values()]) if transitions else 0
335
+ avg_stable = np.mean([t['stable_rate'] for t in transitions.values()]) if transitions else 0
336
+
337
+ with col1:
338
+ st.metric("Total Users Analyzed", f"{total_users:,}")
339
+
340
+ with col2:
341
+ st.metric("Average Improvement Rate", f"{avg_improvement:.1f}%",
342
+ delta=f"+{avg_improvement:.1f}%" if avg_improvement > 0 else None)
343
+
344
+ with col3:
345
+ st.metric("Average Decline Rate", f"{avg_decline:.1f}%",
346
+ delta=f"-{avg_decline:.1f}%" if avg_decline > 0 else None)
347
+
348
+ with col4:
349
+ st.metric("Average Stable Rate", f"{avg_stable:.1f}%")
350
+
351
+ # Summary chart
352
+ if transitions:
353
+ st.plotly_chart(create_summary_chart(transitions), use_container_width=True)
354
+
355
+ # Parameter-wise analysis
356
+ st.header("πŸ” Parameter-wise Analysis")
357
+
358
+ if transitions:
359
+ tabs = st.tabs(list(health_params.keys()))
360
+
361
+ for i, (param, cols) in enumerate(health_params.items()):
362
+ with tabs[i]:
363
+ if param in transitions and transitions[param]['total_users'] > 0:
364
+ col1, col2 = st.columns([1, 1])
365
+
366
+ with col1:
367
+ # Display metrics for this parameter
368
+ st.subheader(f"{param} Metrics")
369
+
370
+ metrics_col1, metrics_col2, metrics_col3 = st.columns(3)
371
+
372
+ with metrics_col1:
373
+ st.metric("Users", transitions[param]['total_users'])
374
+
375
+ with metrics_col2:
376
+ improvement_rate = transitions[param]['improvement_rate']
377
+ st.metric("Improved", f"{transitions[param]['improved']}",
378
+ f"{improvement_rate:.1f}%")
379
+
380
+ with metrics_col3:
381
+ decline_rate = transitions[param]['decline_rate']
382
+ st.metric("Declined", f"{transitions[param]['declined']}",
383
+ f"{decline_rate:.1f}%")
384
+
385
+ # Transition matrix heatmap
386
+ st.plotly_chart(
387
+ create_transition_heatmap(transitions[param]['matrix'], param),
388
+ use_container_width=True
389
+ )
390
+
391
+ with col2:
392
+ # Sankey diagram
393
+ sankey_fig = create_sankey_diagram(
394
+ df_clean, param, cols['old_tag'], cols['new_tag'], selected_location
395
+ )
396
+ if sankey_fig:
397
+ st.plotly_chart(sankey_fig, use_container_width=True)
398
+ else:
399
+ st.info("No transition data available for Sankey diagram")
400
+
401
+ # Detailed transition table
402
+ st.subheader(f"{param} Detailed Transitions")
403
+ transition_table = transitions[param]['matrix']
404
+ st.dataframe(transition_table, use_container_width=True)
405
+
406
+ else:
407
+ st.warning(f"No data available for {param} parameter")
408
+ else:
409
+ st.warning("No transition data available for the selected location.")
410
+
411
+ # Data insights
412
+ st.header("πŸ’‘ Key Insights")
413
+
414
+ insights = []
415
+
416
+ for param, data in transitions.items():
417
+ if data['total_users'] > 0:
418
+ if data['improvement_rate'] > 50:
419
+ insights.append(f"βœ… **{param}**: Excellent improvement rate of {data['improvement_rate']:.1f}%")
420
+ elif data['improvement_rate'] > 30:
421
+ insights.append(f"🟑 **{param}**: Good improvement rate of {data['improvement_rate']:.1f}%")
422
+
423
+ if data['decline_rate'] > 30:
424
+ insights.append(f"⚠️ **{param}**: High decline rate of {data['decline_rate']:.1f}% - needs attention")
425
+
426
+ if insights:
427
+ for insight in insights:
428
+ st.markdown(insight)
429
+ else:
430
+ st.info("No significant insights to highlight at this time.")
431
+
432
+ # Export functionality
433
+ st.header("πŸ“₯ Export Data")
434
+
435
+ if st.button("Generate Summary Report"):
436
+ summary_data = []
437
+ for param, data in transitions.items():
438
+ summary_data.append({
439
+ 'Parameter': param,
440
+ 'Total Users': data['total_users'],
441
+ 'Improved': data['improved'],
442
+ 'Declined': data['declined'],
443
+ 'Stable': data['stable'],
444
+ 'Improvement Rate (%)': round(data['improvement_rate'], 2),
445
+ 'Decline Rate (%)': round(data['decline_rate'], 2),
446
+ 'Stable Rate (%)': round(data['stable_rate'], 2)
447
+ })
448
+
449
+ summary_df = pd.DataFrame(summary_data)
450
+
451
+ st.download_button(
452
+ label="Download Summary CSV",
453
+ data=summary_df.to_csv(index=False),
454
+ file_name=f"health_transitions_summary_{selected_location.replace(' ', '_')}.csv",
455
+ mime="text/csv"
456
+ )
457
+
458
+ st.dataframe(summary_df, use_container_width=True)
459
 
460
+ if __name__ == "__main__":
461
+ main()