Ashar086 commited on
Commit
459e711
·
verified ·
1 Parent(s): 1bb20a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -112
app.py CHANGED
@@ -1,17 +1,18 @@
 
1
  import streamlit as st
2
  import pandas as pd
3
- import matplotlib.pyplot as plt
4
- import seaborn as sns
5
 
6
- # Load the dataset
7
  @st.cache_data
8
  def load_data():
9
  df = pd.read_csv("hospital_readmissions.csv")
10
- # Data Cleaning
11
  df['medical_specialty'] = df['medical_specialty'].replace('Missing', 'Unknown')
12
  df['diag_1'] = df['diag_1'].replace('Missing', 'Unknown_Diagnosis')
13
  df['diag_2'] = df['diag_2'].replace('Missing', 'Unknown_Diagnosis')
14
  df['diag_3'] = df['diag_3'].replace('Missing', 'Unknown_Diagnosis')
 
15
  df['age'] = df['age'].str.extract(r'\[(\d+)-').astype(float)
16
  return df
17
 
@@ -23,71 +24,45 @@ page = st.sidebar.radio("Go to", ["Overview", "Factors Influencing Readmissions"
23
  "Hospital Stay and Procedures", "Patient Health Insights",
24
  "Recommendations"])
25
 
 
 
 
 
 
 
 
 
26
  # Page 1: Overview
27
  if page == "Overview":
28
  st.title("Hospital Readmissions Overview")
29
 
30
- # Total Patients
31
- total_patients = len(df)
32
- readmission_rate = df['readmitted'].value_counts(normalize=True).get('yes', 0) * 100
33
- avg_hospital_stay = df['time_in_hospital'].mean()
34
 
35
- st.metric("Total Patients", total_patients)
36
- st.metric("Readmission Rate (%)", f"{readmission_rate:.2f}%")
37
- st.metric("Average Hospital Stay (Days)", f"{avg_hospital_stay:.2f}")
38
 
39
  # Age Distribution
40
- st.subheader("Age Distribution")
41
- fig, ax = plt.subplots()
42
- sns.histplot(df['age'], bins=10, kde=False, ax=ax)
43
- ax.set_title("Age Distribution")
44
- ax.set_xlabel("Age Group")
45
- ax.set_ylabel("Count")
46
- st.pyplot(fig)
47
-
48
- # Medical Specialties
49
- st.subheader("Top Medical Specialties")
50
- specialty_counts = df['medical_specialty'].value_counts().head(5)
51
- fig, ax = plt.subplots()
52
- specialty_counts.plot(kind='pie', autopct='%1.1f%%', ax=ax)
53
- ax.set_title("Top Medical Specialties")
54
- ax.set_ylabel("")
55
- st.pyplot(fig)
56
 
57
  # Page 2: Factors Influencing Readmissions
58
  elif page == "Factors Influencing Readmissions":
59
  st.title("Factors Influencing Readmissions")
60
 
61
- # Readmission by Age Group
62
- st.subheader("Readmission by Age Group")
63
- age_readmission = df.groupby(pd.cut(df['age'], bins=10))['readmitted'].value_counts(normalize=True).unstack().fillna(0)
64
- fig, ax = plt.subplots()
65
- age_readmission.plot(kind='bar', stacked=True, ax=ax)
66
- ax.set_title("Readmission by Age Group")
67
- ax.set_xlabel("Age Group")
68
- ax.set_ylabel("Proportion")
69
- st.pyplot(fig)
70
-
71
  # Readmission by Medical Specialty
72
- st.subheader("Readmission by Medical Specialty")
73
- specialty_readmission = df.groupby('medical_specialty')['readmitted'].value_counts(normalize=True).unstack().fillna(0)
74
  specialty_readmission = specialty_readmission.sort_values(by='yes', ascending=False).head(10)
75
- fig, ax = plt.subplots()
76
- specialty_readmission['yes'].plot(kind='barh', color='orange', ax=ax)
77
- ax.set_title("Top Specialties with High Readmission Rates")
78
- ax.set_xlabel("Readmission Rate")
79
- st.pyplot(fig)
80
-
81
- # Readmission by Diagnosis
82
- st.subheader("Readmission by Primary Diagnosis")
83
- diag_readmission = df.groupby('diag_1')['readmitted'].value_counts(normalize=True).unstack().fillna(0)
84
- diag_readmission = diag_readmission.sort_values(by='yes', ascending=False).head(10)
85
- fig, ax = plt.subplots()
86
- diag_readmission['yes'].plot(kind='bar', color='green', ax=ax)
87
- ax.set_title("Top Diagnoses with High Readmission Rates")
88
- ax.set_xlabel("Diagnosis")
89
- ax.set_ylabel("Readmission Rate")
90
- st.pyplot(fig)
91
 
92
  # Page 3: Hospital Stay and Procedures
93
  elif page == "Hospital Stay and Procedures":
@@ -95,64 +70,21 @@ elif page == "Hospital Stay and Procedures":
95
 
96
  # Hospital Stay Duration by Readmission Status
97
  st.subheader("Hospital Stay Duration by Readmission Status")
98
- fig, ax = plt.subplots()
99
- sns.boxplot(x='readmitted', y='time_in_hospital', data=df, ax=ax)
100
- ax.set_title("Hospital Stay Duration by Readmission Status")
101
- ax.set_xlabel("Readmitted")
102
- ax.set_ylabel("Hospital Stay (Days)")
103
- st.pyplot(fig)
104
-
105
- # Number of Lab Procedures
106
- st.subheader("Number of Lab Procedures")
107
- fig, ax = plt.subplots()
108
- sns.histplot(df['n_lab_procedures'], bins=20, kde=False, ax=ax)
109
- ax.set_title("Distribution of Lab Procedures")
110
- ax.set_xlabel("Number of Lab Procedures")
111
- ax.set_ylabel("Count")
112
- st.pyplot(fig)
113
-
114
- # Medication Count by Readmission Status
115
- st.subheader("Medication Count by Readmission Status")
116
- fig, ax = plt.subplots()
117
- sns.violinplot(x='readmitted', y='n_medications', data=df, ax=ax)
118
- ax.set_title("Medication Count by Readmission Status")
119
- ax.set_xlabel("Readmitted")
120
- ax.set_ylabel("Number of Medications")
121
- st.pyplot(fig)
122
 
123
  # Page 4: Patient Health Insights
124
  elif page == "Patient Health Insights":
125
  st.title("Patient Health Insights")
126
 
127
  # Diabetes and Readmissions
128
- st.subheader("Diabetes and Readmissions")
129
- diabetes_readmission = df.groupby('diabetes_med')['readmitted'].value_counts(normalize=True).unstack().fillna(0)
130
- fig, ax = plt.subplots()
131
- diabetes_readmission.plot(kind='bar', ax=ax)
132
- ax.set_title("Readmission Rates by Diabetes Medication")
133
- ax.set_xlabel("Diabetes Medication")
134
- ax.set_ylabel("Proportion")
135
- st.pyplot(fig)
136
-
137
- # Glucose Test Results
138
- st.subheader("Glucose Test Results and Readmissions")
139
- glucose_readmission = df.groupby('glucose_test')['readmitted'].value_counts(normalize=True).unstack().fillna(0)
140
- fig, ax = plt.subplots()
141
- glucose_readmission.plot(kind='bar', stacked=True, ax=ax)
142
- ax.set_title("Glucose Test Results and Readmissions")
143
- ax.set_xlabel("Glucose Test Result")
144
- ax.set_ylabel("Proportion")
145
- st.pyplot(fig)
146
-
147
- # Secondary Diagnoses
148
- st.subheader("Common Secondary Diagnoses")
149
- secondary_diagnoses = df['diag_2'].value_counts().head(10)
150
- fig, ax = plt.subplots()
151
- secondary_diagnoses.plot(kind='bar', color='purple', ax=ax)
152
- ax.set_title("Top Secondary Diagnoses")
153
- ax.set_xlabel("Diagnosis")
154
- ax.set_ylabel("Count")
155
- st.pyplot(fig)
156
 
157
  # Page 5: Recommendations
158
  elif page == "Recommendations":
@@ -174,9 +106,9 @@ elif page == "Recommendations":
174
 
175
  # Interactive Widget
176
  st.subheader("Personalized Risk Factors")
177
- age_group = st.selectbox("Select Age Group", sorted(df['age'].unique()))
178
- diagnosis = st.selectbox("Select Diagnosis", df['diag_1'].unique())
179
 
180
- filtered_df = df[(df['age'] == age_group) & (df['diag_1'] == diagnosis)]
181
- readmission_rate = filtered_df['readmitted'].value_counts(normalize=True).get('yes', 0) * 100
182
- st.metric("Readmission Rate for Selected Group", f"{readmission_rate:.2f}%")
 
1
+
2
  import streamlit as st
3
  import pandas as pd
4
+ import plotly.express as px
 
5
 
6
+ # Load and clean the dataset
7
  @st.cache_data
8
  def load_data():
9
  df = pd.read_csv("hospital_readmissions.csv")
10
+ # Clean missing values
11
  df['medical_specialty'] = df['medical_specialty'].replace('Missing', 'Unknown')
12
  df['diag_1'] = df['diag_1'].replace('Missing', 'Unknown_Diagnosis')
13
  df['diag_2'] = df['diag_2'].replace('Missing', 'Unknown_Diagnosis')
14
  df['diag_3'] = df['diag_3'].replace('Missing', 'Unknown_Diagnosis')
15
+ # Extract numerical age from range
16
  df['age'] = df['age'].str.extract(r'\[(\d+)-').astype(float)
17
  return df
18
 
 
24
  "Hospital Stay and Procedures", "Patient Health Insights",
25
  "Recommendations"])
26
 
27
+ # Filters for interactivity
28
+ st.sidebar.header("Filters")
29
+ age_group = st.sidebar.multiselect("Select Age Group", options=sorted(df['age'].unique()), default=sorted(df['age'].unique()))
30
+ diagnosis = st.sidebar.multiselect("Select Diagnosis", options=df['diag_1'].unique(), default=["Circulatory", "Diabetes"])
31
+
32
+ # Filter the data based on user selection
33
+ filtered_df = df[df['age'].isin(age_group) & df['diag_1'].isin(diagnosis)]
34
+
35
  # Page 1: Overview
36
  if page == "Overview":
37
  st.title("Hospital Readmissions Overview")
38
 
39
+ # Total Patients and Readmission Rate
40
+ total_patients = len(filtered_df)
41
+ readmission_rate = filtered_df['readmitted'].value_counts(normalize=True).get('yes', 0) * 100
 
42
 
43
+ col1, col2 = st.columns(2)
44
+ col1.metric("Total Patients", total_patients)
45
+ col2.metric("Readmission Rate (%)", f"{readmission_rate:.2f}%")
46
 
47
  # Age Distribution
48
+ st.subheader("Age Distribution of Patients")
49
+ fig_age = px.histogram(filtered_df, x='age', nbins=10, title="Age Distribution",
50
+ labels={'age': 'Age Group', 'count': 'Number of Patients'})
51
+ fig_age.update_layout(showlegend=False)
52
+ st.plotly_chart(fig_age, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  # Page 2: Factors Influencing Readmissions
55
  elif page == "Factors Influencing Readmissions":
56
  st.title("Factors Influencing Readmissions")
57
 
 
 
 
 
 
 
 
 
 
 
58
  # Readmission by Medical Specialty
59
+ st.subheader("Readmission Rates by Medical Specialty")
60
+ specialty_readmission = filtered_df.groupby('medical_specialty')['readmitted'].value_counts(normalize=True).unstack().fillna(0)
61
  specialty_readmission = specialty_readmission.sort_values(by='yes', ascending=False).head(10)
62
+ fig_specialty = px.bar(specialty_readmission, y='yes', title="Top Specialties with High Readmission Rates",
63
+ labels={'index': 'Medical Specialty', 'yes': 'Readmission Rate (%)'})
64
+ fig_specialty.update_layout(yaxis_tickformat=".0%")
65
+ st.plotly_chart(fig_specialty, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  # Page 3: Hospital Stay and Procedures
68
  elif page == "Hospital Stay and Procedures":
 
70
 
71
  # Hospital Stay Duration by Readmission Status
72
  st.subheader("Hospital Stay Duration by Readmission Status")
73
+ fig_stay = px.box(filtered_df, x='readmitted', y='time_in_hospital', title="Hospital Stay Duration by Readmission Status",
74
+ labels={'readmitted': 'Readmitted', 'time_in_hospital': 'Hospital Stay (Days)'})
75
+ st.plotly_chart(fig_stay, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  # Page 4: Patient Health Insights
78
  elif page == "Patient Health Insights":
79
  st.title("Patient Health Insights")
80
 
81
  # Diabetes and Readmissions
82
+ st.subheader("Diabetes Medication and Readmissions")
83
+ diabetes_readmission = filtered_df.groupby('diabetes_med')['readmitted'].value_counts(normalize=True).unstack().fillna(0)
84
+ fig_diabetes = px.bar(diabetes_readmission, barmode='group', title="Readmission Rates by Diabetes Medication",
85
+ labels={'index': 'Diabetes Medication', 'yes': 'Readmission Rate (%)'})
86
+ fig_diabetes.update_layout(yaxis_tickformat=".0%")
87
+ st.plotly_chart(fig_diabetes, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  # Page 5: Recommendations
90
  elif page == "Recommendations":
 
106
 
107
  # Interactive Widget
108
  st.subheader("Personalized Risk Factors")
109
+ selected_age = st.selectbox("Select Age Group", sorted(filtered_df['age'].unique()))
110
+ selected_diagnosis = st.selectbox("Select Diagnosis", filtered_df['diag_1'].unique())
111
 
112
+ personalized_df = filtered_df[(filtered_df['age'] == selected_age) & (filtered_df['diag_1'] == selected_diagnosis)]
113
+ readmission_rate_personalized = personalized_df['readmitted'].value_counts(normalize=True).get('yes', 0) * 100
114
+ st.metric("Readmission Rate for Selected Group", f"{readmission_rate_personalized:.2f}%")