Update app.py
Browse files
app.py
CHANGED
|
@@ -1,17 +1,18 @@
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
-
import
|
| 4 |
-
import seaborn as sns
|
| 5 |
|
| 6 |
-
# Load the dataset
|
| 7 |
@st.cache_data
|
| 8 |
def load_data():
|
| 9 |
df = pd.read_csv("hospital_readmissions.csv")
|
| 10 |
-
#
|
| 11 |
df['medical_specialty'] = df['medical_specialty'].replace('Missing', 'Unknown')
|
| 12 |
df['diag_1'] = df['diag_1'].replace('Missing', 'Unknown_Diagnosis')
|
| 13 |
df['diag_2'] = df['diag_2'].replace('Missing', 'Unknown_Diagnosis')
|
| 14 |
df['diag_3'] = df['diag_3'].replace('Missing', 'Unknown_Diagnosis')
|
|
|
|
| 15 |
df['age'] = df['age'].str.extract(r'\[(\d+)-').astype(float)
|
| 16 |
return df
|
| 17 |
|
|
@@ -23,71 +24,45 @@ page = st.sidebar.radio("Go to", ["Overview", "Factors Influencing Readmissions"
|
|
| 23 |
"Hospital Stay and Procedures", "Patient Health Insights",
|
| 24 |
"Recommendations"])
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
# Page 1: Overview
|
| 27 |
if page == "Overview":
|
| 28 |
st.title("Hospital Readmissions Overview")
|
| 29 |
|
| 30 |
-
# Total Patients
|
| 31 |
-
total_patients = len(
|
| 32 |
-
readmission_rate =
|
| 33 |
-
avg_hospital_stay = df['time_in_hospital'].mean()
|
| 34 |
|
| 35 |
-
st.
|
| 36 |
-
|
| 37 |
-
|
| 38 |
|
| 39 |
# Age Distribution
|
| 40 |
-
st.subheader("Age Distribution")
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
ax.set_ylabel("Count")
|
| 46 |
-
st.pyplot(fig)
|
| 47 |
-
|
| 48 |
-
# Medical Specialties
|
| 49 |
-
st.subheader("Top Medical Specialties")
|
| 50 |
-
specialty_counts = df['medical_specialty'].value_counts().head(5)
|
| 51 |
-
fig, ax = plt.subplots()
|
| 52 |
-
specialty_counts.plot(kind='pie', autopct='%1.1f%%', ax=ax)
|
| 53 |
-
ax.set_title("Top Medical Specialties")
|
| 54 |
-
ax.set_ylabel("")
|
| 55 |
-
st.pyplot(fig)
|
| 56 |
|
| 57 |
# Page 2: Factors Influencing Readmissions
|
| 58 |
elif page == "Factors Influencing Readmissions":
|
| 59 |
st.title("Factors Influencing Readmissions")
|
| 60 |
|
| 61 |
-
# Readmission by Age Group
|
| 62 |
-
st.subheader("Readmission by Age Group")
|
| 63 |
-
age_readmission = df.groupby(pd.cut(df['age'], bins=10))['readmitted'].value_counts(normalize=True).unstack().fillna(0)
|
| 64 |
-
fig, ax = plt.subplots()
|
| 65 |
-
age_readmission.plot(kind='bar', stacked=True, ax=ax)
|
| 66 |
-
ax.set_title("Readmission by Age Group")
|
| 67 |
-
ax.set_xlabel("Age Group")
|
| 68 |
-
ax.set_ylabel("Proportion")
|
| 69 |
-
st.pyplot(fig)
|
| 70 |
-
|
| 71 |
# Readmission by Medical Specialty
|
| 72 |
-
st.subheader("Readmission by Medical Specialty")
|
| 73 |
-
specialty_readmission =
|
| 74 |
specialty_readmission = specialty_readmission.sort_values(by='yes', ascending=False).head(10)
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
st.pyplot(fig)
|
| 80 |
-
|
| 81 |
-
# Readmission by Diagnosis
|
| 82 |
-
st.subheader("Readmission by Primary Diagnosis")
|
| 83 |
-
diag_readmission = df.groupby('diag_1')['readmitted'].value_counts(normalize=True).unstack().fillna(0)
|
| 84 |
-
diag_readmission = diag_readmission.sort_values(by='yes', ascending=False).head(10)
|
| 85 |
-
fig, ax = plt.subplots()
|
| 86 |
-
diag_readmission['yes'].plot(kind='bar', color='green', ax=ax)
|
| 87 |
-
ax.set_title("Top Diagnoses with High Readmission Rates")
|
| 88 |
-
ax.set_xlabel("Diagnosis")
|
| 89 |
-
ax.set_ylabel("Readmission Rate")
|
| 90 |
-
st.pyplot(fig)
|
| 91 |
|
| 92 |
# Page 3: Hospital Stay and Procedures
|
| 93 |
elif page == "Hospital Stay and Procedures":
|
|
@@ -95,64 +70,21 @@ elif page == "Hospital Stay and Procedures":
|
|
| 95 |
|
| 96 |
# Hospital Stay Duration by Readmission Status
|
| 97 |
st.subheader("Hospital Stay Duration by Readmission Status")
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
ax.set_xlabel("Readmitted")
|
| 102 |
-
ax.set_ylabel("Hospital Stay (Days)")
|
| 103 |
-
st.pyplot(fig)
|
| 104 |
-
|
| 105 |
-
# Number of Lab Procedures
|
| 106 |
-
st.subheader("Number of Lab Procedures")
|
| 107 |
-
fig, ax = plt.subplots()
|
| 108 |
-
sns.histplot(df['n_lab_procedures'], bins=20, kde=False, ax=ax)
|
| 109 |
-
ax.set_title("Distribution of Lab Procedures")
|
| 110 |
-
ax.set_xlabel("Number of Lab Procedures")
|
| 111 |
-
ax.set_ylabel("Count")
|
| 112 |
-
st.pyplot(fig)
|
| 113 |
-
|
| 114 |
-
# Medication Count by Readmission Status
|
| 115 |
-
st.subheader("Medication Count by Readmission Status")
|
| 116 |
-
fig, ax = plt.subplots()
|
| 117 |
-
sns.violinplot(x='readmitted', y='n_medications', data=df, ax=ax)
|
| 118 |
-
ax.set_title("Medication Count by Readmission Status")
|
| 119 |
-
ax.set_xlabel("Readmitted")
|
| 120 |
-
ax.set_ylabel("Number of Medications")
|
| 121 |
-
st.pyplot(fig)
|
| 122 |
|
| 123 |
# Page 4: Patient Health Insights
|
| 124 |
elif page == "Patient Health Insights":
|
| 125 |
st.title("Patient Health Insights")
|
| 126 |
|
| 127 |
# Diabetes and Readmissions
|
| 128 |
-
st.subheader("Diabetes and Readmissions")
|
| 129 |
-
diabetes_readmission =
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
ax.set_ylabel("Proportion")
|
| 135 |
-
st.pyplot(fig)
|
| 136 |
-
|
| 137 |
-
# Glucose Test Results
|
| 138 |
-
st.subheader("Glucose Test Results and Readmissions")
|
| 139 |
-
glucose_readmission = df.groupby('glucose_test')['readmitted'].value_counts(normalize=True).unstack().fillna(0)
|
| 140 |
-
fig, ax = plt.subplots()
|
| 141 |
-
glucose_readmission.plot(kind='bar', stacked=True, ax=ax)
|
| 142 |
-
ax.set_title("Glucose Test Results and Readmissions")
|
| 143 |
-
ax.set_xlabel("Glucose Test Result")
|
| 144 |
-
ax.set_ylabel("Proportion")
|
| 145 |
-
st.pyplot(fig)
|
| 146 |
-
|
| 147 |
-
# Secondary Diagnoses
|
| 148 |
-
st.subheader("Common Secondary Diagnoses")
|
| 149 |
-
secondary_diagnoses = df['diag_2'].value_counts().head(10)
|
| 150 |
-
fig, ax = plt.subplots()
|
| 151 |
-
secondary_diagnoses.plot(kind='bar', color='purple', ax=ax)
|
| 152 |
-
ax.set_title("Top Secondary Diagnoses")
|
| 153 |
-
ax.set_xlabel("Diagnosis")
|
| 154 |
-
ax.set_ylabel("Count")
|
| 155 |
-
st.pyplot(fig)
|
| 156 |
|
| 157 |
# Page 5: Recommendations
|
| 158 |
elif page == "Recommendations":
|
|
@@ -174,9 +106,9 @@ elif page == "Recommendations":
|
|
| 174 |
|
| 175 |
# Interactive Widget
|
| 176 |
st.subheader("Personalized Risk Factors")
|
| 177 |
-
|
| 178 |
-
|
| 179 |
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
st.metric("Readmission Rate for Selected Group", f"{
|
|
|
|
| 1 |
+
|
| 2 |
import streamlit as st
|
| 3 |
import pandas as pd
|
| 4 |
+
import plotly.express as px
|
|
|
|
| 5 |
|
| 6 |
+
# Load and clean the dataset
|
| 7 |
@st.cache_data
|
| 8 |
def load_data():
|
| 9 |
df = pd.read_csv("hospital_readmissions.csv")
|
| 10 |
+
# Clean missing values
|
| 11 |
df['medical_specialty'] = df['medical_specialty'].replace('Missing', 'Unknown')
|
| 12 |
df['diag_1'] = df['diag_1'].replace('Missing', 'Unknown_Diagnosis')
|
| 13 |
df['diag_2'] = df['diag_2'].replace('Missing', 'Unknown_Diagnosis')
|
| 14 |
df['diag_3'] = df['diag_3'].replace('Missing', 'Unknown_Diagnosis')
|
| 15 |
+
# Extract numerical age from range
|
| 16 |
df['age'] = df['age'].str.extract(r'\[(\d+)-').astype(float)
|
| 17 |
return df
|
| 18 |
|
|
|
|
| 24 |
"Hospital Stay and Procedures", "Patient Health Insights",
|
| 25 |
"Recommendations"])
|
| 26 |
|
| 27 |
+
# Filters for interactivity
|
| 28 |
+
st.sidebar.header("Filters")
|
| 29 |
+
age_group = st.sidebar.multiselect("Select Age Group", options=sorted(df['age'].unique()), default=sorted(df['age'].unique()))
|
| 30 |
+
diagnosis = st.sidebar.multiselect("Select Diagnosis", options=df['diag_1'].unique(), default=["Circulatory", "Diabetes"])
|
| 31 |
+
|
| 32 |
+
# Filter the data based on user selection
|
| 33 |
+
filtered_df = df[df['age'].isin(age_group) & df['diag_1'].isin(diagnosis)]
|
| 34 |
+
|
| 35 |
# Page 1: Overview
|
| 36 |
if page == "Overview":
|
| 37 |
st.title("Hospital Readmissions Overview")
|
| 38 |
|
| 39 |
+
# Total Patients and Readmission Rate
|
| 40 |
+
total_patients = len(filtered_df)
|
| 41 |
+
readmission_rate = filtered_df['readmitted'].value_counts(normalize=True).get('yes', 0) * 100
|
|
|
|
| 42 |
|
| 43 |
+
col1, col2 = st.columns(2)
|
| 44 |
+
col1.metric("Total Patients", total_patients)
|
| 45 |
+
col2.metric("Readmission Rate (%)", f"{readmission_rate:.2f}%")
|
| 46 |
|
| 47 |
# Age Distribution
|
| 48 |
+
st.subheader("Age Distribution of Patients")
|
| 49 |
+
fig_age = px.histogram(filtered_df, x='age', nbins=10, title="Age Distribution",
|
| 50 |
+
labels={'age': 'Age Group', 'count': 'Number of Patients'})
|
| 51 |
+
fig_age.update_layout(showlegend=False)
|
| 52 |
+
st.plotly_chart(fig_age, use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
# Page 2: Factors Influencing Readmissions
|
| 55 |
elif page == "Factors Influencing Readmissions":
|
| 56 |
st.title("Factors Influencing Readmissions")
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
# Readmission by Medical Specialty
|
| 59 |
+
st.subheader("Readmission Rates by Medical Specialty")
|
| 60 |
+
specialty_readmission = filtered_df.groupby('medical_specialty')['readmitted'].value_counts(normalize=True).unstack().fillna(0)
|
| 61 |
specialty_readmission = specialty_readmission.sort_values(by='yes', ascending=False).head(10)
|
| 62 |
+
fig_specialty = px.bar(specialty_readmission, y='yes', title="Top Specialties with High Readmission Rates",
|
| 63 |
+
labels={'index': 'Medical Specialty', 'yes': 'Readmission Rate (%)'})
|
| 64 |
+
fig_specialty.update_layout(yaxis_tickformat=".0%")
|
| 65 |
+
st.plotly_chart(fig_specialty, use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
# Page 3: Hospital Stay and Procedures
|
| 68 |
elif page == "Hospital Stay and Procedures":
|
|
|
|
| 70 |
|
| 71 |
# Hospital Stay Duration by Readmission Status
|
| 72 |
st.subheader("Hospital Stay Duration by Readmission Status")
|
| 73 |
+
fig_stay = px.box(filtered_df, x='readmitted', y='time_in_hospital', title="Hospital Stay Duration by Readmission Status",
|
| 74 |
+
labels={'readmitted': 'Readmitted', 'time_in_hospital': 'Hospital Stay (Days)'})
|
| 75 |
+
st.plotly_chart(fig_stay, use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
# Page 4: Patient Health Insights
|
| 78 |
elif page == "Patient Health Insights":
|
| 79 |
st.title("Patient Health Insights")
|
| 80 |
|
| 81 |
# Diabetes and Readmissions
|
| 82 |
+
st.subheader("Diabetes Medication and Readmissions")
|
| 83 |
+
diabetes_readmission = filtered_df.groupby('diabetes_med')['readmitted'].value_counts(normalize=True).unstack().fillna(0)
|
| 84 |
+
fig_diabetes = px.bar(diabetes_readmission, barmode='group', title="Readmission Rates by Diabetes Medication",
|
| 85 |
+
labels={'index': 'Diabetes Medication', 'yes': 'Readmission Rate (%)'})
|
| 86 |
+
fig_diabetes.update_layout(yaxis_tickformat=".0%")
|
| 87 |
+
st.plotly_chart(fig_diabetes, use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
# Page 5: Recommendations
|
| 90 |
elif page == "Recommendations":
|
|
|
|
| 106 |
|
| 107 |
# Interactive Widget
|
| 108 |
st.subheader("Personalized Risk Factors")
|
| 109 |
+
selected_age = st.selectbox("Select Age Group", sorted(filtered_df['age'].unique()))
|
| 110 |
+
selected_diagnosis = st.selectbox("Select Diagnosis", filtered_df['diag_1'].unique())
|
| 111 |
|
| 112 |
+
personalized_df = filtered_df[(filtered_df['age'] == selected_age) & (filtered_df['diag_1'] == selected_diagnosis)]
|
| 113 |
+
readmission_rate_personalized = personalized_df['readmitted'].value_counts(normalize=True).get('yes', 0) * 100
|
| 114 |
+
st.metric("Readmission Rate for Selected Group", f"{readmission_rate_personalized:.2f}%")
|