Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| import matplotlib.dates as mdates | |
| import pandas as pd | |
| # Load data | |
| def load_data(): | |
| df = pd.read_csv("./processed_data.csv") # Make sure this file has the columns: preg, plas, pres, skin, test, mass, pedi, age, class | |
| return df | |
| # Create Streamlit app | |
| def app(): | |
| # Title for the app | |
| huggingface_page_title = "Diabetes Outcomes Dashboard" | |
| st.set_page_config(page_title=huggingface_page_title, layout="wide") | |
| # --- Custom CSS to adjust sidebar width --- | |
| st.markdown( | |
| """ | |
| <style> | |
| /* Sidebar width */ | |
| [data-testid="stSidebar"] { | |
| width: 600px; | |
| min-width: 600px; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| # --- Page content --- | |
| st.title(huggingface_page_title) | |
| # Load data | |
| data = load_data() | |
| # Ensure expected columns exist (optional safety check) | |
| expected_cols = {"preg", "plas", "pres", "skin", "test", "mass", "pedi", "age", "class"} | |
| if not expected_cols.issubset(set(data.columns)): | |
| st.error(f"Dataset is missing some expected columns. Found: {list(data.columns)}") | |
| return | |
| # --- Key Metrics from the data --- | |
| total_obs = len(data) | |
| n_diabetes = int(data["class"].sum()) # assuming 1 = diabetes, 0 = no diabetes | |
| diabetes_rate = (n_diabetes / total_obs * 100) if total_obs > 0 else 0 | |
| avg_age = round(data["age"].mean(), 1) | |
| median_age = round(data["age"].median(), 1) | |
| avg_preg = round(data["preg"].mean(), 1) | |
| avg_bmi = round(data["mass"].mean(), 1) | |
| avg_glucose = round(data["plas"].mean(), 1) | |
| avg_bp = round(data["pres"].mean(), 1) | |
| avg_pedi = round(data["pedi"].mean(), 3) | |
| # Display metrics in the sidebar | |
| st.sidebar.header("Key Metrics") | |
| st.sidebar.metric("Total patients", total_obs) | |
| st.sidebar.metric("Patients with diabetes", n_diabetes) | |
| st.sidebar.metric("Diabetes prevalence (%)", f"{diabetes_rate:.1f}") | |
| st.sidebar.markdown("---") | |
| st.sidebar.metric("Avg age (years)", avg_age) | |
| st.sidebar.metric("Median age (years)", median_age) | |
| st.sidebar.markdown("---") | |
| st.sidebar.metric("Avg pregnancies", avg_preg) | |
| st.sidebar.metric("Avg BMI", avg_bmi) | |
| st.sidebar.metric("Avg plasma glucose", avg_glucose) | |
| st.sidebar.metric("Avg blood pressure (mm Hg)", avg_bp) | |
| st.sidebar.metric("Avg diabetes pedigree", avg_pedi) | |
| # --- Data preview --- | |
| st.markdown("### Data preview") | |
| st.dataframe(data.head()) | |
| # Styling for seaborn plots | |
| sns.set_style("whitegrid", {'grid.color': 'lightgrey', 'grid.linestyle': '--'}) | |
| #//////////////////////////////////////////////////////////////////////////// | |
| # Outcome distribution (class) | |
| #//////////////////////////////////////////////////////////////////////////// | |
| if "class" in data.columns: | |
| st.header("Diabetes outcome distribution") | |
| fig, ax = plt.subplots() | |
| outcome_counts = data["class"].value_counts().sort_index() | |
| sns.barplot(x=outcome_counts.index, y=outcome_counts.values, ax=ax) | |
| ax.set_xlabel("Outcome (0 = No diabetes, 1 = Diabetes)") | |
| ax.set_ylabel("Count") | |
| ax.set_title("Diabetes outcome count") | |
| for i, v in enumerate(outcome_counts.values): | |
| ax.text(i, v + max(outcome_counts.values) * 0.01, str(v), ha="center", va="bottom", fontsize=9) | |
| st.pyplot(fig) | |
| plt.close(fig) | |
| #//////////////////////////////////////////////////////////////////////////// | |
| # Feature distributions by outcome (histograms) | |
| #//////////////////////////////////////////////////////////////////////////// | |
| st.header("Key feature distributions by outcome") | |
| feature_display_names = { | |
| "preg": "Pregnancies", | |
| "plas": "Plasma glucose concentration", | |
| "pres": "Diastolic blood pressure (mm Hg)", | |
| "mass": "Body mass index (BMI)", | |
| "age": "Age (years)" | |
| } | |
| for col, label in feature_display_names.items(): | |
| if {col, "class"}.issubset(data.columns): | |
| st.subheader(f"{label} by outcome") | |
| fig, ax = plt.subplots() | |
| sns.histplot( | |
| data=data, | |
| x=col, | |
| hue="class", | |
| multiple="stack", | |
| bins=30, | |
| ax=ax | |
| ) | |
| ax.set_xlabel(label) | |
| ax.set_title(f"{label} distribution (Diabetes vs No diabetes)") | |
| st.pyplot(fig) | |
| plt.close(fig) | |
| #//////////////////////////////////////////////////////////////////////////// | |
| # Boxplots of selected features by outcome | |
| #//////////////////////////////////////////////////////////////////////////// | |
| st.header("Feature boxplots by outcome") | |
| box_features = { | |
| "plas": "Plasma glucose concentration", | |
| "mass": "Body mass index (BMI)", | |
| "pedi": "Diabetes pedigree function" | |
| } | |
| for col, label in box_features.items(): | |
| if {col, "class"}.issubset(data.columns): | |
| st.subheader(f"{label} vs outcome") | |
| fig, ax = plt.subplots() | |
| sns.boxplot( | |
| data=data, | |
| x="class", | |
| y=col, | |
| ax=ax | |
| ) | |
| ax.set_xlabel("Outcome (0 = No diabetes, 1 = Diabetes)") | |
| ax.set_ylabel(label) | |
| ax.set_title(f"{label} vs diabetes outcome") | |
| st.pyplot(fig) | |
| plt.close(fig) | |
| #//////////////////////////////////////////////////////////////////////////// | |
| # Correlation heatmap of numeric variables | |
| #//////////////////////////////////////////////////////////////////////////// | |
| st.header("Correlation heatmap (numeric features)") | |
| numeric_cols = data.select_dtypes(include=["number"]).columns | |
| if len(numeric_cols) > 1: | |
| fig, ax = plt.subplots(figsize=(8, 6)) | |
| corr = data[numeric_cols].corr() | |
| sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax) | |
| ax.set_title("Correlation heatmap") | |
| st.pyplot(fig) | |
| plt.close(fig) | |
| else: | |
| st.write("Not enough numeric columns to compute correlations.") | |
| if __name__ == "__main__": | |
| app() | |