Spaces:

AbdramaneB
/

Pima_reporting

Sleeping

App Files Files Community

AbdramaneB commited on Nov 20, 2025

Commit

b7a0a90

verified ·

1 Parent(s): 8e2efdb

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

.amlignore +6 -0
Dockerfile +32 -12
requirements.txt +6 -3
src/.amlignore +6 -0
src/streamlit_app.py +169 -38

.amlignore ADDED Viewed

	@@ -0,0 +1,6 @@

+## This file was auto generated by the Azure Machine Learning Studio. Please do not remove.
+## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
+.ipynb_aml_checkpoints/
+*.amltmp
+*.amltemp

Dockerfile CHANGED Viewed

@@ -1,20 +1,40 @@
-FROM python:3.13.5-slim
-WORKDIR /app
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    curl \
-    git \
-    && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt ./
-COPY src/ ./src/
-RUN pip3 install -r requirements.txt
-EXPOSE 8501
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

+# Use Python 3.10 to match your Azure env
+FROM python:3.10-slim
+# --- Create non-root user -----------------------------------------------------
+RUN useradd -m -u 1000 user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1
+WORKDIR $HOME/app
+# --- Copy requirements first for caching -------------------------------------
 COPY requirements.txt ./
+# --- Install OS dependencies --------------------------------------------------
+USER root
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      curl \
+      git \
+      build-essential \
+  && rm -rf /var/lib/apt/lists/*
+# --- Install Python dependencies ---------------------------------------------
+RUN python -m pip install --upgrade pip \
+ && python -m pip install -r requirements.txt \
+ && python -m streamlit --version
+# --- Copy application code ----------------------------------------------------
+COPY --chown=user . $HOME/app
+# --- Switch to non-root user --------------------------------------------------
+USER user
+# --- Configure Streamlit server ----------------------------------------------
+EXPOSE 8501
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
+# --- Launch the Streamlit app -------------------------------------------------
+ENTRYPOINT ["python", "-m", "streamlit", "run", "src/streamlit_app.py",  "--server.port=8501",  "--server.address=0.0.0.0",  "--server.headless=true",  "--server.enableXsrfProtection=false"]

requirements.txt CHANGED Viewed

@@ -1,3 +1,6 @@
-altair
-pandas
-streamlit

+pandas>=2.1,<3
+matplotlib>=3.8
+seaborn>=0.13
+scipy>=1.10,<2
+numpy>=1.23,<2
+streamlit>=1.30

src/.amlignore ADDED Viewed

	@@ -0,0 +1,6 @@

+## This file was auto generated by the Azure Machine Learning Studio. Please do not remove.
+## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
+.ipynb_aml_checkpoints/
+*.amltmp
+*.amltemp

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,171 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+import seaborn as sns
+import matplotlib.pyplot as plt
+import matplotlib.dates as mdates
+import pandas as pd
+# Load data
+def load_data():
+    df = pd.read_csv("./processed_data.csv")  # Make sure this file has the columns: preg, plas, pres, skin, test, mass, pedi, age, class
+    return df
+# Create Streamlit app
+def app():
+    # Title for the app
+    huggingface_page_title = "Diabetes Outcomes Dashboard"
+    st.set_page_config(page_title=huggingface_page_title, layout="wide")
+    # --- Custom CSS to adjust sidebar width ---
+    st.markdown(
+        """
+        <style>
+            /* Sidebar width */
+            [data-testid="stSidebar"] {
+                width: 600px;
+                min-width: 600px;
+            }
+        </style>
+        """,
+        unsafe_allow_html=True
+    )
+    # --- Page content ---
+    st.title(huggingface_page_title)
+    # Load data
+    data = load_data()
+    # Ensure expected columns exist (optional safety check)
+    expected_cols = {"preg", "plas", "pres", "skin", "test", "mass", "pedi", "age", "class"}
+    if not expected_cols.issubset(set(data.columns)):
+        st.error(f"Dataset is missing some expected columns. Found: {list(data.columns)}")
+        return
+    # --- Key Metrics from the data ---
+    total_obs = len(data)
+    n_diabetes = int(data["class"].sum())  # assuming 1 = diabetes, 0 = no diabetes
+    diabetes_rate = (n_diabetes / total_obs * 100) if total_obs > 0 else 0
+    avg_age = round(data["age"].mean(), 1)
+    median_age = round(data["age"].median(), 1)
+    avg_preg = round(data["preg"].mean(), 1)
+    avg_bmi = round(data["mass"].mean(), 1)
+    avg_glucose = round(data["plas"].mean(), 1)
+    avg_bp = round(data["pres"].mean(), 1)
+    avg_pedi = round(data["pedi"].mean(), 3)
+    # Display metrics in the sidebar
+    st.sidebar.header("Key Metrics")
+    st.sidebar.metric("Total patients", total_obs)
+    st.sidebar.metric("Patients with diabetes", n_diabetes)
+    st.sidebar.metric("Diabetes prevalence (%)", f"{diabetes_rate:.1f}")
+    st.sidebar.markdown("---")
+    st.sidebar.metric("Avg age (years)", avg_age)
+    st.sidebar.metric("Median age (years)", median_age)
+    st.sidebar.markdown("---")
+    st.sidebar.metric("Avg pregnancies", avg_preg)
+    st.sidebar.metric("Avg BMI", avg_bmi)
+    st.sidebar.metric("Avg plasma glucose", avg_glucose)
+    st.sidebar.metric("Avg blood pressure (mm Hg)", avg_bp)
+    st.sidebar.metric("Avg diabetes pedigree", avg_pedi)
+    # --- Data preview ---
+    st.markdown("### Data preview")
+    st.dataframe(data.head())
+    # Styling for seaborn plots
+    sns.set_style("whitegrid", {'grid.color': 'lightgrey', 'grid.linestyle': '--'})
+    #////////////////////////////////////////////////////////////////////////////
+    # Outcome distribution (class)
+    #////////////////////////////////////////////////////////////////////////////
+    if "class" in data.columns:
+        st.header("Diabetes outcome distribution")
+        fig, ax = plt.subplots()
+        outcome_counts = data["class"].value_counts().sort_index()
+        sns.barplot(x=outcome_counts.index, y=outcome_counts.values, ax=ax)
+        ax.set_xlabel("Outcome (0 = No diabetes, 1 = Diabetes)")
+        ax.set_ylabel("Count")
+        ax.set_title("Diabetes outcome count")
+        for i, v in enumerate(outcome_counts.values):
+            ax.text(i, v + max(outcome_counts.values) * 0.01, str(v), ha="center", va="bottom", fontsize=9)
+        st.pyplot(fig)
+        plt.close(fig)
+    #////////////////////////////////////////////////////////////////////////////
+    # Feature distributions by outcome (histograms)
+    #////////////////////////////////////////////////////////////////////////////
+    st.header("Key feature distributions by outcome")
+    feature_display_names = {
+        "preg": "Pregnancies",
+        "plas": "Plasma glucose concentration",
+        "pres": "Diastolic blood pressure (mm Hg)",
+        "mass": "Body mass index (BMI)",
+        "age": "Age (years)"
+    }
+    for col, label in feature_display_names.items():
+        if {col, "class"}.issubset(data.columns):
+            st.subheader(f"{label} by outcome")
+            fig, ax = plt.subplots()
+            sns.histplot(
+                data=data,
+                x=col,
+                hue="class",
+                multiple="stack",
+                bins=30,
+                ax=ax
+            )
+            ax.set_xlabel(label)
+            ax.set_title(f"{label} distribution (Diabetes vs No diabetes)")
+            st.pyplot(fig)
+            plt.close(fig)
+    #////////////////////////////////////////////////////////////////////////////
+    # Boxplots of selected features by outcome
+    #////////////////////////////////////////////////////////////////////////////
+    st.header("Feature boxplots by outcome")
+    box_features = {
+        "plas": "Plasma glucose concentration",
+        "mass": "Body mass index (BMI)",
+        "pedi": "Diabetes pedigree function"
+    }
+    for col, label in box_features.items():
+        if {col, "class"}.issubset(data.columns):
+            st.subheader(f"{label} vs outcome")
+            fig, ax = plt.subplots()
+            sns.boxplot(
+                data=data,
+                x="class",
+                y=col,
+                ax=ax
+            )
+            ax.set_xlabel("Outcome (0 = No diabetes, 1 = Diabetes)")
+            ax.set_ylabel(label)
+            ax.set_title(f"{label} vs diabetes outcome")
+            st.pyplot(fig)
+            plt.close(fig)
+    #////////////////////////////////////////////////////////////////////////////
+    # Correlation heatmap of numeric variables
+    #////////////////////////////////////////////////////////////////////////////
+    st.header("Correlation heatmap (numeric features)")
+    numeric_cols = data.select_dtypes(include=["number"]).columns
+    if len(numeric_cols) > 1:
+        fig, ax = plt.subplots(figsize=(8, 6))
+        corr = data[numeric_cols].corr()
+        sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
+        ax.set_title("Correlation heatmap")
+        st.pyplot(fig)
+        plt.close(fig)
+    else:
+        st.write("Not enough numeric columns to compute correlations.")
+if __name__ == "__main__":
+    app()