Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- .amlignore +6 -0
- Dockerfile +32 -12
- requirements.txt +6 -3
- src/.amlignore +6 -0
- src/streamlit_app.py +169 -38
.amlignore
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## This file was auto generated by the Azure Machine Learning Studio. Please do not remove.
|
| 2 |
+
## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
|
| 3 |
+
|
| 4 |
+
.ipynb_aml_checkpoints/
|
| 5 |
+
*.amltmp
|
| 6 |
+
*.amltemp
|
Dockerfile
CHANGED
|
@@ -1,20 +1,40 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
|
|
|
|
| 11 |
COPY requirements.txt ./
|
| 12 |
-
COPY src/ ./src/
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
| 19 |
|
| 20 |
-
|
|
|
|
|
|
| 1 |
+
# Use Python 3.10 to match your Azure env
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
|
| 4 |
+
# --- Create non-root user -----------------------------------------------------
|
| 5 |
+
RUN useradd -m -u 1000 user
|
| 6 |
+
ENV HOME=/home/user \
|
| 7 |
+
PATH=/home/user/.local/bin:$PATH \
|
| 8 |
+
PIP_NO_CACHE_DIR=1 \
|
| 9 |
+
PIP_DISABLE_PIP_VERSION_CHECK=1
|
| 10 |
+
WORKDIR $HOME/app
|
| 11 |
|
| 12 |
+
# --- Copy requirements first for caching -------------------------------------
|
| 13 |
COPY requirements.txt ./
|
|
|
|
| 14 |
|
| 15 |
+
# --- Install OS dependencies --------------------------------------------------
|
| 16 |
+
USER root
|
| 17 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
| 18 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 19 |
+
curl \
|
| 20 |
+
git \
|
| 21 |
+
build-essential \
|
| 22 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 23 |
|
| 24 |
+
# --- Install Python dependencies ---------------------------------------------
|
| 25 |
+
RUN python -m pip install --upgrade pip \
|
| 26 |
+
&& python -m pip install -r requirements.txt \
|
| 27 |
+
&& python -m streamlit --version
|
| 28 |
+
|
| 29 |
+
# --- Copy application code ----------------------------------------------------
|
| 30 |
+
COPY --chown=user . $HOME/app
|
| 31 |
|
| 32 |
+
# --- Switch to non-root user --------------------------------------------------
|
| 33 |
+
USER user
|
| 34 |
+
|
| 35 |
+
# --- Configure Streamlit server ----------------------------------------------
|
| 36 |
+
EXPOSE 8501
|
| 37 |
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
| 38 |
|
| 39 |
+
# --- Launch the Streamlit app -------------------------------------------------
|
| 40 |
+
ENTRYPOINT ["python", "-m", "streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.headless=true", "--server.enableXsrfProtection=false"]
|
requirements.txt
CHANGED
|
@@ -1,3 +1,6 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pandas>=2.1,<3
|
| 2 |
+
matplotlib>=3.8
|
| 3 |
+
seaborn>=0.13
|
| 4 |
+
scipy>=1.10,<2
|
| 5 |
+
numpy>=1.23,<2
|
| 6 |
+
streamlit>=1.30
|
src/.amlignore
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## This file was auto generated by the Azure Machine Learning Studio. Please do not remove.
|
| 2 |
+
## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
|
| 3 |
+
|
| 4 |
+
.ipynb_aml_checkpoints/
|
| 5 |
+
*.amltmp
|
| 6 |
+
*.amltemp
|
src/streamlit_app.py
CHANGED
|
@@ -1,40 +1,171 @@
|
|
| 1 |
-
import altair as alt
|
| 2 |
-
import numpy as np
|
| 3 |
-
import pandas as pd
|
| 4 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
""
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
|
| 10 |
-
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
| 11 |
-
forums](https://discuss.streamlit.io).
|
| 12 |
-
|
| 13 |
-
In the meantime, below is an example of what you can do with just a few lines of code:
|
| 14 |
-
"""
|
| 15 |
-
|
| 16 |
-
num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
|
| 17 |
-
num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
|
| 18 |
-
|
| 19 |
-
indices = np.linspace(0, 1, num_points)
|
| 20 |
-
theta = 2 * np.pi * num_turns * indices
|
| 21 |
-
radius = indices
|
| 22 |
-
|
| 23 |
-
x = radius * np.cos(theta)
|
| 24 |
-
y = radius * np.sin(theta)
|
| 25 |
-
|
| 26 |
-
df = pd.DataFrame({
|
| 27 |
-
"x": x,
|
| 28 |
-
"y": y,
|
| 29 |
-
"idx": indices,
|
| 30 |
-
"rand": np.random.randn(num_points),
|
| 31 |
-
})
|
| 32 |
-
|
| 33 |
-
st.altair_chart(alt.Chart(df, height=700, width=700)
|
| 34 |
-
.mark_point(filled=True)
|
| 35 |
-
.encode(
|
| 36 |
-
x=alt.X("x", axis=None),
|
| 37 |
-
y=alt.Y("y", axis=None),
|
| 38 |
-
color=alt.Color("idx", legend=None, scale=alt.Scale()),
|
| 39 |
-
size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
|
| 40 |
-
))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
import seaborn as sns
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
import matplotlib.dates as mdates
|
| 5 |
+
import pandas as pd
|
| 6 |
+
|
| 7 |
+
# Load data
|
| 8 |
+
def load_data():
|
| 9 |
+
df = pd.read_csv("./processed_data.csv") # Make sure this file has the columns: preg, plas, pres, skin, test, mass, pedi, age, class
|
| 10 |
+
return df
|
| 11 |
+
|
| 12 |
+
# Create Streamlit app
|
| 13 |
+
def app():
|
| 14 |
+
# Title for the app
|
| 15 |
+
huggingface_page_title = "Diabetes Outcomes Dashboard"
|
| 16 |
+
st.set_page_config(page_title=huggingface_page_title, layout="wide")
|
| 17 |
+
|
| 18 |
+
# --- Custom CSS to adjust sidebar width ---
|
| 19 |
+
st.markdown(
|
| 20 |
+
"""
|
| 21 |
+
<style>
|
| 22 |
+
/* Sidebar width */
|
| 23 |
+
[data-testid="stSidebar"] {
|
| 24 |
+
width: 600px;
|
| 25 |
+
min-width: 600px;
|
| 26 |
+
}
|
| 27 |
+
</style>
|
| 28 |
+
""",
|
| 29 |
+
unsafe_allow_html=True
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# --- Page content ---
|
| 33 |
+
st.title(huggingface_page_title)
|
| 34 |
+
|
| 35 |
+
# Load data
|
| 36 |
+
data = load_data()
|
| 37 |
+
|
| 38 |
+
# Ensure expected columns exist (optional safety check)
|
| 39 |
+
expected_cols = {"preg", "plas", "pres", "skin", "test", "mass", "pedi", "age", "class"}
|
| 40 |
+
if not expected_cols.issubset(set(data.columns)):
|
| 41 |
+
st.error(f"Dataset is missing some expected columns. Found: {list(data.columns)}")
|
| 42 |
+
return
|
| 43 |
+
|
| 44 |
+
# --- Key Metrics from the data ---
|
| 45 |
+
total_obs = len(data)
|
| 46 |
+
n_diabetes = int(data["class"].sum()) # assuming 1 = diabetes, 0 = no diabetes
|
| 47 |
+
diabetes_rate = (n_diabetes / total_obs * 100) if total_obs > 0 else 0
|
| 48 |
+
|
| 49 |
+
avg_age = round(data["age"].mean(), 1)
|
| 50 |
+
median_age = round(data["age"].median(), 1)
|
| 51 |
+
|
| 52 |
+
avg_preg = round(data["preg"].mean(), 1)
|
| 53 |
+
avg_bmi = round(data["mass"].mean(), 1)
|
| 54 |
+
avg_glucose = round(data["plas"].mean(), 1)
|
| 55 |
+
avg_bp = round(data["pres"].mean(), 1)
|
| 56 |
+
avg_pedi = round(data["pedi"].mean(), 3)
|
| 57 |
+
|
| 58 |
+
# Display metrics in the sidebar
|
| 59 |
+
st.sidebar.header("Key Metrics")
|
| 60 |
+
st.sidebar.metric("Total patients", total_obs)
|
| 61 |
+
st.sidebar.metric("Patients with diabetes", n_diabetes)
|
| 62 |
+
st.sidebar.metric("Diabetes prevalence (%)", f"{diabetes_rate:.1f}")
|
| 63 |
+
|
| 64 |
+
st.sidebar.markdown("---")
|
| 65 |
+
st.sidebar.metric("Avg age (years)", avg_age)
|
| 66 |
+
st.sidebar.metric("Median age (years)", median_age)
|
| 67 |
+
|
| 68 |
+
st.sidebar.markdown("---")
|
| 69 |
+
st.sidebar.metric("Avg pregnancies", avg_preg)
|
| 70 |
+
st.sidebar.metric("Avg BMI", avg_bmi)
|
| 71 |
+
st.sidebar.metric("Avg plasma glucose", avg_glucose)
|
| 72 |
+
st.sidebar.metric("Avg blood pressure (mm Hg)", avg_bp)
|
| 73 |
+
st.sidebar.metric("Avg diabetes pedigree", avg_pedi)
|
| 74 |
+
|
| 75 |
+
# --- Data preview ---
|
| 76 |
+
st.markdown("### Data preview")
|
| 77 |
+
st.dataframe(data.head())
|
| 78 |
+
|
| 79 |
+
# Styling for seaborn plots
|
| 80 |
+
sns.set_style("whitegrid", {'grid.color': 'lightgrey', 'grid.linestyle': '--'})
|
| 81 |
+
|
| 82 |
+
#////////////////////////////////////////////////////////////////////////////
|
| 83 |
+
# Outcome distribution (class)
|
| 84 |
+
#////////////////////////////////////////////////////////////////////////////
|
| 85 |
+
if "class" in data.columns:
|
| 86 |
+
st.header("Diabetes outcome distribution")
|
| 87 |
+
fig, ax = plt.subplots()
|
| 88 |
+
outcome_counts = data["class"].value_counts().sort_index()
|
| 89 |
+
sns.barplot(x=outcome_counts.index, y=outcome_counts.values, ax=ax)
|
| 90 |
+
ax.set_xlabel("Outcome (0 = No diabetes, 1 = Diabetes)")
|
| 91 |
+
ax.set_ylabel("Count")
|
| 92 |
+
ax.set_title("Diabetes outcome count")
|
| 93 |
+
for i, v in enumerate(outcome_counts.values):
|
| 94 |
+
ax.text(i, v + max(outcome_counts.values) * 0.01, str(v), ha="center", va="bottom", fontsize=9)
|
| 95 |
+
st.pyplot(fig)
|
| 96 |
+
plt.close(fig)
|
| 97 |
+
|
| 98 |
+
#////////////////////////////////////////////////////////////////////////////
|
| 99 |
+
# Feature distributions by outcome (histograms)
|
| 100 |
+
#////////////////////////////////////////////////////////////////////////////
|
| 101 |
+
st.header("Key feature distributions by outcome")
|
| 102 |
+
|
| 103 |
+
feature_display_names = {
|
| 104 |
+
"preg": "Pregnancies",
|
| 105 |
+
"plas": "Plasma glucose concentration",
|
| 106 |
+
"pres": "Diastolic blood pressure (mm Hg)",
|
| 107 |
+
"mass": "Body mass index (BMI)",
|
| 108 |
+
"age": "Age (years)"
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
for col, label in feature_display_names.items():
|
| 112 |
+
if {col, "class"}.issubset(data.columns):
|
| 113 |
+
st.subheader(f"{label} by outcome")
|
| 114 |
+
fig, ax = plt.subplots()
|
| 115 |
+
sns.histplot(
|
| 116 |
+
data=data,
|
| 117 |
+
x=col,
|
| 118 |
+
hue="class",
|
| 119 |
+
multiple="stack",
|
| 120 |
+
bins=30,
|
| 121 |
+
ax=ax
|
| 122 |
+
)
|
| 123 |
+
ax.set_xlabel(label)
|
| 124 |
+
ax.set_title(f"{label} distribution (Diabetes vs No diabetes)")
|
| 125 |
+
st.pyplot(fig)
|
| 126 |
+
plt.close(fig)
|
| 127 |
+
|
| 128 |
+
#////////////////////////////////////////////////////////////////////////////
|
| 129 |
+
# Boxplots of selected features by outcome
|
| 130 |
+
#////////////////////////////////////////////////////////////////////////////
|
| 131 |
+
st.header("Feature boxplots by outcome")
|
| 132 |
+
|
| 133 |
+
box_features = {
|
| 134 |
+
"plas": "Plasma glucose concentration",
|
| 135 |
+
"mass": "Body mass index (BMI)",
|
| 136 |
+
"pedi": "Diabetes pedigree function"
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
for col, label in box_features.items():
|
| 140 |
+
if {col, "class"}.issubset(data.columns):
|
| 141 |
+
st.subheader(f"{label} vs outcome")
|
| 142 |
+
fig, ax = plt.subplots()
|
| 143 |
+
sns.boxplot(
|
| 144 |
+
data=data,
|
| 145 |
+
x="class",
|
| 146 |
+
y=col,
|
| 147 |
+
ax=ax
|
| 148 |
+
)
|
| 149 |
+
ax.set_xlabel("Outcome (0 = No diabetes, 1 = Diabetes)")
|
| 150 |
+
ax.set_ylabel(label)
|
| 151 |
+
ax.set_title(f"{label} vs diabetes outcome")
|
| 152 |
+
st.pyplot(fig)
|
| 153 |
+
plt.close(fig)
|
| 154 |
+
|
| 155 |
+
#////////////////////////////////////////////////////////////////////////////
|
| 156 |
+
# Correlation heatmap of numeric variables
|
| 157 |
+
#////////////////////////////////////////////////////////////////////////////
|
| 158 |
+
st.header("Correlation heatmap (numeric features)")
|
| 159 |
+
numeric_cols = data.select_dtypes(include=["number"]).columns
|
| 160 |
+
if len(numeric_cols) > 1:
|
| 161 |
+
fig, ax = plt.subplots(figsize=(8, 6))
|
| 162 |
+
corr = data[numeric_cols].corr()
|
| 163 |
+
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
|
| 164 |
+
ax.set_title("Correlation heatmap")
|
| 165 |
+
st.pyplot(fig)
|
| 166 |
+
plt.close(fig)
|
| 167 |
+
else:
|
| 168 |
+
st.write("Not enough numeric columns to compute correlations.")
|
| 169 |
|
| 170 |
+
if __name__ == "__main__":
|
| 171 |
+
app()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|