AbdramaneB commited on
Commit
b7a0a90
·
verified ·
1 Parent(s): 8e2efdb

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. .amlignore +6 -0
  2. Dockerfile +32 -12
  3. requirements.txt +6 -3
  4. src/.amlignore +6 -0
  5. src/streamlit_app.py +169 -38
.amlignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ## This file was auto generated by the Azure Machine Learning Studio. Please do not remove.
2
+ ## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
3
+
4
+ .ipynb_aml_checkpoints/
5
+ *.amltmp
6
+ *.amltemp
Dockerfile CHANGED
@@ -1,20 +1,40 @@
1
- FROM python:3.13.5-slim
 
2
 
3
- WORKDIR /app
4
-
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- curl \
8
- git \
9
- && rm -rf /var/lib/apt/lists/*
10
 
 
11
  COPY requirements.txt ./
12
- COPY src/ ./src/
13
 
14
- RUN pip3 install -r requirements.txt
 
 
 
 
 
 
 
15
 
16
- EXPOSE 8501
 
 
 
 
 
 
17
 
 
 
 
 
 
18
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
19
 
20
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
1
+ # Use Python 3.10 to match your Azure env
2
+ FROM python:3.10-slim
3
 
4
+ # --- Create non-root user -----------------------------------------------------
5
+ RUN useradd -m -u 1000 user
6
+ ENV HOME=/home/user \
7
+ PATH=/home/user/.local/bin:$PATH \
8
+ PIP_NO_CACHE_DIR=1 \
9
+ PIP_DISABLE_PIP_VERSION_CHECK=1
10
+ WORKDIR $HOME/app
11
 
12
+ # --- Copy requirements first for caching -------------------------------------
13
  COPY requirements.txt ./
 
14
 
15
+ # --- Install OS dependencies --------------------------------------------------
16
+ USER root
17
+ ENV DEBIAN_FRONTEND=noninteractive
18
+ RUN apt-get update && apt-get install -y --no-install-recommends \
19
+ curl \
20
+ git \
21
+ build-essential \
22
+ && rm -rf /var/lib/apt/lists/*
23
 
24
+ # --- Install Python dependencies ---------------------------------------------
25
+ RUN python -m pip install --upgrade pip \
26
+ && python -m pip install -r requirements.txt \
27
+ && python -m streamlit --version
28
+
29
+ # --- Copy application code ----------------------------------------------------
30
+ COPY --chown=user . $HOME/app
31
 
32
+ # --- Switch to non-root user --------------------------------------------------
33
+ USER user
34
+
35
+ # --- Configure Streamlit server ----------------------------------------------
36
+ EXPOSE 8501
37
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
38
 
39
+ # --- Launch the Streamlit app -------------------------------------------------
40
+ ENTRYPOINT ["python", "-m", "streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.headless=true", "--server.enableXsrfProtection=false"]
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
1
+ pandas>=2.1,<3
2
+ matplotlib>=3.8
3
+ seaborn>=0.13
4
+ scipy>=1.10,<2
5
+ numpy>=1.23,<2
6
+ streamlit>=1.30
src/.amlignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ## This file was auto generated by the Azure Machine Learning Studio. Please do not remove.
2
+ ## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
3
+
4
+ .ipynb_aml_checkpoints/
5
+ *.amltmp
6
+ *.amltemp
src/streamlit_app.py CHANGED
@@ -1,40 +1,171 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
1
  import streamlit as st
2
+ import seaborn as sns
3
+ import matplotlib.pyplot as plt
4
+ import matplotlib.dates as mdates
5
+ import pandas as pd
6
+
7
+ # Load data
8
+ def load_data():
9
+ df = pd.read_csv("./processed_data.csv") # Make sure this file has the columns: preg, plas, pres, skin, test, mass, pedi, age, class
10
+ return df
11
+
12
+ # Create Streamlit app
13
+ def app():
14
+ # Title for the app
15
+ huggingface_page_title = "Diabetes Outcomes Dashboard"
16
+ st.set_page_config(page_title=huggingface_page_title, layout="wide")
17
+
18
+ # --- Custom CSS to adjust sidebar width ---
19
+ st.markdown(
20
+ """
21
+ <style>
22
+ /* Sidebar width */
23
+ [data-testid="stSidebar"] {
24
+ width: 600px;
25
+ min-width: 600px;
26
+ }
27
+ </style>
28
+ """,
29
+ unsafe_allow_html=True
30
+ )
31
+
32
+ # --- Page content ---
33
+ st.title(huggingface_page_title)
34
+
35
+ # Load data
36
+ data = load_data()
37
+
38
+ # Ensure expected columns exist (optional safety check)
39
+ expected_cols = {"preg", "plas", "pres", "skin", "test", "mass", "pedi", "age", "class"}
40
+ if not expected_cols.issubset(set(data.columns)):
41
+ st.error(f"Dataset is missing some expected columns. Found: {list(data.columns)}")
42
+ return
43
+
44
+ # --- Key Metrics from the data ---
45
+ total_obs = len(data)
46
+ n_diabetes = int(data["class"].sum()) # assuming 1 = diabetes, 0 = no diabetes
47
+ diabetes_rate = (n_diabetes / total_obs * 100) if total_obs > 0 else 0
48
+
49
+ avg_age = round(data["age"].mean(), 1)
50
+ median_age = round(data["age"].median(), 1)
51
+
52
+ avg_preg = round(data["preg"].mean(), 1)
53
+ avg_bmi = round(data["mass"].mean(), 1)
54
+ avg_glucose = round(data["plas"].mean(), 1)
55
+ avg_bp = round(data["pres"].mean(), 1)
56
+ avg_pedi = round(data["pedi"].mean(), 3)
57
+
58
+ # Display metrics in the sidebar
59
+ st.sidebar.header("Key Metrics")
60
+ st.sidebar.metric("Total patients", total_obs)
61
+ st.sidebar.metric("Patients with diabetes", n_diabetes)
62
+ st.sidebar.metric("Diabetes prevalence (%)", f"{diabetes_rate:.1f}")
63
+
64
+ st.sidebar.markdown("---")
65
+ st.sidebar.metric("Avg age (years)", avg_age)
66
+ st.sidebar.metric("Median age (years)", median_age)
67
+
68
+ st.sidebar.markdown("---")
69
+ st.sidebar.metric("Avg pregnancies", avg_preg)
70
+ st.sidebar.metric("Avg BMI", avg_bmi)
71
+ st.sidebar.metric("Avg plasma glucose", avg_glucose)
72
+ st.sidebar.metric("Avg blood pressure (mm Hg)", avg_bp)
73
+ st.sidebar.metric("Avg diabetes pedigree", avg_pedi)
74
+
75
+ # --- Data preview ---
76
+ st.markdown("### Data preview")
77
+ st.dataframe(data.head())
78
+
79
+ # Styling for seaborn plots
80
+ sns.set_style("whitegrid", {'grid.color': 'lightgrey', 'grid.linestyle': '--'})
81
+
82
+ #////////////////////////////////////////////////////////////////////////////
83
+ # Outcome distribution (class)
84
+ #////////////////////////////////////////////////////////////////////////////
85
+ if "class" in data.columns:
86
+ st.header("Diabetes outcome distribution")
87
+ fig, ax = plt.subplots()
88
+ outcome_counts = data["class"].value_counts().sort_index()
89
+ sns.barplot(x=outcome_counts.index, y=outcome_counts.values, ax=ax)
90
+ ax.set_xlabel("Outcome (0 = No diabetes, 1 = Diabetes)")
91
+ ax.set_ylabel("Count")
92
+ ax.set_title("Diabetes outcome count")
93
+ for i, v in enumerate(outcome_counts.values):
94
+ ax.text(i, v + max(outcome_counts.values) * 0.01, str(v), ha="center", va="bottom", fontsize=9)
95
+ st.pyplot(fig)
96
+ plt.close(fig)
97
+
98
+ #////////////////////////////////////////////////////////////////////////////
99
+ # Feature distributions by outcome (histograms)
100
+ #////////////////////////////////////////////////////////////////////////////
101
+ st.header("Key feature distributions by outcome")
102
+
103
+ feature_display_names = {
104
+ "preg": "Pregnancies",
105
+ "plas": "Plasma glucose concentration",
106
+ "pres": "Diastolic blood pressure (mm Hg)",
107
+ "mass": "Body mass index (BMI)",
108
+ "age": "Age (years)"
109
+ }
110
+
111
+ for col, label in feature_display_names.items():
112
+ if {col, "class"}.issubset(data.columns):
113
+ st.subheader(f"{label} by outcome")
114
+ fig, ax = plt.subplots()
115
+ sns.histplot(
116
+ data=data,
117
+ x=col,
118
+ hue="class",
119
+ multiple="stack",
120
+ bins=30,
121
+ ax=ax
122
+ )
123
+ ax.set_xlabel(label)
124
+ ax.set_title(f"{label} distribution (Diabetes vs No diabetes)")
125
+ st.pyplot(fig)
126
+ plt.close(fig)
127
+
128
+ #////////////////////////////////////////////////////////////////////////////
129
+ # Boxplots of selected features by outcome
130
+ #////////////////////////////////////////////////////////////////////////////
131
+ st.header("Feature boxplots by outcome")
132
+
133
+ box_features = {
134
+ "plas": "Plasma glucose concentration",
135
+ "mass": "Body mass index (BMI)",
136
+ "pedi": "Diabetes pedigree function"
137
+ }
138
+
139
+ for col, label in box_features.items():
140
+ if {col, "class"}.issubset(data.columns):
141
+ st.subheader(f"{label} vs outcome")
142
+ fig, ax = plt.subplots()
143
+ sns.boxplot(
144
+ data=data,
145
+ x="class",
146
+ y=col,
147
+ ax=ax
148
+ )
149
+ ax.set_xlabel("Outcome (0 = No diabetes, 1 = Diabetes)")
150
+ ax.set_ylabel(label)
151
+ ax.set_title(f"{label} vs diabetes outcome")
152
+ st.pyplot(fig)
153
+ plt.close(fig)
154
+
155
+ #////////////////////////////////////////////////////////////////////////////
156
+ # Correlation heatmap of numeric variables
157
+ #////////////////////////////////////////////////////////////////////////////
158
+ st.header("Correlation heatmap (numeric features)")
159
+ numeric_cols = data.select_dtypes(include=["number"]).columns
160
+ if len(numeric_cols) > 1:
161
+ fig, ax = plt.subplots(figsize=(8, 6))
162
+ corr = data[numeric_cols].corr()
163
+ sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
164
+ ax.set_title("Correlation heatmap")
165
+ st.pyplot(fig)
166
+ plt.close(fig)
167
+ else:
168
+ st.write("Not enough numeric columns to compute correlations.")
169
 
170
+ if __name__ == "__main__":
171
+ app()