AbdramaneB commited on
Commit
1733b93
·
verified ·
1 Parent(s): 9811692

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. .amlignore +6 -0
  2. Dockerfile +32 -12
  3. requirements.txt +6 -3
  4. src/.amlignore +6 -0
  5. src/streamlit_app.py +231 -38
.amlignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ## This file was auto generated by the Azure Machine Learning Studio. Please do not remove.
2
+ ## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
3
+
4
+ .ipynb_aml_checkpoints/
5
+ *.amltmp
6
+ *.amltemp
Dockerfile CHANGED
@@ -1,20 +1,40 @@
1
- FROM python:3.13.5-slim
 
2
 
3
- WORKDIR /app
4
-
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- curl \
8
- git \
9
- && rm -rf /var/lib/apt/lists/*
10
 
 
11
  COPY requirements.txt ./
12
- COPY src/ ./src/
13
 
14
- RUN pip3 install -r requirements.txt
 
 
 
 
 
 
 
15
 
16
- EXPOSE 8501
 
 
 
 
 
 
17
 
 
 
 
 
 
18
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
19
 
20
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
1
+ # Use Python 3.10 to match your Azure env
2
+ FROM python:3.10-slim
3
 
4
+ # --- Create non-root user -----------------------------------------------------
5
+ RUN useradd -m -u 1000 user
6
+ ENV HOME=/home/user \
7
+ PATH=/home/user/.local/bin:$PATH \
8
+ PIP_NO_CACHE_DIR=1 \
9
+ PIP_DISABLE_PIP_VERSION_CHECK=1
10
+ WORKDIR $HOME/app
11
 
12
+ # --- Copy requirements first for caching -------------------------------------
13
  COPY requirements.txt ./
 
14
 
15
+ # --- Install OS dependencies --------------------------------------------------
16
+ USER root
17
+ ENV DEBIAN_FRONTEND=noninteractive
18
+ RUN apt-get update && apt-get install -y --no-install-recommends \
19
+ curl \
20
+ git \
21
+ build-essential \
22
+ && rm -rf /var/lib/apt/lists/*
23
 
24
+ # --- Install Python dependencies ---------------------------------------------
25
+ RUN python -m pip install --upgrade pip \
26
+ && python -m pip install -r requirements.txt \
27
+ && python -m streamlit --version
28
+
29
+ # --- Copy application code ----------------------------------------------------
30
+ COPY --chown=user . $HOME/app
31
 
32
+ # --- Switch to non-root user --------------------------------------------------
33
+ USER user
34
+
35
+ # --- Configure Streamlit server ----------------------------------------------
36
+ EXPOSE 8501
37
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
38
 
39
+ # --- Launch the Streamlit app -------------------------------------------------
40
+ ENTRYPOINT ["python", "-m", "streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.headless=true", "--server.enableXsrfProtection=false"]
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
1
+ pandas>=2.1,<3
2
+ matplotlib>=3.8
3
+ seaborn>=0.13
4
+ scipy>=1.10,<2
5
+ numpy>=1.23,<2
6
+ streamlit>=1.30
src/.amlignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ## This file was auto generated by the Azure Machine Learning Studio. Please do not remove.
2
+ ## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
3
+
4
+ .ipynb_aml_checkpoints/
5
+ *.amltmp
6
+ *.amltemp
src/streamlit_app.py CHANGED
@@ -1,40 +1,233 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
1
  import streamlit as st
2
+ import seaborn as sns
3
+ import matplotlib.pyplot as plt
4
+ import matplotlib.dates as mdates
5
+ import pandas as pd
6
+
7
+ # Load data
8
+ def load_data():
9
+ df = pd.read_csv("./processed_data.csv") # Replace with your dataset
10
+ return df
11
+
12
+ # Create Streamlit app
13
+ def app():
14
+ # Title for the app
15
+ huggingface_page_title = "Machine Tool Failure Dashboard"
16
+ st.set_page_config(page_title=huggingface_page_title, layout="wide")
17
+ # --- Custom CSS to adjust sidebar width ---
18
+ st.markdown(
19
+ """
20
+ <style>
21
+ /* Sidebar width */
22
+ [data-testid="stSidebar"] {
23
+ width: 600px;
24
+ min-width: 600px;
25
+ }
26
+ </style>
27
+ """,
28
+ unsafe_allow_html=True
29
+ )
30
+
31
+ # --- Page content ---
32
+ st.title(huggingface_page_title)
33
+
34
+ # Load data
35
+ data = load_data()
36
+
37
+ # --- Create some helper/derived columns ---
38
+ # Kelvin -> Celsius for readability (if temps exist)
39
+ if "Air temperature" in data.columns:
40
+ data["Air temperature (°C)"] = data["Air temperature"] - 273.15
41
+ if "Process temperature" in data.columns:
42
+ data["Process temperature (°C)"] = data["Process temperature"] - 273.15
43
+
44
+ # Temperature delta (process - air)
45
+ if {"Air temperature", "Process temperature"}.issubset(data.columns):
46
+ data["Temperature delta (°C)"] = data["Process temperature"] - data["Air temperature"]
47
+
48
+ # Rotational speed categories (quartiles)
49
+ if "Rotational speed" in data.columns:
50
+ try:
51
+ data["Rotational speed category"] = pd.qcut(
52
+ data["Rotational speed"],
53
+ q=4,
54
+ labels=["Very Low", "Low", "High", "Very High"]
55
+ )
56
+ except ValueError:
57
+ # qcut can fail if there are too many identical values; fallback to cut
58
+ data["Rotational speed category"] = pd.cut(
59
+ data["Rotational speed"],
60
+ bins=4,
61
+ labels=["Very Low", "Low", "High", "Very High"]
62
+ )
63
+
64
+ # --- Key Metrics from the data ---
65
+ total_obs = len(data)
66
+ n_failures = int(data["Failure"].sum()) if "Failure" in data.columns else 0
67
+ failure_rate = (n_failures / total_obs * 100) if total_obs > 0 else 0
68
+
69
+ n_types = data["Type"].nunique() if "Type" in data.columns else 0
70
+ most_common_type = data["Type"].value_counts().idxmax() if "Type" in data.columns else "N/A"
71
+
72
+ avg_rot_speed = round(data["Rotational speed"].mean(), 2) if "Rotational speed" in data.columns else None
73
+ avg_torque = round(data["Torque"].mean(), 2) if "Torque" in data.columns else None
74
+ avg_tool_wear = round(data["Tool wear"].mean(), 2) if "Tool wear" in data.columns else None
75
+
76
+ avg_air_temp_c = (
77
+ round(data["Air temperature (°C)"].mean(), 2)
78
+ if "Air temperature (°C)" in data.columns
79
+ else None
80
+ )
81
+ avg_proc_temp_c = (
82
+ round(data["Process temperature (°C)"].mean(), 2)
83
+ if "Process temperature (°C)" in data.columns
84
+ else None
85
+ )
86
+ avg_temp_delta = (
87
+ round(data["Temperature delta (°C)"].mean(), 2)
88
+ if "Temperature delta (°C)" in data.columns
89
+ else None
90
+ )
91
+
92
+ # Display metrics in the sidebar
93
+ st.sidebar.header("Key Metrics")
94
+ st.sidebar.metric("Total observations", total_obs)
95
+ st.sidebar.metric("Number of failures", n_failures)
96
+ st.sidebar.metric("Failure rate (%)", f"{failure_rate:.2f}")
97
+
98
+ st.sidebar.metric("Number of machine types", n_types)
99
+ st.sidebar.metric("Most common type", most_common_type)
100
+
101
+ if avg_rot_speed is not None:
102
+ st.sidebar.metric("Avg rotational speed (RPM)", avg_rot_speed)
103
+ if avg_torque is not None:
104
+ st.sidebar.metric("Avg torque (Nm)", avg_torque)
105
+ if avg_tool_wear is not None:
106
+ st.sidebar.metric("Avg tool wear (min)", avg_tool_wear)
107
+ if avg_air_temp_c is not None:
108
+ st.sidebar.metric("Avg air temperature (°C)", avg_air_temp_c)
109
+ if avg_proc_temp_c is not None:
110
+ st.sidebar.metric("Avg process temperature (°C)", avg_proc_temp_c)
111
+ if avg_temp_delta is not None:
112
+ st.sidebar.metric("Avg temp delta (°C)", avg_temp_delta)
113
+
114
+ st.markdown("### Data preview")
115
+ st.dataframe(data.head())
116
+
117
+ sns.set_style("whitegrid", {'grid.color': 'lightgrey', 'grid.linestyle': '--'})
118
+
119
+ #////////////////////////////////////////////////////////////////////////////
120
+ # Failure distribution
121
+ #////////////////////////////////////////////////////////////////////////////
122
+ if "Failure" in data.columns:
123
+ st.header("Failure distribution")
124
+ fig, ax = plt.subplots()
125
+ failure_counts = data["Failure"].value_counts().sort_index()
126
+ sns.barplot(x=failure_counts.index, y=failure_counts.values, ax=ax)
127
+ ax.set_xlabel("Failure (0 = No, 1 = Yes)")
128
+ ax.set_ylabel("Count")
129
+ ax.set_title("Failure Count")
130
+ for i, v in enumerate(failure_counts.values):
131
+ ax.text(i, v + max(failure_counts.values) * 0.01, str(v), ha="center", va="bottom", fontsize=9)
132
+ st.pyplot(fig)
133
+ plt.show()
134
+
135
+ #////////////////////////////////////////////////////////////////////////////
136
+ # Failure rate by machine Type
137
+ #////////////////////////////////////////////////////////////////////////////
138
+ if {"Type", "Failure"}.issubset(data.columns):
139
+ st.header("Failure rate by machine type")
140
+ fig, ax = plt.subplots()
141
+ failure_by_type = data.groupby("Type")["Failure"].mean().reset_index()
142
+ failure_by_type["Failure_rate_%"] = failure_by_type["Failure"] * 100
143
+ sns.barplot(data=failure_by_type, x="Type", y="Failure_rate_%", ax=ax)
144
+ ax.set_ylabel("Failure rate (%)")
145
+ ax.set_title("Failure rate by machine type")
146
+ for i, row in failure_by_type.iterrows():
147
+ ax.text(i, row["Failure_rate_%"] + 0.5, f"{row['Failure_rate_%']:.1f}%", ha="center", fontsize=9)
148
+ st.pyplot(fig)
149
+ plt.show()
150
+
151
+ #////////////////////////////////////////////////////////////////////////////
152
+ # Rotational speed distribution by failure
153
+ #////////////////////////////////////////////////////////////////////////////
154
+ if {"Rotational speed", "Failure"}.issubset(data.columns):
155
+ st.header("Rotational speed distribution by failure")
156
+ fig, ax = plt.subplots()
157
+ sns.histplot(
158
+ data=data,
159
+ x="Rotational speed",
160
+ hue="Failure",
161
+ multiple="stack",
162
+ bins=30,
163
+ ax=ax
164
+ )
165
+ ax.set_xlabel("Rotational speed (RPM)")
166
+ ax.set_title("Rotational speed distribution (Failed vs Not Failed)")
167
+ st.pyplot(fig)
168
+ plt.show()
169
+
170
+ #////////////////////////////////////////////////////////////////////////////
171
+ # Torque vs Rotational speed (scatter)
172
+ #////////////////////////////////////////////////////////////////////////////
173
+ if {"Torque", "Rotational speed"}.issubset(data.columns):
174
+ st.header("Torque vs Rotational speed")
175
+ fig, ax = plt.subplots()
176
+ if "Failure" in data.columns:
177
+ sns.scatterplot(
178
+ data=data,
179
+ x="Rotational speed",
180
+ y="Torque",
181
+ hue="Failure",
182
+ alpha=0.6,
183
+ ax=ax
184
+ )
185
+ else:
186
+ sns.scatterplot(
187
+ data=data,
188
+ x="Rotational speed",
189
+ y="Torque",
190
+ alpha=0.6,
191
+ ax=ax
192
+ )
193
+ ax.set_xlabel("Rotational speed (RPM)")
194
+ ax.set_ylabel("Torque (Nm)")
195
+ ax.set_title("Torque vs Rotational speed")
196
+ st.pyplot(fig)
197
+ plt.show()
198
+
199
+ #////////////////////////////////////////////////////////////////////////////
200
+ # Tool wear distribution by failure
201
+ #////////////////////////////////////////////////////////////////////////////
202
+ if {"Tool wear", "Failure"}.issubset(data.columns):
203
+ st.header("Tool wear distribution by failure")
204
+ fig, ax = plt.subplots()
205
+ sns.boxplot(
206
+ data=data,
207
+ x="Failure",
208
+ y="Tool wear",
209
+ ax=ax
210
+ )
211
+ ax.set_xlabel("Failure (0 = No, 1 = Yes)")
212
+ ax.set_ylabel("Tool wear (min)")
213
+ ax.set_title("Tool wear vs Failure")
214
+ st.pyplot(fig)
215
+ plt.show()
216
+
217
+ #////////////////////////////////////////////////////////////////////////////
218
+ # Correlation heatmap of numeric variables
219
+ #////////////////////////////////////////////////////////////////////////////
220
+ st.header("Correlation heatmap (numeric features)")
221
+ numeric_cols = data.select_dtypes(include=["number"]).columns
222
+ if len(numeric_cols) > 1:
223
+ fig, ax = plt.subplots(figsize=(8, 6))
224
+ corr = data[numeric_cols].corr()
225
+ sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
226
+ ax.set_title("Correlation heatmap")
227
+ st.pyplot(fig)
228
+ plt.show()
229
+ else:
230
+ st.write("Not enough numeric columns to compute correlations.")
231
 
232
+ if __name__ == "__main__":
233
+ app()