simnid commited on
Commit
9533aba
·
verified ·
1 Parent(s): 82250cf

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. Dockerfile +20 -12
  2. __pycache__/app.cpython-312.pyc +0 -0
  3. app.py +308 -0
  4. bulk_data_upload.py +54 -0
  5. requirements.txt +13 -3
Dockerfile CHANGED
@@ -1,20 +1,28 @@
1
- FROM python:3.13.5-slim
 
2
 
 
3
  WORKDIR /app
4
 
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- curl \
8
- git \
9
- && rm -rf /var/lib/apt/lists/*
10
-
11
- COPY requirements.txt ./
12
- COPY src/ ./src/
13
 
 
14
  RUN pip3 install -r requirements.txt
15
 
16
- EXPOSE 8501
 
 
 
 
 
 
17
 
18
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
 
19
 
20
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
 
 
 
1
+ # Use a minimal base image with Python 3.9 installed
2
+ FROM python:3.9-slim
3
 
4
+ # Set the working directory inside the container to /app
5
  WORKDIR /app
6
 
7
+ # Copy all files from the project directory to /app
8
+ COPY . .
 
 
 
 
 
 
9
 
10
+ # Install Python dependencies
11
  RUN pip3 install -r requirements.txt
12
 
13
+ # Create a non-root user for security
14
+ RUN useradd -m -u 1000 user
15
+ USER user
16
+ ENV HOME=/home/user \
17
+ PATH=/home/user/.local/bin:$PATH
18
+
19
+ WORKDIR $HOME/app
20
 
21
+ # Copy app files with proper ownership
22
+ COPY --chown=user . $HOME/app
23
 
24
+ # Define the command to run the Streamlit app
25
+ CMD ["streamlit", "run", "app.py", \
26
+ "--server.port=8501", \
27
+ "--server.address=0.0.0.0", \
28
+ "--server.enableXsrfProtection=false"]
__pycache__/app.cpython-312.pyc ADDED
Binary file (12.9 kB). View file
 
app.py ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing packages
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import numpy as np
5
+ from huggingface_hub import hf_hub_download
6
+ import joblib
7
+ import io
8
+
9
+ # App Configuration
10
+ st.set_page_config(
11
+ page_title="Engine Predictive Maintenance",
12
+ page_icon="🛠️",
13
+ layout="wide"
14
+ )
15
+
16
+ st.title("🛠️ Smart Engine Predictive Maintenance App")
17
+ st.markdown("""
18
+ This application predicts whether an engine is **Faulty (maintenance required)** or **Normal**
19
+ based on sensor readings.
20
+
21
+ **Target:**
22
+ - **0 = Normal**
23
+ - **1 = Faulty**
24
+
25
+ **Note:** The model expects engineered features, so the app computes the same feature engineering
26
+ used during training to ensure schema consistency.
27
+ """)
28
+
29
+ # Model Settings (Hugging Face)
30
+ MODEL_REPO_ID = "simnid/predictive-maintenance-model"
31
+ MODEL_FILENAME = "best_predictive_maintenance_model.joblib"
32
+
33
+ # Dataset repo (for pulling bulk sample)
34
+ DATA_REPO_ID = "simnid/predictive-engine-maintenance-dataset"
35
+ BULK_TEST_FILENAME = "bulk_test_sample.csv"
36
+
37
+ RAW_COLS = [
38
+ "Engine rpm",
39
+ "Lub oil pressure",
40
+ "Fuel pressure",
41
+ "Coolant pressure",
42
+ "lub oil temp",
43
+ "Coolant temp"
44
+ ]
45
+
46
+ ENGINEERED_COLS = [
47
+ "RPM_FuelPressure_Ratio",
48
+ "Power_Index",
49
+ "Thermal_Pressure_Index",
50
+ "Mech_Cooling_Balance",
51
+ "Pressure_Coordination",
52
+ "Low_Oil_Pressure_Flag",
53
+ "High_Coolant_Temp_Flag",
54
+ "Low_RPM_Flag"
55
+ ]
56
+
57
+ FINAL_FEATURE_ORDER = RAW_COLS + ENGINEERED_COLS
58
+
59
+ # Feature Engineering
60
+ def add_engineered_features(df: pd.DataFrame) -> pd.DataFrame:
61
+ df = df.copy()
62
+
63
+ # Ensure required raw columns exist
64
+ missing = [c for c in RAW_COLS if c not in df.columns]
65
+ if missing:
66
+ raise ValueError(f"Missing required columns: {missing}")
67
+
68
+ # Convert to numeric (safe conversion)
69
+ for c in RAW_COLS:
70
+ df[c] = pd.to_numeric(df[c], errors="coerce")
71
+
72
+ if df[RAW_COLS].isnull().any().any():
73
+ bad_cols = df[RAW_COLS].columns[df[RAW_COLS].isnull().any()].tolist()
74
+ raise ValueError(f"Non-numeric / missing values detected in: {bad_cols}")
75
+
76
+ # Interaction Features
77
+ df["RPM_FuelPressure_Ratio"] = df["Engine rpm"] / (df["Fuel pressure"] + 1e-5)
78
+ df["Power_Index"] = (df["Engine rpm"] * df["Fuel pressure"]) / 1000
79
+
80
+ # System Stress Indicators
81
+ df["Thermal_Pressure_Index"] = df["Coolant temp"] / (df["Fuel pressure"] + 1e-5)
82
+ df["Mech_Cooling_Balance"] = (
83
+ (df["Engine rpm"] + df["Lub oil pressure"]) -
84
+ (df["Coolant temp"] + df["Coolant pressure"])
85
+ )
86
+ df["Pressure_Coordination"] = df["Fuel pressure"] - df["Coolant pressure"]
87
+
88
+ # Early Warning Flags (data-driven thresholds)
89
+ df["Low_Oil_Pressure_Flag"] = (df["Lub oil pressure"] < 1.5).astype(int)
90
+ df["High_Coolant_Temp_Flag"] = (df["Coolant temp"] > 100).astype(int)
91
+ df["Low_RPM_Flag"] = (df["Engine rpm"] < 600).astype(int)
92
+
93
+ return df[FINAL_FEATURE_ORDER]
94
+
95
+ # Load Model
96
+ @st.cache_resource
97
+ def load_model():
98
+ try:
99
+ model_path = hf_hub_download(
100
+ repo_id=MODEL_REPO_ID,
101
+ filename=MODEL_FILENAME,
102
+ repo_type="model"
103
+ )
104
+ return joblib.load(model_path)
105
+ except Exception as e:
106
+ st.error(f"Error loading model from Hugging Face: {e}")
107
+ return None
108
+
109
+ model = load_model()
110
+ if model is None:
111
+ st.warning("Model could not be loaded. Please verify model repo + filename.")
112
+ st.stop()
113
+
114
+
115
+ # Sidebar: Business + Model Context
116
+ with st.sidebar:
117
+ st.header("About This Model")
118
+ st.markdown("""
119
+ **Model Details**
120
+ - **Model Type:** Gradient Boosting Classifier
121
+ - **Optimization Objective:** Maximize recall for faulty engines (minimize missed failures)
122
+ - **Artifact Source:** Hugging Face Model Hub
123
+
124
+ **Why Recall Matters**
125
+ A false negative means a failure was missed, leading to downtime, safety risks, and costly repairs.
126
+ """)
127
+
128
+ st.subheader("Production Metrics (Reference)")
129
+ st.metric("Recall (Faulty)", "0.84")
130
+ st.metric("ROC-AUC", "0.70")
131
+ st.metric("PR-AUC", "0.80")
132
+
133
+ st.markdown("---")
134
+ st.subheader("Decision Threshold")
135
+ threshold = st.slider(
136
+ "Classification Threshold (Faulty if P ≥ threshold)",
137
+ min_value=0.05, max_value=0.95, value=0.50, step=0.01
138
+ )
139
+ st.caption("Lower threshold → higher recall (fewer missed failures), but more false alarms.")
140
+
141
+
142
+ # Tabs: Single + Bulk Prediction
143
+ tab1, tab2 = st.tabs(["🔎 Single Prediction", "📦 Bulk Prediction"])
144
+
145
+
146
+ # Single Prediction
147
+ with tab1:
148
+ st.subheader("Engine Sensor Inputs")
149
+
150
+ c1, c2, c3 = st.columns(3)
151
+
152
+ with c1:
153
+ engine_rpm = st.number_input("Engine rpm", min_value=0.0, value=700.0, step=1.0)
154
+ lub_oil_pressure = st.number_input("Lub oil pressure", min_value=0.0, value=2.50, step=0.01)
155
+
156
+ with c2:
157
+ fuel_pressure = st.number_input("Fuel pressure", min_value=0.0, value=12.00, step=0.01)
158
+ coolant_pressure = st.number_input("Coolant pressure", min_value=0.0, value=2.50, step=0.01)
159
+
160
+ with c3:
161
+ lub_oil_temp = st.number_input("lub oil temp", min_value=0.0, value=80.0, step=0.1)
162
+ coolant_temp = st.number_input("Coolant temp", min_value=0.0, value=85.0, step=0.1)
163
+
164
+ raw_input_df = pd.DataFrame([{
165
+ "Engine rpm": engine_rpm,
166
+ "Lub oil pressure": lub_oil_pressure,
167
+ "Fuel pressure": fuel_pressure,
168
+ "Coolant pressure": coolant_pressure,
169
+ "lub oil temp": lub_oil_temp,
170
+ "Coolant temp": coolant_temp
171
+ }])
172
+
173
+ try:
174
+ feature_df = add_engineered_features(raw_input_df)
175
+ except Exception as e:
176
+ st.error(f"Feature engineering failed: {e}")
177
+ st.stop()
178
+
179
+ with st.expander("View engineered input dataframe"):
180
+ st.dataframe(feature_df)
181
+ csv = feature_df.to_csv(index=False).encode("utf-8")
182
+ st.download_button("Download Engineered Input CSV", csv, "engine_input_features.csv", "text/csv")
183
+
184
+ st.subheader("Prediction Output")
185
+
186
+ if st.button("Predict Engine Condition", type="primary", use_container_width=True):
187
+ try:
188
+ proba_faulty = None
189
+ if hasattr(model, "predict_proba"):
190
+ proba_faulty = float(model.predict_proba(feature_df)[0][1])
191
+
192
+ # Threshold-based classification (business control)
193
+ if proba_faulty is not None:
194
+ pred_class = int(proba_faulty >= threshold)
195
+ else:
196
+ pred_class = int(model.predict(feature_df)[0])
197
+
198
+ colA, colB = st.columns(2)
199
+
200
+ with colA:
201
+ if pred_class == 1:
202
+ st.error("⚠️ Prediction: FAULTY (Maintenance Recommended)")
203
+ else:
204
+ st.success("✅ Prediction: NORMAL (No Immediate Maintenance Required)")
205
+
206
+ with colB:
207
+ if proba_faulty is not None:
208
+ st.metric("Probability of Faulty (Class 1)", f"{proba_faulty*100:.1f}%")
209
+ st.progress(int(proba_faulty * 100))
210
+ else:
211
+ st.info("Probability score unavailable (model does not support predict_proba).")
212
+
213
+ except Exception as e:
214
+ st.error(f"Prediction failed: {e}")
215
+
216
+ # Bulk Prediction
217
+ with tab2:
218
+ st.subheader("Bulk CSV Prediction")
219
+
220
+ st.markdown("""
221
+ Upload a CSV containing **raw sensor columns only**:
222
+
223
+ - Engine rpm
224
+ - Lub oil pressure
225
+ - Fuel pressure
226
+ - Coolant pressure
227
+ - lub oil temp
228
+ - Coolant temp
229
+
230
+ The app will automatically engineer features and return:
231
+ - `Predicted_Class` (0/1)
232
+ - `Faulty_Probability` (if available)
233
+ """)
234
+
235
+ # Try pulling a sample file from HF dataset repo (like tourism project pattern)
236
+ @st.cache_resource
237
+ def load_bulk_sample():
238
+ try:
239
+ path = hf_hub_download(
240
+ repo_id=DATA_REPO_ID,
241
+ filename=BULK_TEST_FILENAME,
242
+ repo_type="dataset"
243
+ )
244
+ return pd.read_csv(path)
245
+ except Exception:
246
+ return None
247
+
248
+ sample_df = load_bulk_sample()
249
+ if sample_df is not None:
250
+ with st.expander("Preview bulk sample from Hugging Face"):
251
+ st.dataframe(sample_df.head())
252
+
253
+ uploaded_file = st.file_uploader("Upload CSV for bulk prediction", type=["csv"])
254
+
255
+ bulk_df = None
256
+ if uploaded_file is not None:
257
+ bulk_df = pd.read_csv(uploaded_file)
258
+ elif sample_df is not None:
259
+ bulk_df = sample_df.copy()
260
+
261
+ if bulk_df is not None:
262
+ st.markdown("✅ Bulk data loaded.")
263
+ st.dataframe(bulk_df.head())
264
+
265
+ if st.button("Run Bulk Prediction", use_container_width=True):
266
+ try:
267
+ # Ensure required columns exist
268
+ missing = [c for c in RAW_COLS if c not in bulk_df.columns]
269
+ if missing:
270
+ st.error(f"Missing required columns: {missing}")
271
+ st.stop()
272
+
273
+ bulk_features = add_engineered_features(bulk_df[RAW_COLS])
274
+
275
+ # Predict
276
+ preds = model.predict(bulk_features).astype(int)
277
+
278
+ if hasattr(model, "predict_proba"):
279
+ probs = model.predict_proba(bulk_features)[:, 1]
280
+ else:
281
+ probs = np.full(shape=(len(bulk_features),), fill_value=np.nan)
282
+
283
+ # Threshold override if proba exists
284
+ if hasattr(model, "predict_proba"):
285
+ preds = (probs >= threshold).astype(int)
286
+
287
+ out = bulk_df.copy()
288
+ out["Predicted_Class"] = preds
289
+ out["Faulty_Probability"] = probs
290
+
291
+ st.success("Bulk predictions completed.")
292
+ st.dataframe(out.head(50))
293
+
294
+ out_csv = out.to_csv(index=False).encode("utf-8")
295
+ st.download_button(
296
+ "Download Bulk Predictions CSV",
297
+ out_csv,
298
+ "bulk_engine_predictions.csv",
299
+ "text/csv"
300
+ )
301
+
302
+ except Exception as e:
303
+ st.error(f"Bulk prediction failed: {e}")
304
+
305
+
306
+ # Footer
307
+ st.markdown("---")
308
+ st.caption("Predictive Maintenance | Gradient Boosting + Streamlit + Hugging Face Model Hub")
bulk_data_upload.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing packages
2
+ from huggingface_hub import HfApi
3
+ import os
4
+ import pandas as pd
5
+
6
+
7
+ # Create Bulk Test Sample Data
8
+ bulk_data = [
9
+ # Engine rpm, Lub oil pressure, Fuel pressure, Coolant pressure, lub oil temp, Coolant temp
10
+ [700, 2.49, 11.79, 3.18, 84.14, 81.63],
11
+ [520, 2.96, 6.55, 1.06, 77.75, 79.65],
12
+ [900, 3.50, 18.20, 2.90, 88.00, 95.00],
13
+ [450, 1.20, 7.50, 2.00, 70.00, 110.0], # high coolant temp + low oil pressure regime
14
+ [1100, 4.10, 20.00, 3.50, 90.00, 85.00]
15
+ ]
16
+
17
+ columns = [
18
+ "Engine rpm",
19
+ "Lub oil pressure",
20
+ "Fuel pressure",
21
+ "Coolant pressure",
22
+ "lub oil temp",
23
+ "Coolant temp"
24
+ ]
25
+
26
+ df_bulk = pd.DataFrame(bulk_data, columns=columns)
27
+
28
+ # Save locally inside data folder (consistent pattern)
29
+ local_path = "predictive_maintenance/data/bulk_test_sample.csv"
30
+ os.makedirs("predictive_maintenance/data", exist_ok=True)
31
+ df_bulk.to_csv(local_path, index=False)
32
+ print(f"Bulk CSV saved locally at {local_path}")
33
+
34
+ # Hugging Face Upload
35
+ HF_TOKEN = os.getenv("HF_TOKEN")
36
+ if HF_TOKEN:
37
+ HF_TOKEN = HF_TOKEN.strip()
38
+ else:
39
+ raise EnvironmentError("HF_TOKEN not set!")
40
+
41
+ DATA_REPO_ID = "simnid/predictive-engine-maintenance-dataset"
42
+ BULK_FILENAME = "bulk_test_sample.csv"
43
+
44
+ api = HfApi(token=HF_TOKEN)
45
+
46
+ api.upload_file(
47
+ path_or_fileobj=local_path,
48
+ path_in_repo=BULK_FILENAME,
49
+ repo_id=DATA_REPO_ID,
50
+ repo_type="dataset",
51
+ token=HF_TOKEN
52
+ )
53
+
54
+ print(f"Bulk CSV uploaded to Hugging Face dataset repo: {DATA_REPO_ID}/{BULK_FILENAME}")
requirements.txt CHANGED
@@ -1,3 +1,13 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
 
 
 
 
 
1
+ # Data manipulation
2
+ pandas==2.2.2
3
+ numpy==1.26.0
4
+
5
+ # Machine learning (model inference only)
6
+ scikit-learn==1.6.0
7
+ joblib==1.5.1
8
+
9
+ # Hugging Face model access
10
+ huggingface_hub==0.32.6
11
+
12
+ # Streamlit frontend
13
+ streamlit==1.43.2