Upload folder using huggingface_hub
Browse files- Dockerfile +20 -12
- __pycache__/app.cpython-312.pyc +0 -0
- app.py +308 -0
- bulk_data_upload.py +54 -0
- requirements.txt +13 -3
Dockerfile
CHANGED
|
@@ -1,20 +1,28 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
|
|
|
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
curl \
|
| 8 |
-
git \
|
| 9 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
-
|
| 11 |
-
COPY requirements.txt ./
|
| 12 |
-
COPY src/ ./src/
|
| 13 |
|
|
|
|
| 14 |
RUN pip3 install -r requirements.txt
|
| 15 |
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
|
|
|
|
| 19 |
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use a minimal base image with Python 3.9 installed
|
| 2 |
+
FROM python:3.9-slim
|
| 3 |
|
| 4 |
+
# Set the working directory inside the container to /app
|
| 5 |
WORKDIR /app
|
| 6 |
|
| 7 |
+
# Copy all files from the project directory to /app
|
| 8 |
+
COPY . .
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
+
# Install Python dependencies
|
| 11 |
RUN pip3 install -r requirements.txt
|
| 12 |
|
| 13 |
+
# Create a non-root user for security
|
| 14 |
+
RUN useradd -m -u 1000 user
|
| 15 |
+
USER user
|
| 16 |
+
ENV HOME=/home/user \
|
| 17 |
+
PATH=/home/user/.local/bin:$PATH
|
| 18 |
+
|
| 19 |
+
WORKDIR $HOME/app
|
| 20 |
|
| 21 |
+
# Copy app files with proper ownership
|
| 22 |
+
COPY --chown=user . $HOME/app
|
| 23 |
|
| 24 |
+
# Define the command to run the Streamlit app
|
| 25 |
+
CMD ["streamlit", "run", "app.py", \
|
| 26 |
+
"--server.port=8501", \
|
| 27 |
+
"--server.address=0.0.0.0", \
|
| 28 |
+
"--server.enableXsrfProtection=false"]
|
__pycache__/app.cpython-312.pyc
ADDED
|
Binary file (12.9 kB). View file
|
|
|
app.py
ADDED
|
@@ -0,0 +1,308 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Importing packages
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
from huggingface_hub import hf_hub_download
|
| 6 |
+
import joblib
|
| 7 |
+
import io
|
| 8 |
+
|
| 9 |
+
# App Configuration
|
| 10 |
+
st.set_page_config(
|
| 11 |
+
page_title="Engine Predictive Maintenance",
|
| 12 |
+
page_icon="🛠️",
|
| 13 |
+
layout="wide"
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
st.title("🛠️ Smart Engine Predictive Maintenance App")
|
| 17 |
+
st.markdown("""
|
| 18 |
+
This application predicts whether an engine is **Faulty (maintenance required)** or **Normal**
|
| 19 |
+
based on sensor readings.
|
| 20 |
+
|
| 21 |
+
**Target:**
|
| 22 |
+
- **0 = Normal**
|
| 23 |
+
- **1 = Faulty**
|
| 24 |
+
|
| 25 |
+
**Note:** The model expects engineered features, so the app computes the same feature engineering
|
| 26 |
+
used during training to ensure schema consistency.
|
| 27 |
+
""")
|
| 28 |
+
|
| 29 |
+
# Model Settings (Hugging Face)
|
| 30 |
+
MODEL_REPO_ID = "simnid/predictive-maintenance-model"
|
| 31 |
+
MODEL_FILENAME = "best_predictive_maintenance_model.joblib"
|
| 32 |
+
|
| 33 |
+
# Dataset repo (for pulling bulk sample)
|
| 34 |
+
DATA_REPO_ID = "simnid/predictive-engine-maintenance-dataset"
|
| 35 |
+
BULK_TEST_FILENAME = "bulk_test_sample.csv"
|
| 36 |
+
|
| 37 |
+
RAW_COLS = [
|
| 38 |
+
"Engine rpm",
|
| 39 |
+
"Lub oil pressure",
|
| 40 |
+
"Fuel pressure",
|
| 41 |
+
"Coolant pressure",
|
| 42 |
+
"lub oil temp",
|
| 43 |
+
"Coolant temp"
|
| 44 |
+
]
|
| 45 |
+
|
| 46 |
+
ENGINEERED_COLS = [
|
| 47 |
+
"RPM_FuelPressure_Ratio",
|
| 48 |
+
"Power_Index",
|
| 49 |
+
"Thermal_Pressure_Index",
|
| 50 |
+
"Mech_Cooling_Balance",
|
| 51 |
+
"Pressure_Coordination",
|
| 52 |
+
"Low_Oil_Pressure_Flag",
|
| 53 |
+
"High_Coolant_Temp_Flag",
|
| 54 |
+
"Low_RPM_Flag"
|
| 55 |
+
]
|
| 56 |
+
|
| 57 |
+
FINAL_FEATURE_ORDER = RAW_COLS + ENGINEERED_COLS
|
| 58 |
+
|
| 59 |
+
# Feature Engineering
|
| 60 |
+
def add_engineered_features(df: pd.DataFrame) -> pd.DataFrame:
|
| 61 |
+
df = df.copy()
|
| 62 |
+
|
| 63 |
+
# Ensure required raw columns exist
|
| 64 |
+
missing = [c for c in RAW_COLS if c not in df.columns]
|
| 65 |
+
if missing:
|
| 66 |
+
raise ValueError(f"Missing required columns: {missing}")
|
| 67 |
+
|
| 68 |
+
# Convert to numeric (safe conversion)
|
| 69 |
+
for c in RAW_COLS:
|
| 70 |
+
df[c] = pd.to_numeric(df[c], errors="coerce")
|
| 71 |
+
|
| 72 |
+
if df[RAW_COLS].isnull().any().any():
|
| 73 |
+
bad_cols = df[RAW_COLS].columns[df[RAW_COLS].isnull().any()].tolist()
|
| 74 |
+
raise ValueError(f"Non-numeric / missing values detected in: {bad_cols}")
|
| 75 |
+
|
| 76 |
+
# Interaction Features
|
| 77 |
+
df["RPM_FuelPressure_Ratio"] = df["Engine rpm"] / (df["Fuel pressure"] + 1e-5)
|
| 78 |
+
df["Power_Index"] = (df["Engine rpm"] * df["Fuel pressure"]) / 1000
|
| 79 |
+
|
| 80 |
+
# System Stress Indicators
|
| 81 |
+
df["Thermal_Pressure_Index"] = df["Coolant temp"] / (df["Fuel pressure"] + 1e-5)
|
| 82 |
+
df["Mech_Cooling_Balance"] = (
|
| 83 |
+
(df["Engine rpm"] + df["Lub oil pressure"]) -
|
| 84 |
+
(df["Coolant temp"] + df["Coolant pressure"])
|
| 85 |
+
)
|
| 86 |
+
df["Pressure_Coordination"] = df["Fuel pressure"] - df["Coolant pressure"]
|
| 87 |
+
|
| 88 |
+
# Early Warning Flags (data-driven thresholds)
|
| 89 |
+
df["Low_Oil_Pressure_Flag"] = (df["Lub oil pressure"] < 1.5).astype(int)
|
| 90 |
+
df["High_Coolant_Temp_Flag"] = (df["Coolant temp"] > 100).astype(int)
|
| 91 |
+
df["Low_RPM_Flag"] = (df["Engine rpm"] < 600).astype(int)
|
| 92 |
+
|
| 93 |
+
return df[FINAL_FEATURE_ORDER]
|
| 94 |
+
|
| 95 |
+
# Load Model
|
| 96 |
+
@st.cache_resource
|
| 97 |
+
def load_model():
|
| 98 |
+
try:
|
| 99 |
+
model_path = hf_hub_download(
|
| 100 |
+
repo_id=MODEL_REPO_ID,
|
| 101 |
+
filename=MODEL_FILENAME,
|
| 102 |
+
repo_type="model"
|
| 103 |
+
)
|
| 104 |
+
return joblib.load(model_path)
|
| 105 |
+
except Exception as e:
|
| 106 |
+
st.error(f"Error loading model from Hugging Face: {e}")
|
| 107 |
+
return None
|
| 108 |
+
|
| 109 |
+
model = load_model()
|
| 110 |
+
if model is None:
|
| 111 |
+
st.warning("Model could not be loaded. Please verify model repo + filename.")
|
| 112 |
+
st.stop()
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
# Sidebar: Business + Model Context
|
| 116 |
+
with st.sidebar:
|
| 117 |
+
st.header("About This Model")
|
| 118 |
+
st.markdown("""
|
| 119 |
+
**Model Details**
|
| 120 |
+
- **Model Type:** Gradient Boosting Classifier
|
| 121 |
+
- **Optimization Objective:** Maximize recall for faulty engines (minimize missed failures)
|
| 122 |
+
- **Artifact Source:** Hugging Face Model Hub
|
| 123 |
+
|
| 124 |
+
**Why Recall Matters**
|
| 125 |
+
A false negative means a failure was missed, leading to downtime, safety risks, and costly repairs.
|
| 126 |
+
""")
|
| 127 |
+
|
| 128 |
+
st.subheader("Production Metrics (Reference)")
|
| 129 |
+
st.metric("Recall (Faulty)", "0.84")
|
| 130 |
+
st.metric("ROC-AUC", "0.70")
|
| 131 |
+
st.metric("PR-AUC", "0.80")
|
| 132 |
+
|
| 133 |
+
st.markdown("---")
|
| 134 |
+
st.subheader("Decision Threshold")
|
| 135 |
+
threshold = st.slider(
|
| 136 |
+
"Classification Threshold (Faulty if P ≥ threshold)",
|
| 137 |
+
min_value=0.05, max_value=0.95, value=0.50, step=0.01
|
| 138 |
+
)
|
| 139 |
+
st.caption("Lower threshold → higher recall (fewer missed failures), but more false alarms.")
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
# Tabs: Single + Bulk Prediction
|
| 143 |
+
tab1, tab2 = st.tabs(["🔎 Single Prediction", "📦 Bulk Prediction"])
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
# Single Prediction
|
| 147 |
+
with tab1:
|
| 148 |
+
st.subheader("Engine Sensor Inputs")
|
| 149 |
+
|
| 150 |
+
c1, c2, c3 = st.columns(3)
|
| 151 |
+
|
| 152 |
+
with c1:
|
| 153 |
+
engine_rpm = st.number_input("Engine rpm", min_value=0.0, value=700.0, step=1.0)
|
| 154 |
+
lub_oil_pressure = st.number_input("Lub oil pressure", min_value=0.0, value=2.50, step=0.01)
|
| 155 |
+
|
| 156 |
+
with c2:
|
| 157 |
+
fuel_pressure = st.number_input("Fuel pressure", min_value=0.0, value=12.00, step=0.01)
|
| 158 |
+
coolant_pressure = st.number_input("Coolant pressure", min_value=0.0, value=2.50, step=0.01)
|
| 159 |
+
|
| 160 |
+
with c3:
|
| 161 |
+
lub_oil_temp = st.number_input("lub oil temp", min_value=0.0, value=80.0, step=0.1)
|
| 162 |
+
coolant_temp = st.number_input("Coolant temp", min_value=0.0, value=85.0, step=0.1)
|
| 163 |
+
|
| 164 |
+
raw_input_df = pd.DataFrame([{
|
| 165 |
+
"Engine rpm": engine_rpm,
|
| 166 |
+
"Lub oil pressure": lub_oil_pressure,
|
| 167 |
+
"Fuel pressure": fuel_pressure,
|
| 168 |
+
"Coolant pressure": coolant_pressure,
|
| 169 |
+
"lub oil temp": lub_oil_temp,
|
| 170 |
+
"Coolant temp": coolant_temp
|
| 171 |
+
}])
|
| 172 |
+
|
| 173 |
+
try:
|
| 174 |
+
feature_df = add_engineered_features(raw_input_df)
|
| 175 |
+
except Exception as e:
|
| 176 |
+
st.error(f"Feature engineering failed: {e}")
|
| 177 |
+
st.stop()
|
| 178 |
+
|
| 179 |
+
with st.expander("View engineered input dataframe"):
|
| 180 |
+
st.dataframe(feature_df)
|
| 181 |
+
csv = feature_df.to_csv(index=False).encode("utf-8")
|
| 182 |
+
st.download_button("Download Engineered Input CSV", csv, "engine_input_features.csv", "text/csv")
|
| 183 |
+
|
| 184 |
+
st.subheader("Prediction Output")
|
| 185 |
+
|
| 186 |
+
if st.button("Predict Engine Condition", type="primary", use_container_width=True):
|
| 187 |
+
try:
|
| 188 |
+
proba_faulty = None
|
| 189 |
+
if hasattr(model, "predict_proba"):
|
| 190 |
+
proba_faulty = float(model.predict_proba(feature_df)[0][1])
|
| 191 |
+
|
| 192 |
+
# Threshold-based classification (business control)
|
| 193 |
+
if proba_faulty is not None:
|
| 194 |
+
pred_class = int(proba_faulty >= threshold)
|
| 195 |
+
else:
|
| 196 |
+
pred_class = int(model.predict(feature_df)[0])
|
| 197 |
+
|
| 198 |
+
colA, colB = st.columns(2)
|
| 199 |
+
|
| 200 |
+
with colA:
|
| 201 |
+
if pred_class == 1:
|
| 202 |
+
st.error("⚠️ Prediction: FAULTY (Maintenance Recommended)")
|
| 203 |
+
else:
|
| 204 |
+
st.success("✅ Prediction: NORMAL (No Immediate Maintenance Required)")
|
| 205 |
+
|
| 206 |
+
with colB:
|
| 207 |
+
if proba_faulty is not None:
|
| 208 |
+
st.metric("Probability of Faulty (Class 1)", f"{proba_faulty*100:.1f}%")
|
| 209 |
+
st.progress(int(proba_faulty * 100))
|
| 210 |
+
else:
|
| 211 |
+
st.info("Probability score unavailable (model does not support predict_proba).")
|
| 212 |
+
|
| 213 |
+
except Exception as e:
|
| 214 |
+
st.error(f"Prediction failed: {e}")
|
| 215 |
+
|
| 216 |
+
# Bulk Prediction
|
| 217 |
+
with tab2:
|
| 218 |
+
st.subheader("Bulk CSV Prediction")
|
| 219 |
+
|
| 220 |
+
st.markdown("""
|
| 221 |
+
Upload a CSV containing **raw sensor columns only**:
|
| 222 |
+
|
| 223 |
+
- Engine rpm
|
| 224 |
+
- Lub oil pressure
|
| 225 |
+
- Fuel pressure
|
| 226 |
+
- Coolant pressure
|
| 227 |
+
- lub oil temp
|
| 228 |
+
- Coolant temp
|
| 229 |
+
|
| 230 |
+
The app will automatically engineer features and return:
|
| 231 |
+
- `Predicted_Class` (0/1)
|
| 232 |
+
- `Faulty_Probability` (if available)
|
| 233 |
+
""")
|
| 234 |
+
|
| 235 |
+
# Try pulling a sample file from HF dataset repo (like tourism project pattern)
|
| 236 |
+
@st.cache_resource
|
| 237 |
+
def load_bulk_sample():
|
| 238 |
+
try:
|
| 239 |
+
path = hf_hub_download(
|
| 240 |
+
repo_id=DATA_REPO_ID,
|
| 241 |
+
filename=BULK_TEST_FILENAME,
|
| 242 |
+
repo_type="dataset"
|
| 243 |
+
)
|
| 244 |
+
return pd.read_csv(path)
|
| 245 |
+
except Exception:
|
| 246 |
+
return None
|
| 247 |
+
|
| 248 |
+
sample_df = load_bulk_sample()
|
| 249 |
+
if sample_df is not None:
|
| 250 |
+
with st.expander("Preview bulk sample from Hugging Face"):
|
| 251 |
+
st.dataframe(sample_df.head())
|
| 252 |
+
|
| 253 |
+
uploaded_file = st.file_uploader("Upload CSV for bulk prediction", type=["csv"])
|
| 254 |
+
|
| 255 |
+
bulk_df = None
|
| 256 |
+
if uploaded_file is not None:
|
| 257 |
+
bulk_df = pd.read_csv(uploaded_file)
|
| 258 |
+
elif sample_df is not None:
|
| 259 |
+
bulk_df = sample_df.copy()
|
| 260 |
+
|
| 261 |
+
if bulk_df is not None:
|
| 262 |
+
st.markdown("✅ Bulk data loaded.")
|
| 263 |
+
st.dataframe(bulk_df.head())
|
| 264 |
+
|
| 265 |
+
if st.button("Run Bulk Prediction", use_container_width=True):
|
| 266 |
+
try:
|
| 267 |
+
# Ensure required columns exist
|
| 268 |
+
missing = [c for c in RAW_COLS if c not in bulk_df.columns]
|
| 269 |
+
if missing:
|
| 270 |
+
st.error(f"Missing required columns: {missing}")
|
| 271 |
+
st.stop()
|
| 272 |
+
|
| 273 |
+
bulk_features = add_engineered_features(bulk_df[RAW_COLS])
|
| 274 |
+
|
| 275 |
+
# Predict
|
| 276 |
+
preds = model.predict(bulk_features).astype(int)
|
| 277 |
+
|
| 278 |
+
if hasattr(model, "predict_proba"):
|
| 279 |
+
probs = model.predict_proba(bulk_features)[:, 1]
|
| 280 |
+
else:
|
| 281 |
+
probs = np.full(shape=(len(bulk_features),), fill_value=np.nan)
|
| 282 |
+
|
| 283 |
+
# Threshold override if proba exists
|
| 284 |
+
if hasattr(model, "predict_proba"):
|
| 285 |
+
preds = (probs >= threshold).astype(int)
|
| 286 |
+
|
| 287 |
+
out = bulk_df.copy()
|
| 288 |
+
out["Predicted_Class"] = preds
|
| 289 |
+
out["Faulty_Probability"] = probs
|
| 290 |
+
|
| 291 |
+
st.success("Bulk predictions completed.")
|
| 292 |
+
st.dataframe(out.head(50))
|
| 293 |
+
|
| 294 |
+
out_csv = out.to_csv(index=False).encode("utf-8")
|
| 295 |
+
st.download_button(
|
| 296 |
+
"Download Bulk Predictions CSV",
|
| 297 |
+
out_csv,
|
| 298 |
+
"bulk_engine_predictions.csv",
|
| 299 |
+
"text/csv"
|
| 300 |
+
)
|
| 301 |
+
|
| 302 |
+
except Exception as e:
|
| 303 |
+
st.error(f"Bulk prediction failed: {e}")
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
# Footer
|
| 307 |
+
st.markdown("---")
|
| 308 |
+
st.caption("Predictive Maintenance | Gradient Boosting + Streamlit + Hugging Face Model Hub")
|
bulk_data_upload.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Importing packages
|
| 2 |
+
from huggingface_hub import HfApi
|
| 3 |
+
import os
|
| 4 |
+
import pandas as pd
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
# Create Bulk Test Sample Data
|
| 8 |
+
bulk_data = [
|
| 9 |
+
# Engine rpm, Lub oil pressure, Fuel pressure, Coolant pressure, lub oil temp, Coolant temp
|
| 10 |
+
[700, 2.49, 11.79, 3.18, 84.14, 81.63],
|
| 11 |
+
[520, 2.96, 6.55, 1.06, 77.75, 79.65],
|
| 12 |
+
[900, 3.50, 18.20, 2.90, 88.00, 95.00],
|
| 13 |
+
[450, 1.20, 7.50, 2.00, 70.00, 110.0], # high coolant temp + low oil pressure regime
|
| 14 |
+
[1100, 4.10, 20.00, 3.50, 90.00, 85.00]
|
| 15 |
+
]
|
| 16 |
+
|
| 17 |
+
columns = [
|
| 18 |
+
"Engine rpm",
|
| 19 |
+
"Lub oil pressure",
|
| 20 |
+
"Fuel pressure",
|
| 21 |
+
"Coolant pressure",
|
| 22 |
+
"lub oil temp",
|
| 23 |
+
"Coolant temp"
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
df_bulk = pd.DataFrame(bulk_data, columns=columns)
|
| 27 |
+
|
| 28 |
+
# Save locally inside data folder (consistent pattern)
|
| 29 |
+
local_path = "predictive_maintenance/data/bulk_test_sample.csv"
|
| 30 |
+
os.makedirs("predictive_maintenance/data", exist_ok=True)
|
| 31 |
+
df_bulk.to_csv(local_path, index=False)
|
| 32 |
+
print(f"Bulk CSV saved locally at {local_path}")
|
| 33 |
+
|
| 34 |
+
# Hugging Face Upload
|
| 35 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 36 |
+
if HF_TOKEN:
|
| 37 |
+
HF_TOKEN = HF_TOKEN.strip()
|
| 38 |
+
else:
|
| 39 |
+
raise EnvironmentError("HF_TOKEN not set!")
|
| 40 |
+
|
| 41 |
+
DATA_REPO_ID = "simnid/predictive-engine-maintenance-dataset"
|
| 42 |
+
BULK_FILENAME = "bulk_test_sample.csv"
|
| 43 |
+
|
| 44 |
+
api = HfApi(token=HF_TOKEN)
|
| 45 |
+
|
| 46 |
+
api.upload_file(
|
| 47 |
+
path_or_fileobj=local_path,
|
| 48 |
+
path_in_repo=BULK_FILENAME,
|
| 49 |
+
repo_id=DATA_REPO_ID,
|
| 50 |
+
repo_type="dataset",
|
| 51 |
+
token=HF_TOKEN
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
print(f"Bulk CSV uploaded to Hugging Face dataset repo: {DATA_REPO_ID}/{BULK_FILENAME}")
|
requirements.txt
CHANGED
|
@@ -1,3 +1,13 @@
|
|
| 1 |
-
|
| 2 |
-
pandas
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Data manipulation
|
| 2 |
+
pandas==2.2.2
|
| 3 |
+
numpy==1.26.0
|
| 4 |
+
|
| 5 |
+
# Machine learning (model inference only)
|
| 6 |
+
scikit-learn==1.6.0
|
| 7 |
+
joblib==1.5.1
|
| 8 |
+
|
| 9 |
+
# Hugging Face model access
|
| 10 |
+
huggingface_hub==0.32.6
|
| 11 |
+
|
| 12 |
+
# Streamlit frontend
|
| 13 |
+
streamlit==1.43.2
|