Spaces:

simnid
/

Engine-Predictive-Maintenance

Sleeping

App Files Files Community

simnid commited on Feb 2

Commit

9533aba

verified ·

1 Parent(s): 82250cf

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

Dockerfile +20 -12
__pycache__/app.cpython-312.pyc +0 -0
app.py +308 -0
bulk_data_upload.py +54 -0
requirements.txt +13 -3

Dockerfile CHANGED Viewed

@@ -1,20 +1,28 @@
-FROM python:3.13.5-slim
 WORKDIR /app
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    curl \
-    git \
-    && rm -rf /var/lib/apt/lists/*
-COPY requirements.txt ./
-COPY src/ ./src/
 RUN pip3 install -r requirements.txt
-EXPOSE 8501
-HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

+# Use a minimal base image with Python 3.9 installed
+FROM python:3.9-slim
+# Set the working directory inside the container to /app
 WORKDIR /app
+# Copy all files from the project directory to /app
+COPY . .
+# Install Python dependencies
 RUN pip3 install -r requirements.txt
+# Create a non-root user for security
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+# Copy app files with proper ownership
+COPY --chown=user . $HOME/app
+# Define the command to run the Streamlit app
+CMD ["streamlit", "run", "app.py", \
+     "--server.port=8501", \
+     "--server.address=0.0.0.0", \
+     "--server.enableXsrfProtection=false"]

__pycache__/app.cpython-312.pyc ADDED Viewed

Binary file (12.9 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,308 @@

+# Importing packages
+import streamlit as st
+import pandas as pd
+import numpy as np
+from huggingface_hub import hf_hub_download
+import joblib
+import io
+# App Configuration
+st.set_page_config(
+    page_title="Engine Predictive Maintenance",
+    page_icon="🛠️",
+    layout="wide"
+)
+st.title("🛠️ Smart Engine Predictive Maintenance App")
+st.markdown("""
+This application predicts whether an engine is **Faulty (maintenance required)** or **Normal**
+based on sensor readings.
+**Target:**
+- **0 = Normal**
+- **1 = Faulty**
+**Note:** The model expects engineered features, so the app computes the same feature engineering
+used during training to ensure schema consistency.
+""")
+# Model Settings (Hugging Face)
+MODEL_REPO_ID = "simnid/predictive-maintenance-model"
+MODEL_FILENAME = "best_predictive_maintenance_model.joblib"
+# Dataset repo (for pulling bulk sample)
+DATA_REPO_ID = "simnid/predictive-engine-maintenance-dataset"
+BULK_TEST_FILENAME = "bulk_test_sample.csv"
+RAW_COLS = [
+    "Engine rpm",
+    "Lub oil pressure",
+    "Fuel pressure",
+    "Coolant pressure",
+    "lub oil temp",
+    "Coolant temp"
+]
+ENGINEERED_COLS = [
+    "RPM_FuelPressure_Ratio",
+    "Power_Index",
+    "Thermal_Pressure_Index",
+    "Mech_Cooling_Balance",
+    "Pressure_Coordination",
+    "Low_Oil_Pressure_Flag",
+    "High_Coolant_Temp_Flag",
+    "Low_RPM_Flag"
+]
+FINAL_FEATURE_ORDER = RAW_COLS + ENGINEERED_COLS
+# Feature Engineering
+def add_engineered_features(df: pd.DataFrame) -> pd.DataFrame:
+    df = df.copy()
+    # Ensure required raw columns exist
+    missing = [c for c in RAW_COLS if c not in df.columns]
+    if missing:
+        raise ValueError(f"Missing required columns: {missing}")
+    # Convert to numeric (safe conversion)
+    for c in RAW_COLS:
+        df[c] = pd.to_numeric(df[c], errors="coerce")
+    if df[RAW_COLS].isnull().any().any():
+        bad_cols = df[RAW_COLS].columns[df[RAW_COLS].isnull().any()].tolist()
+        raise ValueError(f"Non-numeric / missing values detected in: {bad_cols}")
+    # Interaction Features
+    df["RPM_FuelPressure_Ratio"] = df["Engine rpm"] / (df["Fuel pressure"] + 1e-5)
+    df["Power_Index"] = (df["Engine rpm"] * df["Fuel pressure"]) / 1000
+    # System Stress Indicators
+    df["Thermal_Pressure_Index"] = df["Coolant temp"] / (df["Fuel pressure"] + 1e-5)
+    df["Mech_Cooling_Balance"] = (
+        (df["Engine rpm"] + df["Lub oil pressure"]) -
+        (df["Coolant temp"] + df["Coolant pressure"])
+    )
+    df["Pressure_Coordination"] = df["Fuel pressure"] - df["Coolant pressure"]
+    # Early Warning Flags (data-driven thresholds)
+    df["Low_Oil_Pressure_Flag"] = (df["Lub oil pressure"] < 1.5).astype(int)
+    df["High_Coolant_Temp_Flag"] = (df["Coolant temp"] > 100).astype(int)
+    df["Low_RPM_Flag"] = (df["Engine rpm"] < 600).astype(int)
+    return df[FINAL_FEATURE_ORDER]
+# Load Model
+@st.cache_resource
+def load_model():
+    try:
+        model_path = hf_hub_download(
+            repo_id=MODEL_REPO_ID,
+            filename=MODEL_FILENAME,
+            repo_type="model"
+        )
+        return joblib.load(model_path)
+    except Exception as e:
+        st.error(f"Error loading model from Hugging Face: {e}")
+        return None
+model = load_model()
+if model is None:
+    st.warning("Model could not be loaded. Please verify model repo + filename.")
+    st.stop()
+# Sidebar: Business + Model Context
+with st.sidebar:
+    st.header("About This Model")
+    st.markdown("""
+**Model Details**
+- **Model Type:** Gradient Boosting Classifier
+- **Optimization Objective:** Maximize recall for faulty engines (minimize missed failures)
+- **Artifact Source:** Hugging Face Model Hub
+**Why Recall Matters**
+A false negative means a failure was missed, leading to downtime, safety risks, and costly repairs.
+""")
+    st.subheader("Production Metrics (Reference)")
+    st.metric("Recall (Faulty)", "0.84")
+    st.metric("ROC-AUC", "0.70")
+    st.metric("PR-AUC", "0.80")
+    st.markdown("---")
+    st.subheader("Decision Threshold")
+    threshold = st.slider(
+        "Classification Threshold (Faulty if P ≥ threshold)",
+        min_value=0.05, max_value=0.95, value=0.50, step=0.01
+    )
+    st.caption("Lower threshold → higher recall (fewer missed failures), but more false alarms.")
+# Tabs: Single + Bulk Prediction
+tab1, tab2 = st.tabs(["🔎 Single Prediction", "📦 Bulk Prediction"])
+# Single Prediction
+with tab1:
+    st.subheader("Engine Sensor Inputs")
+    c1, c2, c3 = st.columns(3)
+    with c1:
+        engine_rpm = st.number_input("Engine rpm", min_value=0.0, value=700.0, step=1.0)
+        lub_oil_pressure = st.number_input("Lub oil pressure", min_value=0.0, value=2.50, step=0.01)
+    with c2:
+        fuel_pressure = st.number_input("Fuel pressure", min_value=0.0, value=12.00, step=0.01)
+        coolant_pressure = st.number_input("Coolant pressure", min_value=0.0, value=2.50, step=0.01)
+    with c3:
+        lub_oil_temp = st.number_input("lub oil temp", min_value=0.0, value=80.0, step=0.1)
+        coolant_temp = st.number_input("Coolant temp", min_value=0.0, value=85.0, step=0.1)
+    raw_input_df = pd.DataFrame([{
+        "Engine rpm": engine_rpm,
+        "Lub oil pressure": lub_oil_pressure,
+        "Fuel pressure": fuel_pressure,
+        "Coolant pressure": coolant_pressure,
+        "lub oil temp": lub_oil_temp,
+        "Coolant temp": coolant_temp
+    }])
+    try:
+        feature_df = add_engineered_features(raw_input_df)
+    except Exception as e:
+        st.error(f"Feature engineering failed: {e}")
+        st.stop()
+    with st.expander("View engineered input dataframe"):
+        st.dataframe(feature_df)
+        csv = feature_df.to_csv(index=False).encode("utf-8")
+        st.download_button("Download Engineered Input CSV", csv, "engine_input_features.csv", "text/csv")
+    st.subheader("Prediction Output")
+    if st.button("Predict Engine Condition", type="primary", use_container_width=True):
+        try:
+            proba_faulty = None
+            if hasattr(model, "predict_proba"):
+                proba_faulty = float(model.predict_proba(feature_df)[0][1])
+            # Threshold-based classification (business control)
+            if proba_faulty is not None:
+                pred_class = int(proba_faulty >= threshold)
+            else:
+                pred_class = int(model.predict(feature_df)[0])
+            colA, colB = st.columns(2)
+            with colA:
+                if pred_class == 1:
+                    st.error("⚠️ Prediction: FAULTY (Maintenance Recommended)")
+                else:
+                    st.success("✅ Prediction: NORMAL (No Immediate Maintenance Required)")
+            with colB:
+                if proba_faulty is not None:
+                    st.metric("Probability of Faulty (Class 1)", f"{proba_faulty*100:.1f}%")
+                    st.progress(int(proba_faulty * 100))
+                else:
+                    st.info("Probability score unavailable (model does not support predict_proba).")
+        except Exception as e:
+            st.error(f"Prediction failed: {e}")
+# Bulk Prediction
+with tab2:
+    st.subheader("Bulk CSV Prediction")
+    st.markdown("""
+Upload a CSV containing **raw sensor columns only**:
+- Engine rpm
+- Lub oil pressure
+- Fuel pressure
+- Coolant pressure
+- lub oil temp
+- Coolant temp
+The app will automatically engineer features and return:
+- `Predicted_Class` (0/1)
+- `Faulty_Probability` (if available)
+""")
+    # Try pulling a sample file from HF dataset repo (like tourism project pattern)
+    @st.cache_resource
+    def load_bulk_sample():
+        try:
+            path = hf_hub_download(
+                repo_id=DATA_REPO_ID,
+                filename=BULK_TEST_FILENAME,
+                repo_type="dataset"
+            )
+            return pd.read_csv(path)
+        except Exception:
+            return None
+    sample_df = load_bulk_sample()
+    if sample_df is not None:
+        with st.expander("Preview bulk sample from Hugging Face"):
+            st.dataframe(sample_df.head())
+    uploaded_file = st.file_uploader("Upload CSV for bulk prediction", type=["csv"])
+    bulk_df = None
+    if uploaded_file is not None:
+        bulk_df = pd.read_csv(uploaded_file)
+    elif sample_df is not None:
+        bulk_df = sample_df.copy()
+    if bulk_df is not None:
+        st.markdown("✅ Bulk data loaded.")
+        st.dataframe(bulk_df.head())
+        if st.button("Run Bulk Prediction", use_container_width=True):
+            try:
+                # Ensure required columns exist
+                missing = [c for c in RAW_COLS if c not in bulk_df.columns]
+                if missing:
+                    st.error(f"Missing required columns: {missing}")
+                    st.stop()
+                bulk_features = add_engineered_features(bulk_df[RAW_COLS])
+                # Predict
+                preds = model.predict(bulk_features).astype(int)
+                if hasattr(model, "predict_proba"):
+                    probs = model.predict_proba(bulk_features)[:, 1]
+                else:
+                    probs = np.full(shape=(len(bulk_features),), fill_value=np.nan)
+                # Threshold override if proba exists
+                if hasattr(model, "predict_proba"):
+                    preds = (probs >= threshold).astype(int)
+                out = bulk_df.copy()
+                out["Predicted_Class"] = preds
+                out["Faulty_Probability"] = probs
+                st.success("Bulk predictions completed.")
+                st.dataframe(out.head(50))
+                out_csv = out.to_csv(index=False).encode("utf-8")
+                st.download_button(
+                    "Download Bulk Predictions CSV",
+                    out_csv,
+                    "bulk_engine_predictions.csv",
+                    "text/csv"
+                )
+            except Exception as e:
+                st.error(f"Bulk prediction failed: {e}")
+# Footer
+st.markdown("---")
+st.caption("Predictive Maintenance | Gradient Boosting + Streamlit + Hugging Face Model Hub")

bulk_data_upload.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# Importing packages
+from huggingface_hub import HfApi
+import os
+import pandas as pd
+# Create Bulk Test Sample Data
+bulk_data = [
+    # Engine rpm, Lub oil pressure, Fuel pressure, Coolant pressure, lub oil temp, Coolant temp
+    [700, 2.49, 11.79, 3.18, 84.14, 81.63],
+    [520, 2.96,  6.55, 1.06, 77.75, 79.65],
+    [900, 3.50, 18.20, 2.90, 88.00, 95.00],
+    [450, 1.20,  7.50, 2.00, 70.00, 110.0],  # high coolant temp + low oil pressure regime
+    [1100, 4.10, 20.00, 3.50, 90.00, 85.00]
+]
+columns = [
+    "Engine rpm",
+    "Lub oil pressure",
+    "Fuel pressure",
+    "Coolant pressure",
+    "lub oil temp",
+    "Coolant temp"
+]
+df_bulk = pd.DataFrame(bulk_data, columns=columns)
+# Save locally inside data folder (consistent pattern)
+local_path = "predictive_maintenance/data/bulk_test_sample.csv"
+os.makedirs("predictive_maintenance/data", exist_ok=True)
+df_bulk.to_csv(local_path, index=False)
+print(f"Bulk CSV saved locally at {local_path}")
+# Hugging Face Upload
+HF_TOKEN = os.getenv("HF_TOKEN")
+if HF_TOKEN:
+    HF_TOKEN = HF_TOKEN.strip()
+else:
+    raise EnvironmentError("HF_TOKEN not set!")
+DATA_REPO_ID = "simnid/predictive-engine-maintenance-dataset"
+BULK_FILENAME = "bulk_test_sample.csv"
+api = HfApi(token=HF_TOKEN)
+api.upload_file(
+    path_or_fileobj=local_path,
+    path_in_repo=BULK_FILENAME,
+    repo_id=DATA_REPO_ID,
+    repo_type="dataset",
+    token=HF_TOKEN
+)
+print(f"Bulk CSV uploaded to Hugging Face dataset repo: {DATA_REPO_ID}/{BULK_FILENAME}")

requirements.txt CHANGED Viewed

@@ -1,3 +1,13 @@
-altair
-pandas
-streamlit

+# Data manipulation
+pandas==2.2.2
+numpy==1.26.0
+# Machine learning (model inference only)
+scikit-learn==1.6.0
+joblib==1.5.1
+# Hugging Face model access
+huggingface_hub==0.32.6
+# Streamlit frontend
+streamlit==1.43.2