|
|
|
|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from huggingface_hub import hf_hub_download |
|
|
import joblib |
|
|
import io |
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title="Engine Predictive Maintenance", |
|
|
page_icon="🛠️", |
|
|
layout="wide" |
|
|
) |
|
|
|
|
|
st.title("🛠️ Smart Engine Predictive Maintenance App") |
|
|
st.markdown(""" |
|
|
This application predicts whether an engine is **Faulty (maintenance required)** or **Normal** |
|
|
based on sensor readings. |
|
|
|
|
|
**Target:** |
|
|
- **0 = Normal** |
|
|
- **1 = Faulty** |
|
|
|
|
|
**Note:** The model expects engineered features, so the app computes the same feature engineering |
|
|
used during training to ensure schema consistency. |
|
|
""") |
|
|
|
|
|
|
|
|
MODEL_REPO_ID = "simnid/predictive-maintenance-model" |
|
|
MODEL_FILENAME = "best_predictive_maintenance_model.joblib" |
|
|
|
|
|
|
|
|
DATA_REPO_ID = "simnid/predictive-engine-maintenance-dataset" |
|
|
BULK_TEST_FILENAME = "bulk_test_sample.csv" |
|
|
|
|
|
RAW_COLS = [ |
|
|
"Engine rpm", |
|
|
"Lub oil pressure", |
|
|
"Fuel pressure", |
|
|
"Coolant pressure", |
|
|
"lub oil temp", |
|
|
"Coolant temp" |
|
|
] |
|
|
|
|
|
ENGINEERED_COLS = [ |
|
|
"RPM_FuelPressure_Ratio", |
|
|
"Power_Index", |
|
|
"Thermal_Pressure_Index", |
|
|
"Mech_Cooling_Balance", |
|
|
"Pressure_Coordination", |
|
|
"Low_Oil_Pressure_Flag", |
|
|
"High_Coolant_Temp_Flag", |
|
|
"Low_RPM_Flag" |
|
|
] |
|
|
|
|
|
FINAL_FEATURE_ORDER = RAW_COLS + ENGINEERED_COLS |
|
|
|
|
|
|
|
|
def add_engineered_features(df: pd.DataFrame) -> pd.DataFrame: |
|
|
df = df.copy() |
|
|
|
|
|
|
|
|
missing = [c for c in RAW_COLS if c not in df.columns] |
|
|
if missing: |
|
|
raise ValueError(f"Missing required columns: {missing}") |
|
|
|
|
|
|
|
|
for c in RAW_COLS: |
|
|
df[c] = pd.to_numeric(df[c], errors="coerce") |
|
|
|
|
|
if df[RAW_COLS].isnull().any().any(): |
|
|
bad_cols = df[RAW_COLS].columns[df[RAW_COLS].isnull().any()].tolist() |
|
|
raise ValueError(f"Non-numeric / missing values detected in: {bad_cols}") |
|
|
|
|
|
|
|
|
df["RPM_FuelPressure_Ratio"] = df["Engine rpm"] / (df["Fuel pressure"] + 1e-5) |
|
|
df["Power_Index"] = (df["Engine rpm"] * df["Fuel pressure"]) / 1000 |
|
|
|
|
|
|
|
|
df["Thermal_Pressure_Index"] = df["Coolant temp"] / (df["Fuel pressure"] + 1e-5) |
|
|
df["Mech_Cooling_Balance"] = ( |
|
|
(df["Engine rpm"] + df["Lub oil pressure"]) - |
|
|
(df["Coolant temp"] + df["Coolant pressure"]) |
|
|
) |
|
|
df["Pressure_Coordination"] = df["Fuel pressure"] - df["Coolant pressure"] |
|
|
|
|
|
|
|
|
df["Low_Oil_Pressure_Flag"] = (df["Lub oil pressure"] < 1.5).astype(int) |
|
|
df["High_Coolant_Temp_Flag"] = (df["Coolant temp"] > 100).astype(int) |
|
|
df["Low_RPM_Flag"] = (df["Engine rpm"] < 600).astype(int) |
|
|
|
|
|
return df[FINAL_FEATURE_ORDER] |
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_model(): |
|
|
try: |
|
|
model_path = hf_hub_download( |
|
|
repo_id=MODEL_REPO_ID, |
|
|
filename=MODEL_FILENAME, |
|
|
repo_type="model" |
|
|
) |
|
|
return joblib.load(model_path) |
|
|
except Exception as e: |
|
|
st.error(f"Error loading model from Hugging Face: {e}") |
|
|
return None |
|
|
|
|
|
model = load_model() |
|
|
if model is None: |
|
|
st.warning("Model could not be loaded. Please verify model repo + filename.") |
|
|
st.stop() |
|
|
|
|
|
|
|
|
|
|
|
with st.sidebar: |
|
|
st.header("About This Model") |
|
|
st.markdown(""" |
|
|
**Model Details** |
|
|
- **Model Type:** Gradient Boosting Classifier |
|
|
- **Optimization Objective:** Maximize recall for faulty engines (minimize missed failures) |
|
|
- **Artifact Source:** Hugging Face Model Hub |
|
|
|
|
|
**Why Recall Matters** |
|
|
A false negative means a failure was missed, leading to downtime, safety risks, and costly repairs. |
|
|
""") |
|
|
|
|
|
st.subheader("Production Metrics (Reference)") |
|
|
st.metric("Recall (Faulty)", "0.84") |
|
|
st.metric("ROC-AUC", "0.70") |
|
|
st.metric("PR-AUC", "0.80") |
|
|
|
|
|
st.markdown("---") |
|
|
st.subheader("Decision Threshold") |
|
|
threshold = st.slider( |
|
|
"Classification Threshold (Faulty if P ≥ threshold)", |
|
|
min_value=0.05, max_value=0.95, value=0.50, step=0.01 |
|
|
) |
|
|
st.caption("Lower threshold → higher recall (fewer missed failures), but more false alarms.") |
|
|
|
|
|
|
|
|
|
|
|
tab1, tab2 = st.tabs(["🔎 Single Prediction", "📦 Bulk Prediction"]) |
|
|
|
|
|
|
|
|
|
|
|
with tab1: |
|
|
st.subheader("Engine Sensor Inputs") |
|
|
|
|
|
c1, c2, c3 = st.columns(3) |
|
|
|
|
|
with c1: |
|
|
engine_rpm = st.number_input("Engine rpm", min_value=0.0, value=700.0, step=1.0) |
|
|
lub_oil_pressure = st.number_input("Lub oil pressure", min_value=0.0, value=2.50, step=0.01) |
|
|
|
|
|
with c2: |
|
|
fuel_pressure = st.number_input("Fuel pressure", min_value=0.0, value=12.00, step=0.01) |
|
|
coolant_pressure = st.number_input("Coolant pressure", min_value=0.0, value=2.50, step=0.01) |
|
|
|
|
|
with c3: |
|
|
lub_oil_temp = st.number_input("lub oil temp", min_value=0.0, value=80.0, step=0.1) |
|
|
coolant_temp = st.number_input("Coolant temp", min_value=0.0, value=85.0, step=0.1) |
|
|
|
|
|
raw_input_df = pd.DataFrame([{ |
|
|
"Engine rpm": engine_rpm, |
|
|
"Lub oil pressure": lub_oil_pressure, |
|
|
"Fuel pressure": fuel_pressure, |
|
|
"Coolant pressure": coolant_pressure, |
|
|
"lub oil temp": lub_oil_temp, |
|
|
"Coolant temp": coolant_temp |
|
|
}]) |
|
|
|
|
|
try: |
|
|
feature_df = add_engineered_features(raw_input_df) |
|
|
except Exception as e: |
|
|
st.error(f"Feature engineering failed: {e}") |
|
|
st.stop() |
|
|
|
|
|
with st.expander("View engineered input dataframe"): |
|
|
st.dataframe(feature_df) |
|
|
csv = feature_df.to_csv(index=False).encode("utf-8") |
|
|
st.download_button("Download Engineered Input CSV", csv, "engine_input_features.csv", "text/csv") |
|
|
|
|
|
st.subheader("Prediction Output") |
|
|
|
|
|
if st.button("Predict Engine Condition", type="primary", use_container_width=True): |
|
|
try: |
|
|
proba_faulty = None |
|
|
if hasattr(model, "predict_proba"): |
|
|
proba_faulty = float(model.predict_proba(feature_df)[0][1]) |
|
|
|
|
|
|
|
|
if proba_faulty is not None: |
|
|
pred_class = int(proba_faulty >= threshold) |
|
|
else: |
|
|
pred_class = int(model.predict(feature_df)[0]) |
|
|
|
|
|
colA, colB = st.columns(2) |
|
|
|
|
|
with colA: |
|
|
if pred_class == 1: |
|
|
st.error("⚠️ Prediction: FAULTY (Maintenance Recommended)") |
|
|
else: |
|
|
st.success("✅ Prediction: NORMAL (No Immediate Maintenance Required)") |
|
|
|
|
|
with colB: |
|
|
if proba_faulty is not None: |
|
|
st.metric("Probability of Faulty (Class 1)", f"{proba_faulty*100:.1f}%") |
|
|
st.progress(int(proba_faulty * 100)) |
|
|
else: |
|
|
st.info("Probability score unavailable (model does not support predict_proba).") |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"Prediction failed: {e}") |
|
|
|
|
|
|
|
|
with tab2: |
|
|
st.subheader("Bulk CSV Prediction") |
|
|
|
|
|
st.markdown(""" |
|
|
Upload a CSV containing **raw sensor columns only**: |
|
|
|
|
|
- Engine rpm |
|
|
- Lub oil pressure |
|
|
- Fuel pressure |
|
|
- Coolant pressure |
|
|
- lub oil temp |
|
|
- Coolant temp |
|
|
|
|
|
The app will automatically engineer features and return: |
|
|
- `Predicted_Class` (0/1) |
|
|
- `Faulty_Probability` (if available) |
|
|
""") |
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_bulk_sample(): |
|
|
try: |
|
|
path = hf_hub_download( |
|
|
repo_id=DATA_REPO_ID, |
|
|
filename=BULK_TEST_FILENAME, |
|
|
repo_type="dataset" |
|
|
) |
|
|
return pd.read_csv(path) |
|
|
except Exception: |
|
|
return None |
|
|
|
|
|
sample_df = load_bulk_sample() |
|
|
if sample_df is not None: |
|
|
with st.expander("Preview bulk sample from Hugging Face"): |
|
|
st.dataframe(sample_df.head()) |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload CSV for bulk prediction", type=["csv"]) |
|
|
|
|
|
bulk_df = None |
|
|
if uploaded_file is not None: |
|
|
bulk_df = pd.read_csv(uploaded_file) |
|
|
elif sample_df is not None: |
|
|
bulk_df = sample_df.copy() |
|
|
|
|
|
if bulk_df is not None: |
|
|
st.markdown("✅ Bulk data loaded.") |
|
|
st.dataframe(bulk_df.head()) |
|
|
|
|
|
if st.button("Run Bulk Prediction", use_container_width=True): |
|
|
try: |
|
|
|
|
|
missing = [c for c in RAW_COLS if c not in bulk_df.columns] |
|
|
if missing: |
|
|
st.error(f"Missing required columns: {missing}") |
|
|
st.stop() |
|
|
|
|
|
bulk_features = add_engineered_features(bulk_df[RAW_COLS]) |
|
|
|
|
|
|
|
|
preds = model.predict(bulk_features).astype(int) |
|
|
|
|
|
if hasattr(model, "predict_proba"): |
|
|
probs = model.predict_proba(bulk_features)[:, 1] |
|
|
else: |
|
|
probs = np.full(shape=(len(bulk_features),), fill_value=np.nan) |
|
|
|
|
|
|
|
|
if hasattr(model, "predict_proba"): |
|
|
preds = (probs >= threshold).astype(int) |
|
|
|
|
|
out = bulk_df.copy() |
|
|
out["Predicted_Class"] = preds |
|
|
out["Faulty_Probability"] = probs |
|
|
|
|
|
st.success("Bulk predictions completed.") |
|
|
st.dataframe(out.head(50)) |
|
|
|
|
|
out_csv = out.to_csv(index=False).encode("utf-8") |
|
|
st.download_button( |
|
|
"Download Bulk Predictions CSV", |
|
|
out_csv, |
|
|
"bulk_engine_predictions.csv", |
|
|
"text/csv" |
|
|
) |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"Bulk prediction failed: {e}") |
|
|
|
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.caption("Predictive Maintenance | Gradient Boosting + Streamlit + Hugging Face Model Hub") |
|
|
|