import os import joblib import pandas as pd import streamlit as st from huggingface_hub import hf_hub_download # ----------------------------- # Streamlit Page Config # ----------------------------- st.set_page_config(page_title="Predictive Maintenance", layout="centered") # ----------------------------- # Banner (Public HF Space URL) # ----------------------------- BANNER_URL = "https://huggingface.co/spaces/Yashwanthsairam/engine-predictive-maintenance/resolve/main/predictive_maintenance_banner.png" try: st.image(BANNER_URL, use_column_width=True) except TypeError: # very old streamlit fallback st.image(BANNER_URL) st.title("Predictive Maintenance for Engine Health") st.write("Enter engine sensor values and get a maintenance prediction.") # ----------------------------- # Config (env-driven) # ----------------------------- HF_MODEL_REPO_ID = os.getenv("HF_MODEL_REPO_ID", "Yashwanthsairam/mlops-predictive-maintenance-engine-final-xgb").strip() MODEL_FILENAME = os.getenv("MODEL_FILENAME", "model.joblib").strip() # ----------------------------- # Load Model from Hugging Face Model Hub (rubric requirement) # ----------------------------- @st.cache_resource(show_spinner=True) def load_model(repo_id: str, filename: str): model_path = hf_hub_download(repo_id=repo_id, filename=filename, repo_type="model") return joblib.load(model_path) try: model = load_model(HF_MODEL_REPO_ID, MODEL_FILENAME) st.caption(f"✅ Model loaded from HF Model Hub: `{HF_MODEL_REPO_ID}`") except Exception as e: st.error("❌ Failed to load model from Hugging Face Model Hub.") st.code(str(e)) st.stop() # ----------------------------- # Feature mapping (snake_case) # ----------------------------- FEATURES = [ "engine_rpm", "lub_oil_pressure", "fuel_pressure", "coolant_pressure", "lub_oil_temp", "coolant_temp", ] LABELS = { "engine_rpm": "Engine RPM", "lub_oil_pressure": "Lub Oil Pressure", "fuel_pressure": "Fuel Pressure", "coolant_pressure": "Coolant Pressure", "lub_oil_temp": "Lub Oil Temperature", "coolant_temp": "Coolant Temperature", } DEFAULTS = { "engine_rpm": 1200.0, "lub_oil_pressure": 3.0, "fuel_pressure": 20.0, "coolant_pressure": 2.0, "lub_oil_temp": 80.0, "coolant_temp": 85.0, } # Resolve expected feature order from trained model if available MODEL_EXPECTED_FEATURES = list(model.feature_names_in_) if hasattr(model, "feature_names_in_") else FEATURES # ----------------------------- # Single Prediction (manual inputs) # ----------------------------- st.subheader("Single Prediction") with st.form("predict_form"): st.write("Provide sensor values and click **Predict**.") col1, col2 = st.columns(2) with col1: engine_rpm = st.number_input(LABELS["engine_rpm"], min_value=0.0, value=DEFAULTS["engine_rpm"]) lub_oil_pressure = st.number_input(LABELS["lub_oil_pressure"], min_value=0.0, value=DEFAULTS["lub_oil_pressure"]) fuel_pressure = st.number_input(LABELS["fuel_pressure"], min_value=0.0, value=DEFAULTS["fuel_pressure"]) with col2: coolant_pressure = st.number_input(LABELS["coolant_pressure"], min_value=0.0, value=DEFAULTS["coolant_pressure"]) lub_oil_temp = st.number_input(LABELS["lub_oil_temp"], min_value=0.0, value=DEFAULTS["lub_oil_temp"]) coolant_temp = st.number_input(LABELS["coolant_temp"], min_value=0.0, value=DEFAULTS["coolant_temp"]) submitted = st.form_submit_button("Predict") if submitted: user_input = { "engine_rpm": engine_rpm, "lub_oil_pressure": lub_oil_pressure, "fuel_pressure": fuel_pressure, "coolant_pressure": coolant_pressure, "lub_oil_temp": lub_oil_temp, "coolant_temp": coolant_temp, } input_df = pd.DataFrame([user_input]) # Align columns to the model’s training schema (names + order) missing = [c for c in MODEL_EXPECTED_FEATURES if c not in input_df.columns] if missing: st.error("❌ Missing required input columns for this model:") st.write(missing) st.stop() input_df = input_df[MODEL_EXPECTED_FEATURES] st.subheader("Input Data (Aligned to Model Features)") st.dataframe(input_df, use_container_width=True) try: pred = model.predict(input_df)[0] except Exception as e: st.error("❌ Prediction failed. Likely feature mismatch or datatype issue.") st.code(str(e)) st.stop() st.subheader("Prediction Result") if int(pred) == 1: st.error("⚠️ Maintenance Required") else: st.success("✅ Engine Healthy") # ----------------------------- # Bulk Prediction (CSV upload) # ----------------------------- st.divider() st.subheader("Bulk Prediction (CSV Upload)") st.write( "Upload a CSV with columns (snake_case): " "`engine_rpm, lub_oil_pressure, fuel_pressure, coolant_pressure, lub_oil_temp, coolant_temp`" ) uploaded_file = st.file_uploader("Upload CSV", type=["csv"]) if uploaded_file is not None: try: bulk_df = pd.read_csv(uploaded_file) except Exception as e: st.error("❌ Failed to read CSV.") st.code(str(e)) st.stop() # Normalize columns to snake_case (handles spaces/case differences) bulk_df.columns = ( bulk_df.columns.astype(str) .str.strip() .str.lower() .str.replace(r"[^a-z0-9]+", "_", regex=True) .str.strip("_") ) missing_cols = [c for c in MODEL_EXPECTED_FEATURES if c not in bulk_df.columns] if missing_cols: st.error("❌ Missing required columns in uploaded file:") st.write(missing_cols) st.stop() X_bulk = bulk_df[MODEL_EXPECTED_FEATURES].copy() try: bulk_pred = model.predict(X_bulk) except Exception as e: st.error("❌ Bulk prediction failed.") st.code(str(e)) st.stop() result_df = bulk_df.copy() result_df["prediction"] = bulk_pred result_df["prediction_label"] = result_df["prediction"].map( {0: "Engine Healthy", 1: "Maintenance Required"} ) st.success(f"✅ Bulk prediction completed for {len(result_df)} rows.") st.dataframe(result_df.head(50), use_container_width=True) out_csv = result_df.to_csv(index=False).encode("utf-8") st.download_button( label="⬇️ Download Predictions CSV", data=out_csv, file_name="bulk_predictions.csv", mime="text/csv", ) # ----------------------------- # Footer / About # ----------------------------- with st.expander("About this app"): st.write( """ This Streamlit app loads a trained XGBoost model from the Hugging Face **Model Hub** and predicts whether an engine requires maintenance based on sensor inputs. It supports: - **Single prediction** via manual input form - **Bulk prediction** via CSV upload + downloadable results The input dataframe is aligned to the model’s training feature schema to prevent feature mismatch issues. """ )