""" Predictive Maintenance Streamlit Application This application provides a web interface for predicting engine maintenance needs based on sensor readings. It loads a trained AdaBoost model from HuggingFace and performs real-time inference on user-provided sensor values. Features: - Configurable alert sensitivity with two operating modes - Single engine prediction with interactive sensor inputs - Bulk CSV import for fleet-wide batch predictions """ import io import json from typing import Any import joblib import numpy as np import pandas as pd import streamlit as st from huggingface_hub import hf_hub_download # Configuration MODEL_REPO = "jskswamy/predictive-maintenance-model" MODEL_FILE = "best_model.joblib" METADATA_FILE = "model_metadata.json" # Operating modes with threshold and performance metrics OPERATING_MODES = { "Optimal -- Maximum Safety": { "threshold": 0.3163, "recall": "99.8%", "precision": "63.2%", "description": ( "Catches virtually all failures. Best for fleets where " "breakdown costs are high ($350-$700 roadside + $448/hr downtime)." ), }, "Default -- Balanced": { "threshold": 0.50, "recall": "84.6%", "precision": "68.4%", "description": ( "Balanced detection vs false alarms. Best for fleets " "where inspection costs are significant." ), }, } DEFAULT_MODE = "Optimal -- Maximum Safety" # Column name aliases for CSV import (maps variations to canonical names) COLUMN_ALIASES = { "engine rpm": "Engine RPM", "engine_rpm": "Engine RPM", "rpm": "Engine RPM", "lub oil pressure": "Lub Oil Pressure", "lub_oil_pressure": "Lub Oil Pressure", "oil pressure": "Lub Oil Pressure", "oil_pressure": "Lub Oil Pressure", "fuel pressure": "Fuel Pressure", "fuel_pressure": "Fuel Pressure", "coolant pressure": "Coolant Pressure", "coolant_pressure": "Coolant Pressure", "lub oil temp": "Lub Oil Temp", "lub_oil_temp": "Lub Oil Temp", "oil temp": "Lub Oil Temp", "oil_temp": "Lub Oil Temp", "oil temperature": "Lub Oil Temp", "oil_temperature": "Lub Oil Temp", "coolant temp": "Coolant Temp", "coolant_temp": "Coolant Temp", "coolant temperature": "Coolant Temp", "coolant_temperature": "Coolant Temp", } REQUIRED_COLUMNS = [ "Engine RPM", "Lub Oil Pressure", "Fuel Pressure", "Coolant Pressure", "Lub Oil Temp", "Coolant Temp", ] MAX_BULK_ROWS = 10_000 # Sensor input ranges (from training data) SENSOR_RANGES = { "engine_rpm": {"min": 0, "max": 2239, "default": 800, "unit": "RPM"}, "lub_oil_pressure": {"min": 0.0, "max": 7.26, "default": 3.0, "unit": "bar"}, "fuel_pressure": {"min": 0.0, "max": 21.14, "default": 8.0, "unit": "bar"}, "coolant_pressure": {"min": 0.0, "max": 7.53, "default": 2.0, "unit": "bar"}, "lub_oil_temp": {"min": 0.0, "max": 164.35, "default": 80.0, "unit": "°C"}, "coolant_temp": {"min": 0.0, "max": 194.59, "default": 75.0, "unit": "°C"}, } # Sensor descriptions for help text SENSOR_HELP = { "engine_rpm": "Engine revolutions per minute. Normal idle: 600-800 RPM. Higher values indicate increased load.", "lub_oil_pressure": "Lubrication oil pressure. Low pressure may indicate bearing wear or pump issues.", "fuel_pressure": "Fuel system delivery pressure. Erratic values may suggest injector problems.", "coolant_pressure": "Cooling system pressure. Low values can indicate leaks or pump failure.", "lub_oil_temp": "Lubrication oil temperature. High temperatures accelerate oil breakdown.", "coolant_temp": "Engine coolant temperature. Overheating can cause severe engine damage.", } @st.cache_resource def load_model() -> tuple[Any, dict] | tuple[None, None]: """Load the trained model and metadata from HuggingFace Hub.""" try: model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE) metadata_path = hf_hub_download(repo_id=MODEL_REPO, filename=METADATA_FILE) model = joblib.load(model_path) with open(metadata_path) as f: metadata = json.load(f) return model, metadata except Exception as e: st.error(f"Failed to load model: {e}") return None, None def engineer_features_batch(df: pd.DataFrame) -> pd.DataFrame: """ Create the 22 engineered features for a batch of sensor readings. Args: df: DataFrame with columns: Engine RPM, Lub Oil Pressure, Fuel Pressure, Coolant Pressure, Lub Oil Temp, Coolant Temp. Returns: DataFrame with 22 features in the correct order for model input. """ eps = 1e-6 rpm = df["Engine RPM"] oil_press = df["Lub Oil Pressure"] fuel_press = df["Fuel Pressure"] cool_press = df["Coolant Pressure"] oil_temp = df["Lub Oil Temp"] cool_temp = df["Coolant Temp"] features = pd.DataFrame( { # Base features (6) "Engine RPM": rpm, "Lub Oil Pressure": oil_press, "Fuel Pressure": fuel_press, "Coolant Pressure": cool_press, "Lub Oil Temp": oil_temp, "Coolant Temp": cool_temp, # Interaction features (5) "RPM_x_OilPressure": rpm * oil_press, "RPM_x_FuelPressure": rpm * fuel_press, "RPM_x_CoolantPressure": rpm * cool_press, "OilTemp_x_OilPressure": oil_temp * oil_press, "CoolantTemp_x_CoolantPressure": cool_temp * cool_press, # Polynomial features (2) "RPM_squared": rpm**2, "OilPressure_squared": oil_press**2, # Domain features (9) "TempDiff": oil_temp - cool_temp, "OilFuelPressureRatio": oil_press / (fuel_press + eps), "CoolantOilPressureRatio": cool_press / (oil_press + eps), "OilHealthIndex": oil_press / (oil_temp + eps), "CoolantStress": cool_temp / (cool_press + eps), "OilTempPerRPM": oil_temp / (rpm + eps), "CoolantTempPerRPM": cool_temp / (rpm + eps), "PressureSum": oil_press + fuel_press + cool_press, "TempSum": oil_temp + cool_temp, } ) return features def engineer_features( rpm: float, oil_press: float, fuel_press: float, cool_press: float, oil_temp: float, cool_temp: float, ) -> pd.DataFrame: """Create the 22 engineered features for a single engine reading.""" row = pd.DataFrame( [ { "Engine RPM": rpm, "Lub Oil Pressure": oil_press, "Fuel Pressure": fuel_press, "Coolant Pressure": cool_press, "Lub Oil Temp": oil_temp, "Coolant Temp": cool_temp, } ] ) return engineer_features_batch(row) def validate_input(value: float, sensor: str) -> list[str]: """Validate sensor input against expected ranges.""" warnings = [] config = SENSOR_RANGES[sensor] if value < config["min"]: warnings.append( f"{sensor.replace('_', ' ').title()}: Value {value} is below expected minimum ({config['min']} {config['unit']})" ) elif value > config["max"]: warnings.append( f"{sensor.replace('_', ' ').title()}: Value {value} is above expected maximum ({config['max']} {config['unit']})" ) return warnings def predict(model: Any, features: pd.DataFrame, threshold: float) -> dict: """Make a prediction using the model for a single observation.""" probability = model.predict_proba(features)[0, 1] prediction = 1 if probability >= threshold else 0 label = "Maintenance Required" if prediction == 1 else "Normal Operation" return { "prediction": prediction, "probability": probability, "label": label, "threshold": threshold, } def predict_batch( model: Any, features: pd.DataFrame, threshold: float ) -> pd.DataFrame: """ Make predictions for a batch of observations. Returns: DataFrame with Probability, Prediction, and Status columns. """ probabilities = model.predict_proba(features)[:, 1] predictions = (probabilities >= threshold).astype(int) statuses = np.where(predictions == 1, "Maintenance Required", "Normal Operation") return pd.DataFrame( { "Probability": probabilities, "Prediction": predictions, "Status": statuses, } ) def validate_csv(df: pd.DataFrame) -> tuple[pd.DataFrame, list[str]]: """ Validate and normalize an uploaded CSV for batch prediction. Returns: Tuple of (cleaned DataFrame with canonical column names, list of warnings). """ warnings = [] # Normalize column names: lowercase for matching rename_map = {} for col in df.columns: key = col.strip().lower() if key in COLUMN_ALIASES: rename_map[col] = COLUMN_ALIASES[key] elif col in REQUIRED_COLUMNS: rename_map[col] = col df = df.rename(columns=rename_map) # Check for missing required columns missing = [c for c in REQUIRED_COLUMNS if c not in df.columns] if missing: return pd.DataFrame(), [f"Missing required columns: {', '.join(missing)}"] # Keep only required columns df = df[REQUIRED_COLUMNS].copy() # Coerce non-numeric values original_len = len(df) for col in REQUIRED_COLUMNS: df[col] = pd.to_numeric(df[col], errors="coerce") # Drop rows with NaN values df = df.dropna().reset_index(drop=True) dropped = original_len - len(df) if dropped > 0: warnings.append( f"{dropped} row(s) dropped due to missing or non-numeric values." ) # Row limit if len(df) > MAX_BULK_ROWS: warnings.append( f"CSV truncated to {MAX_BULK_ROWS:,} rows (uploaded {len(df):,})." ) df = df.head(MAX_BULK_ROWS) if len(df) == 0: warnings.append("No valid rows remaining after cleaning.") return df, warnings def generate_sample_csv() -> str: """Generate a sample CSV template with correct headers and example rows.""" sample = pd.DataFrame( { "Engine RPM": [800, 1200, 1500, 600, 1800], "Lub Oil Pressure": [3.0, 2.5, 4.1, 1.2, 3.8], "Fuel Pressure": [8.0, 6.5, 10.0, 3.0, 9.5], "Coolant Pressure": [2.0, 1.8, 2.5, 0.8, 2.2], "Lub Oil Temp": [80.0, 95.0, 75.0, 120.0, 85.0], "Coolant Temp": [75.0, 88.0, 70.0, 110.0, 80.0], } ) return sample.to_csv(index=False) def color_prediction_rows(row: pd.Series) -> list[str]: """Apply row-level styling based on prediction status.""" if row.get("Status") == "Maintenance Required": return ["background-color: #ffcccc"] * len(row) return ["background-color: #ccffcc"] * len(row) def render_sidebar() -> float: """Render the sidebar with operating mode selection. Returns the active threshold.""" with st.sidebar: st.header("Alert Sensitivity") mode = st.radio( "Operating Mode", options=list(OPERATING_MODES.keys()), index=list(OPERATING_MODES.keys()).index(DEFAULT_MODE), label_visibility="collapsed", ) config = OPERATING_MODES[mode] st.metric("Threshold", f"{config['threshold']:.4f}") col1, col2 = st.columns(2) with col1: st.metric("Recall", config["recall"]) with col2: st.metric("Precision", config["precision"]) st.info(config["description"]) return config["threshold"] def render_single_prediction(model: Any, metadata: dict, threshold: float): """Render the single engine prediction tab.""" st.subheader("Sensor Readings") col1, col2 = st.columns(2) with col1: engine_rpm = st.number_input( "Engine RPM", min_value=0.0, max_value=5000.0, value=float(SENSOR_RANGES["engine_rpm"]["default"]), step=10.0, help=SENSOR_HELP["engine_rpm"], ) lub_oil_pressure = st.number_input( "Lub Oil Pressure (bar)", min_value=0.0, max_value=20.0, value=float(SENSOR_RANGES["lub_oil_pressure"]["default"]), step=0.1, help=SENSOR_HELP["lub_oil_pressure"], ) fuel_pressure = st.number_input( "Fuel Pressure (bar)", min_value=0.0, max_value=50.0, value=float(SENSOR_RANGES["fuel_pressure"]["default"]), step=0.5, help=SENSOR_HELP["fuel_pressure"], ) with col2: coolant_pressure = st.number_input( "Coolant Pressure (bar)", min_value=0.0, max_value=20.0, value=float(SENSOR_RANGES["coolant_pressure"]["default"]), step=0.1, help=SENSOR_HELP["coolant_pressure"], ) lub_oil_temp = st.number_input( "Lub Oil Temp (°C)", min_value=0.0, max_value=250.0, value=float(SENSOR_RANGES["lub_oil_temp"]["default"]), step=1.0, help=SENSOR_HELP["lub_oil_temp"], ) coolant_temp = st.number_input( "Coolant Temp (°C)", min_value=0.0, max_value=250.0, value=float(SENSOR_RANGES["coolant_temp"]["default"]), step=1.0, help=SENSOR_HELP["coolant_temp"], ) # Validate inputs all_warnings = [] inputs = { "engine_rpm": engine_rpm, "lub_oil_pressure": lub_oil_pressure, "fuel_pressure": fuel_pressure, "coolant_pressure": coolant_pressure, "lub_oil_temp": lub_oil_temp, "coolant_temp": coolant_temp, } for sensor, value in inputs.items(): all_warnings.extend(validate_input(value, sensor)) if all_warnings: st.warning("**Input Validation Warnings:**") for warning in all_warnings: st.caption(f"- {warning}") st.caption( "*Values outside training data ranges may produce less reliable predictions.*" ) st.divider() if st.button( "Predict Maintenance Status", type="primary", use_container_width=True ): try: features = engineer_features( engine_rpm, lub_oil_pressure, fuel_pressure, coolant_pressure, lub_oil_temp, coolant_temp, ) result = predict(model, features, threshold) st.subheader("Prediction Result") if result["prediction"] == 0: st.success(f"**{result['label']}**") st.markdown( "The engine is operating within normal parameters. " "Continue regular monitoring." ) else: st.error(f"**{result['label']}**") st.markdown( "The model indicates potential engine issues. " "**Schedule maintenance inspection.**" ) col1, col2, col3 = st.columns(3) with col1: st.metric("Confidence", f"{result['probability']:.1%}") with col2: st.metric("Threshold", f"{result['threshold']:.4f}") with col3: st.metric("Raw Probability", f"{result['probability']:.4f}") except Exception as e: st.error(f"Prediction failed: {e}") def render_bulk_import(model: Any, metadata: dict, threshold: float): """Render the bulk CSV import tab.""" st.markdown( "Upload a CSV file with engine sensor readings to predict maintenance " "status for an entire fleet." ) st.markdown( f"**Required columns:** {', '.join(REQUIRED_COLUMNS)}" ) # Sample CSV download sample_csv = generate_sample_csv() st.download_button( "Download Sample CSV Template", data=sample_csv, file_name="sensor_template.csv", mime="text/csv", ) uploaded_file = st.file_uploader("Upload sensor readings CSV", type=["csv"]) if uploaded_file is None: return # Read and validate CSV try: raw_df = pd.read_csv(uploaded_file) except Exception as e: st.error(f"Failed to read CSV: {e}") return if raw_df.empty: st.warning("The uploaded CSV file is empty.") return cleaned_df, warnings = validate_csv(raw_df) for w in warnings: if "Missing required" in w or "No valid rows" in w: st.error(w) else: st.warning(w) if cleaned_df.empty: return # Data preview st.subheader("Data Preview") st.dataframe(cleaned_df.head(), use_container_width=True) st.caption(f"{len(cleaned_df):,} valid rows loaded.") st.divider() if st.button( "Run Batch Predictions", type="primary", use_container_width=True ): with st.spinner("Running predictions..."): features = engineer_features_batch(cleaned_df) results = predict_batch(model, features, threshold) # Summary metrics st.subheader("Batch Results") total = len(results) maint_count = int((results["Prediction"] == 1).sum()) normal_count = total - maint_count col1, col2, col3 = st.columns(3) with col1: st.metric("Total Engines", f"{total:,}") with col2: st.metric("Maintenance Required", f"{maint_count:,}") with col3: st.metric("Normal Operation", f"{normal_count:,}") # Results table with color coding display_df = pd.concat( [cleaned_df.reset_index(drop=True), results.reset_index(drop=True)], axis=1, ) display_df["Probability"] = display_df["Probability"].round(4) styled = display_df.style.apply(color_prediction_rows, axis=1) st.dataframe(styled, use_container_width=True) # Download results csv_buffer = io.StringIO() display_df.to_csv(csv_buffer, index=False) st.download_button( "Download Results CSV", data=csv_buffer.getvalue(), file_name="maintenance_predictions.csv", mime="text/csv", ) def main(): """Main application entry point.""" st.set_page_config( page_title="Predictive Maintenance", page_icon="🔧", layout="centered", initial_sidebar_state="expanded", ) st.title("Engine Predictive Maintenance") st.markdown( "Predict engine maintenance needs based on sensor readings. " "Select an operating mode in the sidebar and use the tabs below." ) # Load model with st.spinner("Loading model from HuggingFace..."): model, metadata = load_model() if model is None: st.error( "Unable to load the prediction model. " "Please try again later or contact support." ) st.stop() # Sidebar: operating mode selection threshold = render_sidebar() # Model info expander active_mode = [ name for name, cfg in OPERATING_MODES.items() if cfg["threshold"] == threshold ][0] with st.expander("Model Information"): st.markdown(f"**Model**: {metadata.get('model_name', 'Unknown')}") st.markdown(f"**Algorithm**: {metadata.get('algorithm', 'Unknown')}") st.markdown( f"**Test Recall**: {metadata.get('test_metrics', {}).get('recall', 0):.2%}" ) st.markdown(f"**Active Mode**: {active_mode}") st.markdown(f"**Decision Threshold**: {threshold:.4f}") st.divider() # Tabs tab1, tab2 = st.tabs(["Single Prediction", "Bulk Import"]) with tab1: render_single_prediction(model, metadata, threshold) with tab2: render_bulk_import(model, metadata, threshold) # Footer st.divider() st.caption( "Built with Streamlit | Model hosted on HuggingFace | " "[PGP-AIML Capstone Project](https://github.com/jskswamy/AIML-LearningBytes)" ) if __name__ == "__main__": main()