jskswamy's picture
Upload app.py with huggingface_hub
3f1113b verified
"""
Predictive Maintenance Streamlit Application
This application provides a web interface for predicting engine maintenance needs
based on sensor readings. It loads a trained AdaBoost model from HuggingFace and
performs real-time inference on user-provided sensor values.
Features:
- Configurable alert sensitivity with two operating modes
- Single engine prediction with interactive sensor inputs
- Bulk CSV import for fleet-wide batch predictions
"""
import io
import json
from typing import Any
import joblib
import numpy as np
import pandas as pd
import streamlit as st
from huggingface_hub import hf_hub_download
# Configuration
MODEL_REPO = "jskswamy/predictive-maintenance-model"
MODEL_FILE = "best_model.joblib"
METADATA_FILE = "model_metadata.json"
# Operating modes with threshold and performance metrics
OPERATING_MODES = {
"Optimal -- Maximum Safety": {
"threshold": 0.3163,
"recall": "99.8%",
"precision": "63.2%",
"description": (
"Catches virtually all failures. Best for fleets where "
"breakdown costs are high ($350-$700 roadside + $448/hr downtime)."
),
},
"Default -- Balanced": {
"threshold": 0.50,
"recall": "84.6%",
"precision": "68.4%",
"description": (
"Balanced detection vs false alarms. Best for fleets "
"where inspection costs are significant."
),
},
}
DEFAULT_MODE = "Optimal -- Maximum Safety"
# Column name aliases for CSV import (maps variations to canonical names)
COLUMN_ALIASES = {
"engine rpm": "Engine RPM",
"engine_rpm": "Engine RPM",
"rpm": "Engine RPM",
"lub oil pressure": "Lub Oil Pressure",
"lub_oil_pressure": "Lub Oil Pressure",
"oil pressure": "Lub Oil Pressure",
"oil_pressure": "Lub Oil Pressure",
"fuel pressure": "Fuel Pressure",
"fuel_pressure": "Fuel Pressure",
"coolant pressure": "Coolant Pressure",
"coolant_pressure": "Coolant Pressure",
"lub oil temp": "Lub Oil Temp",
"lub_oil_temp": "Lub Oil Temp",
"oil temp": "Lub Oil Temp",
"oil_temp": "Lub Oil Temp",
"oil temperature": "Lub Oil Temp",
"oil_temperature": "Lub Oil Temp",
"coolant temp": "Coolant Temp",
"coolant_temp": "Coolant Temp",
"coolant temperature": "Coolant Temp",
"coolant_temperature": "Coolant Temp",
}
REQUIRED_COLUMNS = [
"Engine RPM",
"Lub Oil Pressure",
"Fuel Pressure",
"Coolant Pressure",
"Lub Oil Temp",
"Coolant Temp",
]
MAX_BULK_ROWS = 10_000
# Sensor input ranges (from training data)
SENSOR_RANGES = {
"engine_rpm": {"min": 0, "max": 2239, "default": 800, "unit": "RPM"},
"lub_oil_pressure": {"min": 0.0, "max": 7.26, "default": 3.0, "unit": "bar"},
"fuel_pressure": {"min": 0.0, "max": 21.14, "default": 8.0, "unit": "bar"},
"coolant_pressure": {"min": 0.0, "max": 7.53, "default": 2.0, "unit": "bar"},
"lub_oil_temp": {"min": 0.0, "max": 164.35, "default": 80.0, "unit": "°C"},
"coolant_temp": {"min": 0.0, "max": 194.59, "default": 75.0, "unit": "°C"},
}
# Sensor descriptions for help text
SENSOR_HELP = {
"engine_rpm": "Engine revolutions per minute. Normal idle: 600-800 RPM. Higher values indicate increased load.",
"lub_oil_pressure": "Lubrication oil pressure. Low pressure may indicate bearing wear or pump issues.",
"fuel_pressure": "Fuel system delivery pressure. Erratic values may suggest injector problems.",
"coolant_pressure": "Cooling system pressure. Low values can indicate leaks or pump failure.",
"lub_oil_temp": "Lubrication oil temperature. High temperatures accelerate oil breakdown.",
"coolant_temp": "Engine coolant temperature. Overheating can cause severe engine damage.",
}
@st.cache_resource
def load_model() -> tuple[Any, dict] | tuple[None, None]:
"""Load the trained model and metadata from HuggingFace Hub."""
try:
model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
metadata_path = hf_hub_download(repo_id=MODEL_REPO, filename=METADATA_FILE)
model = joblib.load(model_path)
with open(metadata_path) as f:
metadata = json.load(f)
return model, metadata
except Exception as e:
st.error(f"Failed to load model: {e}")
return None, None
def engineer_features_batch(df: pd.DataFrame) -> pd.DataFrame:
"""
Create the 22 engineered features for a batch of sensor readings.
Args:
df: DataFrame with columns: Engine RPM, Lub Oil Pressure, Fuel Pressure,
Coolant Pressure, Lub Oil Temp, Coolant Temp.
Returns:
DataFrame with 22 features in the correct order for model input.
"""
eps = 1e-6
rpm = df["Engine RPM"]
oil_press = df["Lub Oil Pressure"]
fuel_press = df["Fuel Pressure"]
cool_press = df["Coolant Pressure"]
oil_temp = df["Lub Oil Temp"]
cool_temp = df["Coolant Temp"]
features = pd.DataFrame(
{
# Base features (6)
"Engine RPM": rpm,
"Lub Oil Pressure": oil_press,
"Fuel Pressure": fuel_press,
"Coolant Pressure": cool_press,
"Lub Oil Temp": oil_temp,
"Coolant Temp": cool_temp,
# Interaction features (5)
"RPM_x_OilPressure": rpm * oil_press,
"RPM_x_FuelPressure": rpm * fuel_press,
"RPM_x_CoolantPressure": rpm * cool_press,
"OilTemp_x_OilPressure": oil_temp * oil_press,
"CoolantTemp_x_CoolantPressure": cool_temp * cool_press,
# Polynomial features (2)
"RPM_squared": rpm**2,
"OilPressure_squared": oil_press**2,
# Domain features (9)
"TempDiff": oil_temp - cool_temp,
"OilFuelPressureRatio": oil_press / (fuel_press + eps),
"CoolantOilPressureRatio": cool_press / (oil_press + eps),
"OilHealthIndex": oil_press / (oil_temp + eps),
"CoolantStress": cool_temp / (cool_press + eps),
"OilTempPerRPM": oil_temp / (rpm + eps),
"CoolantTempPerRPM": cool_temp / (rpm + eps),
"PressureSum": oil_press + fuel_press + cool_press,
"TempSum": oil_temp + cool_temp,
}
)
return features
def engineer_features(
rpm: float,
oil_press: float,
fuel_press: float,
cool_press: float,
oil_temp: float,
cool_temp: float,
) -> pd.DataFrame:
"""Create the 22 engineered features for a single engine reading."""
row = pd.DataFrame(
[
{
"Engine RPM": rpm,
"Lub Oil Pressure": oil_press,
"Fuel Pressure": fuel_press,
"Coolant Pressure": cool_press,
"Lub Oil Temp": oil_temp,
"Coolant Temp": cool_temp,
}
]
)
return engineer_features_batch(row)
def validate_input(value: float, sensor: str) -> list[str]:
"""Validate sensor input against expected ranges."""
warnings = []
config = SENSOR_RANGES[sensor]
if value < config["min"]:
warnings.append(
f"{sensor.replace('_', ' ').title()}: Value {value} is below expected minimum ({config['min']} {config['unit']})"
)
elif value > config["max"]:
warnings.append(
f"{sensor.replace('_', ' ').title()}: Value {value} is above expected maximum ({config['max']} {config['unit']})"
)
return warnings
def predict(model: Any, features: pd.DataFrame, threshold: float) -> dict:
"""Make a prediction using the model for a single observation."""
probability = model.predict_proba(features)[0, 1]
prediction = 1 if probability >= threshold else 0
label = "Maintenance Required" if prediction == 1 else "Normal Operation"
return {
"prediction": prediction,
"probability": probability,
"label": label,
"threshold": threshold,
}
def predict_batch(
model: Any, features: pd.DataFrame, threshold: float
) -> pd.DataFrame:
"""
Make predictions for a batch of observations.
Returns:
DataFrame with Probability, Prediction, and Status columns.
"""
probabilities = model.predict_proba(features)[:, 1]
predictions = (probabilities >= threshold).astype(int)
statuses = np.where(predictions == 1, "Maintenance Required", "Normal Operation")
return pd.DataFrame(
{
"Probability": probabilities,
"Prediction": predictions,
"Status": statuses,
}
)
def validate_csv(df: pd.DataFrame) -> tuple[pd.DataFrame, list[str]]:
"""
Validate and normalize an uploaded CSV for batch prediction.
Returns:
Tuple of (cleaned DataFrame with canonical column names, list of warnings).
"""
warnings = []
# Normalize column names: lowercase for matching
rename_map = {}
for col in df.columns:
key = col.strip().lower()
if key in COLUMN_ALIASES:
rename_map[col] = COLUMN_ALIASES[key]
elif col in REQUIRED_COLUMNS:
rename_map[col] = col
df = df.rename(columns=rename_map)
# Check for missing required columns
missing = [c for c in REQUIRED_COLUMNS if c not in df.columns]
if missing:
return pd.DataFrame(), [f"Missing required columns: {', '.join(missing)}"]
# Keep only required columns
df = df[REQUIRED_COLUMNS].copy()
# Coerce non-numeric values
original_len = len(df)
for col in REQUIRED_COLUMNS:
df[col] = pd.to_numeric(df[col], errors="coerce")
# Drop rows with NaN values
df = df.dropna().reset_index(drop=True)
dropped = original_len - len(df)
if dropped > 0:
warnings.append(
f"{dropped} row(s) dropped due to missing or non-numeric values."
)
# Row limit
if len(df) > MAX_BULK_ROWS:
warnings.append(
f"CSV truncated to {MAX_BULK_ROWS:,} rows (uploaded {len(df):,})."
)
df = df.head(MAX_BULK_ROWS)
if len(df) == 0:
warnings.append("No valid rows remaining after cleaning.")
return df, warnings
def generate_sample_csv() -> str:
"""Generate a sample CSV template with correct headers and example rows."""
sample = pd.DataFrame(
{
"Engine RPM": [800, 1200, 1500, 600, 1800],
"Lub Oil Pressure": [3.0, 2.5, 4.1, 1.2, 3.8],
"Fuel Pressure": [8.0, 6.5, 10.0, 3.0, 9.5],
"Coolant Pressure": [2.0, 1.8, 2.5, 0.8, 2.2],
"Lub Oil Temp": [80.0, 95.0, 75.0, 120.0, 85.0],
"Coolant Temp": [75.0, 88.0, 70.0, 110.0, 80.0],
}
)
return sample.to_csv(index=False)
def color_prediction_rows(row: pd.Series) -> list[str]:
"""Apply row-level styling based on prediction status."""
if row.get("Status") == "Maintenance Required":
return ["background-color: #ffcccc"] * len(row)
return ["background-color: #ccffcc"] * len(row)
def render_sidebar() -> float:
"""Render the sidebar with operating mode selection. Returns the active threshold."""
with st.sidebar:
st.header("Alert Sensitivity")
mode = st.radio(
"Operating Mode",
options=list(OPERATING_MODES.keys()),
index=list(OPERATING_MODES.keys()).index(DEFAULT_MODE),
label_visibility="collapsed",
)
config = OPERATING_MODES[mode]
st.metric("Threshold", f"{config['threshold']:.4f}")
col1, col2 = st.columns(2)
with col1:
st.metric("Recall", config["recall"])
with col2:
st.metric("Precision", config["precision"])
st.info(config["description"])
return config["threshold"]
def render_single_prediction(model: Any, metadata: dict, threshold: float):
"""Render the single engine prediction tab."""
st.subheader("Sensor Readings")
col1, col2 = st.columns(2)
with col1:
engine_rpm = st.number_input(
"Engine RPM",
min_value=0.0,
max_value=5000.0,
value=float(SENSOR_RANGES["engine_rpm"]["default"]),
step=10.0,
help=SENSOR_HELP["engine_rpm"],
)
lub_oil_pressure = st.number_input(
"Lub Oil Pressure (bar)",
min_value=0.0,
max_value=20.0,
value=float(SENSOR_RANGES["lub_oil_pressure"]["default"]),
step=0.1,
help=SENSOR_HELP["lub_oil_pressure"],
)
fuel_pressure = st.number_input(
"Fuel Pressure (bar)",
min_value=0.0,
max_value=50.0,
value=float(SENSOR_RANGES["fuel_pressure"]["default"]),
step=0.5,
help=SENSOR_HELP["fuel_pressure"],
)
with col2:
coolant_pressure = st.number_input(
"Coolant Pressure (bar)",
min_value=0.0,
max_value=20.0,
value=float(SENSOR_RANGES["coolant_pressure"]["default"]),
step=0.1,
help=SENSOR_HELP["coolant_pressure"],
)
lub_oil_temp = st.number_input(
"Lub Oil Temp (°C)",
min_value=0.0,
max_value=250.0,
value=float(SENSOR_RANGES["lub_oil_temp"]["default"]),
step=1.0,
help=SENSOR_HELP["lub_oil_temp"],
)
coolant_temp = st.number_input(
"Coolant Temp (°C)",
min_value=0.0,
max_value=250.0,
value=float(SENSOR_RANGES["coolant_temp"]["default"]),
step=1.0,
help=SENSOR_HELP["coolant_temp"],
)
# Validate inputs
all_warnings = []
inputs = {
"engine_rpm": engine_rpm,
"lub_oil_pressure": lub_oil_pressure,
"fuel_pressure": fuel_pressure,
"coolant_pressure": coolant_pressure,
"lub_oil_temp": lub_oil_temp,
"coolant_temp": coolant_temp,
}
for sensor, value in inputs.items():
all_warnings.extend(validate_input(value, sensor))
if all_warnings:
st.warning("**Input Validation Warnings:**")
for warning in all_warnings:
st.caption(f"- {warning}")
st.caption(
"*Values outside training data ranges may produce less reliable predictions.*"
)
st.divider()
if st.button(
"Predict Maintenance Status", type="primary", use_container_width=True
):
try:
features = engineer_features(
engine_rpm,
lub_oil_pressure,
fuel_pressure,
coolant_pressure,
lub_oil_temp,
coolant_temp,
)
result = predict(model, features, threshold)
st.subheader("Prediction Result")
if result["prediction"] == 0:
st.success(f"**{result['label']}**")
st.markdown(
"The engine is operating within normal parameters. "
"Continue regular monitoring."
)
else:
st.error(f"**{result['label']}**")
st.markdown(
"The model indicates potential engine issues. "
"**Schedule maintenance inspection.**"
)
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Confidence", f"{result['probability']:.1%}")
with col2:
st.metric("Threshold", f"{result['threshold']:.4f}")
with col3:
st.metric("Raw Probability", f"{result['probability']:.4f}")
except Exception as e:
st.error(f"Prediction failed: {e}")
def render_bulk_import(model: Any, metadata: dict, threshold: float):
"""Render the bulk CSV import tab."""
st.markdown(
"Upload a CSV file with engine sensor readings to predict maintenance "
"status for an entire fleet."
)
st.markdown(
f"**Required columns:** {', '.join(REQUIRED_COLUMNS)}"
)
# Sample CSV download
sample_csv = generate_sample_csv()
st.download_button(
"Download Sample CSV Template",
data=sample_csv,
file_name="sensor_template.csv",
mime="text/csv",
)
uploaded_file = st.file_uploader("Upload sensor readings CSV", type=["csv"])
if uploaded_file is None:
return
# Read and validate CSV
try:
raw_df = pd.read_csv(uploaded_file)
except Exception as e:
st.error(f"Failed to read CSV: {e}")
return
if raw_df.empty:
st.warning("The uploaded CSV file is empty.")
return
cleaned_df, warnings = validate_csv(raw_df)
for w in warnings:
if "Missing required" in w or "No valid rows" in w:
st.error(w)
else:
st.warning(w)
if cleaned_df.empty:
return
# Data preview
st.subheader("Data Preview")
st.dataframe(cleaned_df.head(), use_container_width=True)
st.caption(f"{len(cleaned_df):,} valid rows loaded.")
st.divider()
if st.button(
"Run Batch Predictions", type="primary", use_container_width=True
):
with st.spinner("Running predictions..."):
features = engineer_features_batch(cleaned_df)
results = predict_batch(model, features, threshold)
# Summary metrics
st.subheader("Batch Results")
total = len(results)
maint_count = int((results["Prediction"] == 1).sum())
normal_count = total - maint_count
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total Engines", f"{total:,}")
with col2:
st.metric("Maintenance Required", f"{maint_count:,}")
with col3:
st.metric("Normal Operation", f"{normal_count:,}")
# Results table with color coding
display_df = pd.concat(
[cleaned_df.reset_index(drop=True), results.reset_index(drop=True)],
axis=1,
)
display_df["Probability"] = display_df["Probability"].round(4)
styled = display_df.style.apply(color_prediction_rows, axis=1)
st.dataframe(styled, use_container_width=True)
# Download results
csv_buffer = io.StringIO()
display_df.to_csv(csv_buffer, index=False)
st.download_button(
"Download Results CSV",
data=csv_buffer.getvalue(),
file_name="maintenance_predictions.csv",
mime="text/csv",
)
def main():
"""Main application entry point."""
st.set_page_config(
page_title="Predictive Maintenance",
page_icon="🔧",
layout="centered",
initial_sidebar_state="expanded",
)
st.title("Engine Predictive Maintenance")
st.markdown(
"Predict engine maintenance needs based on sensor readings. "
"Select an operating mode in the sidebar and use the tabs below."
)
# Load model
with st.spinner("Loading model from HuggingFace..."):
model, metadata = load_model()
if model is None:
st.error(
"Unable to load the prediction model. "
"Please try again later or contact support."
)
st.stop()
# Sidebar: operating mode selection
threshold = render_sidebar()
# Model info expander
active_mode = [
name
for name, cfg in OPERATING_MODES.items()
if cfg["threshold"] == threshold
][0]
with st.expander("Model Information"):
st.markdown(f"**Model**: {metadata.get('model_name', 'Unknown')}")
st.markdown(f"**Algorithm**: {metadata.get('algorithm', 'Unknown')}")
st.markdown(
f"**Test Recall**: {metadata.get('test_metrics', {}).get('recall', 0):.2%}"
)
st.markdown(f"**Active Mode**: {active_mode}")
st.markdown(f"**Decision Threshold**: {threshold:.4f}")
st.divider()
# Tabs
tab1, tab2 = st.tabs(["Single Prediction", "Bulk Import"])
with tab1:
render_single_prediction(model, metadata, threshold)
with tab2:
render_bulk_import(model, metadata, threshold)
# Footer
st.divider()
st.caption(
"Built with Streamlit | Model hosted on HuggingFace | "
"[PGP-AIML Capstone Project](https://github.com/jskswamy/AIML-LearningBytes)"
)
if __name__ == "__main__":
main()