Spaces:

jskswamy
/

predictive-maintenance

Runtime error

App Files Files Community

predictive-maintenance / app.py

jskswamy

Upload app.py with huggingface_hub

3f1113b verified 2 months ago

raw

history blame contribute delete

20.5 kB

	"""
	Predictive Maintenance Streamlit Application

	This application provides a web interface for predicting engine maintenance needs
	based on sensor readings. It loads a trained AdaBoost model from HuggingFace and
	performs real-time inference on user-provided sensor values.

	Features:
	- Configurable alert sensitivity with two operating modes
	- Single engine prediction with interactive sensor inputs
	- Bulk CSV import for fleet-wide batch predictions
	"""

	import io
	import json
	from typing import Any

	import joblib
	import numpy as np
	import pandas as pd
	import streamlit as st
	from huggingface_hub import hf_hub_download

	# Configuration
	MODEL_REPO = "jskswamy/predictive-maintenance-model"
	MODEL_FILE = "best_model.joblib"
	METADATA_FILE = "model_metadata.json"

	# Operating modes with threshold and performance metrics
	OPERATING_MODES = {
	"Optimal -- Maximum Safety": {
	"threshold": 0.3163,
	"recall": "99.8%",
	"precision": "63.2%",
	"description": (
	"Catches virtually all failures. Best for fleets where "
	"breakdown costs are high ($350-$700 roadside + $448/hr downtime)."
	),
	},
	"Default -- Balanced": {
	"threshold": 0.50,
	"recall": "84.6%",
	"precision": "68.4%",
	"description": (
	"Balanced detection vs false alarms. Best for fleets "
	"where inspection costs are significant."
	),
	},
	}
	DEFAULT_MODE = "Optimal -- Maximum Safety"

	# Column name aliases for CSV import (maps variations to canonical names)
	COLUMN_ALIASES = {
	"engine rpm": "Engine RPM",
	"engine_rpm": "Engine RPM",
	"rpm": "Engine RPM",
	"lub oil pressure": "Lub Oil Pressure",
	"lub_oil_pressure": "Lub Oil Pressure",
	"oil pressure": "Lub Oil Pressure",
	"oil_pressure": "Lub Oil Pressure",
	"fuel pressure": "Fuel Pressure",
	"fuel_pressure": "Fuel Pressure",
	"coolant pressure": "Coolant Pressure",
	"coolant_pressure": "Coolant Pressure",
	"lub oil temp": "Lub Oil Temp",
	"lub_oil_temp": "Lub Oil Temp",
	"oil temp": "Lub Oil Temp",
	"oil_temp": "Lub Oil Temp",
	"oil temperature": "Lub Oil Temp",
	"oil_temperature": "Lub Oil Temp",
	"coolant temp": "Coolant Temp",
	"coolant_temp": "Coolant Temp",
	"coolant temperature": "Coolant Temp",
	"coolant_temperature": "Coolant Temp",
	}

	REQUIRED_COLUMNS = [
	"Engine RPM",
	"Lub Oil Pressure",
	"Fuel Pressure",
	"Coolant Pressure",
	"Lub Oil Temp",
	"Coolant Temp",
	]

	MAX_BULK_ROWS = 10_000

	# Sensor input ranges (from training data)
	SENSOR_RANGES = {
	"engine_rpm": {"min": 0, "max": 2239, "default": 800, "unit": "RPM"},
	"lub_oil_pressure": {"min": 0.0, "max": 7.26, "default": 3.0, "unit": "bar"},
	"fuel_pressure": {"min": 0.0, "max": 21.14, "default": 8.0, "unit": "bar"},
	"coolant_pressure": {"min": 0.0, "max": 7.53, "default": 2.0, "unit": "bar"},
	"lub_oil_temp": {"min": 0.0, "max": 164.35, "default": 80.0, "unit": "°C"},
	"coolant_temp": {"min": 0.0, "max": 194.59, "default": 75.0, "unit": "°C"},
	}

	# Sensor descriptions for help text
	SENSOR_HELP = {
	"engine_rpm": "Engine revolutions per minute. Normal idle: 600-800 RPM. Higher values indicate increased load.",
	"lub_oil_pressure": "Lubrication oil pressure. Low pressure may indicate bearing wear or pump issues.",
	"fuel_pressure": "Fuel system delivery pressure. Erratic values may suggest injector problems.",
	"coolant_pressure": "Cooling system pressure. Low values can indicate leaks or pump failure.",
	"lub_oil_temp": "Lubrication oil temperature. High temperatures accelerate oil breakdown.",
	"coolant_temp": "Engine coolant temperature. Overheating can cause severe engine damage.",
	}


	@st.cache_resource
	def load_model() -> tuple[Any, dict] \| tuple[None, None]:
	"""Load the trained model and metadata from HuggingFace Hub."""
	try:
	model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
	metadata_path = hf_hub_download(repo_id=MODEL_REPO, filename=METADATA_FILE)

	model = joblib.load(model_path)
	with open(metadata_path) as f:
	metadata = json.load(f)

	return model, metadata
	except Exception as e:
	st.error(f"Failed to load model: {e}")
	return None, None


	def engineer_features_batch(df: pd.DataFrame) -> pd.DataFrame:
	"""
	Create the 22 engineered features for a batch of sensor readings.

	Args:
	df: DataFrame with columns: Engine RPM, Lub Oil Pressure, Fuel Pressure,
	Coolant Pressure, Lub Oil Temp, Coolant Temp.

	Returns:
	DataFrame with 22 features in the correct order for model input.
	"""
	eps = 1e-6

	rpm = df["Engine RPM"]
	oil_press = df["Lub Oil Pressure"]
	fuel_press = df["Fuel Pressure"]
	cool_press = df["Coolant Pressure"]
	oil_temp = df["Lub Oil Temp"]
	cool_temp = df["Coolant Temp"]

	features = pd.DataFrame(
	{
	# Base features (6)
	"Engine RPM": rpm,
	"Lub Oil Pressure": oil_press,
	"Fuel Pressure": fuel_press,
	"Coolant Pressure": cool_press,
	"Lub Oil Temp": oil_temp,
	"Coolant Temp": cool_temp,
	# Interaction features (5)
	"RPM_x_OilPressure": rpm * oil_press,
	"RPM_x_FuelPressure": rpm * fuel_press,
	"RPM_x_CoolantPressure": rpm * cool_press,
	"OilTemp_x_OilPressure": oil_temp * oil_press,
	"CoolantTemp_x_CoolantPressure": cool_temp * cool_press,
	# Polynomial features (2)
	"RPM_squared": rpm**2,
	"OilPressure_squared": oil_press**2,
	# Domain features (9)
	"TempDiff": oil_temp - cool_temp,
	"OilFuelPressureRatio": oil_press / (fuel_press + eps),
	"CoolantOilPressureRatio": cool_press / (oil_press + eps),
	"OilHealthIndex": oil_press / (oil_temp + eps),
	"CoolantStress": cool_temp / (cool_press + eps),
	"OilTempPerRPM": oil_temp / (rpm + eps),
	"CoolantTempPerRPM": cool_temp / (rpm + eps),
	"PressureSum": oil_press + fuel_press + cool_press,
	"TempSum": oil_temp + cool_temp,
	}
	)

	return features


	def engineer_features(
	rpm: float,
	oil_press: float,
	fuel_press: float,
	cool_press: float,
	oil_temp: float,
	cool_temp: float,
	) -> pd.DataFrame:
	"""Create the 22 engineered features for a single engine reading."""
	row = pd.DataFrame(
	[
	{
	"Engine RPM": rpm,
	"Lub Oil Pressure": oil_press,
	"Fuel Pressure": fuel_press,
	"Coolant Pressure": cool_press,
	"Lub Oil Temp": oil_temp,
	"Coolant Temp": cool_temp,
	}
	]
	)
	return engineer_features_batch(row)


	def validate_input(value: float, sensor: str) -> list[str]:
	"""Validate sensor input against expected ranges."""
	warnings = []
	config = SENSOR_RANGES[sensor]

	if value < config["min"]:
	warnings.append(
	f"{sensor.replace('_', ' ').title()}: Value {value} is below expected minimum ({config['min']} {config['unit']})"
	)
	elif value > config["max"]:
	warnings.append(
	f"{sensor.replace('_', ' ').title()}: Value {value} is above expected maximum ({config['max']} {config['unit']})"
	)

	return warnings


	def predict(model: Any, features: pd.DataFrame, threshold: float) -> dict:
	"""Make a prediction using the model for a single observation."""
	probability = model.predict_proba(features)[0, 1]
	prediction = 1 if probability >= threshold else 0
	label = "Maintenance Required" if prediction == 1 else "Normal Operation"

	return {
	"prediction": prediction,
	"probability": probability,
	"label": label,
	"threshold": threshold,
	}


	def predict_batch(
	model: Any, features: pd.DataFrame, threshold: float
	) -> pd.DataFrame:
	"""
	Make predictions for a batch of observations.

	Returns:
	DataFrame with Probability, Prediction, and Status columns.
	"""
	probabilities = model.predict_proba(features)[:, 1]
	predictions = (probabilities >= threshold).astype(int)
	statuses = np.where(predictions == 1, "Maintenance Required", "Normal Operation")

	return pd.DataFrame(
	{
	"Probability": probabilities,
	"Prediction": predictions,
	"Status": statuses,
	}
	)


	def validate_csv(df: pd.DataFrame) -> tuple[pd.DataFrame, list[str]]:
	"""
	Validate and normalize an uploaded CSV for batch prediction.

	Returns:
	Tuple of (cleaned DataFrame with canonical column names, list of warnings).
	"""
	warnings = []

	# Normalize column names: lowercase for matching
	rename_map = {}
	for col in df.columns:
	key = col.strip().lower()
	if key in COLUMN_ALIASES:
	rename_map[col] = COLUMN_ALIASES[key]
	elif col in REQUIRED_COLUMNS:
	rename_map[col] = col

	df = df.rename(columns=rename_map)

	# Check for missing required columns
	missing = [c for c in REQUIRED_COLUMNS if c not in df.columns]
	if missing:
	return pd.DataFrame(), [f"Missing required columns: {', '.join(missing)}"]

	# Keep only required columns
	df = df[REQUIRED_COLUMNS].copy()

	# Coerce non-numeric values
	original_len = len(df)
	for col in REQUIRED_COLUMNS:
	df[col] = pd.to_numeric(df[col], errors="coerce")

	# Drop rows with NaN values
	df = df.dropna().reset_index(drop=True)
	dropped = original_len - len(df)
	if dropped > 0:
	warnings.append(
	f"{dropped} row(s) dropped due to missing or non-numeric values."
	)

	# Row limit
	if len(df) > MAX_BULK_ROWS:
	warnings.append(
	f"CSV truncated to {MAX_BULK_ROWS:,} rows (uploaded {len(df):,})."
	)
	df = df.head(MAX_BULK_ROWS)

	if len(df) == 0:
	warnings.append("No valid rows remaining after cleaning.")

	return df, warnings


	def generate_sample_csv() -> str:
	"""Generate a sample CSV template with correct headers and example rows."""
	sample = pd.DataFrame(
	{
	"Engine RPM": [800, 1200, 1500, 600, 1800],
	"Lub Oil Pressure": [3.0, 2.5, 4.1, 1.2, 3.8],
	"Fuel Pressure": [8.0, 6.5, 10.0, 3.0, 9.5],
	"Coolant Pressure": [2.0, 1.8, 2.5, 0.8, 2.2],
	"Lub Oil Temp": [80.0, 95.0, 75.0, 120.0, 85.0],
	"Coolant Temp": [75.0, 88.0, 70.0, 110.0, 80.0],
	}
	)
	return sample.to_csv(index=False)


	def color_prediction_rows(row: pd.Series) -> list[str]:
	"""Apply row-level styling based on prediction status."""
	if row.get("Status") == "Maintenance Required":
	return ["background-color: #ffcccc"] * len(row)
	return ["background-color: #ccffcc"] * len(row)


	def render_sidebar() -> float:
	"""Render the sidebar with operating mode selection. Returns the active threshold."""
	with st.sidebar:
	st.header("Alert Sensitivity")

	mode = st.radio(
	"Operating Mode",
	options=list(OPERATING_MODES.keys()),
	index=list(OPERATING_MODES.keys()).index(DEFAULT_MODE),
	label_visibility="collapsed",
	)

	config = OPERATING_MODES[mode]

	st.metric("Threshold", f"{config['threshold']:.4f}")

	col1, col2 = st.columns(2)
	with col1:
	st.metric("Recall", config["recall"])
	with col2:
	st.metric("Precision", config["precision"])

	st.info(config["description"])

	return config["threshold"]


	def render_single_prediction(model: Any, metadata: dict, threshold: float):
	"""Render the single engine prediction tab."""
	st.subheader("Sensor Readings")

	col1, col2 = st.columns(2)

	with col1:
	engine_rpm = st.number_input(
	"Engine RPM",
	min_value=0.0,
	max_value=5000.0,
	value=float(SENSOR_RANGES["engine_rpm"]["default"]),
	step=10.0,
	help=SENSOR_HELP["engine_rpm"],
	)

	lub_oil_pressure = st.number_input(
	"Lub Oil Pressure (bar)",
	min_value=0.0,
	max_value=20.0,
	value=float(SENSOR_RANGES["lub_oil_pressure"]["default"]),
	step=0.1,
	help=SENSOR_HELP["lub_oil_pressure"],
	)

	fuel_pressure = st.number_input(
	"Fuel Pressure (bar)",
	min_value=0.0,
	max_value=50.0,
	value=float(SENSOR_RANGES["fuel_pressure"]["default"]),
	step=0.5,
	help=SENSOR_HELP["fuel_pressure"],
	)

	with col2:
	coolant_pressure = st.number_input(
	"Coolant Pressure (bar)",
	min_value=0.0,
	max_value=20.0,
	value=float(SENSOR_RANGES["coolant_pressure"]["default"]),
	step=0.1,
	help=SENSOR_HELP["coolant_pressure"],
	)

	lub_oil_temp = st.number_input(
	"Lub Oil Temp (°C)",
	min_value=0.0,
	max_value=250.0,
	value=float(SENSOR_RANGES["lub_oil_temp"]["default"]),
	step=1.0,
	help=SENSOR_HELP["lub_oil_temp"],
	)

	coolant_temp = st.number_input(
	"Coolant Temp (°C)",
	min_value=0.0,
	max_value=250.0,
	value=float(SENSOR_RANGES["coolant_temp"]["default"]),
	step=1.0,
	help=SENSOR_HELP["coolant_temp"],
	)

	# Validate inputs
	all_warnings = []
	inputs = {
	"engine_rpm": engine_rpm,
	"lub_oil_pressure": lub_oil_pressure,
	"fuel_pressure": fuel_pressure,
	"coolant_pressure": coolant_pressure,
	"lub_oil_temp": lub_oil_temp,
	"coolant_temp": coolant_temp,
	}

	for sensor, value in inputs.items():
	all_warnings.extend(validate_input(value, sensor))

	if all_warnings:
	st.warning("Input Validation Warnings:")
	for warning in all_warnings:
	st.caption(f"- {warning}")
	st.caption(
	"Values outside training data ranges may produce less reliable predictions."
	)

	st.divider()

	if st.button(
	"Predict Maintenance Status", type="primary", use_container_width=True
	):
	try:
	features = engineer_features(
	engine_rpm,
	lub_oil_pressure,
	fuel_pressure,
	coolant_pressure,
	lub_oil_temp,
	coolant_temp,
	)

	result = predict(model, features, threshold)

	st.subheader("Prediction Result")

	if result["prediction"] == 0:
	st.success(f"{result['label']}")
	st.markdown(
	"The engine is operating within normal parameters. "
	"Continue regular monitoring."
	)
	else:
	st.error(f"{result['label']}")
	st.markdown(
	"The model indicates potential engine issues. "
	"Schedule maintenance inspection."
	)

	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("Confidence", f"{result['probability']:.1%}")
	with col2:
	st.metric("Threshold", f"{result['threshold']:.4f}")
	with col3:
	st.metric("Raw Probability", f"{result['probability']:.4f}")

	except Exception as e:
	st.error(f"Prediction failed: {e}")


	def render_bulk_import(model: Any, metadata: dict, threshold: float):
	"""Render the bulk CSV import tab."""
	st.markdown(
	"Upload a CSV file with engine sensor readings to predict maintenance "
	"status for an entire fleet."
	)
	st.markdown(
	f"Required columns: {', '.join(REQUIRED_COLUMNS)}"
	)

	# Sample CSV download
	sample_csv = generate_sample_csv()
	st.download_button(
	"Download Sample CSV Template",
	data=sample_csv,
	file_name="sensor_template.csv",
	mime="text/csv",
	)

	uploaded_file = st.file_uploader("Upload sensor readings CSV", type=["csv"])

	if uploaded_file is None:
	return

	# Read and validate CSV
	try:
	raw_df = pd.read_csv(uploaded_file)
	except Exception as e:
	st.error(f"Failed to read CSV: {e}")
	return

	if raw_df.empty:
	st.warning("The uploaded CSV file is empty.")
	return

	cleaned_df, warnings = validate_csv(raw_df)

	for w in warnings:
	if "Missing required" in w or "No valid rows" in w:
	st.error(w)
	else:
	st.warning(w)

	if cleaned_df.empty:
	return

	# Data preview
	st.subheader("Data Preview")
	st.dataframe(cleaned_df.head(), use_container_width=True)
	st.caption(f"{len(cleaned_df):,} valid rows loaded.")

	st.divider()

	if st.button(
	"Run Batch Predictions", type="primary", use_container_width=True
	):
	with st.spinner("Running predictions..."):
	features = engineer_features_batch(cleaned_df)
	results = predict_batch(model, features, threshold)

	# Summary metrics
	st.subheader("Batch Results")
	total = len(results)
	maint_count = int((results["Prediction"] == 1).sum())
	normal_count = total - maint_count

	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("Total Engines", f"{total:,}")
	with col2:
	st.metric("Maintenance Required", f"{maint_count:,}")
	with col3:
	st.metric("Normal Operation", f"{normal_count:,}")

	# Results table with color coding
	display_df = pd.concat(
	[cleaned_df.reset_index(drop=True), results.reset_index(drop=True)],
	axis=1,
	)
	display_df["Probability"] = display_df["Probability"].round(4)

	styled = display_df.style.apply(color_prediction_rows, axis=1)
	st.dataframe(styled, use_container_width=True)

	# Download results
	csv_buffer = io.StringIO()
	display_df.to_csv(csv_buffer, index=False)
	st.download_button(
	"Download Results CSV",
	data=csv_buffer.getvalue(),
	file_name="maintenance_predictions.csv",
	mime="text/csv",
	)


	def main():
	"""Main application entry point."""
	st.set_page_config(
	page_title="Predictive Maintenance",
	page_icon="🔧",
	layout="centered",
	initial_sidebar_state="expanded",
	)

	st.title("Engine Predictive Maintenance")
	st.markdown(
	"Predict engine maintenance needs based on sensor readings. "
	"Select an operating mode in the sidebar and use the tabs below."
	)

	# Load model
	with st.spinner("Loading model from HuggingFace..."):
	model, metadata = load_model()

	if model is None:
	st.error(
	"Unable to load the prediction model. "
	"Please try again later or contact support."
	)
	st.stop()

	# Sidebar: operating mode selection
	threshold = render_sidebar()

	# Model info expander
	active_mode = [
	name
	for name, cfg in OPERATING_MODES.items()
	if cfg["threshold"] == threshold
	][0]

	with st.expander("Model Information"):
	st.markdown(f"Model: {metadata.get('model_name', 'Unknown')}")
	st.markdown(f"Algorithm: {metadata.get('algorithm', 'Unknown')}")
	st.markdown(
	f"Test Recall: {metadata.get('test_metrics', {}).get('recall', 0):.2%}"
	)
	st.markdown(f"Active Mode: {active_mode}")
	st.markdown(f"Decision Threshold: {threshold:.4f}")

	st.divider()

	# Tabs
	tab1, tab2 = st.tabs(["Single Prediction", "Bulk Import"])

	with tab1:
	render_single_prediction(model, metadata, threshold)

	with tab2:
	render_bulk_import(model, metadata, threshold)

	# Footer
	st.divider()
	st.caption(
	"Built with Streamlit \| Model hosted on HuggingFace \| "
	"[PGP-AIML Capstone Project](https://github.com/jskswamy/AIML-LearningBytes)"
	)


	if __name__ == "__main__":
	main()