Spaces:

sriharimudakavi
/

engine-condition-predictor

Sleeping

App Files Files Community

engine-condition-predictor / app.py

sriharimudakavi

Upload app.py with huggingface_hub

b056742 verified 4 months ago

raw

history blame contribute delete

12.9 kB


	import os
	import numpy as np
	import pandas as pd
	import joblib
	import streamlit as st
	import matplotlib.pyplot as plt
	from huggingface_hub import hf_hub_download

	# HF model + scaler locations
	MODEL_REPO_ID = "sriharimudakavi/engine-condition-xgboost-tuned"
	MODEL_FILENAME = "xgboost_tuned_model.joblib"

	SCALER_REPO_ID = "sriharimudakavi/engine-data"
	SCALER_FILENAME = "scaler.joblib"

	# Feature columns used for prediction
	FEATURE_COLS = [
	"Engine rpm",
	"Lub oil pressure",
	"Fuel pressure",
	"Coolant pressure",
	"lub oil temp",
	"Coolant temp"
	]

	# Acceptable ranges for human-readable notes (car engine)
	HEALTHY_RANGES = {
	"Engine rpm": (650, 3000),
	"Lub oil pressure": (1.0, 4.0),
	"Fuel pressure": (2.5, 4.0),
	"Coolant pressure": (0.9, 1.4),
	"lub oil temp": (85.0, 110.0),
	"Coolant temp": (85.0, 105.0)
	}

	# Corrective recommendations
	FIX_SUGGESTIONS = {
	("Engine rpm", "high"): "Reduce engine load and inspect governor calibration.",
	("Engine rpm", "low"): "Inspect air intake and fuel delivery; reduce excessive load.",
	("Lub oil pressure", "high"): "Inspect relief valve and verify oil grade.",
	("Lub oil pressure", "low"): "Check oil level, pump health, and replace filters.",
	("Fuel pressure", "high"): "Check injector return line and regulator blockage.",
	("Fuel pressure", "low"): "Inspect fuel filter and pump; remove air ingress.",
	("Coolant pressure", "high"): "Inspect thermostat, radiator cap, and coolant passages.",
	("Coolant pressure", "low"): "Refill coolant, bleed air, and inspect pump.",
	("lub oil temp", "high"): "Inspect lubrication cooling circuit and bearings.",
	("lub oil temp", "low"): "Allow proper warm-up and verify heating system.",
	("Coolant temp", "high"): "Inspect radiator, fan, and coolant flow.",
	("Coolant temp", "low"): "Verify thermostat and sensor calibration."
	}

	# Freeze plot to prevent shaking
	if "saved_fig" not in st.session_state:
	st.session_state.saved_fig = None


	# ----------------------------------------------------------
	# Load model & scaler
	# ----------------------------------------------------------
	@st.cache_resource
	def load_artifacts():
	mp = hf_hub_download(repo_id=MODEL_REPO_ID, filename=MODEL_FILENAME, repo_type="model")
	sp = hf_hub_download(repo_id=SCALER_REPO_ID, filename=SCALER_FILENAME, repo_type="dataset")

	model = joblib.load(mp)
	scaler = joblib.load(sp)

	try:
	importances = model.feature_importances_
	except:
	importances = np.ones(len(FEATURE_COLS))

	return model, scaler, importances


	# ----------------------------------------------------------
	# Predict with threshold=0.5
	# ----------------------------------------------------------
	def predict_with_proba(model, Xs, threshold=0.5):
	try:
	proba = model.predict_proba(Xs)[:, 1]
	preds = (proba >= threshold).astype(int)
	return preds, proba
	except:
	raw = model.predict(Xs)
	if raw.min() >= 0 and raw.max() <= 1:
	preds = (raw >= threshold).astype(int)
	return preds, raw
	return raw.astype(int), None


	# ----------------------------------------------------------
	# Feature effect estimation
	# ----------------------------------------------------------
	def local_effects(row_df, model, scaler, importances):
	base_row = row_df[FEATURE_COLS].iloc[0].astype(float)
	x_orig = base_row.values
	X_base = scaler.transform([x_orig])

	preds, proba = predict_with_proba(model, X_base)
	pred = int(preds[0])
	base_p = proba[0] if proba is not None else None

	scale = getattr(scaler, "scale_", np.ones(len(FEATURE_COLS)))
	mean = getattr(scaler, "mean_", np.zeros(len(FEATURE_COLS)))

	effects = []
	for i, feat in enumerate(FEATURE_COLS):
	step = scale[i] if scale[i] != 0 else 1

	x_plus = x_orig.copy()
	x_minus = x_orig.copy()
	x_plus[i] += step
	x_minus[i] -= step

	Xp = scaler.transform([x_plus])
	Xm = scaler.transform([x_minus])

	_, pp = predict_with_proba(model, Xp)
	_, pm = predict_with_proba(model, Xm)

	if base_p is None or pp is None or pm is None:
	delta = 0
	else:
	delta = ((pp[0] - base_p) - (pm[0] - base_p)) / 2

	z = (x_orig[i] - mean[i]) / (scale[i] if scale[i] != 0 else 1)
	imp = float(importances[i])
	score = abs(delta) * imp

	effects.append((feat, x_orig[i], z, delta, imp, score))

	effects.sort(key=lambda x: x[5], reverse=True)
	return pred, base_p, effects


	# ----------------------------------------------------------
	# Build explanation text
	# ----------------------------------------------------------
	def build_explanation(effects, pred, base_p):
	lines = []

	for rank, (feat, val, z, delta, imp, score) in enumerate(effects[:5], start=1):
	low, high = HEALTHY_RANGES[feat]

	if val < low:
	state = "low"
	range_info = f"(value {val:.1f} below {low}-{high})"
	elif val > high:
	state = "high"
	range_info = f"(value {val:.1f} above {low}-{high})"
	else:
	state = "normal"
	range_info = f"(value {val:.1f} within {low}-{high})"

	if abs(z) >= 2.5:
	level = "shows extreme deviation"
	elif abs(z) >= 1.5:
	level = "is strongly abnormal"
	elif abs(z) >= 0.5:
	level = "is moderately shifted"
	else:
	level = "is close to expected behaviour"

	if delta > 0:
	effect = "increases fault likelihood"
	weight = "major contributor"
	elif delta < 0:
	effect = "reduces fault likelihood"
	weight = "stabilizing factor"
	else:
	effect = "has minimal impact"
	weight = "weak driver"

	fix = FIX_SUGGESTIONS.get((feat, state), f"Inspect {feat} subsystem.")

	lines.append(
	f"{rank}. {feat} {level} {range_info}. A 1σ change {effect} by {delta:+.3f}, "
	f"making it a {weight}. Recommended action: {fix}."
	)

	header = "The engine is classified as Faulty (1)." if pred == 1 else "The engine is classified as Normal (0)."

	if base_p is not None:
	header += f" Fault probability: {base_p*100:.1f}%."

	return header, lines


	# ----------------------------------------------------------
	# Plot z-score bar chart
	# ----------------------------------------------------------
	def zscore_plot(effects):
	fig, ax = plt.subplots(figsize=(7, 3))

	z_vals = [e[2] for e in effects]
	feats = [e[0] for e in effects]
	idx = np.argsort(np.abs(z_vals))[::-1]

	ax.barh(range(len(z_vals)), np.array(z_vals)[idx])
	ax.set_yticks(range(len(z_vals)))
	ax.set_yticklabels(np.array(feats)[idx])
	ax.invert_yaxis()
	ax.set_xlabel("z-score")
	fig.tight_layout()

	return fig


	# ----------------------------------------------------------
	# MAIN APP
	# ----------------------------------------------------------
	def main():
	st.set_page_config(page_title="Engine Predictor", layout="wide", page_icon="⚙️")

	# Anti-shake + styling
	st.markdown("<script>window.parent.document.body.style.overflow = 'hidden';</script>", unsafe_allow_html=True)
	st.markdown(
	"""
	<style>
	body { background: #f4f4f4 !important; }
	* { animation: none !important; transition: none !important; }
	canvas { transform: translateZ(0) !important; }
	.element-container { will-change: auto !important; }
	.card {
	padding: 1rem;
	border:1px solid #ddd;
	border-radius: 1rem;
	background:#fff;
	}
	</style>
	""",
	unsafe_allow_html=True
	)

	model, scaler, importances = load_artifacts()

	st.title("⚙️ Engine Condition Predictor")
	st.write("Advanced diagnostics, explanations, and CSV evaluation.")

	mode = st.sidebar.radio("Input Mode", ["Manual Entry", "Upload CSV"])

	# ------------------------------------------------------
	# MANUAL MODE
	# ------------------------------------------------------
	if mode == "Manual Entry":
	c1, c2, c3 = st.columns(3)

	with c1:
	rpm = st.number_input("Engine RPM", value=800)
	oilp = st.number_input("Lub oil pressure", value=3.0)

	with c2:
	fuelp = st.number_input("Fuel pressure", value=6.0)
	coolp = st.number_input("Coolant pressure", value=2.0)

	with c3:
	oilt = st.number_input("Lub oil temp", value=80.0)
	coolt = st.number_input("Coolant temp", value=90.0)

	df = pd.DataFrame([[rpm, oilp, fuelp, coolp, oilt, coolt]], columns=FEATURE_COLS)
	st.dataframe(df)

	if st.button("Predict"):
	pred, base_p, effects = local_effects(df, model, scaler, importances)
	header, lines = build_explanation(effects, pred, base_p)

	a, b = st.columns([1, 2])

	with a:
	st.markdown("<div class='card'>", unsafe_allow_html=True)
	st.markdown("### 🔴 Faulty" if pred == 1 else "### 🟢 Normal")
	if base_p is not None:
	st.markdown(f"Probability: {base_p*100:.1f}%")
	st.markdown("</div>", unsafe_allow_html=True)

	fig = zscore_plot(effects)
	st.session_state.saved_fig = fig

	with b:
	st.pyplot(st.session_state.saved_fig)

	st.subheader("Detailed Analysis")
	st.write(header)
	for line in lines:
	st.markdown("- " + line)

	# ------------------------------------------------------
	# CSV MODE (WITH EVALUATION)
	# ------------------------------------------------------
	else:
	st.subheader("📁 Upload CSV (Raw Sensor Data + Optional Labels)")

	f = st.file_uploader("Upload CSV", type="csv")

	if f:
	df = pd.read_csv(f)
	st.write("### Preview of Uploaded File")
	st.dataframe(df.head())

	# 1. Detect Actual Label Column
	POSSIBLE_LABELS = ["ActualLabel", "Label", "Engine_Condition", "Condition", "Status"]

	actual_label_col = None
	for col in df.columns:
	if col in POSSIBLE_LABELS:
	actual_label_col = col
	break

	def convert_label(x):
	if isinstance(x, str):
	x = x.strip().lower()
	if x.startswith("f"):
	return 1
	if x.startswith("n"):
	return 0
	return int(x)

	if actual_label_col is not None:
	df["Actual"] = df[actual_label_col].apply(convert_label)
	else:
	df["Actual"] = None

	# Extract features
	feature_df = df[FEATURE_COLS].astype(float)

	preds, probs, explanations = [], [], []

	for i in range(len(feature_df)):
	row = feature_df.iloc[i:i+1]
	p, bp, eff = local_effects(row, model, scaler, importances)
	h, lines = build_explanation(eff, p, bp)

	preds.append(p)
	probs.append(bp)
	explanations.append(lines[0] if len(lines) else "")

	df["Predicted"] = preds
	df["Pred_Prob"] = probs
	df["Explanation"] = explanations

	# 3. Evaluation Metrics
	if df["Actual"].notnull().any():
	from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

	y_true = df["Actual"].astype(int)
	y_pred = df["Predicted"].astype(int)

	acc = accuracy_score(y_true, y_pred)
	cm = confusion_matrix(y_true, y_pred)
	report = classification_report(y_true, y_pred, target_names=["Normal", "Faulty"])

	st.subheader("📊 Evaluation Metrics")
	st.write(f"Accuracy: {acc*100:.2f}%")

	st.write("### Confusion Matrix")
	st.write(cm)

	st.write("### Classification Report")
	st.text(report)

	# Row Coloring
	def color_row(row):
	if row["Actual"] is None:
	return [""] * len(row)
	if row["Actual"] == row["Predicted"]:
	return ["background-color: #d4edda"] * len(row) # green
	return ["background-color: #f8d7da"] * len(row) # red

	st.subheader("📍 Detailed Prediction Results")
	st.dataframe(df.style.apply(color_row, axis=1))


	# ----------------------------------------------------------
	if __name__ == "__main__":
	main()