import os import numpy as np import pandas as pd import joblib import streamlit as st import matplotlib.pyplot as plt from huggingface_hub import hf_hub_download # HF model + scaler locations MODEL_REPO_ID = "sriharimudakavi/engine-condition-xgboost-tuned" MODEL_FILENAME = "xgboost_tuned_model.joblib" SCALER_REPO_ID = "sriharimudakavi/engine-data" SCALER_FILENAME = "scaler.joblib" # Feature columns used for prediction FEATURE_COLS = [ "Engine rpm", "Lub oil pressure", "Fuel pressure", "Coolant pressure", "lub oil temp", "Coolant temp" ] # Acceptable ranges for human-readable notes (car engine) HEALTHY_RANGES = { "Engine rpm": (650, 3000), "Lub oil pressure": (1.0, 4.0), "Fuel pressure": (2.5, 4.0), "Coolant pressure": (0.9, 1.4), "lub oil temp": (85.0, 110.0), "Coolant temp": (85.0, 105.0) } # Corrective recommendations FIX_SUGGESTIONS = { ("Engine rpm", "high"): "Reduce engine load and inspect governor calibration.", ("Engine rpm", "low"): "Inspect air intake and fuel delivery; reduce excessive load.", ("Lub oil pressure", "high"): "Inspect relief valve and verify oil grade.", ("Lub oil pressure", "low"): "Check oil level, pump health, and replace filters.", ("Fuel pressure", "high"): "Check injector return line and regulator blockage.", ("Fuel pressure", "low"): "Inspect fuel filter and pump; remove air ingress.", ("Coolant pressure", "high"): "Inspect thermostat, radiator cap, and coolant passages.", ("Coolant pressure", "low"): "Refill coolant, bleed air, and inspect pump.", ("lub oil temp", "high"): "Inspect lubrication cooling circuit and bearings.", ("lub oil temp", "low"): "Allow proper warm-up and verify heating system.", ("Coolant temp", "high"): "Inspect radiator, fan, and coolant flow.", ("Coolant temp", "low"): "Verify thermostat and sensor calibration." } # Freeze plot to prevent shaking if "saved_fig" not in st.session_state: st.session_state.saved_fig = None # ---------------------------------------------------------- # Load model & scaler # ---------------------------------------------------------- @st.cache_resource def load_artifacts(): mp = hf_hub_download(repo_id=MODEL_REPO_ID, filename=MODEL_FILENAME, repo_type="model") sp = hf_hub_download(repo_id=SCALER_REPO_ID, filename=SCALER_FILENAME, repo_type="dataset") model = joblib.load(mp) scaler = joblib.load(sp) try: importances = model.feature_importances_ except: importances = np.ones(len(FEATURE_COLS)) return model, scaler, importances # ---------------------------------------------------------- # Predict with threshold=0.5 # ---------------------------------------------------------- def predict_with_proba(model, Xs, threshold=0.5): try: proba = model.predict_proba(Xs)[:, 1] preds = (proba >= threshold).astype(int) return preds, proba except: raw = model.predict(Xs) if raw.min() >= 0 and raw.max() <= 1: preds = (raw >= threshold).astype(int) return preds, raw return raw.astype(int), None # ---------------------------------------------------------- # Feature effect estimation # ---------------------------------------------------------- def local_effects(row_df, model, scaler, importances): base_row = row_df[FEATURE_COLS].iloc[0].astype(float) x_orig = base_row.values X_base = scaler.transform([x_orig]) preds, proba = predict_with_proba(model, X_base) pred = int(preds[0]) base_p = proba[0] if proba is not None else None scale = getattr(scaler, "scale_", np.ones(len(FEATURE_COLS))) mean = getattr(scaler, "mean_", np.zeros(len(FEATURE_COLS))) effects = [] for i, feat in enumerate(FEATURE_COLS): step = scale[i] if scale[i] != 0 else 1 x_plus = x_orig.copy() x_minus = x_orig.copy() x_plus[i] += step x_minus[i] -= step Xp = scaler.transform([x_plus]) Xm = scaler.transform([x_minus]) _, pp = predict_with_proba(model, Xp) _, pm = predict_with_proba(model, Xm) if base_p is None or pp is None or pm is None: delta = 0 else: delta = ((pp[0] - base_p) - (pm[0] - base_p)) / 2 z = (x_orig[i] - mean[i]) / (scale[i] if scale[i] != 0 else 1) imp = float(importances[i]) score = abs(delta) * imp effects.append((feat, x_orig[i], z, delta, imp, score)) effects.sort(key=lambda x: x[5], reverse=True) return pred, base_p, effects # ---------------------------------------------------------- # Build explanation text # ---------------------------------------------------------- def build_explanation(effects, pred, base_p): lines = [] for rank, (feat, val, z, delta, imp, score) in enumerate(effects[:5], start=1): low, high = HEALTHY_RANGES[feat] if val < low: state = "low" range_info = f"(value {val:.1f} below {low}-{high})" elif val > high: state = "high" range_info = f"(value {val:.1f} above {low}-{high})" else: state = "normal" range_info = f"(value {val:.1f} within {low}-{high})" if abs(z) >= 2.5: level = "shows extreme deviation" elif abs(z) >= 1.5: level = "is strongly abnormal" elif abs(z) >= 0.5: level = "is moderately shifted" else: level = "is close to expected behaviour" if delta > 0: effect = "increases fault likelihood" weight = "major contributor" elif delta < 0: effect = "reduces fault likelihood" weight = "stabilizing factor" else: effect = "has minimal impact" weight = "weak driver" fix = FIX_SUGGESTIONS.get((feat, state), f"Inspect {feat} subsystem.") lines.append( f"{rank}. {feat} {level} {range_info}. A 1σ change {effect} by {delta:+.3f}, " f"making it a {weight}. Recommended action: {fix}." ) header = "The engine is classified as Faulty (1)." if pred == 1 else "The engine is classified as Normal (0)." if base_p is not None: header += f" Fault probability: {base_p*100:.1f}%." return header, lines # ---------------------------------------------------------- # Plot z-score bar chart # ---------------------------------------------------------- def zscore_plot(effects): fig, ax = plt.subplots(figsize=(7, 3)) z_vals = [e[2] for e in effects] feats = [e[0] for e in effects] idx = np.argsort(np.abs(z_vals))[::-1] ax.barh(range(len(z_vals)), np.array(z_vals)[idx]) ax.set_yticks(range(len(z_vals))) ax.set_yticklabels(np.array(feats)[idx]) ax.invert_yaxis() ax.set_xlabel("z-score") fig.tight_layout() return fig # ---------------------------------------------------------- # MAIN APP # ---------------------------------------------------------- def main(): st.set_page_config(page_title="Engine Predictor", layout="wide", page_icon="⚙️") # Anti-shake + styling st.markdown("", unsafe_allow_html=True) st.markdown( """ """, unsafe_allow_html=True ) model, scaler, importances = load_artifacts() st.title("⚙️ Engine Condition Predictor") st.write("Advanced diagnostics, explanations, and CSV evaluation.") mode = st.sidebar.radio("Input Mode", ["Manual Entry", "Upload CSV"]) # ------------------------------------------------------ # MANUAL MODE # ------------------------------------------------------ if mode == "Manual Entry": c1, c2, c3 = st.columns(3) with c1: rpm = st.number_input("Engine RPM", value=800) oilp = st.number_input("Lub oil pressure", value=3.0) with c2: fuelp = st.number_input("Fuel pressure", value=6.0) coolp = st.number_input("Coolant pressure", value=2.0) with c3: oilt = st.number_input("Lub oil temp", value=80.0) coolt = st.number_input("Coolant temp", value=90.0) df = pd.DataFrame([[rpm, oilp, fuelp, coolp, oilt, coolt]], columns=FEATURE_COLS) st.dataframe(df) if st.button("Predict"): pred, base_p, effects = local_effects(df, model, scaler, importances) header, lines = build_explanation(effects, pred, base_p) a, b = st.columns([1, 2]) with a: st.markdown("