sriharimudakavi's picture
Upload app.py with huggingface_hub
b056742 verified
import os
import numpy as np
import pandas as pd
import joblib
import streamlit as st
import matplotlib.pyplot as plt
from huggingface_hub import hf_hub_download
# HF model + scaler locations
MODEL_REPO_ID = "sriharimudakavi/engine-condition-xgboost-tuned"
MODEL_FILENAME = "xgboost_tuned_model.joblib"
SCALER_REPO_ID = "sriharimudakavi/engine-data"
SCALER_FILENAME = "scaler.joblib"
# Feature columns used for prediction
FEATURE_COLS = [
"Engine rpm",
"Lub oil pressure",
"Fuel pressure",
"Coolant pressure",
"lub oil temp",
"Coolant temp"
]
# Acceptable ranges for human-readable notes (car engine)
HEALTHY_RANGES = {
"Engine rpm": (650, 3000),
"Lub oil pressure": (1.0, 4.0),
"Fuel pressure": (2.5, 4.0),
"Coolant pressure": (0.9, 1.4),
"lub oil temp": (85.0, 110.0),
"Coolant temp": (85.0, 105.0)
}
# Corrective recommendations
FIX_SUGGESTIONS = {
("Engine rpm", "high"): "Reduce engine load and inspect governor calibration.",
("Engine rpm", "low"): "Inspect air intake and fuel delivery; reduce excessive load.",
("Lub oil pressure", "high"): "Inspect relief valve and verify oil grade.",
("Lub oil pressure", "low"): "Check oil level, pump health, and replace filters.",
("Fuel pressure", "high"): "Check injector return line and regulator blockage.",
("Fuel pressure", "low"): "Inspect fuel filter and pump; remove air ingress.",
("Coolant pressure", "high"): "Inspect thermostat, radiator cap, and coolant passages.",
("Coolant pressure", "low"): "Refill coolant, bleed air, and inspect pump.",
("lub oil temp", "high"): "Inspect lubrication cooling circuit and bearings.",
("lub oil temp", "low"): "Allow proper warm-up and verify heating system.",
("Coolant temp", "high"): "Inspect radiator, fan, and coolant flow.",
("Coolant temp", "low"): "Verify thermostat and sensor calibration."
}
# Freeze plot to prevent shaking
if "saved_fig" not in st.session_state:
st.session_state.saved_fig = None
# ----------------------------------------------------------
# Load model & scaler
# ----------------------------------------------------------
@st.cache_resource
def load_artifacts():
mp = hf_hub_download(repo_id=MODEL_REPO_ID, filename=MODEL_FILENAME, repo_type="model")
sp = hf_hub_download(repo_id=SCALER_REPO_ID, filename=SCALER_FILENAME, repo_type="dataset")
model = joblib.load(mp)
scaler = joblib.load(sp)
try:
importances = model.feature_importances_
except:
importances = np.ones(len(FEATURE_COLS))
return model, scaler, importances
# ----------------------------------------------------------
# Predict with threshold=0.5
# ----------------------------------------------------------
def predict_with_proba(model, Xs, threshold=0.5):
try:
proba = model.predict_proba(Xs)[:, 1]
preds = (proba >= threshold).astype(int)
return preds, proba
except:
raw = model.predict(Xs)
if raw.min() >= 0 and raw.max() <= 1:
preds = (raw >= threshold).astype(int)
return preds, raw
return raw.astype(int), None
# ----------------------------------------------------------
# Feature effect estimation
# ----------------------------------------------------------
def local_effects(row_df, model, scaler, importances):
base_row = row_df[FEATURE_COLS].iloc[0].astype(float)
x_orig = base_row.values
X_base = scaler.transform([x_orig])
preds, proba = predict_with_proba(model, X_base)
pred = int(preds[0])
base_p = proba[0] if proba is not None else None
scale = getattr(scaler, "scale_", np.ones(len(FEATURE_COLS)))
mean = getattr(scaler, "mean_", np.zeros(len(FEATURE_COLS)))
effects = []
for i, feat in enumerate(FEATURE_COLS):
step = scale[i] if scale[i] != 0 else 1
x_plus = x_orig.copy()
x_minus = x_orig.copy()
x_plus[i] += step
x_minus[i] -= step
Xp = scaler.transform([x_plus])
Xm = scaler.transform([x_minus])
_, pp = predict_with_proba(model, Xp)
_, pm = predict_with_proba(model, Xm)
if base_p is None or pp is None or pm is None:
delta = 0
else:
delta = ((pp[0] - base_p) - (pm[0] - base_p)) / 2
z = (x_orig[i] - mean[i]) / (scale[i] if scale[i] != 0 else 1)
imp = float(importances[i])
score = abs(delta) * imp
effects.append((feat, x_orig[i], z, delta, imp, score))
effects.sort(key=lambda x: x[5], reverse=True)
return pred, base_p, effects
# ----------------------------------------------------------
# Build explanation text
# ----------------------------------------------------------
def build_explanation(effects, pred, base_p):
lines = []
for rank, (feat, val, z, delta, imp, score) in enumerate(effects[:5], start=1):
low, high = HEALTHY_RANGES[feat]
if val < low:
state = "low"
range_info = f"(value {val:.1f} below {low}-{high})"
elif val > high:
state = "high"
range_info = f"(value {val:.1f} above {low}-{high})"
else:
state = "normal"
range_info = f"(value {val:.1f} within {low}-{high})"
if abs(z) >= 2.5:
level = "shows extreme deviation"
elif abs(z) >= 1.5:
level = "is strongly abnormal"
elif abs(z) >= 0.5:
level = "is moderately shifted"
else:
level = "is close to expected behaviour"
if delta > 0:
effect = "increases fault likelihood"
weight = "major contributor"
elif delta < 0:
effect = "reduces fault likelihood"
weight = "stabilizing factor"
else:
effect = "has minimal impact"
weight = "weak driver"
fix = FIX_SUGGESTIONS.get((feat, state), f"Inspect {feat} subsystem.")
lines.append(
f"{rank}. {feat} {level} {range_info}. A 1Οƒ change {effect} by {delta:+.3f}, "
f"making it a {weight}. Recommended action: {fix}."
)
header = "The engine is classified as Faulty (1)." if pred == 1 else "The engine is classified as Normal (0)."
if base_p is not None:
header += f" Fault probability: {base_p*100:.1f}%."
return header, lines
# ----------------------------------------------------------
# Plot z-score bar chart
# ----------------------------------------------------------
def zscore_plot(effects):
fig, ax = plt.subplots(figsize=(7, 3))
z_vals = [e[2] for e in effects]
feats = [e[0] for e in effects]
idx = np.argsort(np.abs(z_vals))[::-1]
ax.barh(range(len(z_vals)), np.array(z_vals)[idx])
ax.set_yticks(range(len(z_vals)))
ax.set_yticklabels(np.array(feats)[idx])
ax.invert_yaxis()
ax.set_xlabel("z-score")
fig.tight_layout()
return fig
# ----------------------------------------------------------
# MAIN APP
# ----------------------------------------------------------
def main():
st.set_page_config(page_title="Engine Predictor", layout="wide", page_icon="βš™οΈ")
# Anti-shake + styling
st.markdown("<script>window.parent.document.body.style.overflow = 'hidden';</script>", unsafe_allow_html=True)
st.markdown(
"""
<style>
body { background: #f4f4f4 !important; }
* { animation: none !important; transition: none !important; }
canvas { transform: translateZ(0) !important; }
.element-container { will-change: auto !important; }
.card {
padding: 1rem;
border:1px solid #ddd;
border-radius: 1rem;
background:#fff;
}
</style>
""",
unsafe_allow_html=True
)
model, scaler, importances = load_artifacts()
st.title("βš™οΈ Engine Condition Predictor")
st.write("Advanced diagnostics, explanations, and CSV evaluation.")
mode = st.sidebar.radio("Input Mode", ["Manual Entry", "Upload CSV"])
# ------------------------------------------------------
# MANUAL MODE
# ------------------------------------------------------
if mode == "Manual Entry":
c1, c2, c3 = st.columns(3)
with c1:
rpm = st.number_input("Engine RPM", value=800)
oilp = st.number_input("Lub oil pressure", value=3.0)
with c2:
fuelp = st.number_input("Fuel pressure", value=6.0)
coolp = st.number_input("Coolant pressure", value=2.0)
with c3:
oilt = st.number_input("Lub oil temp", value=80.0)
coolt = st.number_input("Coolant temp", value=90.0)
df = pd.DataFrame([[rpm, oilp, fuelp, coolp, oilt, coolt]], columns=FEATURE_COLS)
st.dataframe(df)
if st.button("Predict"):
pred, base_p, effects = local_effects(df, model, scaler, importances)
header, lines = build_explanation(effects, pred, base_p)
a, b = st.columns([1, 2])
with a:
st.markdown("<div class='card'>", unsafe_allow_html=True)
st.markdown("### πŸ”΄ Faulty" if pred == 1 else "### 🟒 Normal")
if base_p is not None:
st.markdown(f"Probability: {base_p*100:.1f}%")
st.markdown("</div>", unsafe_allow_html=True)
fig = zscore_plot(effects)
st.session_state.saved_fig = fig
with b:
st.pyplot(st.session_state.saved_fig)
st.subheader("Detailed Analysis")
st.write(header)
for line in lines:
st.markdown("- " + line)
# ------------------------------------------------------
# CSV MODE (WITH EVALUATION)
# ------------------------------------------------------
else:
st.subheader("πŸ“ Upload CSV (Raw Sensor Data + Optional Labels)")
f = st.file_uploader("Upload CSV", type="csv")
if f:
df = pd.read_csv(f)
st.write("### Preview of Uploaded File")
st.dataframe(df.head())
# 1. Detect Actual Label Column
POSSIBLE_LABELS = ["ActualLabel", "Label", "Engine_Condition", "Condition", "Status"]
actual_label_col = None
for col in df.columns:
if col in POSSIBLE_LABELS:
actual_label_col = col
break
def convert_label(x):
if isinstance(x, str):
x = x.strip().lower()
if x.startswith("f"):
return 1
if x.startswith("n"):
return 0
return int(x)
if actual_label_col is not None:
df["Actual"] = df[actual_label_col].apply(convert_label)
else:
df["Actual"] = None
# Extract features
feature_df = df[FEATURE_COLS].astype(float)
preds, probs, explanations = [], [], []
for i in range(len(feature_df)):
row = feature_df.iloc[i:i+1]
p, bp, eff = local_effects(row, model, scaler, importances)
h, lines = build_explanation(eff, p, bp)
preds.append(p)
probs.append(bp)
explanations.append(lines[0] if len(lines) else "")
df["Predicted"] = preds
df["Pred_Prob"] = probs
df["Explanation"] = explanations
# 3. Evaluation Metrics
if df["Actual"].notnull().any():
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
y_true = df["Actual"].astype(int)
y_pred = df["Predicted"].astype(int)
acc = accuracy_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred)
report = classification_report(y_true, y_pred, target_names=["Normal", "Faulty"])
st.subheader("πŸ“Š Evaluation Metrics")
st.write(f"**Accuracy:** {acc*100:.2f}%")
st.write("### Confusion Matrix")
st.write(cm)
st.write("### Classification Report")
st.text(report)
# Row Coloring
def color_row(row):
if row["Actual"] is None:
return [""] * len(row)
if row["Actual"] == row["Predicted"]:
return ["background-color: #d4edda"] * len(row) # green
return ["background-color: #f8d7da"] * len(row) # red
st.subheader("πŸ“ Detailed Prediction Results")
st.dataframe(df.style.apply(color_row, axis=1))
# ----------------------------------------------------------
if __name__ == "__main__":
main()