import pandas as pd
import streamlit as st
import joblib
from huggingface_hub import hf_hub_download

st.set_page_config(page_title="Engine Predictive Maintenance", layout="centered")

st.title("Engine Predictive Maintenance")
st.write("Enter sensor values to predict whether maintenance is needed.")

MODEL_REPO = "vinayakdnrdd/engine-pm-model"
MODEL_FILE = "engine_pm_model.joblib"

# Load model
model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
model = joblib.load(model_path)

# ---- Helper: get expected input columns from sklearn Pipeline ----
def get_expected_columns(m):
    # Best case: pipeline has feature_names_in_
    cols = getattr(m, "feature_names_in_", None)
    if cols is not None:
        return list(cols)

    # Typical: pipeline.named_steps["preprocess"] has feature_names_in_
    pre = getattr(m, "named_steps", {}).get("preprocess", None)
    if pre is not None:
        cols = getattr(pre, "feature_names_in_", None)
        if cols is not None:
            return list(cols)

        # Fallback: derive from transformers definition
        exp = []
        try:
            for _, _, c in pre.transformers:
                if isinstance(c, list):
                    exp.extend(c)
        except Exception:
            pass
        # keep unique order
        seen = set()
        exp2 = []
        for c in exp:
            if c not in seen:
                exp2.append(c); seen.add(c)
        if exp2:
            return exp2

    # Last fallback: assume these common names
    return ["Engine rpm", "Lub oil pressure", "Fuel pressure", "Coolant pressure", "Lub oil temp", "Coolant temp"]

expected_cols = get_expected_columns(model)

with st.expander("Debug (Expected input columns from model)"):
    st.write(expected_cols)

# ---- UI inputs ----
engine_rpm = st.number_input("Engine RPM", value=1500.0)
lub_oil_pressure = st.number_input("Lub Oil Pressure", value=2.5)
fuel_pressure = st.number_input("Fuel Pressure", value=3.0)
coolant_pressure = st.number_input("Coolant Pressure", value=1.2)
lub_oil_temp = st.number_input("Lub Oil Temperature", value=85.0)
coolant_temp = st.number_input("Coolant Temperature", value=90.0)

# We store inputs with MANY aliases (case-insensitive matching)
inputs = {
    "Engine rpm": engine_rpm,
    "engine rpm": engine_rpm,
    "Engine_RPM": engine_rpm,
    "Engine_Rpm": engine_rpm,

    "Lub oil pressure": lub_oil_pressure,
    "lub oil pressure": lub_oil_pressure,
    "Lub_Oil_Pressure": lub_oil_pressure,

    "Fuel pressure": fuel_pressure,
    "fuel pressure": fuel_pressure,
    "Fuel_Pressure": fuel_pressure,

    "Coolant pressure": coolant_pressure,
    "coolant pressure": coolant_pressure,
    "Coolant_Pressure": coolant_pressure,

    "Lub oil temp": lub_oil_temp,
    "lub oil temp": lub_oil_temp,
    "Lub oil temperature": lub_oil_temp,
    "lub oil temperature": lub_oil_temp,
    "Lub_Oil_Temperature": lub_oil_temp,

    "Coolant temp": coolant_temp,
    "coolant temp": coolant_temp,
    "Coolant temperature": coolant_temp,
    "coolant temperature": coolant_temp,
    "Coolant_Temperature": coolant_temp,
}

def find_value_for_col(colname: str):
    # exact match
    if colname in inputs:
        return inputs[colname]
    # normalized match
    key = colname.strip().lower()
    for k, v in inputs.items():
        if k.strip().lower() == key:
            return v
    return None

if st.button("Predict"):
    row = {}
    missing = []
    for col in expected_cols:
        v = find_value_for_col(col)
        if v is None:
            missing.append(col)
        else:
            row[col] = v

    if missing:
        st.error("Model expects these columns but app couldn't map them:")
        st.write(missing)
        st.stop()

    X = pd.DataFrame([row], columns=expected_cols)

    proba = float(model.predict_proba(X)[0, 1])
    pred = int(proba >= 0.5)

    st.write(f"Probability (Maintenance Needed): **{proba:.2f}**")
    if pred == 1:
        st.error("⚠️ Prediction: **Maintenance Needed**")
    else:
        st.success("✅ Prediction: **Normal Operation**")