import os
import numpy as np
import pandas as pd
import joblib
import streamlit as st
import matplotlib.pyplot as plt
from huggingface_hub import hf_hub_download

# HF model + scaler locations
MODEL_REPO_ID = "sriharimudakavi/engine-condition-xgboost-tuned"
MODEL_FILENAME = "xgboost_tuned_model.joblib"

SCALER_REPO_ID = "sriharimudakavi/engine-data"
SCALER_FILENAME = "scaler.joblib"

# Feature columns used for prediction
FEATURE_COLS = [
    "Engine rpm",
    "Lub oil pressure",
    "Fuel pressure",
    "Coolant pressure",
    "lub oil temp",
    "Coolant temp"
]

# Acceptable ranges for human-readable notes (car engine)
HEALTHY_RANGES = {
    "Engine rpm": (650, 3000),
    "Lub oil pressure": (1.0, 4.0),
    "Fuel pressure": (2.5, 4.0),
    "Coolant pressure": (0.9, 1.4),
    "lub oil temp": (85.0, 110.0),
    "Coolant temp": (85.0, 105.0)
}

# Corrective recommendations
FIX_SUGGESTIONS = {
    ("Engine rpm", "high"): "Reduce engine load and inspect governor calibration.",
    ("Engine rpm", "low"): "Inspect air intake and fuel delivery; reduce excessive load.",
    ("Lub oil pressure", "high"): "Inspect relief valve and verify oil grade.",
    ("Lub oil pressure", "low"): "Check oil level, pump health, and replace filters.",
    ("Fuel pressure", "high"): "Check injector return line and regulator blockage.",
    ("Fuel pressure", "low"): "Inspect fuel filter and pump; remove air ingress.",
    ("Coolant pressure", "high"): "Inspect thermostat, radiator cap, and coolant passages.",
    ("Coolant pressure", "low"): "Refill coolant, bleed air, and inspect pump.",
    ("lub oil temp", "high"): "Inspect lubrication cooling circuit and bearings.",
    ("lub oil temp", "low"): "Allow proper warm-up and verify heating system.",
    ("Coolant temp", "high"): "Inspect radiator, fan, and coolant flow.",
    ("Coolant temp", "low"): "Verify thermostat and sensor calibration."
}

# Freeze plot to prevent shaking
if "saved_fig" not in st.session_state:
    st.session_state.saved_fig = None


# ----------------------------------------------------------
# Load model & scaler
# ----------------------------------------------------------
@st.cache_resource
def load_artifacts():
    mp = hf_hub_download(repo_id=MODEL_REPO_ID, filename=MODEL_FILENAME, repo_type="model")
    sp = hf_hub_download(repo_id=SCALER_REPO_ID, filename=SCALER_FILENAME, repo_type="dataset")

    model = joblib.load(mp)
    scaler = joblib.load(sp)

    try:
        importances = model.feature_importances_
    except:
        importances = np.ones(len(FEATURE_COLS))

    return model, scaler, importances


# ----------------------------------------------------------
# Predict with threshold=0.5
# ----------------------------------------------------------
def predict_with_proba(model, Xs, threshold=0.5):
    try:
        proba = model.predict_proba(Xs)[:, 1]
        preds = (proba >= threshold).astype(int)
        return preds, proba
    except:
        raw = model.predict(Xs)
        if raw.min() >= 0 and raw.max() <= 1:
            preds = (raw >= threshold).astype(int)
            return preds, raw
        return raw.astype(int), None


# ----------------------------------------------------------
# Feature effect estimation
# ----------------------------------------------------------
def local_effects(row_df, model, scaler, importances):
    base_row = row_df[FEATURE_COLS].iloc[0].astype(float)
    x_orig = base_row.values
    X_base = scaler.transform([x_orig])

    preds, proba = predict_with_proba(model, X_base)
    pred = int(preds[0])
    base_p = proba[0] if proba is not None else None

    scale = getattr(scaler, "scale_", np.ones(len(FEATURE_COLS)))
    mean = getattr(scaler, "mean_", np.zeros(len(FEATURE_COLS)))

    effects = []
    for i, feat in enumerate(FEATURE_COLS):
        step = scale[i] if scale[i] != 0 else 1

        x_plus = x_orig.copy()
        x_minus = x_orig.copy()
        x_plus[i] += step
        x_minus[i] -= step

        Xp = scaler.transform([x_plus])
        Xm = scaler.transform([x_minus])

        _, pp = predict_with_proba(model, Xp)
        _, pm = predict_with_proba(model, Xm)

        if base_p is None or pp is None or pm is None:
            delta = 0
        else:
            delta = ((pp[0] - base_p) - (pm[0] - base_p)) / 2

        z = (x_orig[i] - mean[i]) / (scale[i] if scale[i] != 0 else 1)
        imp = float(importances[i])
        score = abs(delta) * imp

        effects.append((feat, x_orig[i], z, delta, imp, score))

    effects.sort(key=lambda x: x[5], reverse=True)
    return pred, base_p, effects


# ----------------------------------------------------------
# Build explanation text
# ----------------------------------------------------------
def build_explanation(effects, pred, base_p):
    lines = []

    for rank, (feat, val, z, delta, imp, score) in enumerate(effects[:5], start=1):
        low, high = HEALTHY_RANGES[feat]

        if val < low:
            state = "low"
            range_info = f"(value {val:.1f} below {low}-{high})"
        elif val > high:
            state = "high"
            range_info = f"(value {val:.1f} above {low}-{high})"
        else:
            state = "normal"
            range_info = f"(value {val:.1f} within {low}-{high})"

        if abs(z) >= 2.5:
            level = "shows extreme deviation"
        elif abs(z) >= 1.5:
            level = "is strongly abnormal"
        elif abs(z) >= 0.5:
            level = "is moderately shifted"
        else:
            level = "is close to expected behaviour"

        if delta > 0:
            effect = "increases fault likelihood"
            weight = "major contributor"
        elif delta < 0:
            effect = "reduces fault likelihood"
            weight = "stabilizing factor"
        else:
            effect = "has minimal impact"
            weight = "weak driver"

        fix = FIX_SUGGESTIONS.get((feat, state), f"Inspect {feat} subsystem.")

        lines.append(
            f"{rank}. {feat} {level} {range_info}. A 1σ change {effect} by {delta:+.3f}, "
            f"making it a {weight}. Recommended action: {fix}."
        )

    header = "The engine is classified as Faulty (1)." if pred == 1 else "The engine is classified as Normal (0)."

    if base_p is not None:
        header += f" Fault probability: {base_p*100:.1f}%."

    return header, lines


# ----------------------------------------------------------
# Plot z-score bar chart
# ----------------------------------------------------------
def zscore_plot(effects):
    fig, ax = plt.subplots(figsize=(7, 3))

    z_vals = [e[2] for e in effects]
    feats = [e[0] for e in effects]
    idx = np.argsort(np.abs(z_vals))[::-1]

    ax.barh(range(len(z_vals)), np.array(z_vals)[idx])
    ax.set_yticks(range(len(z_vals)))
    ax.set_yticklabels(np.array(feats)[idx])
    ax.invert_yaxis()
    ax.set_xlabel("z-score")
    fig.tight_layout()

    return fig


# ----------------------------------------------------------
# MAIN APP
# ----------------------------------------------------------
def main():
    st.set_page_config(page_title="Engine Predictor", layout="wide", page_icon="⚙️")

    # Anti-shake + styling
    st.markdown("<script>window.parent.document.body.style.overflow = 'hidden';</script>", unsafe_allow_html=True)
    st.markdown(
        """
        <style>
        body { background: #f4f4f4 !important; }
        * { animation: none !important; transition: none !important; }
        canvas { transform: translateZ(0) !important; }
        .element-container { will-change: auto !important; }
        .card {
            padding: 1rem;
            border:1px solid #ddd;
            border-radius: 1rem;
            background:#fff;
        }
        </style>
        """,
        unsafe_allow_html=True
    )

    model, scaler, importances = load_artifacts()

    st.title("⚙️ Engine Condition Predictor")
    st.write("Advanced diagnostics, explanations, and CSV evaluation.")

    mode = st.sidebar.radio("Input Mode", ["Manual Entry", "Upload CSV"])

    # ------------------------------------------------------
    # MANUAL MODE
    # ------------------------------------------------------
    if mode == "Manual Entry":
        c1, c2, c3 = st.columns(3)

        with c1:
            rpm = st.number_input("Engine RPM", value=800)
            oilp = st.number_input("Lub oil pressure", value=3.0)

        with c2:
            fuelp = st.number_input("Fuel pressure", value=6.0)
            coolp = st.number_input("Coolant pressure", value=2.0)

        with c3:
            oilt = st.number_input("Lub oil temp", value=80.0)
            coolt = st.number_input("Coolant temp", value=90.0)

        df = pd.DataFrame([[rpm, oilp, fuelp, coolp, oilt, coolt]], columns=FEATURE_COLS)
        st.dataframe(df)

        if st.button("Predict"):
            pred, base_p, effects = local_effects(df, model, scaler, importances)
            header, lines = build_explanation(effects, pred, base_p)

            a, b = st.columns([1, 2])

            with a:
                st.markdown("<div class='card'>", unsafe_allow_html=True)
                st.markdown("### 🔴 Faulty" if pred == 1 else "### 🟢 Normal")
                if base_p is not None:
                    st.markdown(f"Probability: {base_p*100:.1f}%")
                st.markdown("</div>", unsafe_allow_html=True)

            fig = zscore_plot(effects)
            st.session_state.saved_fig = fig

            with b:
                st.pyplot(st.session_state.saved_fig)

            st.subheader("Detailed Analysis")
            st.write(header)
            for line in lines:
                st.markdown("- " + line)

    # ------------------------------------------------------
    # CSV MODE (WITH EVALUATION)
    # ------------------------------------------------------
    else:
        st.subheader("📁 Upload CSV (Raw Sensor Data + Optional Labels)")

        f = st.file_uploader("Upload CSV", type="csv")

        if f:
            df = pd.read_csv(f)
            st.write("### Preview of Uploaded File")
            st.dataframe(df.head())

            # 1. Detect Actual Label Column
            POSSIBLE_LABELS = ["ActualLabel", "Label", "Engine_Condition", "Condition", "Status"]

            actual_label_col = None
            for col in df.columns:
                if col in POSSIBLE_LABELS:
                    actual_label_col = col
                    break

            def convert_label(x):
                if isinstance(x, str):
                    x = x.strip().lower()
                    if x.startswith("f"):
                        return 1
                    if x.startswith("n"):
                        return 0
                return int(x)

            if actual_label_col is not None:
                df["Actual"] = df[actual_label_col].apply(convert_label)
            else:
                df["Actual"] = None

            # Extract features
            feature_df = df[FEATURE_COLS].astype(float)

            preds, probs, explanations = [], [], []

            for i in range(len(feature_df)):
                row = feature_df.iloc[i:i+1]
                p, bp, eff = local_effects(row, model, scaler, importances)
                h, lines = build_explanation(eff, p, bp)

                preds.append(p)
                probs.append(bp)
                explanations.append(lines[0] if len(lines) else "")

            df["Predicted"] = preds
            df["Pred_Prob"] = probs
            df["Explanation"] = explanations

            # 3. Evaluation Metrics
            if df["Actual"].notnull().any():
                from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

                y_true = df["Actual"].astype(int)
                y_pred = df["Predicted"].astype(int)

                acc = accuracy_score(y_true, y_pred)
                cm = confusion_matrix(y_true, y_pred)
                report = classification_report(y_true, y_pred, target_names=["Normal", "Faulty"])

                st.subheader("📊 Evaluation Metrics")
                st.write(f"**Accuracy:** {acc*100:.2f}%")

                st.write("### Confusion Matrix")
                st.write(cm)

                st.write("### Classification Report")
                st.text(report)

            # Row Coloring
            def color_row(row):
                if row["Actual"] is None:
                    return [""] * len(row)
                if row["Actual"] == row["Predicted"]:
                    return ["background-color: #d4edda"] * len(row)  # green
                return ["background-color: #f8d7da"] * len(row)      # red

            st.subheader("📍 Detailed Prediction Results")
            st.dataframe(df.style.apply(color_row, axis=1))


# ----------------------------------------------------------
if __name__ == "__main__":
    main()