# Importing packages
import streamlit as st
import pandas as pd
import numpy as np
from huggingface_hub import hf_hub_download
import joblib
import io

# App Configuration
st.set_page_config(
    page_title="Engine Predictive Maintenance",
    page_icon="🛠️",
    layout="wide"
)

st.title("🛠️ Smart Engine Predictive Maintenance App")
st.markdown("""
This application predicts whether an engine is **Faulty (maintenance required)** or **Normal**
based on sensor readings.

**Target:**
- **0 = Normal**
- **1 = Faulty**

**Note:** The model expects engineered features, so the app computes the same feature engineering
used during training to ensure schema consistency.
""")

# Model Settings (Hugging Face)
MODEL_REPO_ID = "simnid/predictive-maintenance-model"
MODEL_FILENAME = "best_predictive_maintenance_model.joblib"

# Dataset repo (for pulling bulk sample)
DATA_REPO_ID = "simnid/predictive-engine-maintenance-dataset"
BULK_TEST_FILENAME = "bulk_test_sample.csv"

RAW_COLS = [
    "Engine rpm",
    "Lub oil pressure",
    "Fuel pressure",
    "Coolant pressure",
    "lub oil temp",
    "Coolant temp"
]

ENGINEERED_COLS = [
    "RPM_FuelPressure_Ratio",
    "Power_Index",
    "Thermal_Pressure_Index",
    "Mech_Cooling_Balance",
    "Pressure_Coordination",
    "Low_Oil_Pressure_Flag",
    "High_Coolant_Temp_Flag",
    "Low_RPM_Flag"
]

FINAL_FEATURE_ORDER = RAW_COLS + ENGINEERED_COLS

# Feature Engineering
def add_engineered_features(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()

    # Ensure required raw columns exist
    missing = [c for c in RAW_COLS if c not in df.columns]
    if missing:
        raise ValueError(f"Missing required columns: {missing}")

    # Convert to numeric (safe conversion)
    for c in RAW_COLS:
        df[c] = pd.to_numeric(df[c], errors="coerce")

    if df[RAW_COLS].isnull().any().any():
        bad_cols = df[RAW_COLS].columns[df[RAW_COLS].isnull().any()].tolist()
        raise ValueError(f"Non-numeric / missing values detected in: {bad_cols}")

    # Interaction Features
    df["RPM_FuelPressure_Ratio"] = df["Engine rpm"] / (df["Fuel pressure"] + 1e-5)
    df["Power_Index"] = (df["Engine rpm"] * df["Fuel pressure"]) / 1000

    # System Stress Indicators
    df["Thermal_Pressure_Index"] = df["Coolant temp"] / (df["Fuel pressure"] + 1e-5)
    df["Mech_Cooling_Balance"] = (
        (df["Engine rpm"] + df["Lub oil pressure"]) -
        (df["Coolant temp"] + df["Coolant pressure"])
    )
    df["Pressure_Coordination"] = df["Fuel pressure"] - df["Coolant pressure"]

    # Early Warning Flags (data-driven thresholds)
    df["Low_Oil_Pressure_Flag"] = (df["Lub oil pressure"] < 1.5).astype(int)
    df["High_Coolant_Temp_Flag"] = (df["Coolant temp"] > 100).astype(int)
    df["Low_RPM_Flag"] = (df["Engine rpm"] < 600).astype(int)

    return df[FINAL_FEATURE_ORDER]

# Load Model
@st.cache_resource
def load_model():
    try:
        model_path = hf_hub_download(
            repo_id=MODEL_REPO_ID,
            filename=MODEL_FILENAME,
            repo_type="model"
        )
        return joblib.load(model_path)
    except Exception as e:
        st.error(f"Error loading model from Hugging Face: {e}")
        return None

model = load_model()
if model is None:
    st.warning("Model could not be loaded. Please verify model repo + filename.")
    st.stop()


# Sidebar: Business + Model Context
with st.sidebar:
    st.header("About This Model")
    st.markdown("""
**Model Details**
- **Model Type:** Gradient Boosting Classifier
- **Optimization Objective:** Maximize recall for faulty engines (minimize missed failures)
- **Artifact Source:** Hugging Face Model Hub

**Why Recall Matters**
A false negative means a failure was missed, leading to downtime, safety risks, and costly repairs.
""")

    st.subheader("Production Metrics (Reference)")
    st.metric("Recall (Faulty)", "0.84")
    st.metric("ROC-AUC", "0.70")
    st.metric("PR-AUC", "0.80")

    st.markdown("---")
    st.subheader("Decision Threshold")
    threshold = st.slider(
        "Classification Threshold (Faulty if P ≥ threshold)",
        min_value=0.05, max_value=0.95, value=0.50, step=0.01
    )
    st.caption("Lower threshold → higher recall (fewer missed failures), but more false alarms.")


# Tabs: Single + Bulk Prediction
tab1, tab2 = st.tabs(["🔎 Single Prediction", "📦 Bulk Prediction"])


# Single Prediction
with tab1:
    st.subheader("Engine Sensor Inputs")

    c1, c2, c3 = st.columns(3)

    with c1:
        engine_rpm = st.number_input("Engine rpm", min_value=0.0, value=700.0, step=1.0)
        lub_oil_pressure = st.number_input("Lub oil pressure", min_value=0.0, value=2.50, step=0.01)

    with c2:
        fuel_pressure = st.number_input("Fuel pressure", min_value=0.0, value=12.00, step=0.01)
        coolant_pressure = st.number_input("Coolant pressure", min_value=0.0, value=2.50, step=0.01)

    with c3:
        lub_oil_temp = st.number_input("lub oil temp", min_value=0.0, value=80.0, step=0.1)
        coolant_temp = st.number_input("Coolant temp", min_value=0.0, value=85.0, step=0.1)

    raw_input_df = pd.DataFrame([{
        "Engine rpm": engine_rpm,
        "Lub oil pressure": lub_oil_pressure,
        "Fuel pressure": fuel_pressure,
        "Coolant pressure": coolant_pressure,
        "lub oil temp": lub_oil_temp,
        "Coolant temp": coolant_temp
    }])

    try:
        feature_df = add_engineered_features(raw_input_df)
    except Exception as e:
        st.error(f"Feature engineering failed: {e}")
        st.stop()

    with st.expander("View engineered input dataframe"):
        st.dataframe(feature_df)
        csv = feature_df.to_csv(index=False).encode("utf-8")
        st.download_button("Download Engineered Input CSV", csv, "engine_input_features.csv", "text/csv")

    st.subheader("Prediction Output")

    if st.button("Predict Engine Condition", type="primary", use_container_width=True):
        try:
            proba_faulty = None
            if hasattr(model, "predict_proba"):
                proba_faulty = float(model.predict_proba(feature_df)[0][1])

            # Threshold-based classification (business control)
            if proba_faulty is not None:
                pred_class = int(proba_faulty >= threshold)
            else:
                pred_class = int(model.predict(feature_df)[0])

            colA, colB = st.columns(2)

            with colA:
                if pred_class == 1:
                    st.error("⚠️ Prediction: FAULTY (Maintenance Recommended)")
                else:
                    st.success("✅ Prediction: NORMAL (No Immediate Maintenance Required)")

            with colB:
                if proba_faulty is not None:
                    st.metric("Probability of Faulty (Class 1)", f"{proba_faulty*100:.1f}%")
                    st.progress(int(proba_faulty * 100))
                else:
                    st.info("Probability score unavailable (model does not support predict_proba).")

        except Exception as e:
            st.error(f"Prediction failed: {e}")

# Bulk Prediction
with tab2:
    st.subheader("Bulk CSV Prediction")

    st.markdown("""
Upload a CSV containing **raw sensor columns only**:

- Engine rpm
- Lub oil pressure
- Fuel pressure
- Coolant pressure
- lub oil temp
- Coolant temp

The app will automatically engineer features and return:
- `Predicted_Class` (0/1)
- `Faulty_Probability` (if available)
""")

    # Try pulling a sample file from HF dataset repo (like tourism project pattern)
    @st.cache_resource
    def load_bulk_sample():
        try:
            path = hf_hub_download(
                repo_id=DATA_REPO_ID,
                filename=BULK_TEST_FILENAME,
                repo_type="dataset"
            )
            return pd.read_csv(path)
        except Exception:
            return None

    sample_df = load_bulk_sample()
    if sample_df is not None:
        with st.expander("Preview bulk sample from Hugging Face"):
            st.dataframe(sample_df.head())

    uploaded_file = st.file_uploader("Upload CSV for bulk prediction", type=["csv"])

    bulk_df = None
    if uploaded_file is not None:
        bulk_df = pd.read_csv(uploaded_file)
    elif sample_df is not None:
        bulk_df = sample_df.copy()

    if bulk_df is not None:
        st.markdown("✅ Bulk data loaded.")
        st.dataframe(bulk_df.head())

        if st.button("Run Bulk Prediction", use_container_width=True):
            try:
                # Ensure required columns exist
                missing = [c for c in RAW_COLS if c not in bulk_df.columns]
                if missing:
                    st.error(f"Missing required columns: {missing}")
                    st.stop()

                bulk_features = add_engineered_features(bulk_df[RAW_COLS])

                # Predict
                preds = model.predict(bulk_features).astype(int)

                if hasattr(model, "predict_proba"):
                    probs = model.predict_proba(bulk_features)[:, 1]
                else:
                    probs = np.full(shape=(len(bulk_features),), fill_value=np.nan)

                # Threshold override if proba exists
                if hasattr(model, "predict_proba"):
                    preds = (probs >= threshold).astype(int)

                out = bulk_df.copy()
                out["Predicted_Class"] = preds
                out["Faulty_Probability"] = probs

                st.success("Bulk predictions completed.")
                st.dataframe(out.head(50))

                out_csv = out.to_csv(index=False).encode("utf-8")
                st.download_button(
                    "Download Bulk Predictions CSV",
                    out_csv,
                    "bulk_engine_predictions.csv",
                    "text/csv"
                )

            except Exception as e:
                st.error(f"Bulk prediction failed: {e}")


# Footer
st.markdown("---")
st.caption("Predictive Maintenance | Gradient Boosting + Streamlit + Hugging Face Model Hub")