import streamlit as st
import pandas as pd
import numpy as np
import joblib

# -----------------------------
# Config
# -----------------------------
st.set_page_config(page_title="Engine Failure Prediction", layout="centered")
st.title("Engine Failure Prediction 🚀")

from huggingface_hub import hf_hub_download

MODEL_PATH = hf_hub_download(
    repo_id="chaitram/Engine-Failure-Prediction",
    filename="best_engine_failure_model_v1.joblib",
    repo_type="model"
)

# If your dataset is public, HF filesystem URL can work inside Spaces
DATASET_PATH = "hf://datasets/chaitram/Engine-Failure-Prediction/engine_data.csv"

# -----------------------------
# Helpers
# -----------------------------
@st.cache_resource
def load_model():
    return joblib.load(MODEL_PATH)

@st.cache_data
def load_reference_dataset():
    """
    Used only to populate dropdown choices.
    If it fails (no HF filesystem / dependency), we fall back to manual choices.
    """
    try:
        df = pd.read_csv(DATASET_PATH)
        return df
    except Exception:
        return None

def build_feature_vector(raw_df: pd.DataFrame, model):
    """
    Makes the input match what the saved model expects.
    Case A: model is a Pipeline that expects raw columns -> use raw_df directly if it matches.
    Case B: model is trained on get_dummies output -> one-hot encode raw_df and align to feature_names_in_.
    """
    required = getattr(model, "feature_names_in_", None)

    # If model doesn't expose required columns, just return raw_df
    if required is None:
        return raw_df

    required = list(required)

    # If required columns look like raw columns and are all present -> reorder and return
    if all(col in raw_df.columns for col in required):
        return raw_df[required]

    # Otherwise assume model expects one-hot encoded columns
    encoded = pd.get_dummies(raw_df, drop_first=True)

    # Add any missing columns (model expects them) as 0
    for col in required:
        if col not in encoded.columns:
            encoded[col] = 0

    # Drop extra columns and order correctly
    encoded = encoded[required]
    return encoded

def safe_predict(model, X):
    """
    Returns prediction + probability (if classifier supports predict_proba).
    """
    pred = model.predict(X)[0]
    proba = None
    if hasattr(model, "predict_proba"):
        try:
            proba = model.predict_proba(X)[0]
        except Exception:
            proba = None
    return pred, proba

# -----------------------------
# Load model + reference
# -----------------------------
try:
    model = load_model()
except Exception as e:
    st.error(f"Could not load {MODEL_PATH}. Make sure best_engine_failure_model_v1.joblib is uploaded to the Space.\n\nError: {e}")
    st.stop()

ref = load_reference_dataset()

# Build dropdown options from dataset if available; otherwise provide simple fallbacks
def get_options(col_name, fallback):
    if ref is not None and col_name in ref.columns:
        vals = sorted([v for v in ref[col_name].dropna().unique().tolist()])
        return vals if len(vals) > 0 else fallback
    return fallback

# -----------------------------
# UI Inputs (NO SPACES IN VARIABLE NAMES)
# -----------------------------
st.subheader("Enter Engine Sensor Readings")

col1, col2 = st.columns(2)

with col1:
    engine_rpm = st.number_input("Engine RPM (Revolutions Per Minute)", min_value=0, max_value=10000, value=1500, step=10)
    lub_oil_pressure = st.number_input("Lubricating Oil Pressure (bar/kPa)", min_value=0.0, max_value=20.0, value=4.0, step=0.1)
    fuel_pressure = st.number_input("Fuel Pressure (bar/kPa)", min_value=0.0, max_value=20.0, value=5.0, step=0.1)

with col2:
    coolant_pressure = st.number_input("Coolant Pressure (bar/kPa)", min_value=0.0, max_value=10.0, value=1.5, step=0.1)
    lub_oil_temperature = st.number_input("Lubricating Oil Temperature (°C)", min_value=0.0, max_value=200.0, value=85.0, step=0.5)
    coolant_temperature = st.number_input("Coolant Temperature (°C)", min_value=0.0, max_value=200.0, value=90.0, step=0.5)

# -----------------------------
# Assemble input into DataFrame (FIXED)
# -----------------------------
raw_input_df = pd.DataFrame([{
    "Engine_RPM":           int(engine_rpm),
    "Lub_Oil_Pressure":     float(lub_oil_pressure),
    "Fuel_Pressure":        float(fuel_pressure),
    "Coolant_Pressure":     float(coolant_pressure),
    "Lub_Oil_Temperature":  float(lub_oil_temperature),
    "Coolant_Temperature":  float(coolant_temperature),
}])

with st.expander("Show input data"):
    st.dataframe(raw_input_df, use_container_width=True)


# -----------------------------
# Predict
# -----------------------------
if st.button("Predict Engine Condition"):
    try:
        X = build_feature_vector(raw_input_df, model)
        pred, proba = safe_predict(model, X)

        result = "Maintenance Required" if int(pred) == 1 else "Engine Operating Normally"

        st.subheader("Prediction Result:")
        st.success(f"The model predicts: **{result}**")

    except Exception as e:
        st.error(
            "Prediction failed. This usually happens when the model expects different feature columns "
            "(e.g., one-hot columns) than the app is sending.\n\n"
            f"Error: {e}"
        )