import streamlit as st
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd

# Load data
def load_data():
    df = pd.read_csv("./processed_data.csv")  # Replace with your dataset
    return df

# Create Streamlit app
def app():
    # Title for the app
    huggingface_page_title = "Machine Tool Failure Dashboard"
    st.set_page_config(page_title=huggingface_page_title, layout="wide")
# --- Custom CSS to adjust sidebar width ---
    st.markdown(
        """
        <style>
            /* Sidebar width */
            [data-testid="stSidebar"] {
                width: 600px;
                min-width: 600px;
            }
        </style>
        """,
        unsafe_allow_html=True
    )

    # --- Page content ---
    st.title(huggingface_page_title)

    # Load data
    data = load_data()

    # --- Create some helper/derived columns ---
    # Kelvin -> Celsius for readability (if temps exist)
    if "Air temperature" in data.columns:
        data["Air temperature (°C)"] = data["Air temperature"] - 273.15
    if "Process temperature" in data.columns:
        data["Process temperature (°C)"] = data["Process temperature"] - 273.15

    # Temperature delta (process - air)
    if {"Air temperature", "Process temperature"}.issubset(data.columns):
        data["Temperature delta (°C)"] = data["Process temperature"] - data["Air temperature"]

    # Rotational speed categories (quartiles)
    if "Rotational speed" in data.columns:
        try:
            data["Rotational speed category"] = pd.qcut(
                data["Rotational speed"],
                q=4,
                labels=["Very Low", "Low", "High", "Very High"]
            )
        except ValueError:
            # qcut can fail if there are too many identical values; fallback to cut
            data["Rotational speed category"] = pd.cut(
                data["Rotational speed"],
                bins=4,
                labels=["Very Low", "Low", "High", "Very High"]
            )

    # --- Key Metrics from the data ---
    total_obs = len(data)
    n_failures = int(data["Failure"].sum()) if "Failure" in data.columns else 0
    failure_rate = (n_failures / total_obs * 100) if total_obs > 0 else 0

    n_types = data["Type"].nunique() if "Type" in data.columns else 0
    most_common_type = data["Type"].value_counts().idxmax() if "Type" in data.columns else "N/A"

    avg_rot_speed = round(data["Rotational speed"].mean(), 2) if "Rotational speed" in data.columns else None
    avg_torque = round(data["Torque"].mean(), 2) if "Torque" in data.columns else None
    avg_tool_wear = round(data["Tool wear"].mean(), 2) if "Tool wear" in data.columns else None

    avg_air_temp_c = (
        round(data["Air temperature (°C)"].mean(), 2)
        if "Air temperature (°C)" in data.columns
        else None
    )
    avg_proc_temp_c = (
        round(data["Process temperature (°C)"].mean(), 2)
        if "Process temperature (°C)" in data.columns
        else None
    )
    avg_temp_delta = (
        round(data["Temperature delta (°C)"].mean(), 2)
        if "Temperature delta (°C)" in data.columns
        else None
    )

    # Display metrics in the sidebar
    st.sidebar.header("Key Metrics")
    st.sidebar.metric("Total observations", total_obs)
    st.sidebar.metric("Number of failures", n_failures)
    st.sidebar.metric("Failure rate (%)", f"{failure_rate:.2f}")

    st.sidebar.metric("Number of machine types", n_types)
    st.sidebar.metric("Most common type", most_common_type)

    if avg_rot_speed is not None:
        st.sidebar.metric("Avg rotational speed (RPM)", avg_rot_speed)
    if avg_torque is not None:
        st.sidebar.metric("Avg torque (Nm)", avg_torque)
    if avg_tool_wear is not None:
        st.sidebar.metric("Avg tool wear (min)", avg_tool_wear)
    if avg_air_temp_c is not None:
        st.sidebar.metric("Avg air temperature (°C)", avg_air_temp_c)
    if avg_proc_temp_c is not None:
        st.sidebar.metric("Avg process temperature (°C)", avg_proc_temp_c)
    if avg_temp_delta is not None:
        st.sidebar.metric("Avg temp delta (°C)", avg_temp_delta)

    st.markdown("### Data preview")
    st.dataframe(data.head())

    sns.set_style("whitegrid", {'grid.color': 'lightgrey', 'grid.linestyle': '--'})

    #////////////////////////////////////////////////////////////////////////////
    # Failure distribution
    #////////////////////////////////////////////////////////////////////////////
    if "Failure" in data.columns:
        st.header("Failure distribution")
        fig, ax = plt.subplots()
        failure_counts = data["Failure"].value_counts().sort_index()
        sns.barplot(x=failure_counts.index, y=failure_counts.values, ax=ax)
        ax.set_xlabel("Failure (0 = No, 1 = Yes)")
        ax.set_ylabel("Count")
        ax.set_title("Failure Count")
        for i, v in enumerate(failure_counts.values):
            ax.text(i, v + max(failure_counts.values) * 0.01, str(v), ha="center", va="bottom", fontsize=9)
        st.pyplot(fig)
        plt.show()

    #////////////////////////////////////////////////////////////////////////////
    # Failure rate by machine Type
    #////////////////////////////////////////////////////////////////////////////
    if {"Type", "Failure"}.issubset(data.columns):
        st.header("Failure rate by machine type")
        fig, ax = plt.subplots()
        failure_by_type = data.groupby("Type")["Failure"].mean().reset_index()
        failure_by_type["Failure_rate_%"] = failure_by_type["Failure"] * 100
        sns.barplot(data=failure_by_type, x="Type", y="Failure_rate_%", ax=ax)
        ax.set_ylabel("Failure rate (%)")
        ax.set_title("Failure rate by machine type")
        for i, row in failure_by_type.iterrows():
            ax.text(i, row["Failure_rate_%"] + 0.5, f"{row['Failure_rate_%']:.1f}%", ha="center", fontsize=9)
        st.pyplot(fig)
        plt.show()

    #////////////////////////////////////////////////////////////////////////////
    # Rotational speed distribution by failure
    #////////////////////////////////////////////////////////////////////////////
    if {"Rotational speed", "Failure"}.issubset(data.columns):
        st.header("Rotational speed distribution by failure")
        fig, ax = plt.subplots()
        sns.histplot(
            data=data,
            x="Rotational speed",
            hue="Failure",
            multiple="stack",
            bins=30,
            ax=ax
        )
        ax.set_xlabel("Rotational speed (RPM)")
        ax.set_title("Rotational speed distribution (Failed vs Not Failed)")
        st.pyplot(fig)
        plt.show()

    #////////////////////////////////////////////////////////////////////////////
    # Torque vs Rotational speed (scatter)
    #////////////////////////////////////////////////////////////////////////////
    if {"Torque", "Rotational speed"}.issubset(data.columns):
        st.header("Torque vs Rotational speed")
        fig, ax = plt.subplots()
        if "Failure" in data.columns:
            sns.scatterplot(
                data=data,
                x="Rotational speed",
                y="Torque",
                hue="Failure",
                alpha=0.6,
                ax=ax
            )
        else:
            sns.scatterplot(
                data=data,
                x="Rotational speed",
                y="Torque",
                alpha=0.6,
                ax=ax
            )
        ax.set_xlabel("Rotational speed (RPM)")
        ax.set_ylabel("Torque (Nm)")
        ax.set_title("Torque vs Rotational speed")
        st.pyplot(fig)
        plt.show()

    #////////////////////////////////////////////////////////////////////////////
    # Tool wear distribution by failure
    #////////////////////////////////////////////////////////////////////////////
    if {"Tool wear", "Failure"}.issubset(data.columns):
        st.header("Tool wear distribution by failure")
        fig, ax = plt.subplots()
        sns.boxplot(
            data=data,
            x="Failure",
            y="Tool wear",
            ax=ax
        )
        ax.set_xlabel("Failure (0 = No, 1 = Yes)")
        ax.set_ylabel("Tool wear (min)")
        ax.set_title("Tool wear vs Failure")
        st.pyplot(fig)
        plt.show()

    #////////////////////////////////////////////////////////////////////////////
    # Correlation heatmap of numeric variables
    #////////////////////////////////////////////////////////////////////////////
    st.header("Correlation heatmap (numeric features)")
    numeric_cols = data.select_dtypes(include=["number"]).columns
    if len(numeric_cols) > 1:
        fig, ax = plt.subplots(figsize=(8, 6))
        corr = data[numeric_cols].corr()
        sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
        ax.set_title("Correlation heatmap")
        st.pyplot(fig)
        plt.show()
    else:
        st.write("Not enough numeric columns to compute correlations.")

if __name__ == "__main__":
    app()