import streamlit as st import seaborn as sns import matplotlib.pyplot as plt import matplotlib.dates as mdates import pandas as pd # Load data def load_data(): df = pd.read_csv("./processed_data.csv") # Replace with your dataset return df # Create Streamlit app def app(): # Title for the app huggingface_page_title = "Machine Tool Failure Dashboard" st.set_page_config(page_title=huggingface_page_title, layout="wide") # --- Custom CSS to adjust sidebar width --- st.markdown( """ """, unsafe_allow_html=True ) # --- Page content --- st.title(huggingface_page_title) # Load data data = load_data() # --- Create some helper/derived columns --- # Kelvin -> Celsius for readability (if temps exist) if "Air temperature" in data.columns: data["Air temperature (°C)"] = data["Air temperature"] - 273.15 if "Process temperature" in data.columns: data["Process temperature (°C)"] = data["Process temperature"] - 273.15 # Temperature delta (process - air) if {"Air temperature", "Process temperature"}.issubset(data.columns): data["Temperature delta (°C)"] = data["Process temperature"] - data["Air temperature"] # Rotational speed categories (quartiles) if "Rotational speed" in data.columns: try: data["Rotational speed category"] = pd.qcut( data["Rotational speed"], q=4, labels=["Very Low", "Low", "High", "Very High"] ) except ValueError: # qcut can fail if there are too many identical values; fallback to cut data["Rotational speed category"] = pd.cut( data["Rotational speed"], bins=4, labels=["Very Low", "Low", "High", "Very High"] ) # --- Key Metrics from the data --- total_obs = len(data) n_failures = int(data["Failure"].sum()) if "Failure" in data.columns else 0 failure_rate = (n_failures / total_obs * 100) if total_obs > 0 else 0 n_types = data["Type"].nunique() if "Type" in data.columns else 0 most_common_type = data["Type"].value_counts().idxmax() if "Type" in data.columns else "N/A" avg_rot_speed = round(data["Rotational speed"].mean(), 2) if "Rotational speed" in data.columns else None avg_torque = round(data["Torque"].mean(), 2) if "Torque" in data.columns else None avg_tool_wear = round(data["Tool wear"].mean(), 2) if "Tool wear" in data.columns else None avg_air_temp_c = ( round(data["Air temperature (°C)"].mean(), 2) if "Air temperature (°C)" in data.columns else None ) avg_proc_temp_c = ( round(data["Process temperature (°C)"].mean(), 2) if "Process temperature (°C)" in data.columns else None ) avg_temp_delta = ( round(data["Temperature delta (°C)"].mean(), 2) if "Temperature delta (°C)" in data.columns else None ) # Display metrics in the sidebar st.sidebar.header("Key Metrics") st.sidebar.metric("Total observations", total_obs) st.sidebar.metric("Number of failures", n_failures) st.sidebar.metric("Failure rate (%)", f"{failure_rate:.2f}") st.sidebar.metric("Number of machine types", n_types) st.sidebar.metric("Most common type", most_common_type) if avg_rot_speed is not None: st.sidebar.metric("Avg rotational speed (RPM)", avg_rot_speed) if avg_torque is not None: st.sidebar.metric("Avg torque (Nm)", avg_torque) if avg_tool_wear is not None: st.sidebar.metric("Avg tool wear (min)", avg_tool_wear) if avg_air_temp_c is not None: st.sidebar.metric("Avg air temperature (°C)", avg_air_temp_c) if avg_proc_temp_c is not None: st.sidebar.metric("Avg process temperature (°C)", avg_proc_temp_c) if avg_temp_delta is not None: st.sidebar.metric("Avg temp delta (°C)", avg_temp_delta) st.markdown("### Data preview") st.dataframe(data.head()) sns.set_style("whitegrid", {'grid.color': 'lightgrey', 'grid.linestyle': '--'}) #//////////////////////////////////////////////////////////////////////////// # Failure distribution #//////////////////////////////////////////////////////////////////////////// if "Failure" in data.columns: st.header("Failure distribution") fig, ax = plt.subplots() failure_counts = data["Failure"].value_counts().sort_index() sns.barplot(x=failure_counts.index, y=failure_counts.values, ax=ax) ax.set_xlabel("Failure (0 = No, 1 = Yes)") ax.set_ylabel("Count") ax.set_title("Failure Count") for i, v in enumerate(failure_counts.values): ax.text(i, v + max(failure_counts.values) * 0.01, str(v), ha="center", va="bottom", fontsize=9) st.pyplot(fig) plt.show() #//////////////////////////////////////////////////////////////////////////// # Failure rate by machine Type #//////////////////////////////////////////////////////////////////////////// if {"Type", "Failure"}.issubset(data.columns): st.header("Failure rate by machine type") fig, ax = plt.subplots() failure_by_type = data.groupby("Type")["Failure"].mean().reset_index() failure_by_type["Failure_rate_%"] = failure_by_type["Failure"] * 100 sns.barplot(data=failure_by_type, x="Type", y="Failure_rate_%", ax=ax) ax.set_ylabel("Failure rate (%)") ax.set_title("Failure rate by machine type") for i, row in failure_by_type.iterrows(): ax.text(i, row["Failure_rate_%"] + 0.5, f"{row['Failure_rate_%']:.1f}%", ha="center", fontsize=9) st.pyplot(fig) plt.show() #//////////////////////////////////////////////////////////////////////////// # Rotational speed distribution by failure #//////////////////////////////////////////////////////////////////////////// if {"Rotational speed", "Failure"}.issubset(data.columns): st.header("Rotational speed distribution by failure") fig, ax = plt.subplots() sns.histplot( data=data, x="Rotational speed", hue="Failure", multiple="stack", bins=30, ax=ax ) ax.set_xlabel("Rotational speed (RPM)") ax.set_title("Rotational speed distribution (Failed vs Not Failed)") st.pyplot(fig) plt.show() #//////////////////////////////////////////////////////////////////////////// # Torque vs Rotational speed (scatter) #//////////////////////////////////////////////////////////////////////////// if {"Torque", "Rotational speed"}.issubset(data.columns): st.header("Torque vs Rotational speed") fig, ax = plt.subplots() if "Failure" in data.columns: sns.scatterplot( data=data, x="Rotational speed", y="Torque", hue="Failure", alpha=0.6, ax=ax ) else: sns.scatterplot( data=data, x="Rotational speed", y="Torque", alpha=0.6, ax=ax ) ax.set_xlabel("Rotational speed (RPM)") ax.set_ylabel("Torque (Nm)") ax.set_title("Torque vs Rotational speed") st.pyplot(fig) plt.show() #//////////////////////////////////////////////////////////////////////////// # Tool wear distribution by failure #//////////////////////////////////////////////////////////////////////////// if {"Tool wear", "Failure"}.issubset(data.columns): st.header("Tool wear distribution by failure") fig, ax = plt.subplots() sns.boxplot( data=data, x="Failure", y="Tool wear", ax=ax ) ax.set_xlabel("Failure (0 = No, 1 = Yes)") ax.set_ylabel("Tool wear (min)") ax.set_title("Tool wear vs Failure") st.pyplot(fig) plt.show() #//////////////////////////////////////////////////////////////////////////// # Correlation heatmap of numeric variables #//////////////////////////////////////////////////////////////////////////// st.header("Correlation heatmap (numeric features)") numeric_cols = data.select_dtypes(include=["number"]).columns if len(numeric_cols) > 1: fig, ax = plt.subplots(figsize=(8, 6)) corr = data[numeric_cols].corr() sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax) ax.set_title("Correlation heatmap") st.pyplot(fig) plt.show() else: st.write("Not enough numeric columns to compute correlations.") if __name__ == "__main__": app()