Machine_tool_reporting / src /streamlit_app.py
AbdramaneB's picture
Upload folder using huggingface_hub
1733b93 verified
import streamlit as st
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
# Load data
def load_data():
df = pd.read_csv("./processed_data.csv") # Replace with your dataset
return df
# Create Streamlit app
def app():
# Title for the app
huggingface_page_title = "Machine Tool Failure Dashboard"
st.set_page_config(page_title=huggingface_page_title, layout="wide")
# --- Custom CSS to adjust sidebar width ---
st.markdown(
"""
<style>
/* Sidebar width */
[data-testid="stSidebar"] {
width: 600px;
min-width: 600px;
}
</style>
""",
unsafe_allow_html=True
)
# --- Page content ---
st.title(huggingface_page_title)
# Load data
data = load_data()
# --- Create some helper/derived columns ---
# Kelvin -> Celsius for readability (if temps exist)
if "Air temperature" in data.columns:
data["Air temperature (°C)"] = data["Air temperature"] - 273.15
if "Process temperature" in data.columns:
data["Process temperature (°C)"] = data["Process temperature"] - 273.15
# Temperature delta (process - air)
if {"Air temperature", "Process temperature"}.issubset(data.columns):
data["Temperature delta (°C)"] = data["Process temperature"] - data["Air temperature"]
# Rotational speed categories (quartiles)
if "Rotational speed" in data.columns:
try:
data["Rotational speed category"] = pd.qcut(
data["Rotational speed"],
q=4,
labels=["Very Low", "Low", "High", "Very High"]
)
except ValueError:
# qcut can fail if there are too many identical values; fallback to cut
data["Rotational speed category"] = pd.cut(
data["Rotational speed"],
bins=4,
labels=["Very Low", "Low", "High", "Very High"]
)
# --- Key Metrics from the data ---
total_obs = len(data)
n_failures = int(data["Failure"].sum()) if "Failure" in data.columns else 0
failure_rate = (n_failures / total_obs * 100) if total_obs > 0 else 0
n_types = data["Type"].nunique() if "Type" in data.columns else 0
most_common_type = data["Type"].value_counts().idxmax() if "Type" in data.columns else "N/A"
avg_rot_speed = round(data["Rotational speed"].mean(), 2) if "Rotational speed" in data.columns else None
avg_torque = round(data["Torque"].mean(), 2) if "Torque" in data.columns else None
avg_tool_wear = round(data["Tool wear"].mean(), 2) if "Tool wear" in data.columns else None
avg_air_temp_c = (
round(data["Air temperature (°C)"].mean(), 2)
if "Air temperature (°C)" in data.columns
else None
)
avg_proc_temp_c = (
round(data["Process temperature (°C)"].mean(), 2)
if "Process temperature (°C)" in data.columns
else None
)
avg_temp_delta = (
round(data["Temperature delta (°C)"].mean(), 2)
if "Temperature delta (°C)" in data.columns
else None
)
# Display metrics in the sidebar
st.sidebar.header("Key Metrics")
st.sidebar.metric("Total observations", total_obs)
st.sidebar.metric("Number of failures", n_failures)
st.sidebar.metric("Failure rate (%)", f"{failure_rate:.2f}")
st.sidebar.metric("Number of machine types", n_types)
st.sidebar.metric("Most common type", most_common_type)
if avg_rot_speed is not None:
st.sidebar.metric("Avg rotational speed (RPM)", avg_rot_speed)
if avg_torque is not None:
st.sidebar.metric("Avg torque (Nm)", avg_torque)
if avg_tool_wear is not None:
st.sidebar.metric("Avg tool wear (min)", avg_tool_wear)
if avg_air_temp_c is not None:
st.sidebar.metric("Avg air temperature (°C)", avg_air_temp_c)
if avg_proc_temp_c is not None:
st.sidebar.metric("Avg process temperature (°C)", avg_proc_temp_c)
if avg_temp_delta is not None:
st.sidebar.metric("Avg temp delta (°C)", avg_temp_delta)
st.markdown("### Data preview")
st.dataframe(data.head())
sns.set_style("whitegrid", {'grid.color': 'lightgrey', 'grid.linestyle': '--'})
#////////////////////////////////////////////////////////////////////////////
# Failure distribution
#////////////////////////////////////////////////////////////////////////////
if "Failure" in data.columns:
st.header("Failure distribution")
fig, ax = plt.subplots()
failure_counts = data["Failure"].value_counts().sort_index()
sns.barplot(x=failure_counts.index, y=failure_counts.values, ax=ax)
ax.set_xlabel("Failure (0 = No, 1 = Yes)")
ax.set_ylabel("Count")
ax.set_title("Failure Count")
for i, v in enumerate(failure_counts.values):
ax.text(i, v + max(failure_counts.values) * 0.01, str(v), ha="center", va="bottom", fontsize=9)
st.pyplot(fig)
plt.show()
#////////////////////////////////////////////////////////////////////////////
# Failure rate by machine Type
#////////////////////////////////////////////////////////////////////////////
if {"Type", "Failure"}.issubset(data.columns):
st.header("Failure rate by machine type")
fig, ax = plt.subplots()
failure_by_type = data.groupby("Type")["Failure"].mean().reset_index()
failure_by_type["Failure_rate_%"] = failure_by_type["Failure"] * 100
sns.barplot(data=failure_by_type, x="Type", y="Failure_rate_%", ax=ax)
ax.set_ylabel("Failure rate (%)")
ax.set_title("Failure rate by machine type")
for i, row in failure_by_type.iterrows():
ax.text(i, row["Failure_rate_%"] + 0.5, f"{row['Failure_rate_%']:.1f}%", ha="center", fontsize=9)
st.pyplot(fig)
plt.show()
#////////////////////////////////////////////////////////////////////////////
# Rotational speed distribution by failure
#////////////////////////////////////////////////////////////////////////////
if {"Rotational speed", "Failure"}.issubset(data.columns):
st.header("Rotational speed distribution by failure")
fig, ax = plt.subplots()
sns.histplot(
data=data,
x="Rotational speed",
hue="Failure",
multiple="stack",
bins=30,
ax=ax
)
ax.set_xlabel("Rotational speed (RPM)")
ax.set_title("Rotational speed distribution (Failed vs Not Failed)")
st.pyplot(fig)
plt.show()
#////////////////////////////////////////////////////////////////////////////
# Torque vs Rotational speed (scatter)
#////////////////////////////////////////////////////////////////////////////
if {"Torque", "Rotational speed"}.issubset(data.columns):
st.header("Torque vs Rotational speed")
fig, ax = plt.subplots()
if "Failure" in data.columns:
sns.scatterplot(
data=data,
x="Rotational speed",
y="Torque",
hue="Failure",
alpha=0.6,
ax=ax
)
else:
sns.scatterplot(
data=data,
x="Rotational speed",
y="Torque",
alpha=0.6,
ax=ax
)
ax.set_xlabel("Rotational speed (RPM)")
ax.set_ylabel("Torque (Nm)")
ax.set_title("Torque vs Rotational speed")
st.pyplot(fig)
plt.show()
#////////////////////////////////////////////////////////////////////////////
# Tool wear distribution by failure
#////////////////////////////////////////////////////////////////////////////
if {"Tool wear", "Failure"}.issubset(data.columns):
st.header("Tool wear distribution by failure")
fig, ax = plt.subplots()
sns.boxplot(
data=data,
x="Failure",
y="Tool wear",
ax=ax
)
ax.set_xlabel("Failure (0 = No, 1 = Yes)")
ax.set_ylabel("Tool wear (min)")
ax.set_title("Tool wear vs Failure")
st.pyplot(fig)
plt.show()
#////////////////////////////////////////////////////////////////////////////
# Correlation heatmap of numeric variables
#////////////////////////////////////////////////////////////////////////////
st.header("Correlation heatmap (numeric features)")
numeric_cols = data.select_dtypes(include=["number"]).columns
if len(numeric_cols) > 1:
fig, ax = plt.subplots(figsize=(8, 6))
corr = data[numeric_cols].corr()
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
ax.set_title("Correlation heatmap")
st.pyplot(fig)
plt.show()
else:
st.write("Not enough numeric columns to compute correlations.")
if __name__ == "__main__":
app()