Spaces:

AbdramaneB
/

Machine_tool_reporting

Sleeping

App Files Files Community

AbdramaneB commited on Nov 13, 2025

Commit

1733b93

verified ·

1 Parent(s): 9811692

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

.amlignore +6 -0
Dockerfile +32 -12
requirements.txt +6 -3
src/.amlignore +6 -0
src/streamlit_app.py +231 -38

.amlignore ADDED Viewed

	@@ -0,0 +1,6 @@

+## This file was auto generated by the Azure Machine Learning Studio. Please do not remove.
+## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
+.ipynb_aml_checkpoints/
+*.amltmp
+*.amltemp

Dockerfile CHANGED Viewed

@@ -1,20 +1,40 @@
-FROM python:3.13.5-slim
-WORKDIR /app
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    curl \
-    git \
-    && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt ./
-COPY src/ ./src/
-RUN pip3 install -r requirements.txt
-EXPOSE 8501
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

+# Use Python 3.10 to match your Azure env
+FROM python:3.10-slim
+# --- Create non-root user -----------------------------------------------------
+RUN useradd -m -u 1000 user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1
+WORKDIR $HOME/app
+# --- Copy requirements first for caching -------------------------------------
 COPY requirements.txt ./
+# --- Install OS dependencies --------------------------------------------------
+USER root
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      curl \
+      git \
+      build-essential \
+  && rm -rf /var/lib/apt/lists/*
+# --- Install Python dependencies ---------------------------------------------
+RUN python -m pip install --upgrade pip \
+ && python -m pip install -r requirements.txt \
+ && python -m streamlit --version
+# --- Copy application code ----------------------------------------------------
+COPY --chown=user . $HOME/app
+# --- Switch to non-root user --------------------------------------------------
+USER user
+# --- Configure Streamlit server ----------------------------------------------
+EXPOSE 8501
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
+# --- Launch the Streamlit app -------------------------------------------------
+ENTRYPOINT ["python", "-m", "streamlit", "run", "src/streamlit_app.py",  "--server.port=8501",  "--server.address=0.0.0.0",  "--server.headless=true",  "--server.enableXsrfProtection=false"]

requirements.txt CHANGED Viewed

@@ -1,3 +1,6 @@
-altair
-pandas
-streamlit

+pandas>=2.1,<3
+matplotlib>=3.8
+seaborn>=0.13
+scipy>=1.10,<2
+numpy>=1.23,<2
+streamlit>=1.30

src/.amlignore ADDED Viewed

	@@ -0,0 +1,6 @@

+## This file was auto generated by the Azure Machine Learning Studio. Please do not remove.
+## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
+.ipynb_aml_checkpoints/
+*.amltmp
+*.amltemp

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,233 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+import seaborn as sns
+import matplotlib.pyplot as plt
+import matplotlib.dates as mdates
+import pandas as pd
+# Load data
+def load_data():
+    df = pd.read_csv("./processed_data.csv")  # Replace with your dataset
+    return df
+# Create Streamlit app
+def app():
+    # Title for the app
+    huggingface_page_title = "Machine Tool Failure Dashboard"
+    st.set_page_config(page_title=huggingface_page_title, layout="wide")
+# --- Custom CSS to adjust sidebar width ---
+    st.markdown(
+        """
+        <style>
+            /* Sidebar width */
+            [data-testid="stSidebar"] {
+                width: 600px;
+                min-width: 600px;
+            }
+        </style>
+        """,
+        unsafe_allow_html=True
+    )
+    # --- Page content ---
+    st.title(huggingface_page_title)
+    # Load data
+    data = load_data()
+    # --- Create some helper/derived columns ---
+    # Kelvin -> Celsius for readability (if temps exist)
+    if "Air temperature" in data.columns:
+        data["Air temperature (°C)"] = data["Air temperature"] - 273.15
+    if "Process temperature" in data.columns:
+        data["Process temperature (°C)"] = data["Process temperature"] - 273.15
+    # Temperature delta (process - air)
+    if {"Air temperature", "Process temperature"}.issubset(data.columns):
+        data["Temperature delta (°C)"] = data["Process temperature"] - data["Air temperature"]
+    # Rotational speed categories (quartiles)
+    if "Rotational speed" in data.columns:
+        try:
+            data["Rotational speed category"] = pd.qcut(
+                data["Rotational speed"],
+                q=4,
+                labels=["Very Low", "Low", "High", "Very High"]
+            )
+        except ValueError:
+            # qcut can fail if there are too many identical values; fallback to cut
+            data["Rotational speed category"] = pd.cut(
+                data["Rotational speed"],
+                bins=4,
+                labels=["Very Low", "Low", "High", "Very High"]
+            )
+    # --- Key Metrics from the data ---
+    total_obs = len(data)
+    n_failures = int(data["Failure"].sum()) if "Failure" in data.columns else 0
+    failure_rate = (n_failures / total_obs * 100) if total_obs > 0 else 0
+    n_types = data["Type"].nunique() if "Type" in data.columns else 0
+    most_common_type = data["Type"].value_counts().idxmax() if "Type" in data.columns else "N/A"
+    avg_rot_speed = round(data["Rotational speed"].mean(), 2) if "Rotational speed" in data.columns else None
+    avg_torque = round(data["Torque"].mean(), 2) if "Torque" in data.columns else None
+    avg_tool_wear = round(data["Tool wear"].mean(), 2) if "Tool wear" in data.columns else None
+    avg_air_temp_c = (
+        round(data["Air temperature (°C)"].mean(), 2)
+        if "Air temperature (°C)" in data.columns
+        else None
+    )
+    avg_proc_temp_c = (
+        round(data["Process temperature (°C)"].mean(), 2)
+        if "Process temperature (°C)" in data.columns
+        else None
+    )
+    avg_temp_delta = (
+        round(data["Temperature delta (°C)"].mean(), 2)
+        if "Temperature delta (°C)" in data.columns
+        else None
+    )
+    # Display metrics in the sidebar
+    st.sidebar.header("Key Metrics")
+    st.sidebar.metric("Total observations", total_obs)
+    st.sidebar.metric("Number of failures", n_failures)
+    st.sidebar.metric("Failure rate (%)", f"{failure_rate:.2f}")
+    st.sidebar.metric("Number of machine types", n_types)
+    st.sidebar.metric("Most common type", most_common_type)
+    if avg_rot_speed is not None:
+        st.sidebar.metric("Avg rotational speed (RPM)", avg_rot_speed)
+    if avg_torque is not None:
+        st.sidebar.metric("Avg torque (Nm)", avg_torque)
+    if avg_tool_wear is not None:
+        st.sidebar.metric("Avg tool wear (min)", avg_tool_wear)
+    if avg_air_temp_c is not None:
+        st.sidebar.metric("Avg air temperature (°C)", avg_air_temp_c)
+    if avg_proc_temp_c is not None:
+        st.sidebar.metric("Avg process temperature (°C)", avg_proc_temp_c)
+    if avg_temp_delta is not None:
+        st.sidebar.metric("Avg temp delta (°C)", avg_temp_delta)
+    st.markdown("### Data preview")
+    st.dataframe(data.head())
+    sns.set_style("whitegrid", {'grid.color': 'lightgrey', 'grid.linestyle': '--'})
+    #////////////////////////////////////////////////////////////////////////////
+    # Failure distribution
+    #////////////////////////////////////////////////////////////////////////////
+    if "Failure" in data.columns:
+        st.header("Failure distribution")
+        fig, ax = plt.subplots()
+        failure_counts = data["Failure"].value_counts().sort_index()
+        sns.barplot(x=failure_counts.index, y=failure_counts.values, ax=ax)
+        ax.set_xlabel("Failure (0 = No, 1 = Yes)")
+        ax.set_ylabel("Count")
+        ax.set_title("Failure Count")
+        for i, v in enumerate(failure_counts.values):
+            ax.text(i, v + max(failure_counts.values) * 0.01, str(v), ha="center", va="bottom", fontsize=9)
+        st.pyplot(fig)
+        plt.show()
+    #////////////////////////////////////////////////////////////////////////////
+    # Failure rate by machine Type
+    #////////////////////////////////////////////////////////////////////////////
+    if {"Type", "Failure"}.issubset(data.columns):
+        st.header("Failure rate by machine type")
+        fig, ax = plt.subplots()
+        failure_by_type = data.groupby("Type")["Failure"].mean().reset_index()
+        failure_by_type["Failure_rate_%"] = failure_by_type["Failure"] * 100
+        sns.barplot(data=failure_by_type, x="Type", y="Failure_rate_%", ax=ax)
+        ax.set_ylabel("Failure rate (%)")
+        ax.set_title("Failure rate by machine type")
+        for i, row in failure_by_type.iterrows():
+            ax.text(i, row["Failure_rate_%"] + 0.5, f"{row['Failure_rate_%']:.1f}%", ha="center", fontsize=9)
+        st.pyplot(fig)
+        plt.show()
+    #////////////////////////////////////////////////////////////////////////////
+    # Rotational speed distribution by failure
+    #////////////////////////////////////////////////////////////////////////////
+    if {"Rotational speed", "Failure"}.issubset(data.columns):
+        st.header("Rotational speed distribution by failure")
+        fig, ax = plt.subplots()
+        sns.histplot(
+            data=data,
+            x="Rotational speed",
+            hue="Failure",
+            multiple="stack",
+            bins=30,
+            ax=ax
+        )
+        ax.set_xlabel("Rotational speed (RPM)")
+        ax.set_title("Rotational speed distribution (Failed vs Not Failed)")
+        st.pyplot(fig)
+        plt.show()
+    #////////////////////////////////////////////////////////////////////////////
+    # Torque vs Rotational speed (scatter)
+    #////////////////////////////////////////////////////////////////////////////
+    if {"Torque", "Rotational speed"}.issubset(data.columns):
+        st.header("Torque vs Rotational speed")
+        fig, ax = plt.subplots()
+        if "Failure" in data.columns:
+            sns.scatterplot(
+                data=data,
+                x="Rotational speed",
+                y="Torque",
+                hue="Failure",
+                alpha=0.6,
+                ax=ax
+            )
+        else:
+            sns.scatterplot(
+                data=data,
+                x="Rotational speed",
+                y="Torque",
+                alpha=0.6,
+                ax=ax
+            )
+        ax.set_xlabel("Rotational speed (RPM)")
+        ax.set_ylabel("Torque (Nm)")
+        ax.set_title("Torque vs Rotational speed")
+        st.pyplot(fig)
+        plt.show()
+    #////////////////////////////////////////////////////////////////////////////
+    # Tool wear distribution by failure
+    #////////////////////////////////////////////////////////////////////////////
+    if {"Tool wear", "Failure"}.issubset(data.columns):
+        st.header("Tool wear distribution by failure")
+        fig, ax = plt.subplots()
+        sns.boxplot(
+            data=data,
+            x="Failure",
+            y="Tool wear",
+            ax=ax
+        )
+        ax.set_xlabel("Failure (0 = No, 1 = Yes)")
+        ax.set_ylabel("Tool wear (min)")
+        ax.set_title("Tool wear vs Failure")
+        st.pyplot(fig)
+        plt.show()
+    #////////////////////////////////////////////////////////////////////////////
+    # Correlation heatmap of numeric variables
+    #////////////////////////////////////////////////////////////////////////////
+    st.header("Correlation heatmap (numeric features)")
+    numeric_cols = data.select_dtypes(include=["number"]).columns
+    if len(numeric_cols) > 1:
+        fig, ax = plt.subplots(figsize=(8, 6))
+        corr = data[numeric_cols].corr()
+        sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
+        ax.set_title("Correlation heatmap")
+        st.pyplot(fig)
+        plt.show()
+    else:
+        st.write("Not enough numeric columns to compute correlations.")
+if __name__ == "__main__":
+    app()