File size: 9,039 Bytes
323ef01
1733b93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323ef01
1733b93
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import streamlit as st
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd

# Load data
def load_data():
    df = pd.read_csv("./processed_data.csv")  # Replace with your dataset
    return df

# Create Streamlit app
def app():
    # Title for the app
    huggingface_page_title = "Machine Tool Failure Dashboard"
    st.set_page_config(page_title=huggingface_page_title, layout="wide")
# --- Custom CSS to adjust sidebar width ---
    st.markdown(
        """
        <style>
            /* Sidebar width */
            [data-testid="stSidebar"] {
                width: 600px;
                min-width: 600px;
            }
        </style>
        """,
        unsafe_allow_html=True
    )

    # --- Page content ---
    st.title(huggingface_page_title)

    # Load data
    data = load_data()

    # --- Create some helper/derived columns ---
    # Kelvin -> Celsius for readability (if temps exist)
    if "Air temperature" in data.columns:
        data["Air temperature (°C)"] = data["Air temperature"] - 273.15
    if "Process temperature" in data.columns:
        data["Process temperature (°C)"] = data["Process temperature"] - 273.15

    # Temperature delta (process - air)
    if {"Air temperature", "Process temperature"}.issubset(data.columns):
        data["Temperature delta (°C)"] = data["Process temperature"] - data["Air temperature"]

    # Rotational speed categories (quartiles)
    if "Rotational speed" in data.columns:
        try:
            data["Rotational speed category"] = pd.qcut(
                data["Rotational speed"],
                q=4,
                labels=["Very Low", "Low", "High", "Very High"]
            )
        except ValueError:
            # qcut can fail if there are too many identical values; fallback to cut
            data["Rotational speed category"] = pd.cut(
                data["Rotational speed"],
                bins=4,
                labels=["Very Low", "Low", "High", "Very High"]
            )

    # --- Key Metrics from the data ---
    total_obs = len(data)
    n_failures = int(data["Failure"].sum()) if "Failure" in data.columns else 0
    failure_rate = (n_failures / total_obs * 100) if total_obs > 0 else 0

    n_types = data["Type"].nunique() if "Type" in data.columns else 0
    most_common_type = data["Type"].value_counts().idxmax() if "Type" in data.columns else "N/A"

    avg_rot_speed = round(data["Rotational speed"].mean(), 2) if "Rotational speed" in data.columns else None
    avg_torque = round(data["Torque"].mean(), 2) if "Torque" in data.columns else None
    avg_tool_wear = round(data["Tool wear"].mean(), 2) if "Tool wear" in data.columns else None

    avg_air_temp_c = (
        round(data["Air temperature (°C)"].mean(), 2)
        if "Air temperature (°C)" in data.columns
        else None
    )
    avg_proc_temp_c = (
        round(data["Process temperature (°C)"].mean(), 2)
        if "Process temperature (°C)" in data.columns
        else None
    )
    avg_temp_delta = (
        round(data["Temperature delta (°C)"].mean(), 2)
        if "Temperature delta (°C)" in data.columns
        else None
    )

    # Display metrics in the sidebar
    st.sidebar.header("Key Metrics")
    st.sidebar.metric("Total observations", total_obs)
    st.sidebar.metric("Number of failures", n_failures)
    st.sidebar.metric("Failure rate (%)", f"{failure_rate:.2f}")

    st.sidebar.metric("Number of machine types", n_types)
    st.sidebar.metric("Most common type", most_common_type)

    if avg_rot_speed is not None:
        st.sidebar.metric("Avg rotational speed (RPM)", avg_rot_speed)
    if avg_torque is not None:
        st.sidebar.metric("Avg torque (Nm)", avg_torque)
    if avg_tool_wear is not None:
        st.sidebar.metric("Avg tool wear (min)", avg_tool_wear)
    if avg_air_temp_c is not None:
        st.sidebar.metric("Avg air temperature (°C)", avg_air_temp_c)
    if avg_proc_temp_c is not None:
        st.sidebar.metric("Avg process temperature (°C)", avg_proc_temp_c)
    if avg_temp_delta is not None:
        st.sidebar.metric("Avg temp delta (°C)", avg_temp_delta)

    st.markdown("### Data preview")
    st.dataframe(data.head())

    sns.set_style("whitegrid", {'grid.color': 'lightgrey', 'grid.linestyle': '--'})

    #////////////////////////////////////////////////////////////////////////////
    # Failure distribution
    #////////////////////////////////////////////////////////////////////////////
    if "Failure" in data.columns:
        st.header("Failure distribution")
        fig, ax = plt.subplots()
        failure_counts = data["Failure"].value_counts().sort_index()
        sns.barplot(x=failure_counts.index, y=failure_counts.values, ax=ax)
        ax.set_xlabel("Failure (0 = No, 1 = Yes)")
        ax.set_ylabel("Count")
        ax.set_title("Failure Count")
        for i, v in enumerate(failure_counts.values):
            ax.text(i, v + max(failure_counts.values) * 0.01, str(v), ha="center", va="bottom", fontsize=9)
        st.pyplot(fig)
        plt.show()

    #////////////////////////////////////////////////////////////////////////////
    # Failure rate by machine Type
    #////////////////////////////////////////////////////////////////////////////
    if {"Type", "Failure"}.issubset(data.columns):
        st.header("Failure rate by machine type")
        fig, ax = plt.subplots()
        failure_by_type = data.groupby("Type")["Failure"].mean().reset_index()
        failure_by_type["Failure_rate_%"] = failure_by_type["Failure"] * 100
        sns.barplot(data=failure_by_type, x="Type", y="Failure_rate_%", ax=ax)
        ax.set_ylabel("Failure rate (%)")
        ax.set_title("Failure rate by machine type")
        for i, row in failure_by_type.iterrows():
            ax.text(i, row["Failure_rate_%"] + 0.5, f"{row['Failure_rate_%']:.1f}%", ha="center", fontsize=9)
        st.pyplot(fig)
        plt.show()

    #////////////////////////////////////////////////////////////////////////////
    # Rotational speed distribution by failure
    #////////////////////////////////////////////////////////////////////////////
    if {"Rotational speed", "Failure"}.issubset(data.columns):
        st.header("Rotational speed distribution by failure")
        fig, ax = plt.subplots()
        sns.histplot(
            data=data,
            x="Rotational speed",
            hue="Failure",
            multiple="stack",
            bins=30,
            ax=ax
        )
        ax.set_xlabel("Rotational speed (RPM)")
        ax.set_title("Rotational speed distribution (Failed vs Not Failed)")
        st.pyplot(fig)
        plt.show()

    #////////////////////////////////////////////////////////////////////////////
    # Torque vs Rotational speed (scatter)
    #////////////////////////////////////////////////////////////////////////////
    if {"Torque", "Rotational speed"}.issubset(data.columns):
        st.header("Torque vs Rotational speed")
        fig, ax = plt.subplots()
        if "Failure" in data.columns:
            sns.scatterplot(
                data=data,
                x="Rotational speed",
                y="Torque",
                hue="Failure",
                alpha=0.6,
                ax=ax
            )
        else:
            sns.scatterplot(
                data=data,
                x="Rotational speed",
                y="Torque",
                alpha=0.6,
                ax=ax
            )
        ax.set_xlabel("Rotational speed (RPM)")
        ax.set_ylabel("Torque (Nm)")
        ax.set_title("Torque vs Rotational speed")
        st.pyplot(fig)
        plt.show()

    #////////////////////////////////////////////////////////////////////////////
    # Tool wear distribution by failure
    #////////////////////////////////////////////////////////////////////////////
    if {"Tool wear", "Failure"}.issubset(data.columns):
        st.header("Tool wear distribution by failure")
        fig, ax = plt.subplots()
        sns.boxplot(
            data=data,
            x="Failure",
            y="Tool wear",
            ax=ax
        )
        ax.set_xlabel("Failure (0 = No, 1 = Yes)")
        ax.set_ylabel("Tool wear (min)")
        ax.set_title("Tool wear vs Failure")
        st.pyplot(fig)
        plt.show()

    #////////////////////////////////////////////////////////////////////////////
    # Correlation heatmap of numeric variables
    #////////////////////////////////////////////////////////////////////////////
    st.header("Correlation heatmap (numeric features)")
    numeric_cols = data.select_dtypes(include=["number"]).columns
    if len(numeric_cols) > 1:
        fig, ax = plt.subplots(figsize=(8, 6))
        corr = data[numeric_cols].corr()
        sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
        ax.set_title("Correlation heatmap")
        st.pyplot(fig)
        plt.show()
    else:
        st.write("Not enough numeric columns to compute correlations.")

if __name__ == "__main__":
    app()