| import pandas as pd |
| import numpy as np |
| from sklearn.model_selection import train_test_split |
| from sklearn.preprocessing import StandardScaler |
| from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor |
| from sklearn.metrics import classification_report, mean_squared_error, precision_recall_curve, roc_curve, auc |
| from sklearn.impute import SimpleImputer |
| from sklearn.pipeline import Pipeline |
| from sklearn.feature_selection import SelectFromModel |
| import joblib |
| import streamlit as st |
| import plotly.express as px |
| import plotly.graph_objects as go |
| from datetime import datetime, timedelta |
| import seaborn as sns |
| import matplotlib.pyplot as plt |
| import os |
|
|
| |
| st.set_page_config( |
| page_title="Predictive Maintenance Dashboard", |
| page_icon="π§", |
| layout="wide", |
| initial_sidebar_state="expanded" |
| ) |
|
|
| |
| st.markdown(""" |
| <style> |
| .main { |
| padding: 0rem 1rem; |
| } |
| .stAlert { |
| padding: 1rem; |
| margin: 1rem 0; |
| } |
| .metric-card { |
| background-color: #f0f2f6; |
| padding: 1rem; |
| border-radius: 0.5rem; |
| } |
| </style> |
| """, unsafe_allow_html=True) |
|
|
| def load_and_prepare_data(): |
| """ |
| ETL Pipeline for data preparation |
| Returns cleaned and feature-engineered dataset |
| """ |
| |
| data = pd.read_csv('./train.csv') |
| |
| |
| data = data.ffill().bfill() |
| |
| |
| data['Failure'] = data[['TWF', 'HDF', 'PWF', 'OSF', 'RNF']].sum(axis=1) > 0 |
| |
| |
| data['Torque_RollingMean'] = data['Torque [Nm]'].rolling(window=10, min_periods=1).mean() |
| data['RPM_Variance'] = data['Rotational speed [rpm]'].rolling(window=10, min_periods=1).var() |
| data['Temperature_Difference'] = data['Process temperature [K]'] - data['Air temperature [K]'] |
| data['Power'] = data['Torque [Nm]'] * data['Rotational speed [rpm]'] / 9550 |
| data['Temperature_Rate'] = data['Process temperature [K]'].diff().fillna(0) |
| data['Wear_Rate'] = data['Tool wear [min]'].diff().fillna(0) |
| data['Power_to_Wear_Ratio'] = data['Power'] / (data['Tool wear [min]'] + 1) |
| |
| |
| data['Last_Maintenance'] = np.random.randint(0, 1000, size=len(data)) |
| data['Maintenance_Count'] = np.random.randint(0, 5, size=len(data)) |
| |
| return data |
|
|
| @st.cache_data |
| def get_failure_patterns(data): |
| """Analyze common patterns leading to failures""" |
| failure_data = data[data['Failure'] == 1] |
| patterns = { |
| 'high_temp': failure_data[failure_data['Temperature_Difference'] > failure_data['Temperature_Difference'].mean()].shape[0], |
| 'high_wear': failure_data[failure_data['Tool wear [min]'] > failure_data['Tool wear [min]'].mean()].shape[0], |
| 'high_power': failure_data[failure_data['Power'] > failure_data['Power'].mean()].shape[0] |
| } |
| return patterns |
|
|
| def create_pipelines(model_params=None): |
| """Create ML pipelines with configurable parameters""" |
| if model_params is None: |
| model_params = { |
| 'n_estimators_clf': 200, |
| 'max_depth_clf': 15, |
| 'n_estimators_reg': 150, |
| 'max_depth_reg': 7 |
| } |
| |
| |
| from sklearn.model_selection import StratifiedKFold |
| skf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True) |
| |
| clf_pipeline = Pipeline([ |
| ('imputer', SimpleImputer(strategy='mean')), |
| ('scaler', StandardScaler()), |
| ('feature_selection', SelectFromModel(RandomForestClassifier(n_estimators=100, random_state=42))), |
| ('classifier', RandomForestClassifier( |
| n_estimators=model_params['n_estimators_clf'], |
| max_depth=model_params['max_depth_clf'], |
| class_weight='balanced', |
| random_state=42 |
| )) |
| ]) |
| |
| reg_pipeline = Pipeline([ |
| ('imputer', SimpleImputer(strategy='mean')), |
| ('scaler', StandardScaler()), |
| ('feature_selection', SelectFromModel(GradientBoostingRegressor(n_estimators=100, random_state=42))), |
| ('regressor', GradientBoostingRegressor( |
| n_estimators=model_params['n_estimators_reg'], |
| max_depth=model_params['max_depth_reg'], |
| learning_rate=0.1, |
| random_state=42 |
| )) |
| ]) |
| |
| return clf_pipeline, reg_pipeline |
|
|
| def calculate_maintenance_metrics(failure_prob, tool_wear, last_maintenance, thresholds): |
| """ |
| Calculate maintenance recommendations based on predictions and customizable thresholds |
| """ |
| risk_threshold = thresholds['risk'] |
| wear_threshold = thresholds['wear'] |
| maintenance_age_threshold = thresholds['maintenance_age'] |
| |
| maintenance_due = ( |
| (failure_prob > risk_threshold) | |
| (tool_wear > wear_threshold) | |
| (last_maintenance > maintenance_age_threshold) |
| ) |
| |
| priority = np.where( |
| failure_prob > 0.7, 'High', |
| np.where(failure_prob > 0.4, 'Medium', 'Low') |
| ) |
| |
| estimated_days = np.where( |
| maintenance_due, |
| 0, |
| np.ceil((wear_threshold - tool_wear) / np.maximum(0.1, tool_wear.mean())) |
| ) |
| |
| next_maintenance = np.where( |
| maintenance_due, |
| 'Immediate', |
| np.where( |
| estimated_days <= 7, |
| 'Within 1 week', |
| np.where( |
| estimated_days <= 30, |
| 'Within 1 month', |
| 'No immediate action needed' |
| ) |
| ) |
| ) |
| |
| return maintenance_due, priority, next_maintenance, estimated_days |
|
|
| def create_failure_analysis_plots(data, X_train, y_train, X_test, y_test, predictions): |
| """Create various failure analysis visualizations""" |
| |
| |
| model = RandomForestClassifier(n_estimators=100, max_depth=10) |
| model.fit(X_train, y_train) |
| |
| |
| fig1 = go.Figure() |
| fig1.add_trace(go.Scatter( |
| y=data['Tool wear [min]'], |
| name='Tool Wear', |
| line=dict(color='blue') |
| )) |
| fig1.add_trace(go.Scatter( |
| y=data['Temperature_Difference'], |
| name='Temperature Difference', |
| line=dict(color='red') |
| )) |
| fig1.add_trace(go.Scatter( |
| y=data['Power'], |
| name='Power', |
| line=dict(color='green') |
| )) |
| fig1.update_layout(title='Key Metrics Over Time', xaxis_title='Observation') |
| |
| |
| fig2 = px.histogram( |
| predictions, |
| nbins=50, |
| title='Distribution of Failure Probabilities' |
| ) |
| |
| |
| y_pred_proba = model.predict_proba(X_test)[:, 1] |
| y_test_cls = y_test |
| |
| |
| fpr, tpr, _ = roc_curve(y_test_cls, y_pred_proba) |
| roc_auc = auc(fpr, tpr) |
| fig3 = go.Figure() |
| fig3.add_trace(go.Scatter( |
| x=fpr, y=tpr, |
| mode='lines', |
| name=f'ROC Curve (AUC = {roc_auc:.2f})' |
| )) |
| fig3.plot_bgcolor = 'white' |
| fig3.update_layout( |
| title='Receiver Operating Characteristic (ROC) Curve', |
| xaxis_title='False Positive Rate', |
| yaxis_title='True Positive Rate', |
| xaxis_range=[0, 1], |
| yaxis_range=[0, 1] |
| ) |
| |
| return fig1, fig2, fig3 |
|
|
| def plot_maintenance_calendar(schedule_df): |
| """Create an interactive maintenance calendar view""" |
| fig = px.timeline( |
| schedule_df, |
| x_start='Scheduled_Date', |
| x_end='Due_Date', |
| y='Equipment_ID', |
| color='Priority', |
| title='Maintenance Schedule Timeline' |
| ) |
| fig.update_yaxes(autorange="reversed", title="Equipment ID") |
| fig.update_xaxes(title="Date") |
| return fig |
|
|
| def sidebar_controls(): |
| """Create sidebar controls for user input""" |
| st.sidebar.header('Dashboard Controls') |
| |
| |
| st.sidebar.subheader('Model Parameters') |
| n_estimators_clf = st.sidebar.slider('Number of Trees (Classification)', 50, 300, 200) |
| max_depth_clf = st.sidebar.slider('Max Tree Depth (Classification)', 5, 30, 15) |
| n_estimators_reg = st.sidebar.slider('Number of Trees (Regression)', 50, 300, 150) |
| max_depth_reg = st.sidebar.slider('Max Tree Depth (Regression)', 5, 30, 7) |
| |
| |
| st.sidebar.subheader('Maintenance Thresholds') |
| risk_threshold = st.sidebar.slider('Risk Threshold', 0.0, 1.0, 0.3) |
| wear_threshold = st.sidebar.slider('Wear Threshold', 100, 300, 200) |
| maintenance_age = st.sidebar.slider('Maintenance Age Threshold', 500, 1000, 800) |
| |
| |
| st.sidebar.subheader('Visualization Settings') |
| plot_height = st.sidebar.slider('Plot Height', 400, 800, 600) |
| color_theme = st.sidebar.selectbox('Color Theme', ['blues', 'reds', 'greens']) |
| |
| return { |
| 'model_params': { |
| 'n_estimators_clf': n_estimators_clf, |
| 'max_depth_clf': max_depth_clf, |
| 'n_estimators_reg': n_estimators_reg, |
| 'max_depth_reg': max_depth_reg |
| }, |
| 'thresholds': { |
| 'risk': risk_threshold, |
| 'wear': wear_threshold, |
| 'maintenance_age': maintenance_age |
| }, |
| 'viz_params': { |
| 'plot_height': plot_height, |
| 'color_theme': color_theme |
| } |
| } |
|
|
| def main(): |
| st.title("π§ Advanced Predictive Maintenance Dashboard") |
| |
| |
| params = sidebar_controls() |
| |
| |
| with st.expander("βΉοΈ Dashboard Overview", expanded=True): |
| st.markdown(""" |
| This dashboard provides comprehensive predictive maintenance analytics for manufacturing equipment: |
| |
| 1. *Real-time Monitoring*: Track equipment health metrics and failure predictions |
| 2. *Maintenance Planning*: Get AI-powered maintenance recommendations |
| 3. *Performance Analysis*: Analyze historical data and model performance |
| 4. *Interactive Features*: Customize thresholds and visualization parameters |
| |
| Use the sidebar controls to adjust model parameters and thresholds. |
| """) |
| |
| |
| with st.spinner("Loading and preparing data..."): |
| data = load_and_prepare_data() |
| |
| |
| feature_columns = [ |
| 'Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', |
| 'Torque [Nm]', 'Tool wear [min]', 'Torque_RollingMean', 'RPM_Variance', |
| 'Temperature_Difference', 'Power', 'Temperature_Rate', 'Wear_Rate', |
| 'Power_to_Wear_Ratio' |
| ] |
| |
| X = data[feature_columns] |
| y_classification = data['Failure'] |
| y_regression = data['Tool wear [min]'] |
| |
| |
| model_dir = './models' |
| os.makedirs(model_dir, exist_ok=True) |
| |
| clf_pipeline_file = os.path.join(model_dir, 'clf_pipeline.pkl') |
| reg_pipeline_file = os.path.join(model_dir, 'reg_pipeline.pkl') |
| |
| if os.path.exists(clf_pipeline_file) and os.path.exists(reg_pipeline_file): |
| |
| clf_pipeline = joblib.load(clf_pipeline_file) |
| reg_pipeline = joblib.load(reg_pipeline_file) |
| |
| |
| |
| X_train, X_test, y_train_cls, y_test_cls = train_test_split( |
| X, y_classification, test_size=0.2, random_state=42, stratify=y_classification |
| ) |
| _, _, y_train_reg, y_test_reg = train_test_split( |
| X, y_regression, test_size=0.2, random_state=42 |
| ) |
| |
| else: |
| |
| with st.spinner("Training models with selected parameters..."): |
| clf_pipeline, reg_pipeline = create_pipelines(params['model_params']) |
| |
| |
| X_train, X_test, y_train_cls, y_test_cls = train_test_split( |
| X, y_classification, test_size=0.2, random_state=42, stratify=y_classification |
| ) |
| _, _, y_train_reg, y_test_reg = train_test_split( |
| X, y_regression, test_size=0.2, random_state=42 |
| ) |
| |
| |
| clf_pipeline.fit(X_train, y_train_cls) |
| reg_pipeline.fit(X_train, y_train_reg) |
| |
| |
| joblib.dump(clf_pipeline, clf_pipeline_file) |
| joblib.dump(reg_pipeline, reg_pipeline_file) |
| st.write("Trained and saved new models to ./models folder.") |
| |
| |
| y_pred_cls = clf_pipeline.predict(X_test) |
| y_pred_proba = clf_pipeline.predict_proba(X_test)[:, 1] |
| y_pred_reg = reg_pipeline.predict(X_test) |
| |
| |
| maintenance_due, priority, next_maintenance, estimated_days = calculate_maintenance_metrics( |
| y_pred_proba, |
| y_pred_reg, |
| data['Last_Maintenance'].iloc[-len(y_pred_cls):], |
| params['thresholds'] |
| ) |
| |
| |
| |
| |
| st.header("π Equipment Health Overview") |
| |
| metric_cols = st.columns(4) |
| with metric_cols[0]: |
| st.metric( |
| "Overall Health Index", |
| f"{(1 - y_pred_proba.mean()):.1%}", |
| delta=f"{-y_pred_proba.mean():.1%}", |
| delta_color="inverse" |
| ) |
| |
| with metric_cols[1]: |
| st.metric( |
| "Average Failure Risk", |
| f"{y_pred_proba.mean():.1%}", |
| delta=f"{(y_pred_proba.mean() - 0.3):.1%}" if y_pred_proba.mean() > 0.3 else "Normal", |
| delta_color="inverse" |
| ) |
| |
| with metric_cols[2]: |
| st.metric( |
| "Equipment Requiring Maintenance", |
| f"{maintenance_due.sum()}", |
| delta=f"{maintenance_due.sum() - 10}" if maintenance_due.sum() > 10 else "Within limits" |
| ) |
| |
| with metric_cols[3]: |
| st.metric( |
| "Average Tool Wear", |
| f"{y_pred_reg.mean():.1f} min", |
| delta=f"{y_pred_reg.mean() - params['thresholds']['wear']:.1f}" |
| ) |
| |
| |
| tabs = st.tabs([ |
| "π Real-time Monitoring", |
| "π Performance Analysis", |
| "π§ Maintenance Planning", |
| "π Historical Analysis" |
| ]) |
| |
| |
| with tabs[0]: |
| |
| status_df = pd.DataFrame({ |
| 'Status': ['Healthy', 'Warning', 'Critical'], |
| 'Count': [ |
| (y_pred_proba < 0.3).sum(), |
| ((y_pred_proba >= 0.3) & (y_pred_proba < 0.7)).sum(), |
| (y_pred_proba >= 0.7).sum() |
| ] |
| }) |
| fig = px.pie( |
| status_df, |
| values='Count', |
| names='Status', |
| title='Equipment Status Distribution', |
| color='Status', |
| color_discrete_map={ |
| 'Healthy': 'green', |
| 'Warning': 'yellow', |
| 'Critical': 'red' |
| } |
| ) |
| st.plotly_chart(fig, use_container_width=True) |
| |
| |
| if maintenance_due.sum() > 0: |
| st.warning(f"β οΈ {maintenance_due.sum()} equipment units require immediate attention!") |
| |
| |
| st.subheader("Equipment Explorer") |
| selected_metric = st.selectbox( |
| "Select Metric to Monitor:", |
| options=['Temperature_Difference', 'Tool wear [min]', 'Power', 'Torque [Nm]', 'Rotational speed [rpm]'] |
| ) |
| |
| time_window = st.slider( |
| "Time Window (last N observations)", |
| min_value=10, |
| max_value=len(data), |
| value=100 |
| ) |
| |
| |
| fig = px.line( |
| data.tail(time_window), |
| y=selected_metric, |
| title=f'{selected_metric} - Last {time_window} Observations' |
| ) |
| fig.add_hline( |
| y=data[selected_metric].mean(), |
| line_dash="dash", |
| annotation_text="Average" |
| ) |
| st.plotly_chart(fig, use_container_width=True) |
| |
| |
| with tabs[1]: |
| st.subheader("Model Performance Analysis") |
| |
| col1, col2 = st.columns(2) |
| |
| with col1: |
| |
| st.markdown("### Failure Prediction Performance") |
| st.text("Classification Report:") |
| st.code(classification_report(y_test_cls, y_pred_cls)) |
| |
| |
| precision, recall, _ = precision_recall_curve(y_test_cls, y_pred_proba) |
| fig = go.Figure() |
| fig.add_trace(go.Scatter( |
| x=recall, y=precision, |
| mode='lines', |
| name='Precision-Recall curve', |
| fill='tozeroy' |
| )) |
| fig.update_layout( |
| title='Precision-Recall Curve', |
| xaxis_title='Recall', |
| yaxis_title='Precision' |
| ) |
| st.plotly_chart(fig, use_container_width=True) |
| |
| with col2: |
| |
| st.markdown("### Tool Wear Prediction Performance") |
| mse = mean_squared_error(y_test_reg, y_pred_reg) |
| rmse = np.sqrt(mse) |
| st.metric("Root Mean Squared Error", f"{rmse:.2f}") |
| |
| |
| feature_names = feature_columns |
| feature_importances = clf_pipeline.named_steps['classifier'].feature_importances_ |
| |
| |
| len_features = len(feature_names) |
| len_importances = len(feature_importances) |
| |
| if len_features > len_importances: |
| feature_names = feature_names[:len_importances] |
| elif len_importances > len_features: |
| feature_importances = feature_importances[:len_features] |
| |
| feature_imp = pd.DataFrame({ |
| 'Feature': feature_names, |
| 'Importance': feature_importances |
| }).sort_values('Importance', ascending=True) |
| |
| fig = px.bar( |
| feature_imp, |
| x='Importance', |
| y='Feature', |
| orientation='h', |
| title='Feature Importance Analysis' |
| ) |
| st.plotly_chart(fig, use_container_width=True) |
| |
| |
| st.subheader("Feature Correlation Analysis") |
| |
| |
| correlation_matrix = data[feature_columns].corr() |
| |
| |
| correlation_fig = px.imshow(correlation_matrix, |
| text_auto=True, |
| color_continuous_scale='Viridis', |
| title="Feature Correlation Heatmap") |
| |
| |
| correlation_fig.update_layout( |
| width=800, |
| height=600, |
| xaxis_title="Features", |
| yaxis_title="Features", |
| xaxis={'tickangle': 45}, |
| yaxis={'tickangle': -45} |
| ) |
| |
| |
| st.plotly_chart(correlation_fig, use_container_width=True) |
| |
| |
| |
| with tabs[2]: |
| st.subheader("Maintenance Schedule and Recommendations") |
| |
| |
| schedule_df = pd.DataFrame({ |
| 'Equipment_ID': range(1, len(maintenance_due) + 1), |
| 'Failure_Probability': y_pred_proba, |
| 'Tool_Wear': y_pred_reg, |
| 'Priority': priority, |
| 'Next_Maintenance': next_maintenance, |
| 'Estimated_Days': estimated_days |
| }) |
| |
| |
| today = datetime.now() |
| schedule_df['Scheduled_Date'] = [ |
| today + timedelta(days=int(d)) for d in schedule_df['Estimated_Days'] |
| ] |
| schedule_df['Due_Date'] = [ |
| d + timedelta(days=7) for d in schedule_df['Scheduled_Date'] |
| ] |
| |
| |
| st.markdown("### π
Maintenance Calendar") |
| calendar_fig = plot_maintenance_calendar(schedule_df) |
| st.plotly_chart(calendar_fig, use_container_width=True) |
| |
| |
| st.markdown("### π§ Priority Maintenance Tasks") |
| priority_df = schedule_df[schedule_df['Priority'] == 'High'].sort_values( |
| 'Failure_Probability', ascending=False |
| ) |
| |
| if not priority_df.empty: |
| st.dataframe( |
| priority_df[['Equipment_ID', 'Failure_Probability', 'Tool_Wear', 'Next_Maintenance']], |
| use_container_width=True |
| ) |
| else: |
| st.success("No high-priority maintenance tasks at the moment!") |
| |
| |
| st.markdown("### π° Maintenance Cost Projection") |
| est_cost_per_maintenance = st.number_input( |
| "Estimated cost per maintenance (USD):", |
| value=1000, |
| step=100 |
| ) |
| |
| total_maintenance = maintenance_due.sum() |
| projected_cost = total_maintenance * est_cost_per_maintenance |
| |
| cost_col1, cost_col2 = st.columns(2) |
| with cost_col1: |
| st.metric( |
| "Projected Maintenance Cost", |
| f"${projected_cost:,.2f}", |
| delta=f"${projected_cost - 10000:,.2f}" if projected_cost > 10000 else "Within budget" |
| ) |
| |
| with cost_col2: |
| st.metric( |
| "Average Cost per Equipment", |
| f"${projected_cost/len(maintenance_due):,.2f}" |
| ) |
| |
| |
| with tabs[3]: |
| st.subheader("Historical Performance Analysis") |
| |
| |
| st.markdown("### π Historical Trends") |
| metric_for_history = st.selectbox( |
| "Select metric for historical analysis:", |
| options=['Tool wear [min]', 'Temperature_Difference', 'Power', 'Failure'] |
| ) |
| |
| fig = go.Figure() |
| fig.add_trace(go.Scatter( |
| y=data[metric_for_history], |
| mode='lines', |
| name=metric_for_history |
| )) |
| |
| |
| z = np.polyfit(range(len(data)), data[metric_for_history], 1) |
| p = np.poly1d(z) |
| fig.add_trace(go.Scatter( |
| y=p(range(len(data))), |
| mode='lines', |
| name='Trend', |
| line=dict(dash='dash') |
| )) |
| |
| st.plotly_chart(fig, use_container_width=True) |
| |
| |
| st.markdown("### π Failure Patterns") |
| patterns = get_failure_patterns(data) |
| |
| pattern_cols = st.columns(3) |
| for i, (pattern, count) in enumerate(patterns.items()): |
| with pattern_cols[i]: |
| st.metric( |
| f"Failures due to {pattern.replace('_', ' ').title()}", |
| count, |
| delta=f"{count/len(data['Failure'])*100:.1f}% of total" |
| ) |
| |
| |
| st.markdown("---") |
| st.markdown(""" |
| ### π Notes and Recommendations |
| - Adjust thresholds in the sidebar to customize maintenance triggers |
| - Regular model retraining is recommended for optimal performance |
| - Contact maintenance team for immediate issues |
| """) |
| |
| |
| if st.button("Generate Maintenance Report"): |
| |
| report_df = pd.DataFrame({ |
| 'Equipment_ID': range(1, len(maintenance_due) + 1), |
| 'Failure_Risk': y_pred_proba, |
| 'Tool_Wear': y_pred_reg, |
| 'Maintenance_Priority': priority, |
| 'Next_Maintenance': next_maintenance, |
| 'Days_Until_Maintenance': estimated_days |
| }) |
| |
| |
| csv = report_df.to_csv(index=False) |
| st.download_button( |
| label="Download Maintenance Report", |
| data=csv, |
| file_name="maintenance_report.csv", |
| mime="text/csv" |
| ) |
|
|
| if __name__ == "__main__": |
| main() |
|
|