Enter sensor readings
and run a prediction
to see results here.
""" FaultSense — LightGBM Fault Prediction App Fixes applied: 1. Model loads at module level so gunicorn workers pick it up 2. Select dropdown works cross-platform """ import os import warnings import numpy as np import pandas as pd warnings.filterwarnings("ignore") from sklearn.model_selection import train_test_split from sklearn.metrics import ( roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, log_loss, confusion_matrix ) from sklearn.preprocessing import OneHotEncoder from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline import joblib from lightgbm import LGBMClassifier from flask import Flask, request, jsonify, render_template_string # ───────────────────────────────────────────── # CONFIG # ───────────────────────────────────────────── DATA_PATH = "synthetic_nim_parallel_10000.csv" MODEL_PATH = "/tmp/faultsense_model.joblib" DROP_COLS = ["location"] TARGET = "faulty" CAT_COLS = ["equipment"] NUM_COLS = ["temperature", "pressure", "vibration", "humidity"] RANDOM_STATE = 42 THRESHOLD = 0.5 FIXED_PARAMS = dict( max_depth=8, num_leaves=50, min_child_samples=20, subsample=0.8, colsample_bytree=0.8, class_weight="balanced", random_state=RANDOM_STATE, verbose=-1, ) BEST_CONFIG = { "learning_rate": 0.05, "n_estimators": 165, "train_ratio": 0.80, "val_ratio": 0.10, "test_ratio": 0.10, } EQUIPMENT_OPTIONS = ["pump", "compressor", "motor", "valve", "sensor"] # ───────────────────────────────────────────── # MODEL TRAINING / LOADING # ───────────────────────────────────────────── def make_preprocessor(): return ColumnTransformer([ ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), CAT_COLS), ("num", "passthrough", NUM_COLS), ]) def train_model(cfg: dict): print(f"Training: lr={cfg['learning_rate']}, n_est={cfg['n_estimators']}") df_raw = pd.read_csv(DATA_PATH) df_raw = df_raw.drop(columns=DROP_COLS, errors="ignore") X = df_raw.drop(columns=[TARGET]) y = df_raw[TARGET] train_r, val_r, test_r = cfg["train_ratio"], cfg["val_ratio"], cfg["test_ratio"] X_trainval, X_test, y_trainval, y_test = train_test_split( X, y, test_size=test_r, stratify=y, random_state=RANDOM_STATE ) val_relative = val_r / (train_r + val_r) X_train, X_val, y_train, y_val = train_test_split( X_trainval, y_trainval, test_size=val_relative, stratify=y_trainval, random_state=RANDOM_STATE ) pipeline = Pipeline([ ("pre", make_preprocessor()), ("clf", LGBMClassifier( n_estimators=cfg["n_estimators"], learning_rate=cfg["learning_rate"], **FIXED_PARAMS )) ]) pipeline.fit(X_train, y_train) y_prob = pipeline.predict_proba(X_test)[:, 1] y_pred = (y_prob >= THRESHOLD).astype(int) test_metrics = { "test_auc": round(roc_auc_score(y_test, y_prob), 4), "test_accuracy": round(accuracy_score(y_test, y_pred), 4), "test_precision": round(precision_score(y_test, y_pred, zero_division=0), 4), "test_recall": round(recall_score(y_test, y_pred, zero_division=0), 4), "test_f1": round(f1_score(y_test, y_pred, zero_division=0), 4), "test_logloss": round(log_loss(y_test, y_prob), 4), } cm = confusion_matrix(y_test, y_pred).tolist() artifact = {"pipeline": pipeline, "config": cfg, "test_metrics": test_metrics, "cm": cm} print(f"Model saved → {MODEL_PATH} AUC={test_metrics['test_auc']} F1={test_metrics['test_f1']}") return artifact def load_or_train(): if os.path.exists(MODEL_PATH): print(f"Loading saved model from {MODEL_PATH}") return joblib.load(MODEL_PATH) return train_model(BEST_CONFIG) # ───────────────────────────────────────────── # LOAD MODEL AT MODULE LEVEL (runs under gunicorn) # ───────────────────────────────────────────── ARTIFACT = load_or_train() # ───────────────────────────────────────────── # FLASK APP # ───────────────────────────────────────────── app = Flask(__name__) HTML = r"""
LightGBM Equipment Fault Predictor
Enter sensor readings
and run a prediction
to see results here.