""" FaultSense — LightGBM + Random Forest Fault Prediction App Both models trained at startup; UI lets user switch between them. """ import os import warnings import numpy as np import pandas as pd warnings.filterwarnings("ignore") from sklearn.model_selection import train_test_split from sklearn.metrics import ( roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, log_loss, confusion_matrix ) from sklearn.preprocessing import OneHotEncoder from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.ensemble import RandomForestClassifier import joblib from lightgbm import LGBMClassifier from flask import Flask, request, jsonify, render_template_string # ───────────────────────────────────────────── # CONFIG # ───────────────────────────────────────────── DATA_PATH = "synthetic_nim_parallel_10000.csv" LGBM_PATH = "/tmp/faultsense_lgbm.joblib" RF_PATH = "/tmp/faultsense_rf.joblib" DROP_COLS = ["location"] TARGET = "faulty" CAT_COLS = ["equipment"] NUM_COLS = ["temperature", "pressure", "vibration", "humidity"] RANDOM_STATE = 42 THRESHOLD = 0.5 LGBM_PARAMS = dict( max_depth=8, num_leaves=50, min_child_samples=20, subsample=0.8, colsample_bytree=0.8, class_weight="balanced", random_state=RANDOM_STATE, verbose=-1, learning_rate=0.05, n_estimators=165, ) RF_PARAMS = dict( n_estimators=165, max_depth=10, min_samples_split=10, min_samples_leaf=5, class_weight="balanced", random_state=RANDOM_STATE, n_jobs=-1, ) BEST_CONFIG = { "train_ratio": 0.80, "val_ratio": 0.10, "test_ratio": 0.10, } EQUIPMENT_OPTIONS = ["pump", "compressor", "motor", "valve", "sensor"] # ───────────────────────────────────────────── # MODEL TRAINING / LOADING # ───────────────────────────────────────────── def make_preprocessor(): return ColumnTransformer([ ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), CAT_COLS), ("num", "passthrough", NUM_COLS), ]) def load_data(cfg): df_raw = pd.read_csv(DATA_PATH) df_raw = df_raw.drop(columns=DROP_COLS, errors="ignore") X = df_raw.drop(columns=[TARGET]) y = df_raw[TARGET] train_r, val_r, test_r = cfg["train_ratio"], cfg["val_ratio"], cfg["test_ratio"] X_trainval, X_test, y_trainval, y_test = train_test_split( X, y, test_size=test_r, stratify=y, random_state=RANDOM_STATE ) val_relative = val_r / (train_r + val_r) X_train, X_val, y_train, y_val = train_test_split( X_trainval, y_trainval, test_size=val_relative, stratify=y_trainval, random_state=RANDOM_STATE ) return X_train, X_val, X_test, y_train, y_val, y_test def compute_metrics(pipeline, X_test, y_test): y_prob = pipeline.predict_proba(X_test)[:, 1] y_pred = (y_prob >= THRESHOLD).astype(int) return { "test_auc": round(roc_auc_score(y_test, y_prob), 4), "test_accuracy": round(accuracy_score(y_test, y_pred), 4), "test_precision": round(precision_score(y_test, y_pred, zero_division=0), 4), "test_recall": round(recall_score(y_test, y_pred, zero_division=0), 4), "test_f1": round(f1_score(y_test, y_pred, zero_division=0), 4), "test_logloss": round(log_loss(y_test, y_prob), 4), }, confusion_matrix(y_test, y_pred).tolist() def train_lgbm(X_train, X_test, y_train, y_test): print("Training LightGBM...") pipeline = Pipeline([ ("pre", make_preprocessor()), ("clf", LGBMClassifier(**LGBM_PARAMS)) ]) pipeline.fit(X_train, y_train) metrics, cm = compute_metrics(pipeline, X_test, y_test) print(f"LGBM AUC={metrics['test_auc']} F1={metrics['test_f1']}") return {"pipeline": pipeline, "test_metrics": metrics, "cm": cm, "config": {**BEST_CONFIG, "model": "LightGBM", "learning_rate": LGBM_PARAMS["learning_rate"], "n_estimators": LGBM_PARAMS["n_estimators"]}} def train_rf(X_train, X_test, y_train, y_test): print("Training Random Forest...") pipeline = Pipeline([ ("pre", make_preprocessor()), ("clf", RandomForestClassifier(**RF_PARAMS)) ]) pipeline.fit(X_train, y_train) metrics, cm = compute_metrics(pipeline, X_test, y_test) print(f"RF AUC={metrics['test_auc']} F1={metrics['test_f1']}") return {"pipeline": pipeline, "test_metrics": metrics, "cm": cm, "config": {**BEST_CONFIG, "model": "Random Forest", "n_estimators": RF_PARAMS["n_estimators"], "max_depth": RF_PARAMS["max_depth"]}} def load_or_train_all(): X_train, X_val, X_test, y_train, y_val, y_test = load_data(BEST_CONFIG) if os.path.exists(LGBM_PATH): print(f"Loading LGBM from {LGBM_PATH}") lgbm_artifact = joblib.load(LGBM_PATH) else: lgbm_artifact = train_lgbm(X_train, X_test, y_train, y_test) joblib.dump(lgbm_artifact, LGBM_PATH) if os.path.exists(RF_PATH): print(f"Loading RF from {RF_PATH}") rf_artifact = joblib.load(RF_PATH) else: rf_artifact = train_rf(X_train, X_test, y_train, y_test) joblib.dump(rf_artifact, RF_PATH) return {"lgbm": lgbm_artifact, "rf": rf_artifact} # ───────────────────────────────────────────── # LOAD MODELS AT MODULE LEVEL # ───────────────────────────────────────────── ARTIFACTS = load_or_train_all() # ───────────────────────────────────────────── # FLASK APP # ───────────────────────────────────────────── app = Flask(__name__) HTML = r""" FaultSense — Equipment Fault Predictor
FS

FaultSense

Multi-Model Equipment Fault Predictor

Loading Models…
⚡ LightGBM Gradient Boosting Loading…
🌲 Random Forest Ensemble Trees Loading…
Sensor Readings
Pump
Pump
Compressor
Motor
Valve
Sensor
40.0°C
5.0 bar
5.0 mm/s
50%
Model Comparison
Loading…
Prediction History
No predictions yet
🔬

Select a model, enter sensor
readings, and run a prediction
to see results here.

Active Model Config
Loading…
""" # ───────────────────────────────────────────── # ROUTES # ───────────────────────────────────────────── @app.route("/") def index(): return render_template_string(HTML) @app.route("/model_info") def model_info(): return jsonify({ "lgbm": { "config": ARTIFACTS["lgbm"]["config"], "test_metrics": ARTIFACTS["lgbm"]["test_metrics"], "cm": ARTIFACTS["lgbm"]["cm"], }, "rf": { "config": ARTIFACTS["rf"]["config"], "test_metrics": ARTIFACTS["rf"]["test_metrics"], "cm": ARTIFACTS["rf"]["cm"], }, }) @app.route("/predict", methods=["POST"]) def predict(): body = request.get_json(force=True) model_key = body.get("model", "lgbm") if model_key not in ARTIFACTS: return jsonify({"error": f"Unknown model '{model_key}'. Use 'lgbm' or 'rf'."}), 400 try: row = pd.DataFrame([{ "equipment": body["equipment"], "temperature": float(body["temperature"]), "pressure": float(body["pressure"]), "vibration": float(body["vibration"]), "humidity": float(body["humidity"]), }]) except (KeyError, ValueError) as e: return jsonify({"error": f"Bad input: {e}"}), 400 artifact = ARTIFACTS[model_key] prob = float(artifact["pipeline"].predict_proba(row)[0, 1]) pred = int(prob >= THRESHOLD) confidence = "HIGH" if prob > 0.85 or prob < 0.15 else "MEDIUM" if prob > 0.65 or prob < 0.35 else "LOW" return jsonify({ "model": model_key, "prediction": pred, "probability": round(prob, 4), "confidence": confidence, "threshold": THRESHOLD, "label": "FAULTY" if pred == 1 else "HEALTHY", }) if __name__ == "__main__": app.run(debug=False, host="0.0.0.0", port=7860)