""" FaultSense — ULTRA FAST Dataset Test Script Optimized with: ✔ requests.Session (connection reuse) ✔ ThreadPoolExecutor (parallel requests) ✔ itertuples (fast iteration) ✔ Same features, no removal """ import argparse import time import requests import pandas as pd import numpy as np from concurrent.futures import ThreadPoolExecutor, as_completed from sklearn.metrics import ( accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix ) # ───────────────────────────────────────────── # CONFIG # ───────────────────────────────────────────── MODELS = ["lgbm", "rf"] THRESHOLD = 0.5 BATCH_PRINT = 500 MAX_WORKERS = 50 # 🔥 increase to 50 if strong CPU REQUEST_DELAY = 0.0 # ───────────────────────────────────────────── # FAST REQUEST FUNCTION # ───────────────────────────────────────────── def send_predict(session, base_url, model, row): payload = { "model": model, "equipment": row.equipment, "temperature": float(row.temperature), "pressure": float(row.pressure), "vibration": float(row.vibration), "humidity": float(row.humidity), } try: resp = session.post(f"{base_url}/predict", json=payload, timeout=10) resp.raise_for_status() return resp.json() except: return None # ───────────────────────────────────────────── # PARALLEL PROCESSING # ───────────────────────────────────────────── def process_row(session, base_url, model, i, row): result = send_predict(session, base_url, model, row) if result is None or "error" in result: return { "index": i, "equipment": row.equipment, "true_label": int(row.faulty), "pred_label": -1, "probability": None, "confidence": "ERROR", "correct": False, } pred = result["prediction"] prob = result["probability"] true = int(row.faulty) return { "index": i, "equipment": row.equipment, "true_label": true, "pred_label": pred, "probability": prob, "confidence": result.get("confidence", ""), "correct": pred == true, } # ───────────────────────────────────────────── # METRICS # ───────────────────────────────────────────── def evaluate(y_true, y_pred, y_prob, model_name): acc = accuracy_score(y_true, y_pred) prec = precision_score(y_true, y_pred, zero_division=0) rec = recall_score(y_true, y_pred, zero_division=0) f1 = f1_score(y_true, y_pred, zero_division=0) auc = roc_auc_score(y_true, y_prob) cm = confusion_matrix(y_true, y_pred) tn, fp, fn, tp = cm.ravel() print("\n" + "═"*50) print(f"MODEL: {model_name.upper()}") print("═"*50) print(f"Accuracy : {acc:.4f}") print(f"Precision: {prec:.4f}") print(f"Recall : {rec:.4f}") print(f"F1 Score : {f1:.4f}") print(f"AUC : {auc:.4f}") print(f"TP={tp} TN={tn} FP={fp} FN={fn}") print("═"*50) return {"accuracy": acc, "precision": prec, "recall": rec, "f1": f1, "auc": auc, "tp": tp, "tn": tn, "fp": fp, "fn": fn} # ───────────────────────────────────────────── # MAIN # ───────────────────────────────────────────── def main(): parser = argparse.ArgumentParser() parser.add_argument("--url", default="http://localhost:7860") parser.add_argument("--csv", default="synthetic_nim_parallel_10000.csv") parser.add_argument("--limit", type=int, default=None) args = parser.parse_args() print("\n🚀 FAST TEST STARTING...\n") df = pd.read_csv(args.csv) if args.limit: df = df.head(args.limit) total = len(df) print(f"Rows: {total}") # check server try: requests.get(f"{args.url}/model_info", timeout=5) print("✅ Server reachable\n") except: print("❌ Server not reachable") return all_metrics = {} for model in MODELS: print(f"\n🔥 Testing {model.upper()}...\n") session = requests.Session() start = time.time() records = [] # FAST ITERATION rows = list(enumerate(df.itertuples(index=False))) with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: futures = [ executor.submit(process_row, session, args.url, model, i, row) for i, row in rows ] for i, future in enumerate(as_completed(futures), 1): records.append(future.result()) if i % BATCH_PRINT == 0 or i == total: elapsed = time.time() - start speed = i / elapsed print(f"[{i}/{total}] Speed: {speed:.0f} req/sec") elapsed = time.time() - start print(f"\n⏱ Finished in {elapsed:.2f}s ({total/elapsed:.0f} req/sec)\n") df_results = pd.DataFrame(records) valid = df_results[df_results["pred_label"] != -1] y_true = valid["true_label"].values y_pred = valid["pred_label"].values y_prob = valid["probability"].astype(float).values metrics = evaluate(y_true, y_pred, y_prob, model) all_metrics[model] = metrics # SAVE FILES df_results.to_csv(f"test_results_{model}.csv", index=False) valid[valid["correct"] == False].to_csv(f"test_mismatches_{model}.csv", index=False) print("\n🏁 FINAL COMPARISON\n") for metric in ["accuracy", "precision", "recall", "f1", "auc"]: lv = all_metrics["lgbm"][metric] rv = all_metrics["rf"][metric] winner = "LGBM ⚡" if lv > rv else "RF 🌲" print(f"{metric.upper():<10} LGBM={lv:.4f} RF={rv:.4f} → {winner}") if __name__ == "__main__": main()