techavenger123
Changes after Testing
f919f67
"""
FaultSense — ULTRA FAST Dataset Test Script
Optimized with:
✔ requests.Session (connection reuse)
✔ ThreadPoolExecutor (parallel requests)
✔ itertuples (fast iteration)
✔ Same features, no removal
"""
import argparse
import time
import requests
import pandas as pd
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed
from sklearn.metrics import (
accuracy_score, precision_score, recall_score,
f1_score, roc_auc_score, confusion_matrix
)
# ─────────────────────────────────────────────
# CONFIG
# ─────────────────────────────────────────────
MODELS = ["lgbm", "rf"]
THRESHOLD = 0.5
BATCH_PRINT = 500
MAX_WORKERS = 50 # 🔥 increase to 50 if strong CPU
REQUEST_DELAY = 0.0
# ─────────────────────────────────────────────
# FAST REQUEST FUNCTION
# ─────────────────────────────────────────────
def send_predict(session, base_url, model, row):
payload = {
"model": model,
"equipment": row.equipment,
"temperature": float(row.temperature),
"pressure": float(row.pressure),
"vibration": float(row.vibration),
"humidity": float(row.humidity),
}
try:
resp = session.post(f"{base_url}/predict", json=payload, timeout=10)
resp.raise_for_status()
return resp.json()
except:
return None
# ─────────────────────────────────────────────
# PARALLEL PROCESSING
# ─────────────────────────────────────────────
def process_row(session, base_url, model, i, row):
result = send_predict(session, base_url, model, row)
if result is None or "error" in result:
return {
"index": i,
"equipment": row.equipment,
"true_label": int(row.faulty),
"pred_label": -1,
"probability": None,
"confidence": "ERROR",
"correct": False,
}
pred = result["prediction"]
prob = result["probability"]
true = int(row.faulty)
return {
"index": i,
"equipment": row.equipment,
"true_label": true,
"pred_label": pred,
"probability": prob,
"confidence": result.get("confidence", ""),
"correct": pred == true,
}
# ─────────────────────────────────────────────
# METRICS
# ─────────────────────────────────────────────
def evaluate(y_true, y_pred, y_prob, model_name):
acc = accuracy_score(y_true, y_pred)
prec = precision_score(y_true, y_pred, zero_division=0)
rec = recall_score(y_true, y_pred, zero_division=0)
f1 = f1_score(y_true, y_pred, zero_division=0)
auc = roc_auc_score(y_true, y_prob)
cm = confusion_matrix(y_true, y_pred)
tn, fp, fn, tp = cm.ravel()
print("\n" + "═"*50)
print(f"MODEL: {model_name.upper()}")
print("═"*50)
print(f"Accuracy : {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall : {rec:.4f}")
print(f"F1 Score : {f1:.4f}")
print(f"AUC : {auc:.4f}")
print(f"TP={tp} TN={tn} FP={fp} FN={fn}")
print("═"*50)
return {"accuracy": acc, "precision": prec, "recall": rec,
"f1": f1, "auc": auc, "tp": tp, "tn": tn, "fp": fp, "fn": fn}
# ─────────────────────────────────────────────
# MAIN
# ─────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--url", default="http://localhost:7860")
parser.add_argument("--csv", default="synthetic_nim_parallel_10000.csv")
parser.add_argument("--limit", type=int, default=None)
args = parser.parse_args()
print("\n🚀 FAST TEST STARTING...\n")
df = pd.read_csv(args.csv)
if args.limit:
df = df.head(args.limit)
total = len(df)
print(f"Rows: {total}")
# check server
try:
requests.get(f"{args.url}/model_info", timeout=5)
print("✅ Server reachable\n")
except:
print("❌ Server not reachable")
return
all_metrics = {}
for model in MODELS:
print(f"\n🔥 Testing {model.upper()}...\n")
session = requests.Session()
start = time.time()
records = []
# FAST ITERATION
rows = list(enumerate(df.itertuples(index=False)))
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
futures = [
executor.submit(process_row, session, args.url, model, i, row)
for i, row in rows
]
for i, future in enumerate(as_completed(futures), 1):
records.append(future.result())
if i % BATCH_PRINT == 0 or i == total:
elapsed = time.time() - start
speed = i / elapsed
print(f"[{i}/{total}] Speed: {speed:.0f} req/sec")
elapsed = time.time() - start
print(f"\n⏱ Finished in {elapsed:.2f}s ({total/elapsed:.0f} req/sec)\n")
df_results = pd.DataFrame(records)
valid = df_results[df_results["pred_label"] != -1]
y_true = valid["true_label"].values
y_pred = valid["pred_label"].values
y_prob = valid["probability"].astype(float).values
metrics = evaluate(y_true, y_pred, y_prob, model)
all_metrics[model] = metrics
# SAVE FILES
df_results.to_csv(f"test_results_{model}.csv", index=False)
valid[valid["correct"] == False].to_csv(f"test_mismatches_{model}.csv", index=False)
print("\n🏁 FINAL COMPARISON\n")
for metric in ["accuracy", "precision", "recall", "f1", "auc"]:
lv = all_metrics["lgbm"][metric]
rv = all_metrics["rf"][metric]
winner = "LGBM ⚡" if lv > rv else "RF 🌲"
print(f"{metric.upper():<10} LGBM={lv:.4f} RF={rv:.4f}{winner}")
if __name__ == "__main__":
main()