import numpy as np import joblib import pandas as pd from flask import Flask, request, jsonify from urllib.parse import urlparse, parse_qs # Load model artifacts model = joblib.load("model.joblib") encoders = joblib.load("encoders.joblib") scaler = joblib.load("scaler.joblib") if_model = joblib.load("best_if_model.joblib") svm_model = joblib.load("best_svm_model.joblib") feature_names = joblib.load("feature_names.joblib") # Initialize the Flask app app = Flask("Bot Detection API") def parse_url_params(url): try: query = urlparse(url).query return {k: v[0] if isinstance(v, list) else v for k, v in parse_qs(query).items()} except Exception: return {} def prepare_features(row_dict): base = { 'region': row_dict.get('region', 'unknown'), 'browser': row_dict.get('browser', 'unknown'), 'device': row_dict.get('device', 'unknown'), 'd': row_dict.get('d', '') } query_params = parse_url_params(base['d']) combined = {**base, **query_params} combined.pop('d', None) # Fill any missing expected features for col in feature_names: if col not in combined and col not in ["iso_anomaly_prob", "svm_anomaly_prob"]: combined[col] = "unknown" df = pd.DataFrame([combined]) # Encode categoricals for col in df.columns: if col in encoders: try: df[col] = encoders[col].transform(df[col].astype(str)) except: df[col] = encoders[col].transform(["unknown"])[0] else: df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0) # Scale and compute anomaly scores df_scaled = scaler.transform(df) iso_score = if_model.decision_function(df_scaled) svm_score = svm_model.decision_function(df_scaled) iso_anomaly_prob = 1 - float(np.clip((iso_score - iso_score.min()) / (iso_score.max() - iso_score.min() + 1e-9), 0, 1)) svm_anomaly_prob = 1 - float(np.clip((svm_score - svm_score.min()) / (svm_score.max() - svm_score.min() + 1e-9), 0, 1)) df['iso_anomaly_prob'] = iso_anomaly_prob df['svm_anomaly_prob'] = svm_anomaly_prob return df[feature_names] @app.get('/') def home(): return "✅ Bot Detection API is live." @app.post('/v1/predict') def predict(): row = request.get_json() X = prepare_features(row) pred_prob = model.predict_proba(X)[0][1] pred_label = int(pred_prob >= 0.5) return jsonify({ "Prediction": "Bot Attack" if pred_label else "Legitimate", "Bot Probability": round(float(pred_prob), 4) })