Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import joblib | |
| import pandas as pd | |
| from flask import Flask, request, jsonify | |
| from urllib.parse import urlparse, parse_qs | |
| # Load model artifacts | |
| model = joblib.load("model.joblib") | |
| encoders = joblib.load("encoders.joblib") | |
| scaler = joblib.load("scaler.joblib") | |
| if_model = joblib.load("best_if_model.joblib") | |
| svm_model = joblib.load("best_svm_model.joblib") | |
| feature_names = joblib.load("feature_names.joblib") | |
| # Initialize the Flask app | |
| app = Flask("Bot Detection API") | |
| def parse_url_params(url): | |
| try: | |
| query = urlparse(url).query | |
| return {k: v[0] if isinstance(v, list) else v for k, v in parse_qs(query).items()} | |
| except Exception: | |
| return {} | |
| def prepare_features(row_dict): | |
| base = { | |
| 'region': row_dict.get('region', 'unknown'), | |
| 'browser': row_dict.get('browser', 'unknown'), | |
| 'device': row_dict.get('device', 'unknown'), | |
| 'd': row_dict.get('d', '') | |
| } | |
| query_params = parse_url_params(base['d']) | |
| combined = {**base, **query_params} | |
| combined.pop('d', None) | |
| # Fill any missing expected features | |
| for col in feature_names: | |
| if col not in combined and col not in ["iso_anomaly_prob", "svm_anomaly_prob"]: | |
| combined[col] = "unknown" | |
| df = pd.DataFrame([combined]) | |
| # Encode categoricals | |
| for col in df.columns: | |
| if col in encoders: | |
| try: | |
| df[col] = encoders[col].transform(df[col].astype(str)) | |
| except: | |
| df[col] = encoders[col].transform(["unknown"])[0] | |
| else: | |
| df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0) | |
| # Scale and compute anomaly scores | |
| df_scaled = scaler.transform(df) | |
| iso_score = if_model.decision_function(df_scaled) | |
| svm_score = svm_model.decision_function(df_scaled) | |
| iso_anomaly_prob = 1 - float(np.clip((iso_score - iso_score.min()) / (iso_score.max() - iso_score.min() + 1e-9), 0, 1)) | |
| svm_anomaly_prob = 1 - float(np.clip((svm_score - svm_score.min()) / (svm_score.max() - svm_score.min() + 1e-9), 0, 1)) | |
| df['iso_anomaly_prob'] = iso_anomaly_prob | |
| df['svm_anomaly_prob'] = svm_anomaly_prob | |
| return df[feature_names] | |
| def home(): | |
| return "✅ Bot Detection API is live." | |
| def predict(): | |
| row = request.get_json() | |
| X = prepare_features(row) | |
| pred_prob = model.predict_proba(X)[0][1] | |
| pred_label = int(pred_prob >= 0.5) | |
| return jsonify({ | |
| "Prediction": "Bot Attack" if pred_label else "Legitimate", | |
| "Bot Probability": round(float(pred_prob), 4) | |
| }) | |