Spaces:
Sleeping
Sleeping
File size: 5,033 Bytes
411bb17 0d5ae83 e858d0c 24d1985 70dd390 411bb17 24d1985 70dd390 5015d4c 64c96d4 5015d4c 64c96d4 24d1985 70dd390 24d1985 50f30ea 70dd390 a891df0 50f30ea 6ce8828 411bb17 50f30ea 411bb17 085c136 411bb17 70dd390 411bb17 e858d0c 70dd390 411bb17 488e534 70dd390 085c136 9f83410 411bb17 70dd390 488e534 70dd390 488e534 70dd390 085c136 48a11d0 6ce8828 48a11d0 24d1985 085c136 48a11d0 6ce8828 48a11d0 e858d0c 48a11d0 24d1985 70dd390 5c8c5f9 e2c2f53 70dd390 24d1985 70dd390 488e534 70dd390 48a11d0 70dd390 a891df0 488e534 50f30ea 411bb17 6ce8828 50f30ea 70dd390 488e534 70dd390 6ce8828 70dd390 1a5ec58 6ce8828 70dd390 6ce8828 e858d0c 6ce8828 70dd390 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import numpy as np
import pandas as pd
import joblib
import shap
import traceback
from flask import Flask, request, jsonify
from urllib.parse import urlparse, parse_qs
# Initialize Flask
app = Flask("Bot detector")
@app.get('/')
def home():
return "✅ Welcome to the Bot Prediction API!"
# Load models and utilities
model = joblib.load("model.joblib")
encoders = joblib.load("encoders.joblib")
scaler = joblib.load("scaler.joblib")
if_model = joblib.load("best_if_model.joblib")
svm_model = joblib.load("best_svm_model.joblib")
iso_scaler = joblib.load("iso_scaler.joblib")
svm_scaler = joblib.load("svm_scaler.joblib")
feature_names = joblib.load("feature_names.joblib")
explainer = shap.TreeExplainer(model)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def parse_url_params(url):
try:
query = urlparse(url).query
return {k: v[0] if isinstance(v, list) else v for k, v in parse_qs(query).items()}
except Exception:
return {}
def prepare_features(row_dict):
base = {
'region': row_dict.get('region', 'unknown'),
'browser': row_dict.get('browser', 'unknown'),
'device': row_dict.get('device', 'unknown'),
'd': row_dict.get('d', '')
}
query_params = parse_url_params(base['d'])
combined = {**base, **query_params}
combined.pop('d', None)
for col in feature_names:
if col not in combined and col not in ["iso_anomaly_prob", "svm_anomaly_prob"]:
combined[col] = "unknown"
df = pd.DataFrame([combined])
for col in df.columns:
if col in encoders:
try:
df[col] = encoders[col].transform(df[col].astype(str))
except:
df[col] = encoders[col].transform(["unknown"])[0]
else:
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
df_scaled = scaler.transform(df)
iso_score = if_model.decision_function(df_scaled).reshape(-1, 1)
svm_score = svm_model.decision_function(df_scaled).reshape(-1, 1)
iso_prob = float(1 - iso_scaler.transform(iso_score)[0][0])
svm_prob = float(1 - svm_scaler.transform(svm_score)[0][0])
df['iso_anomaly_prob'] = iso_prob
df['svm_anomaly_prob'] = svm_prob
return df[feature_names]
def generate_shap_bot_attack_paragraph(index, shap_values, X, encoders=None, class_index=1, top_n=10):
if isinstance(shap_values, list):
shap_vals = shap_values[class_index][index]
base_val = explainer.expected_value[class_index]
else:
shap_vals = shap_values[index]
base_val = explainer.expected_value
if not np.isscalar(base_val) and len(np.shape(base_val)) > 0:
base_val = base_val[class_index] if len(base_val) > class_index else base_val[0]
shap_scalar_vals = [float(s[0]) if isinstance(s, np.ndarray) else float(s) for s in shap_vals]
x_vals = X.iloc[index]
feature_names = X.columns
decoded_vals = {}
for col in feature_names:
val = x_vals[col]
try:
if encoders and col in encoders:
decoded_vals[col] = encoders[col].inverse_transform([int(val)])[0]
else:
decoded_vals[col] = val
except:
decoded_vals[col] = val
feature_contribs = list(zip(feature_names, decoded_vals.values(), shap_scalar_vals))
feature_contribs = sorted(feature_contribs, key=lambda x: abs(x[2]), reverse=True)[:top_n]
positive_impacts = []
negative_impacts = []
for fname, fval, sval in feature_contribs:
line = f" - {fname:20} = {str(fval):<20} contributed {sval:.4f}"
if sval > 0:
positive_impacts.append(line)
elif sval < 0:
negative_impacts.append(line)
final_log_odds = base_val + np.sum(shap_scalar_vals)
explanation = f"\n==== SHAP Explanation for Bot Attack Classification ====\n"
explanation += f"Base value (log-odds for class 1) : {base_val:.4f}\n"
explanation += f"Predicted log-odds (class 1) : {final_log_odds:.4f}\n\n"
if positive_impacts:
explanation += "🔺 Factors that INCREASED Bot Likelihood:\n" + "\n".join(positive_impacts) + "\n\n"
if negative_impacts:
explanation += "🔻 Factors that DECREASED Bot Likelihood:\n" + "\n".join(negative_impacts) + "\n\n"
explanation += "📝 These features collectively explain the model's decision.\n"
return explanation
@app.post('/v1/predict')
def predict():
try:
row = request.get_json()
X = prepare_features(row)
probs = model.predict_proba(X)[0]
pred_label = int(model.classes_[np.argmax(probs)])
shap_values = explainer.shap_values(X)
explanation = generate_shap_bot_attack_paragraph(0, shap_values, X, encoders)
return jsonify({
"Prediction": "Bot Attack" if pred_label == 1 else "Legitimate",
"SHAP Explanation": explanation
})
except Exception as e:
traceback.print_exc()
return jsonify({"error": str(e)}), 500
|