File size: 2,593 Bytes
a30886e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import numpy as np
import joblib
import pandas as pd
from flask import Flask, request, jsonify
from urllib.parse import urlparse, parse_qs

# Load model artifacts
model = joblib.load("model.joblib")
encoders = joblib.load("encoders.joblib")
scaler = joblib.load("scaler.joblib")
if_model = joblib.load("best_if_model.joblib")
svm_model = joblib.load("best_svm_model.joblib")
feature_names = joblib.load("feature_names.joblib")

# Initialize the Flask app
app = Flask("Bot Detection API")

def parse_url_params(url):
    try:
        query = urlparse(url).query
        return {k: v[0] if isinstance(v, list) else v for k, v in parse_qs(query).items()}
    except Exception:
        return {}

def prepare_features(row_dict):
    base = {
        'region': row_dict.get('region', 'unknown'),
        'browser': row_dict.get('browser', 'unknown'),
        'device': row_dict.get('device', 'unknown'),
        'd': row_dict.get('d', '')
    }
    query_params = parse_url_params(base['d'])
    combined = {**base, **query_params}
    combined.pop('d', None)

    # Fill any missing expected features
    for col in feature_names:
        if col not in combined and col not in ["iso_anomaly_prob", "svm_anomaly_prob"]:
            combined[col] = "unknown"

    df = pd.DataFrame([combined])

    # Encode categoricals
    for col in df.columns:
        if col in encoders:
            try:
                df[col] = encoders[col].transform(df[col].astype(str))
            except:
                df[col] = encoders[col].transform(["unknown"])[0]
        else:
            df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

    # Scale and compute anomaly scores
    df_scaled = scaler.transform(df)
    iso_score = if_model.decision_function(df_scaled)
    svm_score = svm_model.decision_function(df_scaled)
    iso_anomaly_prob = 1 - float(np.clip((iso_score - iso_score.min()) / (iso_score.max() - iso_score.min() + 1e-9), 0, 1))
    svm_anomaly_prob = 1 - float(np.clip((svm_score - svm_score.min()) / (svm_score.max() - svm_score.min() + 1e-9), 0, 1))

    df['iso_anomaly_prob'] = iso_anomaly_prob
    df['svm_anomaly_prob'] = svm_anomaly_prob

    return df[feature_names]

@app.get('/')
def home():
    return "✅ Bot Detection API is live."

@app.post('/v1/predict')
def predict():
    row = request.get_json()
    X = prepare_features(row)
    pred_prob = model.predict_proba(X)[0][1]
    pred_label = int(pred_prob >= 0.5)

    return jsonify({
        "Prediction": "Bot Attack" if pred_label else "Legitimate",
        "Bot Probability": round(float(pred_prob), 4)
    })