Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- Dockerfile +19 -0
- app.py +82 -0
- requirements.txt +11 -0
Dockerfile
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use Python base image
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
# Set working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copy all files into the container
|
| 8 |
+
COPY . /app
|
| 9 |
+
|
| 10 |
+
# Install dependencies
|
| 11 |
+
RUN pip install --upgrade pip && \
|
| 12 |
+
pip install -r requirements.txt
|
| 13 |
+
|
| 14 |
+
# Expose port for Streamlit
|
| 15 |
+
EXPOSE 7860
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# Run Streamlit app
|
| 19 |
+
CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
app.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import joblib
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from flask import Flask, request, jsonify
|
| 5 |
+
from urllib.parse import urlparse, parse_qs
|
| 6 |
+
|
| 7 |
+
# Load model artifacts
|
| 8 |
+
model = joblib.load("model.joblib")
|
| 9 |
+
encoders = joblib.load("encoders.joblib")
|
| 10 |
+
scaler = joblib.load("scaler.joblib")
|
| 11 |
+
if_model = joblib.load("best_if_model.joblib")
|
| 12 |
+
svm_model = joblib.load("best_svm_model.joblib")
|
| 13 |
+
feature_names = joblib.load("feature_names.joblib")
|
| 14 |
+
|
| 15 |
+
# Initialize the Flask app
|
| 16 |
+
app = Flask("Bot Detection API")
|
| 17 |
+
|
| 18 |
+
def parse_url_params(url):
|
| 19 |
+
try:
|
| 20 |
+
query = urlparse(url).query
|
| 21 |
+
return {k: v[0] if isinstance(v, list) else v for k, v in parse_qs(query).items()}
|
| 22 |
+
except Exception:
|
| 23 |
+
return {}
|
| 24 |
+
|
| 25 |
+
def prepare_features(row_dict):
|
| 26 |
+
base = {
|
| 27 |
+
'region': row_dict.get('region', 'unknown'),
|
| 28 |
+
'browser': row_dict.get('browser', 'unknown'),
|
| 29 |
+
'device': row_dict.get('device', 'unknown'),
|
| 30 |
+
'd': row_dict.get('d', '')
|
| 31 |
+
}
|
| 32 |
+
query_params = parse_url_params(base['d'])
|
| 33 |
+
combined = {**base, **query_params}
|
| 34 |
+
combined.pop('d', None)
|
| 35 |
+
|
| 36 |
+
# Fill any missing expected features
|
| 37 |
+
for col in feature_names:
|
| 38 |
+
if col not in combined and col not in ["iso_anomaly_prob", "svm_anomaly_prob"]:
|
| 39 |
+
combined[col] = "unknown"
|
| 40 |
+
|
| 41 |
+
df = pd.DataFrame([combined])
|
| 42 |
+
|
| 43 |
+
# Encode categoricals
|
| 44 |
+
for col in df.columns:
|
| 45 |
+
if col in encoders:
|
| 46 |
+
try:
|
| 47 |
+
df[col] = encoders[col].transform(df[col].astype(str))
|
| 48 |
+
except:
|
| 49 |
+
df[col] = encoders[col].transform(["unknown"])[0]
|
| 50 |
+
else:
|
| 51 |
+
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
|
| 52 |
+
|
| 53 |
+
# Scale and compute anomaly scores
|
| 54 |
+
df_scaled = scaler.transform(df)
|
| 55 |
+
iso_score = if_model.decision_function(df_scaled)
|
| 56 |
+
svm_score = svm_model.decision_function(df_scaled)
|
| 57 |
+
iso_anomaly_prob = 1 - float(np.clip((iso_score - iso_score.min()) / (iso_score.max() - iso_score.min() + 1e-9), 0, 1))
|
| 58 |
+
svm_anomaly_prob = 1 - float(np.clip((svm_score - svm_score.min()) / (svm_score.max() - svm_score.min() + 1e-9), 0, 1))
|
| 59 |
+
|
| 60 |
+
df['iso_anomaly_prob'] = iso_anomaly_prob
|
| 61 |
+
df['svm_anomaly_prob'] = svm_anomaly_prob
|
| 62 |
+
|
| 63 |
+
return df[feature_names]
|
| 64 |
+
|
| 65 |
+
@app.get('/')
|
| 66 |
+
def home():
|
| 67 |
+
return "✅ Bot Detection API is live."
|
| 68 |
+
|
| 69 |
+
@app.post('/v1/predict')
|
| 70 |
+
def predict():
|
| 71 |
+
row = request.get_json()
|
| 72 |
+
X = prepare_features(row)
|
| 73 |
+
pred_prob = model.predict_proba(X)[0][1]
|
| 74 |
+
pred_label = int(pred_prob >= 0.5)
|
| 75 |
+
|
| 76 |
+
return jsonify({
|
| 77 |
+
"Prediction": "Bot Attack" if pred_label else "Legitimate",
|
| 78 |
+
"Bot Probability": round(float(pred_prob), 4)
|
| 79 |
+
})
|
| 80 |
+
|
| 81 |
+
if __name__ == '__main__':
|
| 82 |
+
app.run(debug=True)
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pandas==2.2.2
|
| 2 |
+
numpy==2.0.2
|
| 3 |
+
scikit-learn==1.6.1
|
| 4 |
+
xgboost==2.1.4
|
| 5 |
+
joblib==1.4.2
|
| 6 |
+
Werkzeug==2.2.2
|
| 7 |
+
flask==2.2.2
|
| 8 |
+
gunicorn==20.1.0
|
| 9 |
+
requests==2.28.1
|
| 10 |
+
streamlit==1.43.2
|
| 11 |
+
uvicorn[standard]
|