ids-backend / app.py
saidimn's picture
Deploy IDS backend
fb761f3
from flask import Flask, request, jsonify
from flask_cors import CORS
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import joblib
from collections import Counter
import os
from pathlib import Path
from huggingface_hub import hf_hub_download
app = Flask(__name__)
CORS(app, origins="*")
@app.after_request
def after_request(response):
response.headers['Access-Control-Allow-Origin'] = '*'
response.headers['Access-Control-Allow-Headers'] = 'Content-Type'
response.headers['Access-Control-Allow-Methods'] = 'GET, POST, OPTIONS'
return response
# ══════════════════════════════════════════════════════════════════
# CONFIGURATION
# ══════════════════════════════════════════════════════════════════
# Ton repo Hugging Face
HF_REPO_ID = "saidimn/ids-cnn-cicids2017"
# Dossier local pour stocker les modèles téléchargés
CACHE_DIR = Path(__file__).parent / "model_cache"
CACHE_DIR.mkdir(exist_ok=True)
# ══════════════════════════════════════════════════════════════════
# ARCHITECTURES CNN-1D
# ══════════════════════════════════════════════════════════════════
class CNN1D_Binary(nn.Module):
def __init__(self, num_features):
super().__init__()
self.features = nn.Sequential(
nn.Conv1d(1, 64, kernel_size=3, padding=1),
nn.BatchNorm1d(64), nn.ReLU(),
nn.Conv1d(64, 64, kernel_size=3, padding=1),
nn.BatchNorm1d(64), nn.ReLU(),
nn.MaxPool1d(2), nn.Dropout(0.2),
nn.Conv1d(64, 128, kernel_size=3, padding=1),
nn.BatchNorm1d(128), nn.ReLU(),
nn.Conv1d(128, 128, kernel_size=3, padding=1),
nn.BatchNorm1d(128), nn.ReLU(),
nn.MaxPool1d(2), nn.Dropout(0.3),
nn.Conv1d(128, 256, kernel_size=3, padding=1),
nn.BatchNorm1d(256), nn.ReLU(),
nn.AdaptiveAvgPool1d(1), nn.Dropout(0.3),
)
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(256, 128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(0.3),
nn.Linear(128, 2)
)
def forward(self, x):
return self.classifier(self.features(x.unsqueeze(1)))
class CNN1D_Attack(nn.Module):
def __init__(self, num_features, num_classes):
super().__init__()
self.features = nn.Sequential(
nn.Conv1d(1, 64, kernel_size=3, padding=1),
nn.BatchNorm1d(64), nn.ReLU(),
nn.Conv1d(64, 64, kernel_size=3, padding=1),
nn.BatchNorm1d(64), nn.ReLU(),
nn.MaxPool1d(2), nn.Dropout(0.2),
nn.Conv1d(64, 128, kernel_size=3, padding=1),
nn.BatchNorm1d(128), nn.ReLU(),
nn.Conv1d(128, 128, kernel_size=3, padding=1),
nn.BatchNorm1d(128), nn.ReLU(),
nn.MaxPool1d(2), nn.Dropout(0.3),
nn.Conv1d(128, 256, kernel_size=3, padding=1),
nn.BatchNorm1d(256), nn.ReLU(),
nn.Conv1d(256, 256, kernel_size=3, padding=1),
nn.BatchNorm1d(256), nn.ReLU(),
nn.AdaptiveAvgPool1d(1), nn.Dropout(0.3),
)
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(256, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(0.4),
nn.Linear(256, 128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(0.3),
nn.Linear(128, num_classes)
)
def forward(self, x):
return self.classifier(self.features(x.unsqueeze(1)))
# ══════════════════════════════════════════════════════════════════
# TΓ‰LΓ‰CHARGEMENT DES MODÈLES DEPUIS HUGGING FACE
# ══════════════════════════════════════════════════════════════════
def download_models():
"""Télécharge les modèles depuis Hugging Face Hub"""
files = {
"binary": "cnn1d_binary.pth",
"attack": "cnn1d_attacks_only.pth",
"scaler": "scaler.pkl",
"encoder": "label_encoder_attacks.pkl"
}
paths = {}
print("=" * 50)
print("Téléchargement des modèles depuis Hugging Face...")
print("=" * 50)
for key, filename in files.items():
print(" ↓ " + filename)
paths[key] = hf_hub_download(
repo_id=HF_REPO_ID,
filename=filename,
cache_dir=CACHE_DIR,
local_dir=CACHE_DIR,
local_dir_use_symlinks=False
)
print(" βœ“ " + paths[key])
return paths
# TΓ©lΓ©charge au dΓ©marrage du serveur
paths = download_models()
print("=" * 50)
# ══════════════════════════════════════════════════════════════════
# CHARGEMENT DES MODÈLES EN MΓ‰MOIRE
# ══════════════════════════════════════════════════════════════════
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device: " + str(device))
scaler = joblib.load(paths["scaler"])
le = joblib.load(paths["encoder"])
num_features = scaler.n_features_in_
num_attack_classes = len(le.classes_)
print("Features: " + str(num_features))
print("Classes: " + str(list(le.classes_)))
# Modèle binaire
binary_model = CNN1D_Binary(num_features).to(device)
binary_model.load_state_dict(torch.load(paths["binary"], map_location=device, weights_only=True))
binary_model.eval()
# Modèle d'attaque
attack_model = CNN1D_Attack(num_features, num_attack_classes).to(device)
attack_model.load_state_dict(torch.load(paths["attack"], map_location=device, weights_only=True))
attack_model.eval()
print("Tous les modΓ¨les sont chargΓ©s βœ“\n")
# ══════════════════════════════════════════════════════════════════
# PRÉTRAITEMENT
# ══════════════════════════════════════════════════════════════════
def preprocess(df):
df.columns = df.columns.str.strip()
cols_to_drop = ['Flow ID', 'Src IP', 'Src Port', 'Dst IP',
'Dst Port', 'Protocol', 'Timestamp', 'Label']
for col in cols_to_drop:
if col in df.columns:
df = df.drop(columns=[col])
rename_dict = {
'Tot Fwd Pkts': 'Total Fwd Packets',
'Tot Bwd Pkts': 'Total Backward Packets',
'TotLen Fwd Pkts': 'Total Length of Fwd Packets',
'TotLen Bwd Pkts': 'Total Length of Bwd Packets',
'Fwd Pkt Len Max': 'Fwd Packet Length Max',
'Fwd Pkt Len Min': 'Fwd Packet Length Min',
'Fwd Pkt Len Mean': 'Fwd Packet Length Mean',
'Fwd Pkt Len Std': 'Fwd Packet Length Std',
'Bwd Pkt Len Max': 'Bwd Packet Length Max',
'Fwd Header Len': 'Fwd Header Length',
'Bwd Header Len': 'Bwd Header Length',
'Fwd Pkts/s': 'Fwd Packets/s',
'Bwd Pkts/s': 'Bwd Packets/s',
'Pkt Len Min': 'Min Packet Length',
'Pkt Len Max': 'Max Packet Length',
'Pkt Len Mean': 'Packet Length Mean',
'Pkt Len Std': 'Packet Length Std',
'Pkt Len Var': 'Packet Length Variance',
'FIN Flag Cnt': 'FIN Flag Count',
'SYN Flag Cnt': 'SYN Flag Count',
'RST Flag Cnt': 'RST Flag Count',
'PSH Flag Cnt': 'PSH Flag Count',
'ACK Flag Cnt': 'ACK Flag Count',
'URG Flag Cnt': 'URG Flag Count',
'Pkt Size Avg': 'Average Packet Size',
'Fwd Seg Size Avg': 'Avg Fwd Segment Size',
'Bwd Seg Size Avg': 'Avg Bwd Segment Size',
'Fwd Byts/b Avg': 'Fwd Avg Bytes/Bulk',
'Fwd Pkts/b Avg': 'Fwd Avg Packets/Bulk',
'Fwd Blk Rate Avg': 'Fwd Avg Bulk Rate',
'Bwd Byts/b Avg': 'Bwd Avg Bytes/Bulk',
'Bwd Pkts/b Avg': 'Bwd Avg Packets/Bulk',
'Bwd Blk Rate Avg': 'Bwd Avg Bulk Rate',
'Subflow Fwd Pkts': 'Subflow Fwd Packets',
'Subflow Bwd Pkts': 'Subflow Bwd Packets',
'Init Fwd Win Byts': 'Init_Win_bytes_forward',
'Init Bwd Win Byts': 'Init_Win_bytes_backward',
'Fwd Act Data Pkts': 'act_data_pkt_fwd',
'Fwd Seg Size Min': 'min_seg_size_forward',
}
df = df.rename(columns=rename_dict)
df = df.select_dtypes(include=[np.number])
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.fillna(0, inplace=True)
if hasattr(scaler, 'feature_names_in_'):
for col in scaler.feature_names_in_:
if col not in df.columns:
df[col] = 0
df = df[scaler.feature_names_in_]
else:
while df.shape[1] < 78:
df['missing_' + str(df.shape[1])] = 0
df = df.iloc[:, :78]
return scaler.transform(df.values)
# ══════════════════════════════════════════════════════════════════
# ROUTES API
# ══════════════════════════════════════════════════════════════════
@app.route('/analyze', methods=['POST'])
def analyze():
if 'file' not in request.files:
return jsonify({'error': 'No file uploaded'}), 400
try:
file = request.files['file']
df = pd.read_csv(file)
if df.empty:
return jsonify({'error': 'CSV file is empty'}), 400
total_flows = len(df)
X_scaled = preprocess(df)
X = torch.tensor(X_scaled, dtype=torch.float32).to(device)
results = []
with torch.no_grad():
binary_out = binary_model(X)
binary_pred = torch.argmax(binary_out, dim=1)
for i in range(len(X)):
if binary_pred[i] == 0:
results.append('BENIGN')
else:
single = X[i].unsqueeze(0)
attack_out = attack_model(single)
attack_pred = torch.argmax(attack_out, dim=1).item()
results.append(le.classes_[attack_pred])
counts = Counter(results)
total = len(results)
labels = list(counts.keys())
values = list(counts.values())
percentages = [round(v/total*100, 2) for v in values]
attacks = {k: v for k, v in counts.items() if k != 'BENIGN'}
benign = counts.get('BENIGN', 0)
return jsonify({
'total_flows': total,
'benign_count': benign,
'attack_count': total - benign,
'labels': labels,
'values': values,
'percentages': percentages,
'attack_types': attacks,
'results': results[:100]
})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/health', methods=['GET'])
def health():
return jsonify({
'status': 'ok',
'device': str(device),
'repo': HF_REPO_ID,
'attack_classes': le.classes_.tolist()
})
if __name__ == '__main__':
import os
port = int(os.environ.get('PORT', 5000))
app.run(debug=False, port=port, host='0.0.0.0')