Spaces:
Running
Running
| from flask import Flask, request, jsonify | |
| from flask_cors import CORS | |
| import pandas as pd | |
| import numpy as np | |
| import torch | |
| import torch.nn as nn | |
| import joblib | |
| from collections import Counter | |
| import os | |
| from pathlib import Path | |
| from huggingface_hub import hf_hub_download | |
| app = Flask(__name__) | |
| CORS(app, origins="*") | |
| def after_request(response): | |
| response.headers['Access-Control-Allow-Origin'] = '*' | |
| response.headers['Access-Control-Allow-Headers'] = 'Content-Type' | |
| response.headers['Access-Control-Allow-Methods'] = 'GET, POST, OPTIONS' | |
| return response | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # CONFIGURATION | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Ton repo Hugging Face | |
| HF_REPO_ID = "saidimn/ids-cnn-cicids2017" | |
| # Dossier local pour stocker les modèles téléchargés | |
| CACHE_DIR = Path(__file__).parent / "model_cache" | |
| CACHE_DIR.mkdir(exist_ok=True) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ARCHITECTURES CNN-1D | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class CNN1D_Binary(nn.Module): | |
| def __init__(self, num_features): | |
| super().__init__() | |
| self.features = nn.Sequential( | |
| nn.Conv1d(1, 64, kernel_size=3, padding=1), | |
| nn.BatchNorm1d(64), nn.ReLU(), | |
| nn.Conv1d(64, 64, kernel_size=3, padding=1), | |
| nn.BatchNorm1d(64), nn.ReLU(), | |
| nn.MaxPool1d(2), nn.Dropout(0.2), | |
| nn.Conv1d(64, 128, kernel_size=3, padding=1), | |
| nn.BatchNorm1d(128), nn.ReLU(), | |
| nn.Conv1d(128, 128, kernel_size=3, padding=1), | |
| nn.BatchNorm1d(128), nn.ReLU(), | |
| nn.MaxPool1d(2), nn.Dropout(0.3), | |
| nn.Conv1d(128, 256, kernel_size=3, padding=1), | |
| nn.BatchNorm1d(256), nn.ReLU(), | |
| nn.AdaptiveAvgPool1d(1), nn.Dropout(0.3), | |
| ) | |
| self.classifier = nn.Sequential( | |
| nn.Flatten(), | |
| nn.Linear(256, 128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(0.3), | |
| nn.Linear(128, 2) | |
| ) | |
| def forward(self, x): | |
| return self.classifier(self.features(x.unsqueeze(1))) | |
| class CNN1D_Attack(nn.Module): | |
| def __init__(self, num_features, num_classes): | |
| super().__init__() | |
| self.features = nn.Sequential( | |
| nn.Conv1d(1, 64, kernel_size=3, padding=1), | |
| nn.BatchNorm1d(64), nn.ReLU(), | |
| nn.Conv1d(64, 64, kernel_size=3, padding=1), | |
| nn.BatchNorm1d(64), nn.ReLU(), | |
| nn.MaxPool1d(2), nn.Dropout(0.2), | |
| nn.Conv1d(64, 128, kernel_size=3, padding=1), | |
| nn.BatchNorm1d(128), nn.ReLU(), | |
| nn.Conv1d(128, 128, kernel_size=3, padding=1), | |
| nn.BatchNorm1d(128), nn.ReLU(), | |
| nn.MaxPool1d(2), nn.Dropout(0.3), | |
| nn.Conv1d(128, 256, kernel_size=3, padding=1), | |
| nn.BatchNorm1d(256), nn.ReLU(), | |
| nn.Conv1d(256, 256, kernel_size=3, padding=1), | |
| nn.BatchNorm1d(256), nn.ReLU(), | |
| nn.AdaptiveAvgPool1d(1), nn.Dropout(0.3), | |
| ) | |
| self.classifier = nn.Sequential( | |
| nn.Flatten(), | |
| nn.Linear(256, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(0.4), | |
| nn.Linear(256, 128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(0.3), | |
| nn.Linear(128, num_classes) | |
| ) | |
| def forward(self, x): | |
| return self.classifier(self.features(x.unsqueeze(1))) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TΓLΓCHARGEMENT DES MODΓLES DEPUIS HUGGING FACE | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def download_models(): | |
| """Télécharge les modèles depuis Hugging Face Hub""" | |
| files = { | |
| "binary": "cnn1d_binary.pth", | |
| "attack": "cnn1d_attacks_only.pth", | |
| "scaler": "scaler.pkl", | |
| "encoder": "label_encoder_attacks.pkl" | |
| } | |
| paths = {} | |
| print("=" * 50) | |
| print("Téléchargement des modèles depuis Hugging Face...") | |
| print("=" * 50) | |
| for key, filename in files.items(): | |
| print(" β " + filename) | |
| paths[key] = hf_hub_download( | |
| repo_id=HF_REPO_ID, | |
| filename=filename, | |
| cache_dir=CACHE_DIR, | |
| local_dir=CACHE_DIR, | |
| local_dir_use_symlinks=False | |
| ) | |
| print(" β " + paths[key]) | |
| return paths | |
| # TΓ©lΓ©charge au dΓ©marrage du serveur | |
| paths = download_models() | |
| print("=" * 50) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # CHARGEMENT DES MODΓLES EN MΓMOIRE | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| print("Device: " + str(device)) | |
| scaler = joblib.load(paths["scaler"]) | |
| le = joblib.load(paths["encoder"]) | |
| num_features = scaler.n_features_in_ | |
| num_attack_classes = len(le.classes_) | |
| print("Features: " + str(num_features)) | |
| print("Classes: " + str(list(le.classes_))) | |
| # Modèle binaire | |
| binary_model = CNN1D_Binary(num_features).to(device) | |
| binary_model.load_state_dict(torch.load(paths["binary"], map_location=device, weights_only=True)) | |
| binary_model.eval() | |
| # Modèle d'attaque | |
| attack_model = CNN1D_Attack(num_features, num_attack_classes).to(device) | |
| attack_model.load_state_dict(torch.load(paths["attack"], map_location=device, weights_only=True)) | |
| attack_model.eval() | |
| print("Tous les modΓ¨les sont chargΓ©s β\n") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # PRΓTRAITEMENT | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def preprocess(df): | |
| df.columns = df.columns.str.strip() | |
| cols_to_drop = ['Flow ID', 'Src IP', 'Src Port', 'Dst IP', | |
| 'Dst Port', 'Protocol', 'Timestamp', 'Label'] | |
| for col in cols_to_drop: | |
| if col in df.columns: | |
| df = df.drop(columns=[col]) | |
| rename_dict = { | |
| 'Tot Fwd Pkts': 'Total Fwd Packets', | |
| 'Tot Bwd Pkts': 'Total Backward Packets', | |
| 'TotLen Fwd Pkts': 'Total Length of Fwd Packets', | |
| 'TotLen Bwd Pkts': 'Total Length of Bwd Packets', | |
| 'Fwd Pkt Len Max': 'Fwd Packet Length Max', | |
| 'Fwd Pkt Len Min': 'Fwd Packet Length Min', | |
| 'Fwd Pkt Len Mean': 'Fwd Packet Length Mean', | |
| 'Fwd Pkt Len Std': 'Fwd Packet Length Std', | |
| 'Bwd Pkt Len Max': 'Bwd Packet Length Max', | |
| 'Fwd Header Len': 'Fwd Header Length', | |
| 'Bwd Header Len': 'Bwd Header Length', | |
| 'Fwd Pkts/s': 'Fwd Packets/s', | |
| 'Bwd Pkts/s': 'Bwd Packets/s', | |
| 'Pkt Len Min': 'Min Packet Length', | |
| 'Pkt Len Max': 'Max Packet Length', | |
| 'Pkt Len Mean': 'Packet Length Mean', | |
| 'Pkt Len Std': 'Packet Length Std', | |
| 'Pkt Len Var': 'Packet Length Variance', | |
| 'FIN Flag Cnt': 'FIN Flag Count', | |
| 'SYN Flag Cnt': 'SYN Flag Count', | |
| 'RST Flag Cnt': 'RST Flag Count', | |
| 'PSH Flag Cnt': 'PSH Flag Count', | |
| 'ACK Flag Cnt': 'ACK Flag Count', | |
| 'URG Flag Cnt': 'URG Flag Count', | |
| 'Pkt Size Avg': 'Average Packet Size', | |
| 'Fwd Seg Size Avg': 'Avg Fwd Segment Size', | |
| 'Bwd Seg Size Avg': 'Avg Bwd Segment Size', | |
| 'Fwd Byts/b Avg': 'Fwd Avg Bytes/Bulk', | |
| 'Fwd Pkts/b Avg': 'Fwd Avg Packets/Bulk', | |
| 'Fwd Blk Rate Avg': 'Fwd Avg Bulk Rate', | |
| 'Bwd Byts/b Avg': 'Bwd Avg Bytes/Bulk', | |
| 'Bwd Pkts/b Avg': 'Bwd Avg Packets/Bulk', | |
| 'Bwd Blk Rate Avg': 'Bwd Avg Bulk Rate', | |
| 'Subflow Fwd Pkts': 'Subflow Fwd Packets', | |
| 'Subflow Bwd Pkts': 'Subflow Bwd Packets', | |
| 'Init Fwd Win Byts': 'Init_Win_bytes_forward', | |
| 'Init Bwd Win Byts': 'Init_Win_bytes_backward', | |
| 'Fwd Act Data Pkts': 'act_data_pkt_fwd', | |
| 'Fwd Seg Size Min': 'min_seg_size_forward', | |
| } | |
| df = df.rename(columns=rename_dict) | |
| df = df.select_dtypes(include=[np.number]) | |
| df.replace([np.inf, -np.inf], np.nan, inplace=True) | |
| df.fillna(0, inplace=True) | |
| if hasattr(scaler, 'feature_names_in_'): | |
| for col in scaler.feature_names_in_: | |
| if col not in df.columns: | |
| df[col] = 0 | |
| df = df[scaler.feature_names_in_] | |
| else: | |
| while df.shape[1] < 78: | |
| df['missing_' + str(df.shape[1])] = 0 | |
| df = df.iloc[:, :78] | |
| return scaler.transform(df.values) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ROUTES API | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def analyze(): | |
| if 'file' not in request.files: | |
| return jsonify({'error': 'No file uploaded'}), 400 | |
| try: | |
| file = request.files['file'] | |
| df = pd.read_csv(file) | |
| if df.empty: | |
| return jsonify({'error': 'CSV file is empty'}), 400 | |
| total_flows = len(df) | |
| X_scaled = preprocess(df) | |
| X = torch.tensor(X_scaled, dtype=torch.float32).to(device) | |
| results = [] | |
| with torch.no_grad(): | |
| binary_out = binary_model(X) | |
| binary_pred = torch.argmax(binary_out, dim=1) | |
| for i in range(len(X)): | |
| if binary_pred[i] == 0: | |
| results.append('BENIGN') | |
| else: | |
| single = X[i].unsqueeze(0) | |
| attack_out = attack_model(single) | |
| attack_pred = torch.argmax(attack_out, dim=1).item() | |
| results.append(le.classes_[attack_pred]) | |
| counts = Counter(results) | |
| total = len(results) | |
| labels = list(counts.keys()) | |
| values = list(counts.values()) | |
| percentages = [round(v/total*100, 2) for v in values] | |
| attacks = {k: v for k, v in counts.items() if k != 'BENIGN'} | |
| benign = counts.get('BENIGN', 0) | |
| return jsonify({ | |
| 'total_flows': total, | |
| 'benign_count': benign, | |
| 'attack_count': total - benign, | |
| 'labels': labels, | |
| 'values': values, | |
| 'percentages': percentages, | |
| 'attack_types': attacks, | |
| 'results': results[:100] | |
| }) | |
| except Exception as e: | |
| return jsonify({'error': str(e)}), 500 | |
| def health(): | |
| return jsonify({ | |
| 'status': 'ok', | |
| 'device': str(device), | |
| 'repo': HF_REPO_ID, | |
| 'attack_classes': le.classes_.tolist() | |
| }) | |
| if __name__ == '__main__': | |
| import os | |
| port = int(os.environ.get('PORT', 5000)) | |
| app.run(debug=False, port=port, host='0.0.0.0') | |