Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import numpy as np | |
| def preprocess_features(data: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| Prépare et enrichit les features du CSV avant prédiction. | |
| """ | |
| print("start data preprocessing") | |
| data = data.replace([np.inf, -np.inf], np.nan).dropna() | |
| eps = 1e-6 | |
| number = data["Number"] + eps | |
| iat = data["IAT"] + eps | |
| min_val = data["Min"] + 1 | |
| # Ratios de flags | |
| data["syn_ratio"] = data["syn_count"] / number | |
| data["ack_ratio"] = data["ack_count"] / number | |
| data["fin_ratio"] = data["fin_count"] / number | |
| data["rst_ratio"] = data["rst_count"] / number | |
| # Tailles de paquets | |
| data["mean_pkt_size"] = data["Tot size"] / number | |
| data["pkt_size_range"] = data["Max"] - data["Min"] | |
| data["pkt_size_ratio"] = data["Max"] / min_val | |
| # Timing | |
| data["mean_iat"] = data["IAT"] / number | |
| data["pkt_rate"] = data["Number"] / iat | |
| # Débit | |
| data["throughput"] = data["Tot size"] / iat | |
| data["bytes_per_sec"] = data["Rate"] * data["Tot size"] | |
| # Variation | |
| data["coef_var"] = data["Std"] / (data["AVG"] + eps) | |
| # Indicateurs combinés | |
| data["tcp_udp_ratio"] = data["TCP"] / (data["UDP"] + 1) | |
| data["flag_entropy"] = ( | |
| data["syn_ratio"] + data["ack_ratio"] + data["fin_ratio"] + data["rst_ratio"] | |
| ) | |
| print("finish data preprocessing") | |
| return data | |