Spaces:
Sleeping
Sleeping
Commit ·
10d6e33
1
Parent(s): 70a72e6
Detection
Browse files- routes/offline_detection.py +48 -33
routes/offline_detection.py
CHANGED
|
@@ -23,22 +23,23 @@ os.makedirs(SAMPLE_DIR, exist_ok=True)
|
|
| 23 |
ALLOWED_EXT = {"csv", "pcap"}
|
| 24 |
|
| 25 |
# --- FEATURE DEFINITIONS (As per your Model Logs) ---
|
|
|
|
| 26 |
BCC_FEATURES = [
|
| 27 |
-
"protocol",
|
| 28 |
-
"src_port",
|
| 29 |
-
"dst_port",
|
| 30 |
-
"duration",
|
| 31 |
-
"
|
| 32 |
-
"
|
| 33 |
-
"
|
| 34 |
-
"
|
| 35 |
-
"
|
| 36 |
-
"bytes_rate",
|
| 37 |
-
"
|
| 38 |
-
"syn_flag_counts",
|
| 39 |
-
"ack_flag_counts",
|
| 40 |
-
"rst_flag_counts",
|
| 41 |
-
"fin_flag_counts"
|
| 42 |
]
|
| 43 |
|
| 44 |
CICIDS_FEATURES = [
|
|
@@ -96,6 +97,7 @@ def offline_predict():
|
|
| 96 |
# 2. Flexible Feature Mapping & Flag Extraction
|
| 97 |
# Renames common CSV headers to the specific technical names the model expects
|
| 98 |
# 2. Flexible Feature Mapping (Translate to EXACT fit-time names)
|
|
|
|
| 99 |
mapping = {
|
| 100 |
'Protocol': 'protocol', 'proto': 'protocol',
|
| 101 |
'Source Port': 'src_port',
|
|
@@ -103,28 +105,38 @@ def offline_predict():
|
|
| 103 |
'Flow Duration': 'duration', 'flow_duration': 'duration',
|
| 104 |
'Total Fwd Packets': 'fwd_packets_count', 'total_fwd_pkts': 'fwd_packets_count',
|
| 105 |
'Total Bwd Packets': 'bwd_packets_count', 'total_bwd_pkts': 'bwd_packets_count',
|
| 106 |
-
'
|
|
|
|
|
|
|
|
|
|
| 107 |
'syn': 'syn_flag_counts', 'ack': 'ack_flag_counts',
|
| 108 |
'rst': 'rst_flag_counts', 'fin': 'fin_flag_counts'
|
| 109 |
}
|
| 110 |
df = df.rename(columns=mapping)
|
| 111 |
|
| 112 |
-
#
|
| 113 |
-
if '
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
# 3. Model Loading & Feature Alignment
|
| 130 |
try:
|
|
@@ -153,7 +165,7 @@ def offline_predict():
|
|
| 153 |
encoder = model_data.get('encoder')
|
| 154 |
|
| 155 |
# Scale features (Critical for BCC/MLP models)
|
| 156 |
-
scaled_data = scaler.transform(input_data)
|
| 157 |
preds = model.predict(scaled_data)
|
| 158 |
|
| 159 |
# Convert numeric 0/1 to "Normal"/"DDoS"
|
|
@@ -168,6 +180,9 @@ def offline_predict():
|
|
| 168 |
|
| 169 |
# Convert all labels to strings for JSON serializability
|
| 170 |
results = [{"index": i, "class": str(lbl)} for i, lbl in enumerate(labels)]
|
|
|
|
|
|
|
|
|
|
| 171 |
|
| 172 |
return jsonify({
|
| 173 |
"success": True,
|
|
|
|
| 23 |
ALLOWED_EXT = {"csv", "pcap"}
|
| 24 |
|
| 25 |
# --- FEATURE DEFINITIONS (As per your Model Logs) ---
|
| 26 |
+
# --- UPDATED FEATURE DEFINITIONS ---
|
| 27 |
BCC_FEATURES = [
|
| 28 |
+
"protocol",
|
| 29 |
+
"src_port",
|
| 30 |
+
"dst_port",
|
| 31 |
+
"duration",
|
| 32 |
+
"packets_count",
|
| 33 |
+
"fwd_packets_count",
|
| 34 |
+
"bwd_packets_count",
|
| 35 |
+
"total_payload_bytes",
|
| 36 |
+
"total_header_bytes",
|
| 37 |
+
"bytes_rate",
|
| 38 |
+
"packets_rate",
|
| 39 |
+
"syn_flag_counts",
|
| 40 |
+
"ack_flag_counts",
|
| 41 |
+
"rst_flag_counts",
|
| 42 |
+
"fin_flag_counts",
|
| 43 |
]
|
| 44 |
|
| 45 |
CICIDS_FEATURES = [
|
|
|
|
| 97 |
# 2. Flexible Feature Mapping & Flag Extraction
|
| 98 |
# Renames common CSV headers to the specific technical names the model expects
|
| 99 |
# 2. Flexible Feature Mapping (Translate to EXACT fit-time names)
|
| 100 |
+
# 2. Flexible Feature Mapping
|
| 101 |
mapping = {
|
| 102 |
'Protocol': 'protocol', 'proto': 'protocol',
|
| 103 |
'Source Port': 'src_port',
|
|
|
|
| 105 |
'Flow Duration': 'duration', 'flow_duration': 'duration',
|
| 106 |
'Total Fwd Packets': 'fwd_packets_count', 'total_fwd_pkts': 'fwd_packets_count',
|
| 107 |
'Total Bwd Packets': 'bwd_packets_count', 'total_bwd_pkts': 'bwd_packets_count',
|
| 108 |
+
'Total Length of Fwd Packets': 'total_payload_bytes', 'payload_len': 'total_payload_bytes',
|
| 109 |
+
'fwd_header_len': 'total_header_bytes', 'header_len': 'total_header_bytes',
|
| 110 |
+
'Flow Bytes/s': 'bytes_rate', 'rate': 'bytes_rate',
|
| 111 |
+
'Flow Pkts/s': 'packets_rate',
|
| 112 |
'syn': 'syn_flag_counts', 'ack': 'ack_flag_counts',
|
| 113 |
'rst': 'rst_flag_counts', 'fin': 'fin_flag_counts'
|
| 114 |
}
|
| 115 |
df = df.rename(columns=mapping)
|
| 116 |
|
| 117 |
+
# Calculate packets_count if missing
|
| 118 |
+
if 'packets_count' not in df.columns and 'fwd_packets_count' in df.columns:
|
| 119 |
+
df['packets_count'] = df['fwd_packets_count'] + df.get('bwd_packets_count', 0)
|
| 120 |
+
|
| 121 |
+
# --- FLAG EXTRACTION LOGIC ---
|
| 122 |
+
flag_map = {
|
| 123 |
+
'syn_flag_counts': 'syn',
|
| 124 |
+
'ack_flag_counts': 'ack',
|
| 125 |
+
'rst_flag_counts': 'rst',
|
| 126 |
+
'fin_flag_counts': 'fin'
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
for model_name, csv_name in flag_map.items():
|
| 130 |
+
if model_name not in df.columns:
|
| 131 |
+
if 'flags' in df.columns:
|
| 132 |
+
# Handle String flags safely
|
| 133 |
+
if df['flags'].dtype == object:
|
| 134 |
+
df[model_name] = df['flags'].str.lower().str.contains(csv_name).astype(int)
|
| 135 |
+
else:
|
| 136 |
+
# Fallback for numeric or missing flag data
|
| 137 |
+
df[model_name] = 0
|
| 138 |
+
else:
|
| 139 |
+
df[model_name] = 0
|
| 140 |
|
| 141 |
# 3. Model Loading & Feature Alignment
|
| 142 |
try:
|
|
|
|
| 165 |
encoder = model_data.get('encoder')
|
| 166 |
|
| 167 |
# Scale features (Critical for BCC/MLP models)
|
| 168 |
+
scaled_data = scaler.transform(input_data.values)
|
| 169 |
preds = model.predict(scaled_data)
|
| 170 |
|
| 171 |
# Convert numeric 0/1 to "Normal"/"DDoS"
|
|
|
|
| 180 |
|
| 181 |
# Convert all labels to strings for JSON serializability
|
| 182 |
results = [{"index": i, "class": str(lbl)} for i, lbl in enumerate(labels)]
|
| 183 |
+
|
| 184 |
+
# Save results for the PDF report generator
|
| 185 |
+
df.to_csv(os.path.join(UPLOAD_DIR, "last_results.csv"), index=False)
|
| 186 |
|
| 187 |
return jsonify({
|
| 188 |
"success": True,
|