Spaces:
Sleeping
Sleeping
File size: 11,399 Bytes
0f8fe33 978b2fe 63b7948 0f8fe33 63b7948 59edcea 978b2fe 0f8fe33 978b2fe 0f8fe33 26a71ad 978b2fe 0f8fe33 935d233 10d6e33 0f8fe33 10d6e33 0f8fe33 935d233 0f8fe33 978b2fe 0f8fe33 59edcea 978b2fe 0f8fe33 935d233 63b7948 935d233 978b2fe 935d233 978b2fe 935d233 70a72e6 10d6e33 935d233 70a72e6 10d6e33 70a72e6 935d233 10d6e33 935d233 978b2fe 935d233 63b7948 978b2fe 935d233 978b2fe 935d233 978b2fe 935d233 c2e4dd0 0f8fe33 935d233 63b7948 c2e4dd0 935d233 c2e4dd0 63b7948 935d233 c2e4dd0 935d233 c2e4dd0 63b7948 0f8fe33 935d233 63b7948 935d233 10d6e33 0f8fe33 935d233 0f8fe33 63b7948 935d233 63b7948 0f8fe33 63b7948 0f8fe33 978b2fe 0f8fe33 978b2fe 0f8fe33 978b2fe 0f8fe33 63b7948 0f8fe33 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 | import os
import pandas as pd
import joblib
from flask import Blueprint, request, jsonify, send_file, make_response, after_this_request
from werkzeug.utils import secure_filename
from datetime import datetime
from fpdf import FPDF
from io import BytesIO
import time
import requests
# --- IMPORT UTILS ---
from utils.pcap_to_csv import convert_pcap_to_csv
from utils.model_selector import load_model
offline_bp = Blueprint("offline_bp", __name__)
# --- CONFIGURATION ---
UPLOAD_DIR = "uploads"
SAMPLE_DIR = "sample"
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(SAMPLE_DIR, exist_ok=True)
ALLOWED_EXT = {"csv", "pcap"}
# --- FEATURE DEFINITIONS (As per your Model Logs) ---
# --- UPDATED FEATURE DEFINITIONS ---
BCC_FEATURES = [
"protocol",
"src_port",
"dst_port",
"duration",
"packets_count",
"fwd_packets_count",
"bwd_packets_count",
"total_payload_bytes",
"total_header_bytes",
"bytes_rate",
"packets_rate",
"syn_flag_counts",
"ack_flag_counts",
"rst_flag_counts",
"fin_flag_counts",
]
CICIDS_FEATURES = [
"Protocol", "Dst Port", "Flow Duration", "Tot Fwd Pkts", "Tot Bwd Pkts",
"TotLen Fwd Pkts", "TotLen Bwd Pkts", "Fwd Pkt Len Mean", "Bwd Pkt Len Mean",
"Flow IAT Mean", "Fwd PSH Flags", "Fwd URG Flags", "Fwd IAT Mean"
]
def allowed(filename):
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXT
# --- ROUTE: DOWNLOAD SAMPLE ---
@offline_bp.route("/sample/<model_type>", methods=["GET"])
def download_sample(model_type):
file_path = os.path.join(SAMPLE_DIR, f"{model_type}_sample.csv")
if not os.path.exists(file_path):
return jsonify(success=False, message="Sample file missing"), 404
return send_file(file_path, as_attachment=True)
# --- ROUTE: URL LIVE PROBE ---
@offline_bp.route("/analyze-url", methods=["POST"])
def analyze_url():
target_url = request.json.get("url")
if not target_url:
return jsonify(success=False, message="No URL provided"), 400
# Ensure URL is properly formatted
if not target_url.startswith("http"):
target_url = "https://" + target_url
# 1. Start "Synthetic Capture" (Timing the request)
start_ts = time.time()
try:
# Use a real user-agent to avoid being blocked by the site
headers_ua = {'User-Agent': 'Mozilla/5.0 (NIDS-Intelligence-Probe/1.0)'}
response = requests.get(target_url, timeout=10, headers=headers_ua, stream=True)
end_ts = time.time()
# 2. Extract Metadata for Synthetic Features
duration = end_ts - start_ts
# We read the content length or measure the response body
payload_bytes = len(response.content)
header_bytes = len(str(response.headers))
# 3. Map to your Model's Features (BCC Format)
# We simulate packet counts based on typical TCP handshakes (approx 8-10 packets per small request)
synthetic_row = {
"protocol": 6, # TCP/HTTPS
"src_port": 443,
"dst_port": 443,
"duration": duration,
"packets_count": 10,
"fwd_packets_count": 5,
"bwd_packets_count": 5,
"total_payload_bytes": payload_bytes,
"total_header_bytes": header_bytes,
"bytes_rate": payload_bytes / duration if duration > 0 else 0,
"packets_rate": 10 / duration if duration > 0 else 0,
"syn_flag_counts": 1,
"ack_flag_counts": 1,
"rst_flag_counts": 0,
"fin_flag_counts": 1
}
# 4. Convert to DataFrame for Prediction
df_url = pd.DataFrame([synthetic_row])
# --- REUSE YOUR PREDICTION LOGIC ---
# Note: You can call a helper function here or reuse the logic from offline_predict
# Make sure to apply the same scaler and model you loaded in the other route
model_data = load_model("bcc")
scaler = model_data.get('scaler')
encoder = model_data.get('encoder')
model = model_data['model']
# Scale and Predict
numeric_input = df_url[BCC_FEATURES].apply(pd.to_numeric).fillna(0)
scaled_data = scaler.transform(numeric_input.values)
preds = model.predict(scaled_data)
label = encoder.inverse_transform(preds)[0]
return jsonify({
"success": True,
"prediction": str(label),
"details": synthetic_row,
"url": target_url
})
except Exception as e:
return jsonify(success=False, message=f"URL Probe Failed: {str(e)}"), 500
# --- ROUTE: PREDICT ---
@offline_bp.route("/predict", methods=["POST"])
def offline_predict():
if "file" not in request.files:
return jsonify(success=False, message="No file uploaded"), 400
file = request.files["file"]
model_type = request.form.get("model", "bcc")
if not allowed(file.filename):
return jsonify(success=False, message="Unsupported file type"), 400
filename = secure_filename(file.filename)
saved_path = os.path.join(UPLOAD_DIR, filename)
file.save(saved_path)
# Cleanup logic to keep the server clean
@after_this_request
def cleanup(response):
try:
if os.path.exists(saved_path):
os.remove(saved_path)
except Exception as e:
print(f"Cleanup Error: {e}")
return response
# 1. Load Data
try:
# If PCAP, you'd call your converter here. For now, assuming CSV load.
df = pd.read_csv(saved_path)
if df.empty:
return jsonify(success=False, message="CSV has no data!"), 400
except Exception as e:
return jsonify(success=False, message=f"Error reading CSV: {str(e)}"), 400
# 2. Flexible Feature Mapping & Flag Extraction
# Renames common CSV headers to the specific technical names the model expects
# 2. Flexible Feature Mapping (Translate to EXACT fit-time names)
# 2. Flexible Feature Mapping
mapping = {
'Protocol': 'protocol', 'proto': 'protocol',
'Source Port': 'src_port',
'Destination Port': 'dst_port',
'Flow Duration': 'duration', 'flow_duration': 'duration',
'Total Fwd Packets': 'fwd_packets_count', 'total_fwd_pkts': 'fwd_packets_count',
'Total Bwd Packets': 'bwd_packets_count', 'total_bwd_pkts': 'bwd_packets_count',
'Total Length of Fwd Packets': 'total_payload_bytes', 'payload_len': 'total_payload_bytes',
'fwd_header_len': 'total_header_bytes', 'header_len': 'total_header_bytes',
'Flow Bytes/s': 'bytes_rate', 'rate': 'bytes_rate',
'Flow Pkts/s': 'packets_rate',
'syn': 'syn_flag_counts', 'ack': 'ack_flag_counts',
'rst': 'rst_flag_counts', 'fin': 'fin_flag_counts'
}
df = df.rename(columns=mapping)
# Calculate packets_count if missing
if 'packets_count' not in df.columns and 'fwd_packets_count' in df.columns:
df['packets_count'] = df['fwd_packets_count'] + df.get('bwd_packets_count', 0)
# --- FLAG EXTRACTION LOGIC ---
flag_map = {
'syn_flag_counts': 'syn',
'ack_flag_counts': 'ack',
'rst_flag_counts': 'rst',
'fin_flag_counts': 'fin'
}
for model_name, csv_name in flag_map.items():
if model_name not in df.columns:
if 'flags' in df.columns:
# Handle String flags safely
if df['flags'].dtype == object:
df[model_name] = df['flags'].str.lower().str.contains(csv_name).astype(int)
else:
# Fallback for numeric or missing flag data
df[model_name] = 0
else:
df[model_name] = 0
# 3. Model Loading & Feature Alignment
try:
model_data = load_model(model_type)
if not model_data or model_data.get('model') is None:
return jsonify(success=False, message="Model failed to load. Check Hub connection."), 500
model = model_data['model']
expected = BCC_FEATURES if model_type == "bcc" else CICIDS_FEATURES
# 🚀 SAFETY PADDING: Fill missing features with 0 to prevent "CRITICAL_ERROR"
for col in expected:
if col not in df.columns:
df[col] = 0
except Exception as e:
return jsonify(success=False, message=f"Model Initialization Error: {str(e)}"), 500
# 4. Prediction Logic
try:
# 1. Map protocols first!
proto_map = {'TCP': 6, 'UDP': 17, 'ICMP': 1, 'tcp': 6, 'udp': 17, 'icmp': 1}
df['protocol'] = df['protocol'].apply(lambda x: proto_map.get(x, x) if isinstance(x, str) else x)
# 2. Reorder columns
input_data = df[expected]
if model_type == "bcc":
scaler = model_data.get('scaler')
encoder = model_data.get('encoder')
# Ensure all columns are numeric before scaling
numeric_input = input_data.apply(pd.to_numeric, errors='coerce').fillna(0)
# 3. Scale features
scaled_data = scaler.transform(numeric_input.values) # Now it's all floats!
preds = model.predict(scaled_data)
labels = encoder.inverse_transform(preds)
# 5. Result Formatting for React Frontend
df["prediction"] = labels
class_counts = df["prediction"].value_counts().to_dict()
# Convert all labels to strings for JSON serializability
results = [{"index": i, "class": str(lbl)} for i, lbl in enumerate(labels)]
# Save results for the PDF report generator
df.to_csv(os.path.join(UPLOAD_DIR, "last_results.csv"), index=False)
return jsonify({
"success": True,
"classCounts": class_counts,
"results": results,
"total_processed": len(df)
})
except Exception as e:
import traceback
print(traceback.format_exc())
return jsonify(success=False, message=f"Prediction Engine Failure: {str(e)}"), 500
# --- ROUTE: PDF REPORT (MEMORY SAFE) ---
@offline_bp.route("/report", methods=["GET"])
def offline_report():
result_file = os.path.join(UPLOAD_DIR, "last_results.csv")
if not os.path.exists(result_file):
return jsonify(success=False, message="Run prediction first"), 400
df = pd.read_csv(result_file)
class_counts = df["prediction"].value_counts().to_dict()
# Generate PDF in memory
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", "B", 16)
pdf.cell(0, 10, "AI-NIDS Offline Threat Analysis Report", ln=True, align='C')
pdf.ln(10)
pdf.set_font("Arial", size=12)
pdf.cell(0, 10, f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=True)
pdf.ln(5)
pdf.set_font("Arial", "B", 12)
pdf.cell(0, 10, "Classification Summary:", ln=True)
pdf.set_font("Arial", size=12)
for cls, count in class_counts.items():
pdf.cell(0, 8, f"- {cls}: {count} occurrences", ln=True)
# Convert to bytes for response (no local file saving)
response = make_response(pdf.output(dest='S').encode('latin-1'))
response.headers.set('Content-Disposition', 'attachment', filename='offline_report.pdf')
response.headers.set('Content-Type', 'application/pdf')
return response
|