ai-nids-backend / routes /offline_detection.py
CodebaseAi's picture
Detection3
59edcea
import os
import pandas as pd
import joblib
from flask import Blueprint, request, jsonify, send_file, make_response, after_this_request
from werkzeug.utils import secure_filename
from datetime import datetime
from fpdf import FPDF
from io import BytesIO
import time
import requests
# --- IMPORT UTILS ---
from utils.pcap_to_csv import convert_pcap_to_csv
from utils.model_selector import load_model
offline_bp = Blueprint("offline_bp", __name__)
# --- CONFIGURATION ---
UPLOAD_DIR = "uploads"
SAMPLE_DIR = "sample"
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(SAMPLE_DIR, exist_ok=True)
ALLOWED_EXT = {"csv", "pcap"}
# --- FEATURE DEFINITIONS (As per your Model Logs) ---
# --- UPDATED FEATURE DEFINITIONS ---
BCC_FEATURES = [
"protocol",
"src_port",
"dst_port",
"duration",
"packets_count",
"fwd_packets_count",
"bwd_packets_count",
"total_payload_bytes",
"total_header_bytes",
"bytes_rate",
"packets_rate",
"syn_flag_counts",
"ack_flag_counts",
"rst_flag_counts",
"fin_flag_counts",
]
CICIDS_FEATURES = [
"Protocol", "Dst Port", "Flow Duration", "Tot Fwd Pkts", "Tot Bwd Pkts",
"TotLen Fwd Pkts", "TotLen Bwd Pkts", "Fwd Pkt Len Mean", "Bwd Pkt Len Mean",
"Flow IAT Mean", "Fwd PSH Flags", "Fwd URG Flags", "Fwd IAT Mean"
]
def allowed(filename):
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXT
# --- ROUTE: DOWNLOAD SAMPLE ---
@offline_bp.route("/sample/<model_type>", methods=["GET"])
def download_sample(model_type):
file_path = os.path.join(SAMPLE_DIR, f"{model_type}_sample.csv")
if not os.path.exists(file_path):
return jsonify(success=False, message="Sample file missing"), 404
return send_file(file_path, as_attachment=True)
# --- ROUTE: URL LIVE PROBE ---
@offline_bp.route("/analyze-url", methods=["POST"])
def analyze_url():
target_url = request.json.get("url")
if not target_url:
return jsonify(success=False, message="No URL provided"), 400
# Ensure URL is properly formatted
if not target_url.startswith("http"):
target_url = "https://" + target_url
# 1. Start "Synthetic Capture" (Timing the request)
start_ts = time.time()
try:
# Use a real user-agent to avoid being blocked by the site
headers_ua = {'User-Agent': 'Mozilla/5.0 (NIDS-Intelligence-Probe/1.0)'}
response = requests.get(target_url, timeout=10, headers=headers_ua, stream=True)
end_ts = time.time()
# 2. Extract Metadata for Synthetic Features
duration = end_ts - start_ts
# We read the content length or measure the response body
payload_bytes = len(response.content)
header_bytes = len(str(response.headers))
# 3. Map to your Model's Features (BCC Format)
# We simulate packet counts based on typical TCP handshakes (approx 8-10 packets per small request)
synthetic_row = {
"protocol": 6, # TCP/HTTPS
"src_port": 443,
"dst_port": 443,
"duration": duration,
"packets_count": 10,
"fwd_packets_count": 5,
"bwd_packets_count": 5,
"total_payload_bytes": payload_bytes,
"total_header_bytes": header_bytes,
"bytes_rate": payload_bytes / duration if duration > 0 else 0,
"packets_rate": 10 / duration if duration > 0 else 0,
"syn_flag_counts": 1,
"ack_flag_counts": 1,
"rst_flag_counts": 0,
"fin_flag_counts": 1
}
# 4. Convert to DataFrame for Prediction
df_url = pd.DataFrame([synthetic_row])
# --- REUSE YOUR PREDICTION LOGIC ---
# Note: You can call a helper function here or reuse the logic from offline_predict
# Make sure to apply the same scaler and model you loaded in the other route
model_data = load_model("bcc")
scaler = model_data.get('scaler')
encoder = model_data.get('encoder')
model = model_data['model']
# Scale and Predict
numeric_input = df_url[BCC_FEATURES].apply(pd.to_numeric).fillna(0)
scaled_data = scaler.transform(numeric_input.values)
preds = model.predict(scaled_data)
label = encoder.inverse_transform(preds)[0]
return jsonify({
"success": True,
"prediction": str(label),
"details": synthetic_row,
"url": target_url
})
except Exception as e:
return jsonify(success=False, message=f"URL Probe Failed: {str(e)}"), 500
# --- ROUTE: PREDICT ---
@offline_bp.route("/predict", methods=["POST"])
def offline_predict():
if "file" not in request.files:
return jsonify(success=False, message="No file uploaded"), 400
file = request.files["file"]
model_type = request.form.get("model", "bcc")
if not allowed(file.filename):
return jsonify(success=False, message="Unsupported file type"), 400
filename = secure_filename(file.filename)
saved_path = os.path.join(UPLOAD_DIR, filename)
file.save(saved_path)
# Cleanup logic to keep the server clean
@after_this_request
def cleanup(response):
try:
if os.path.exists(saved_path):
os.remove(saved_path)
except Exception as e:
print(f"Cleanup Error: {e}")
return response
# 1. Load Data
try:
# If PCAP, you'd call your converter here. For now, assuming CSV load.
df = pd.read_csv(saved_path)
if df.empty:
return jsonify(success=False, message="CSV has no data!"), 400
except Exception as e:
return jsonify(success=False, message=f"Error reading CSV: {str(e)}"), 400
# 2. Flexible Feature Mapping & Flag Extraction
# Renames common CSV headers to the specific technical names the model expects
# 2. Flexible Feature Mapping (Translate to EXACT fit-time names)
# 2. Flexible Feature Mapping
mapping = {
'Protocol': 'protocol', 'proto': 'protocol',
'Source Port': 'src_port',
'Destination Port': 'dst_port',
'Flow Duration': 'duration', 'flow_duration': 'duration',
'Total Fwd Packets': 'fwd_packets_count', 'total_fwd_pkts': 'fwd_packets_count',
'Total Bwd Packets': 'bwd_packets_count', 'total_bwd_pkts': 'bwd_packets_count',
'Total Length of Fwd Packets': 'total_payload_bytes', 'payload_len': 'total_payload_bytes',
'fwd_header_len': 'total_header_bytes', 'header_len': 'total_header_bytes',
'Flow Bytes/s': 'bytes_rate', 'rate': 'bytes_rate',
'Flow Pkts/s': 'packets_rate',
'syn': 'syn_flag_counts', 'ack': 'ack_flag_counts',
'rst': 'rst_flag_counts', 'fin': 'fin_flag_counts'
}
df = df.rename(columns=mapping)
# Calculate packets_count if missing
if 'packets_count' not in df.columns and 'fwd_packets_count' in df.columns:
df['packets_count'] = df['fwd_packets_count'] + df.get('bwd_packets_count', 0)
# --- FLAG EXTRACTION LOGIC ---
flag_map = {
'syn_flag_counts': 'syn',
'ack_flag_counts': 'ack',
'rst_flag_counts': 'rst',
'fin_flag_counts': 'fin'
}
for model_name, csv_name in flag_map.items():
if model_name not in df.columns:
if 'flags' in df.columns:
# Handle String flags safely
if df['flags'].dtype == object:
df[model_name] = df['flags'].str.lower().str.contains(csv_name).astype(int)
else:
# Fallback for numeric or missing flag data
df[model_name] = 0
else:
df[model_name] = 0
# 3. Model Loading & Feature Alignment
try:
model_data = load_model(model_type)
if not model_data or model_data.get('model') is None:
return jsonify(success=False, message="Model failed to load. Check Hub connection."), 500
model = model_data['model']
expected = BCC_FEATURES if model_type == "bcc" else CICIDS_FEATURES
# 🚀 SAFETY PADDING: Fill missing features with 0 to prevent "CRITICAL_ERROR"
for col in expected:
if col not in df.columns:
df[col] = 0
except Exception as e:
return jsonify(success=False, message=f"Model Initialization Error: {str(e)}"), 500
# 4. Prediction Logic
try:
# 1. Map protocols first!
proto_map = {'TCP': 6, 'UDP': 17, 'ICMP': 1, 'tcp': 6, 'udp': 17, 'icmp': 1}
df['protocol'] = df['protocol'].apply(lambda x: proto_map.get(x, x) if isinstance(x, str) else x)
# 2. Reorder columns
input_data = df[expected]
if model_type == "bcc":
scaler = model_data.get('scaler')
encoder = model_data.get('encoder')
# Ensure all columns are numeric before scaling
numeric_input = input_data.apply(pd.to_numeric, errors='coerce').fillna(0)
# 3. Scale features
scaled_data = scaler.transform(numeric_input.values) # Now it's all floats!
preds = model.predict(scaled_data)
labels = encoder.inverse_transform(preds)
# 5. Result Formatting for React Frontend
df["prediction"] = labels
class_counts = df["prediction"].value_counts().to_dict()
# Convert all labels to strings for JSON serializability
results = [{"index": i, "class": str(lbl)} for i, lbl in enumerate(labels)]
# Save results for the PDF report generator
df.to_csv(os.path.join(UPLOAD_DIR, "last_results.csv"), index=False)
return jsonify({
"success": True,
"classCounts": class_counts,
"results": results,
"total_processed": len(df)
})
except Exception as e:
import traceback
print(traceback.format_exc())
return jsonify(success=False, message=f"Prediction Engine Failure: {str(e)}"), 500
# --- ROUTE: PDF REPORT (MEMORY SAFE) ---
@offline_bp.route("/report", methods=["GET"])
def offline_report():
result_file = os.path.join(UPLOAD_DIR, "last_results.csv")
if not os.path.exists(result_file):
return jsonify(success=False, message="Run prediction first"), 400
df = pd.read_csv(result_file)
class_counts = df["prediction"].value_counts().to_dict()
# Generate PDF in memory
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", "B", 16)
pdf.cell(0, 10, "AI-NIDS Offline Threat Analysis Report", ln=True, align='C')
pdf.ln(10)
pdf.set_font("Arial", size=12)
pdf.cell(0, 10, f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=True)
pdf.ln(5)
pdf.set_font("Arial", "B", 12)
pdf.cell(0, 10, "Classification Summary:", ln=True)
pdf.set_font("Arial", size=12)
for cls, count in class_counts.items():
pdf.cell(0, 8, f"- {cls}: {count} occurrences", ln=True)
# Convert to bytes for response (no local file saving)
response = make_response(pdf.output(dest='S').encode('latin-1'))
response.headers.set('Content-Disposition', 'attachment', filename='offline_report.pdf')
response.headers.set('Content-Type', 'application/pdf')
return response