Spaces:

CodebaseAi
/

ai-nids-backend

Sleeping

App Files Files Community

ai-nids-backend / routes /offline_detection.py

CodebaseAi

Detection3

59edcea about 2 months ago

raw

history blame contribute delete

11.4 kB

	import os
	import pandas as pd
	import joblib
	from flask import Blueprint, request, jsonify, send_file, make_response, after_this_request
	from werkzeug.utils import secure_filename
	from datetime import datetime
	from fpdf import FPDF
	from io import BytesIO
	import time
	import requests

	# --- IMPORT UTILS ---
	from utils.pcap_to_csv import convert_pcap_to_csv
	from utils.model_selector import load_model

	offline_bp = Blueprint("offline_bp", __name__)


	# --- CONFIGURATION ---
	UPLOAD_DIR = "uploads"
	SAMPLE_DIR = "sample"
	os.makedirs(UPLOAD_DIR, exist_ok=True)
	os.makedirs(SAMPLE_DIR, exist_ok=True)

	ALLOWED_EXT = {"csv", "pcap"}

	# --- FEATURE DEFINITIONS (As per your Model Logs) ---
	# --- UPDATED FEATURE DEFINITIONS ---
	BCC_FEATURES = [
	"protocol",
	"src_port",
	"dst_port",
	"duration",
	"packets_count",
	"fwd_packets_count",
	"bwd_packets_count",
	"total_payload_bytes",
	"total_header_bytes",
	"bytes_rate",
	"packets_rate",
	"syn_flag_counts",
	"ack_flag_counts",
	"rst_flag_counts",
	"fin_flag_counts",
	]

	CICIDS_FEATURES = [
	"Protocol", "Dst Port", "Flow Duration", "Tot Fwd Pkts", "Tot Bwd Pkts",
	"TotLen Fwd Pkts", "TotLen Bwd Pkts", "Fwd Pkt Len Mean", "Bwd Pkt Len Mean",
	"Flow IAT Mean", "Fwd PSH Flags", "Fwd URG Flags", "Fwd IAT Mean"
	]

	def allowed(filename):
	return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXT

	# --- ROUTE: DOWNLOAD SAMPLE ---
	@offline_bp.route("/sample/<model_type>", methods=["GET"])
	def download_sample(model_type):
	file_path = os.path.join(SAMPLE_DIR, f"{model_type}_sample.csv")
	if not os.path.exists(file_path):
	return jsonify(success=False, message="Sample file missing"), 404
	return send_file(file_path, as_attachment=True)


	# --- ROUTE: URL LIVE PROBE ---
	@offline_bp.route("/analyze-url", methods=["POST"])
	def analyze_url():
	target_url = request.json.get("url")
	if not target_url:
	return jsonify(success=False, message="No URL provided"), 400

	# Ensure URL is properly formatted
	if not target_url.startswith("http"):
	target_url = "https://" + target_url

	# 1. Start "Synthetic Capture" (Timing the request)
	start_ts = time.time()

	try:
	# Use a real user-agent to avoid being blocked by the site
	headers_ua = {'User-Agent': 'Mozilla/5.0 (NIDS-Intelligence-Probe/1.0)'}
	response = requests.get(target_url, timeout=10, headers=headers_ua, stream=True)
	end_ts = time.time()

	# 2. Extract Metadata for Synthetic Features
	duration = end_ts - start_ts
	# We read the content length or measure the response body
	payload_bytes = len(response.content)
	header_bytes = len(str(response.headers))

	# 3. Map to your Model's Features (BCC Format)
	# We simulate packet counts based on typical TCP handshakes (approx 8-10 packets per small request)
	synthetic_row = {
	"protocol": 6, # TCP/HTTPS
	"src_port": 443,
	"dst_port": 443,
	"duration": duration,
	"packets_count": 10,
	"fwd_packets_count": 5,
	"bwd_packets_count": 5,
	"total_payload_bytes": payload_bytes,
	"total_header_bytes": header_bytes,
	"bytes_rate": payload_bytes / duration if duration > 0 else 0,
	"packets_rate": 10 / duration if duration > 0 else 0,
	"syn_flag_counts": 1,
	"ack_flag_counts": 1,
	"rst_flag_counts": 0,
	"fin_flag_counts": 1
	}

	# 4. Convert to DataFrame for Prediction
	df_url = pd.DataFrame([synthetic_row])

	# --- REUSE YOUR PREDICTION LOGIC ---
	# Note: You can call a helper function here or reuse the logic from offline_predict
	# Make sure to apply the same scaler and model you loaded in the other route
	model_data = load_model("bcc")
	scaler = model_data.get('scaler')
	encoder = model_data.get('encoder')
	model = model_data['model']

	# Scale and Predict
	numeric_input = df_url[BCC_FEATURES].apply(pd.to_numeric).fillna(0)
	scaled_data = scaler.transform(numeric_input.values)
	preds = model.predict(scaled_data)
	label = encoder.inverse_transform(preds)[0]

	return jsonify({
	"success": True,
	"prediction": str(label),
	"details": synthetic_row,
	"url": target_url
	})

	except Exception as e:
	return jsonify(success=False, message=f"URL Probe Failed: {str(e)}"), 500

	# --- ROUTE: PREDICT ---
	@offline_bp.route("/predict", methods=["POST"])
	def offline_predict():
	if "file" not in request.files:
	return jsonify(success=False, message="No file uploaded"), 400

	file = request.files["file"]
	model_type = request.form.get("model", "bcc")

	if not allowed(file.filename):
	return jsonify(success=False, message="Unsupported file type"), 400

	filename = secure_filename(file.filename)
	saved_path = os.path.join(UPLOAD_DIR, filename)
	file.save(saved_path)

	# Cleanup logic to keep the server clean
	@after_this_request
	def cleanup(response):
	try:
	if os.path.exists(saved_path):
	os.remove(saved_path)
	except Exception as e:
	print(f"Cleanup Error: {e}")
	return response

	# 1. Load Data
	try:
	# If PCAP, you'd call your converter here. For now, assuming CSV load.
	df = pd.read_csv(saved_path)
	if df.empty:
	return jsonify(success=False, message="CSV has no data!"), 400
	except Exception as e:
	return jsonify(success=False, message=f"Error reading CSV: {str(e)}"), 400

	# 2. Flexible Feature Mapping & Flag Extraction
	# Renames common CSV headers to the specific technical names the model expects
	# 2. Flexible Feature Mapping (Translate to EXACT fit-time names)
	# 2. Flexible Feature Mapping
	mapping = {
	'Protocol': 'protocol', 'proto': 'protocol',
	'Source Port': 'src_port',
	'Destination Port': 'dst_port',
	'Flow Duration': 'duration', 'flow_duration': 'duration',
	'Total Fwd Packets': 'fwd_packets_count', 'total_fwd_pkts': 'fwd_packets_count',
	'Total Bwd Packets': 'bwd_packets_count', 'total_bwd_pkts': 'bwd_packets_count',
	'Total Length of Fwd Packets': 'total_payload_bytes', 'payload_len': 'total_payload_bytes',
	'fwd_header_len': 'total_header_bytes', 'header_len': 'total_header_bytes',
	'Flow Bytes/s': 'bytes_rate', 'rate': 'bytes_rate',
	'Flow Pkts/s': 'packets_rate',
	'syn': 'syn_flag_counts', 'ack': 'ack_flag_counts',
	'rst': 'rst_flag_counts', 'fin': 'fin_flag_counts'
	}
	df = df.rename(columns=mapping)

	# Calculate packets_count if missing
	if 'packets_count' not in df.columns and 'fwd_packets_count' in df.columns:
	df['packets_count'] = df['fwd_packets_count'] + df.get('bwd_packets_count', 0)

	# --- FLAG EXTRACTION LOGIC ---
	flag_map = {
	'syn_flag_counts': 'syn',
	'ack_flag_counts': 'ack',
	'rst_flag_counts': 'rst',
	'fin_flag_counts': 'fin'
	}

	for model_name, csv_name in flag_map.items():
	if model_name not in df.columns:
	if 'flags' in df.columns:
	# Handle String flags safely
	if df['flags'].dtype == object:
	df[model_name] = df['flags'].str.lower().str.contains(csv_name).astype(int)
	else:
	# Fallback for numeric or missing flag data
	df[model_name] = 0
	else:
	df[model_name] = 0

	# 3. Model Loading & Feature Alignment
	try:
	model_data = load_model(model_type)
	if not model_data or model_data.get('model') is None:
	return jsonify(success=False, message="Model failed to load. Check Hub connection."), 500

	model = model_data['model']
	expected = BCC_FEATURES if model_type == "bcc" else CICIDS_FEATURES

	# 🚀 SAFETY PADDING: Fill missing features with 0 to prevent "CRITICAL_ERROR"
	for col in expected:
	if col not in df.columns:
	df[col] = 0

	except Exception as e:
	return jsonify(success=False, message=f"Model Initialization Error: {str(e)}"), 500


	# 4. Prediction Logic
	try:
	# 1. Map protocols first!
	proto_map = {'TCP': 6, 'UDP': 17, 'ICMP': 1, 'tcp': 6, 'udp': 17, 'icmp': 1}
	df['protocol'] = df['protocol'].apply(lambda x: proto_map.get(x, x) if isinstance(x, str) else x)

	# 2. Reorder columns
	input_data = df[expected]

	if model_type == "bcc":
	scaler = model_data.get('scaler')
	encoder = model_data.get('encoder')

	# Ensure all columns are numeric before scaling
	numeric_input = input_data.apply(pd.to_numeric, errors='coerce').fillna(0)

	# 3. Scale features
	scaled_data = scaler.transform(numeric_input.values) # Now it's all floats!
	preds = model.predict(scaled_data)

	labels = encoder.inverse_transform(preds)

	# 5. Result Formatting for React Frontend
	df["prediction"] = labels
	class_counts = df["prediction"].value_counts().to_dict()

	# Convert all labels to strings for JSON serializability
	results = [{"index": i, "class": str(lbl)} for i, lbl in enumerate(labels)]

	# Save results for the PDF report generator
	df.to_csv(os.path.join(UPLOAD_DIR, "last_results.csv"), index=False)

	return jsonify({
	"success": True,
	"classCounts": class_counts,
	"results": results,
	"total_processed": len(df)
	})

	except Exception as e:
	import traceback
	print(traceback.format_exc())
	return jsonify(success=False, message=f"Prediction Engine Failure: {str(e)}"), 500

	# --- ROUTE: PDF REPORT (MEMORY SAFE) ---
	@offline_bp.route("/report", methods=["GET"])
	def offline_report():
	result_file = os.path.join(UPLOAD_DIR, "last_results.csv")
	if not os.path.exists(result_file):
	return jsonify(success=False, message="Run prediction first"), 400

	df = pd.read_csv(result_file)
	class_counts = df["prediction"].value_counts().to_dict()

	# Generate PDF in memory
	pdf = FPDF()
	pdf.add_page()
	pdf.set_font("Arial", "B", 16)
	pdf.cell(0, 10, "AI-NIDS Offline Threat Analysis Report", ln=True, align='C')
	pdf.ln(10)

	pdf.set_font("Arial", size=12)
	pdf.cell(0, 10, f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=True)
	pdf.ln(5)

	pdf.set_font("Arial", "B", 12)
	pdf.cell(0, 10, "Classification Summary:", ln=True)
	pdf.set_font("Arial", size=12)

	for cls, count in class_counts.items():
	pdf.cell(0, 8, f"- {cls}: {count} occurrences", ln=True)

	# Convert to bytes for response (no local file saving)
	response = make_response(pdf.output(dest='S').encode('latin-1'))
	response.headers.set('Content-Disposition', 'attachment', filename='offline_report.pdf')
	response.headers.set('Content-Type', 'application/pdf')
	return response