Spaces:
Sleeping
Sleeping
| import os | |
| import pandas as pd | |
| import joblib | |
| from flask import Blueprint, request, jsonify, send_file, make_response, after_this_request | |
| from werkzeug.utils import secure_filename | |
| from datetime import datetime | |
| from fpdf import FPDF | |
| from io import BytesIO | |
| import time | |
| import requests | |
| # --- IMPORT UTILS --- | |
| from utils.pcap_to_csv import convert_pcap_to_csv | |
| from utils.model_selector import load_model | |
| offline_bp = Blueprint("offline_bp", __name__) | |
| # --- CONFIGURATION --- | |
| UPLOAD_DIR = "uploads" | |
| SAMPLE_DIR = "sample" | |
| os.makedirs(UPLOAD_DIR, exist_ok=True) | |
| os.makedirs(SAMPLE_DIR, exist_ok=True) | |
| ALLOWED_EXT = {"csv", "pcap"} | |
| # --- FEATURE DEFINITIONS (As per your Model Logs) --- | |
| # --- UPDATED FEATURE DEFINITIONS --- | |
| BCC_FEATURES = [ | |
| "protocol", | |
| "src_port", | |
| "dst_port", | |
| "duration", | |
| "packets_count", | |
| "fwd_packets_count", | |
| "bwd_packets_count", | |
| "total_payload_bytes", | |
| "total_header_bytes", | |
| "bytes_rate", | |
| "packets_rate", | |
| "syn_flag_counts", | |
| "ack_flag_counts", | |
| "rst_flag_counts", | |
| "fin_flag_counts", | |
| ] | |
| CICIDS_FEATURES = [ | |
| "Protocol", "Dst Port", "Flow Duration", "Tot Fwd Pkts", "Tot Bwd Pkts", | |
| "TotLen Fwd Pkts", "TotLen Bwd Pkts", "Fwd Pkt Len Mean", "Bwd Pkt Len Mean", | |
| "Flow IAT Mean", "Fwd PSH Flags", "Fwd URG Flags", "Fwd IAT Mean" | |
| ] | |
| def allowed(filename): | |
| return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXT | |
| # --- ROUTE: DOWNLOAD SAMPLE --- | |
| def download_sample(model_type): | |
| file_path = os.path.join(SAMPLE_DIR, f"{model_type}_sample.csv") | |
| if not os.path.exists(file_path): | |
| return jsonify(success=False, message="Sample file missing"), 404 | |
| return send_file(file_path, as_attachment=True) | |
| # --- ROUTE: URL LIVE PROBE --- | |
| def analyze_url(): | |
| target_url = request.json.get("url") | |
| if not target_url: | |
| return jsonify(success=False, message="No URL provided"), 400 | |
| # Ensure URL is properly formatted | |
| if not target_url.startswith("http"): | |
| target_url = "https://" + target_url | |
| # 1. Start "Synthetic Capture" (Timing the request) | |
| start_ts = time.time() | |
| try: | |
| # Use a real user-agent to avoid being blocked by the site | |
| headers_ua = {'User-Agent': 'Mozilla/5.0 (NIDS-Intelligence-Probe/1.0)'} | |
| response = requests.get(target_url, timeout=10, headers=headers_ua, stream=True) | |
| end_ts = time.time() | |
| # 2. Extract Metadata for Synthetic Features | |
| duration = end_ts - start_ts | |
| # We read the content length or measure the response body | |
| payload_bytes = len(response.content) | |
| header_bytes = len(str(response.headers)) | |
| # 3. Map to your Model's Features (BCC Format) | |
| # We simulate packet counts based on typical TCP handshakes (approx 8-10 packets per small request) | |
| synthetic_row = { | |
| "protocol": 6, # TCP/HTTPS | |
| "src_port": 443, | |
| "dst_port": 443, | |
| "duration": duration, | |
| "packets_count": 10, | |
| "fwd_packets_count": 5, | |
| "bwd_packets_count": 5, | |
| "total_payload_bytes": payload_bytes, | |
| "total_header_bytes": header_bytes, | |
| "bytes_rate": payload_bytes / duration if duration > 0 else 0, | |
| "packets_rate": 10 / duration if duration > 0 else 0, | |
| "syn_flag_counts": 1, | |
| "ack_flag_counts": 1, | |
| "rst_flag_counts": 0, | |
| "fin_flag_counts": 1 | |
| } | |
| # 4. Convert to DataFrame for Prediction | |
| df_url = pd.DataFrame([synthetic_row]) | |
| # --- REUSE YOUR PREDICTION LOGIC --- | |
| # Note: You can call a helper function here or reuse the logic from offline_predict | |
| # Make sure to apply the same scaler and model you loaded in the other route | |
| model_data = load_model("bcc") | |
| scaler = model_data.get('scaler') | |
| encoder = model_data.get('encoder') | |
| model = model_data['model'] | |
| # Scale and Predict | |
| numeric_input = df_url[BCC_FEATURES].apply(pd.to_numeric).fillna(0) | |
| scaled_data = scaler.transform(numeric_input.values) | |
| preds = model.predict(scaled_data) | |
| label = encoder.inverse_transform(preds)[0] | |
| return jsonify({ | |
| "success": True, | |
| "prediction": str(label), | |
| "details": synthetic_row, | |
| "url": target_url | |
| }) | |
| except Exception as e: | |
| return jsonify(success=False, message=f"URL Probe Failed: {str(e)}"), 500 | |
| # --- ROUTE: PREDICT --- | |
| def offline_predict(): | |
| if "file" not in request.files: | |
| return jsonify(success=False, message="No file uploaded"), 400 | |
| file = request.files["file"] | |
| model_type = request.form.get("model", "bcc") | |
| if not allowed(file.filename): | |
| return jsonify(success=False, message="Unsupported file type"), 400 | |
| filename = secure_filename(file.filename) | |
| saved_path = os.path.join(UPLOAD_DIR, filename) | |
| file.save(saved_path) | |
| # Cleanup logic to keep the server clean | |
| def cleanup(response): | |
| try: | |
| if os.path.exists(saved_path): | |
| os.remove(saved_path) | |
| except Exception as e: | |
| print(f"Cleanup Error: {e}") | |
| return response | |
| # 1. Load Data | |
| try: | |
| # If PCAP, you'd call your converter here. For now, assuming CSV load. | |
| df = pd.read_csv(saved_path) | |
| if df.empty: | |
| return jsonify(success=False, message="CSV has no data!"), 400 | |
| except Exception as e: | |
| return jsonify(success=False, message=f"Error reading CSV: {str(e)}"), 400 | |
| # 2. Flexible Feature Mapping & Flag Extraction | |
| # Renames common CSV headers to the specific technical names the model expects | |
| # 2. Flexible Feature Mapping (Translate to EXACT fit-time names) | |
| # 2. Flexible Feature Mapping | |
| mapping = { | |
| 'Protocol': 'protocol', 'proto': 'protocol', | |
| 'Source Port': 'src_port', | |
| 'Destination Port': 'dst_port', | |
| 'Flow Duration': 'duration', 'flow_duration': 'duration', | |
| 'Total Fwd Packets': 'fwd_packets_count', 'total_fwd_pkts': 'fwd_packets_count', | |
| 'Total Bwd Packets': 'bwd_packets_count', 'total_bwd_pkts': 'bwd_packets_count', | |
| 'Total Length of Fwd Packets': 'total_payload_bytes', 'payload_len': 'total_payload_bytes', | |
| 'fwd_header_len': 'total_header_bytes', 'header_len': 'total_header_bytes', | |
| 'Flow Bytes/s': 'bytes_rate', 'rate': 'bytes_rate', | |
| 'Flow Pkts/s': 'packets_rate', | |
| 'syn': 'syn_flag_counts', 'ack': 'ack_flag_counts', | |
| 'rst': 'rst_flag_counts', 'fin': 'fin_flag_counts' | |
| } | |
| df = df.rename(columns=mapping) | |
| # Calculate packets_count if missing | |
| if 'packets_count' not in df.columns and 'fwd_packets_count' in df.columns: | |
| df['packets_count'] = df['fwd_packets_count'] + df.get('bwd_packets_count', 0) | |
| # --- FLAG EXTRACTION LOGIC --- | |
| flag_map = { | |
| 'syn_flag_counts': 'syn', | |
| 'ack_flag_counts': 'ack', | |
| 'rst_flag_counts': 'rst', | |
| 'fin_flag_counts': 'fin' | |
| } | |
| for model_name, csv_name in flag_map.items(): | |
| if model_name not in df.columns: | |
| if 'flags' in df.columns: | |
| # Handle String flags safely | |
| if df['flags'].dtype == object: | |
| df[model_name] = df['flags'].str.lower().str.contains(csv_name).astype(int) | |
| else: | |
| # Fallback for numeric or missing flag data | |
| df[model_name] = 0 | |
| else: | |
| df[model_name] = 0 | |
| # 3. Model Loading & Feature Alignment | |
| try: | |
| model_data = load_model(model_type) | |
| if not model_data or model_data.get('model') is None: | |
| return jsonify(success=False, message="Model failed to load. Check Hub connection."), 500 | |
| model = model_data['model'] | |
| expected = BCC_FEATURES if model_type == "bcc" else CICIDS_FEATURES | |
| # 🚀 SAFETY PADDING: Fill missing features with 0 to prevent "CRITICAL_ERROR" | |
| for col in expected: | |
| if col not in df.columns: | |
| df[col] = 0 | |
| except Exception as e: | |
| return jsonify(success=False, message=f"Model Initialization Error: {str(e)}"), 500 | |
| # 4. Prediction Logic | |
| try: | |
| # 1. Map protocols first! | |
| proto_map = {'TCP': 6, 'UDP': 17, 'ICMP': 1, 'tcp': 6, 'udp': 17, 'icmp': 1} | |
| df['protocol'] = df['protocol'].apply(lambda x: proto_map.get(x, x) if isinstance(x, str) else x) | |
| # 2. Reorder columns | |
| input_data = df[expected] | |
| if model_type == "bcc": | |
| scaler = model_data.get('scaler') | |
| encoder = model_data.get('encoder') | |
| # Ensure all columns are numeric before scaling | |
| numeric_input = input_data.apply(pd.to_numeric, errors='coerce').fillna(0) | |
| # 3. Scale features | |
| scaled_data = scaler.transform(numeric_input.values) # Now it's all floats! | |
| preds = model.predict(scaled_data) | |
| labels = encoder.inverse_transform(preds) | |
| # 5. Result Formatting for React Frontend | |
| df["prediction"] = labels | |
| class_counts = df["prediction"].value_counts().to_dict() | |
| # Convert all labels to strings for JSON serializability | |
| results = [{"index": i, "class": str(lbl)} for i, lbl in enumerate(labels)] | |
| # Save results for the PDF report generator | |
| df.to_csv(os.path.join(UPLOAD_DIR, "last_results.csv"), index=False) | |
| return jsonify({ | |
| "success": True, | |
| "classCounts": class_counts, | |
| "results": results, | |
| "total_processed": len(df) | |
| }) | |
| except Exception as e: | |
| import traceback | |
| print(traceback.format_exc()) | |
| return jsonify(success=False, message=f"Prediction Engine Failure: {str(e)}"), 500 | |
| # --- ROUTE: PDF REPORT (MEMORY SAFE) --- | |
| def offline_report(): | |
| result_file = os.path.join(UPLOAD_DIR, "last_results.csv") | |
| if not os.path.exists(result_file): | |
| return jsonify(success=False, message="Run prediction first"), 400 | |
| df = pd.read_csv(result_file) | |
| class_counts = df["prediction"].value_counts().to_dict() | |
| # Generate PDF in memory | |
| pdf = FPDF() | |
| pdf.add_page() | |
| pdf.set_font("Arial", "B", 16) | |
| pdf.cell(0, 10, "AI-NIDS Offline Threat Analysis Report", ln=True, align='C') | |
| pdf.ln(10) | |
| pdf.set_font("Arial", size=12) | |
| pdf.cell(0, 10, f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=True) | |
| pdf.ln(5) | |
| pdf.set_font("Arial", "B", 12) | |
| pdf.cell(0, 10, "Classification Summary:", ln=True) | |
| pdf.set_font("Arial", size=12) | |
| for cls, count in class_counts.items(): | |
| pdf.cell(0, 8, f"- {cls}: {count} occurrences", ln=True) | |
| # Convert to bytes for response (no local file saving) | |
| response = make_response(pdf.output(dest='S').encode('latin-1')) | |
| response.headers.set('Content-Disposition', 'attachment', filename='offline_report.pdf') | |
| response.headers.set('Content-Type', 'application/pdf') | |
| return response | |