File size: 11,399 Bytes
0f8fe33
 
978b2fe
63b7948
0f8fe33
 
 
63b7948
59edcea
 
978b2fe
 
0f8fe33
978b2fe
0f8fe33
 
 
26a71ad
978b2fe
0f8fe33
 
 
 
 
 
 
935d233
10d6e33
0f8fe33
10d6e33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f8fe33
 
 
935d233
 
 
0f8fe33
 
 
 
 
978b2fe
 
 
 
0f8fe33
 
 
 
59edcea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
978b2fe
0f8fe33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
935d233
63b7948
 
 
 
 
 
 
 
 
935d233
978b2fe
935d233
978b2fe
 
 
 
 
 
935d233
 
70a72e6
10d6e33
935d233
70a72e6
 
 
 
 
 
10d6e33
 
 
 
70a72e6
 
935d233
 
 
10d6e33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
935d233
 
978b2fe
 
935d233
 
63b7948
978b2fe
935d233
978b2fe
935d233
 
 
 
 
978b2fe
935d233
c2e4dd0
0f8fe33
935d233
63b7948
c2e4dd0
 
 
 
 
935d233
c2e4dd0
63b7948
935d233
 
c2e4dd0
 
 
 
 
 
935d233
c2e4dd0
63b7948
0f8fe33
935d233
63b7948
 
935d233
 
 
10d6e33
 
 
0f8fe33
935d233
 
 
 
 
 
0f8fe33
63b7948
935d233
 
 
63b7948
 
0f8fe33
 
 
 
 
 
 
 
 
63b7948
0f8fe33
 
 
978b2fe
 
0f8fe33
 
978b2fe
0f8fe33
 
978b2fe
 
 
 
 
 
0f8fe33
63b7948
 
 
 
 
0f8fe33
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
import os
import pandas as pd
import joblib
from flask import Blueprint, request, jsonify, send_file, make_response, after_this_request
from werkzeug.utils import secure_filename
from datetime import datetime
from fpdf import FPDF
from io import BytesIO
import time
import requests

# --- IMPORT UTILS ---
from utils.pcap_to_csv import convert_pcap_to_csv
from utils.model_selector import load_model

offline_bp = Blueprint("offline_bp", __name__)


# --- CONFIGURATION ---
UPLOAD_DIR = "uploads"
SAMPLE_DIR = "sample"
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(SAMPLE_DIR, exist_ok=True)

ALLOWED_EXT = {"csv", "pcap"}

# --- FEATURE DEFINITIONS (As per your Model Logs) ---
# --- UPDATED FEATURE DEFINITIONS ---
BCC_FEATURES = [
    "protocol",
    "src_port",
    "dst_port",
    "duration",
    "packets_count",
    "fwd_packets_count",
    "bwd_packets_count",
    "total_payload_bytes",
    "total_header_bytes",
    "bytes_rate",
    "packets_rate",
    "syn_flag_counts",
    "ack_flag_counts",
    "rst_flag_counts",
    "fin_flag_counts",
]

CICIDS_FEATURES = [
    "Protocol", "Dst Port", "Flow Duration", "Tot Fwd Pkts", "Tot Bwd Pkts",
    "TotLen Fwd Pkts", "TotLen Bwd Pkts", "Fwd Pkt Len Mean", "Bwd Pkt Len Mean",
    "Flow IAT Mean", "Fwd PSH Flags", "Fwd URG Flags", "Fwd IAT Mean"
]

def allowed(filename):
    return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXT

# --- ROUTE: DOWNLOAD SAMPLE ---
@offline_bp.route("/sample/<model_type>", methods=["GET"])
def download_sample(model_type):
    file_path = os.path.join(SAMPLE_DIR, f"{model_type}_sample.csv")
    if not os.path.exists(file_path):
        return jsonify(success=False, message="Sample file missing"), 404
    return send_file(file_path, as_attachment=True)


# --- ROUTE: URL LIVE PROBE ---
@offline_bp.route("/analyze-url", methods=["POST"])
def analyze_url():
    target_url = request.json.get("url")
    if not target_url:
        return jsonify(success=False, message="No URL provided"), 400

    # Ensure URL is properly formatted
    if not target_url.startswith("http"):
        target_url = "https://" + target_url

    # 1. Start "Synthetic Capture" (Timing the request)
    start_ts = time.time()
    
    try:
        # Use a real user-agent to avoid being blocked by the site
        headers_ua = {'User-Agent': 'Mozilla/5.0 (NIDS-Intelligence-Probe/1.0)'}
        response = requests.get(target_url, timeout=10, headers=headers_ua, stream=True)
        end_ts = time.time()
        
        # 2. Extract Metadata for Synthetic Features
        duration = end_ts - start_ts
        # We read the content length or measure the response body
        payload_bytes = len(response.content) 
        header_bytes = len(str(response.headers))
        
        # 3. Map to your Model's Features (BCC Format)
        # We simulate packet counts based on typical TCP handshakes (approx 8-10 packets per small request)
        synthetic_row = {
            "protocol": 6, # TCP/HTTPS
            "src_port": 443,
            "dst_port": 443,
            "duration": duration,
            "packets_count": 10,
            "fwd_packets_count": 5,
            "bwd_packets_count": 5,
            "total_payload_bytes": payload_bytes,
            "total_header_bytes": header_bytes,
            "bytes_rate": payload_bytes / duration if duration > 0 else 0,
            "packets_rate": 10 / duration if duration > 0 else 0,
            "syn_flag_counts": 1, 
            "ack_flag_counts": 1,
            "rst_flag_counts": 0,
            "fin_flag_counts": 1
        }
        
        # 4. Convert to DataFrame for Prediction
        df_url = pd.DataFrame([synthetic_row])
        
        # --- REUSE YOUR PREDICTION LOGIC ---
        # Note: You can call a helper function here or reuse the logic from offline_predict
        # Make sure to apply the same scaler and model you loaded in the other route
        model_data = load_model("bcc") 
        scaler = model_data.get('scaler')
        encoder = model_data.get('encoder')
        model = model_data['model']

        # Scale and Predict
        numeric_input = df_url[BCC_FEATURES].apply(pd.to_numeric).fillna(0)
        scaled_data = scaler.transform(numeric_input.values)
        preds = model.predict(scaled_data)
        label = encoder.inverse_transform(preds)[0]

        return jsonify({
            "success": True,
            "prediction": str(label),
            "details": synthetic_row,
            "url": target_url
        })

    except Exception as e:
        return jsonify(success=False, message=f"URL Probe Failed: {str(e)}"), 500

# --- ROUTE: PREDICT ---
@offline_bp.route("/predict", methods=["POST"])
def offline_predict():
    if "file" not in request.files:
        return jsonify(success=False, message="No file uploaded"), 400

    file = request.files["file"]
    model_type = request.form.get("model", "bcc")

    if not allowed(file.filename):
        return jsonify(success=False, message="Unsupported file type"), 400

    filename = secure_filename(file.filename)
    saved_path = os.path.join(UPLOAD_DIR, filename)
    file.save(saved_path)

    # Cleanup logic to keep the server clean
    @after_this_request
    def cleanup(response):
        try:
            if os.path.exists(saved_path):
                os.remove(saved_path)
        except Exception as e:
            print(f"Cleanup Error: {e}")
        return response

    # 1. Load Data
    try:
        # If PCAP, you'd call your converter here. For now, assuming CSV load.
        df = pd.read_csv(saved_path)
        if df.empty:
            return jsonify(success=False, message="CSV has no data!"), 400
    except Exception as e:
        return jsonify(success=False, message=f"Error reading CSV: {str(e)}"), 400

    # 2. Flexible Feature Mapping & Flag Extraction
    # Renames common CSV headers to the specific technical names the model expects
    # 2. Flexible Feature Mapping (Translate to EXACT fit-time names)
    # 2. Flexible Feature Mapping
    mapping = {
        'Protocol': 'protocol', 'proto': 'protocol',
        'Source Port': 'src_port',
        'Destination Port': 'dst_port',
        'Flow Duration': 'duration', 'flow_duration': 'duration',
        'Total Fwd Packets': 'fwd_packets_count', 'total_fwd_pkts': 'fwd_packets_count',
        'Total Bwd Packets': 'bwd_packets_count', 'total_bwd_pkts': 'bwd_packets_count',
        'Total Length of Fwd Packets': 'total_payload_bytes', 'payload_len': 'total_payload_bytes',
        'fwd_header_len': 'total_header_bytes', 'header_len': 'total_header_bytes',
        'Flow Bytes/s': 'bytes_rate', 'rate': 'bytes_rate',
        'Flow Pkts/s': 'packets_rate',
        'syn': 'syn_flag_counts', 'ack': 'ack_flag_counts', 
        'rst': 'rst_flag_counts', 'fin': 'fin_flag_counts'
    }
    df = df.rename(columns=mapping)

    # Calculate packets_count if missing
    if 'packets_count' not in df.columns and 'fwd_packets_count' in df.columns:
        df['packets_count'] = df['fwd_packets_count'] + df.get('bwd_packets_count', 0)

    # --- FLAG EXTRACTION LOGIC ---
    flag_map = {
        'syn_flag_counts': 'syn', 
        'ack_flag_counts': 'ack', 
        'rst_flag_counts': 'rst', 
        'fin_flag_counts': 'fin'
    }

    for model_name, csv_name in flag_map.items():
        if model_name not in df.columns:
            if 'flags' in df.columns:
                # Handle String flags safely
                if df['flags'].dtype == object:
                    df[model_name] = df['flags'].str.lower().str.contains(csv_name).astype(int)
                else:
                    # Fallback for numeric or missing flag data
                    df[model_name] = 0
            else:
                df[model_name] = 0

    # 3. Model Loading & Feature Alignment
    try:
        model_data = load_model(model_type)
        if not model_data or model_data.get('model') is None:
            return jsonify(success=False, message="Model failed to load. Check Hub connection."), 500

        model = model_data['model']
        expected = BCC_FEATURES if model_type == "bcc" else CICIDS_FEATURES
        
        # 🚀 SAFETY PADDING: Fill missing features with 0 to prevent "CRITICAL_ERROR"
        for col in expected:
            if col not in df.columns:
                df[col] = 0 
                
    except Exception as e:
        return jsonify(success=False, message=f"Model Initialization Error: {str(e)}"), 500
    

    # 4. Prediction Logic
    try:
        # 1. Map protocols first!
        proto_map = {'TCP': 6, 'UDP': 17, 'ICMP': 1, 'tcp': 6, 'udp': 17, 'icmp': 1}
        df['protocol'] = df['protocol'].apply(lambda x: proto_map.get(x, x) if isinstance(x, str) else x)

        # 2. Reorder columns
        input_data = df[expected] 
    
        if model_type == "bcc":
            scaler = model_data.get('scaler')
            encoder = model_data.get('encoder')
        
            # Ensure all columns are numeric before scaling
            numeric_input = input_data.apply(pd.to_numeric, errors='coerce').fillna(0)
        
            # 3. Scale features
            scaled_data = scaler.transform(numeric_input.values) # Now it's all floats!
            preds = model.predict(scaled_data)
        
            labels = encoder.inverse_transform(preds)

        # 5. Result Formatting for React Frontend
        df["prediction"] = labels
        class_counts = df["prediction"].value_counts().to_dict()
        
        # Convert all labels to strings for JSON serializability
        results = [{"index": i, "class": str(lbl)} for i, lbl in enumerate(labels)]
        
        # Save results for the PDF report generator
        df.to_csv(os.path.join(UPLOAD_DIR, "last_results.csv"), index=False)

        return jsonify({
            "success": True, 
            "classCounts": class_counts, 
            "results": results,
            "total_processed": len(df)
        })

    except Exception as e:
        import traceback
        print(traceback.format_exc())
        return jsonify(success=False, message=f"Prediction Engine Failure: {str(e)}"), 500

# --- ROUTE: PDF REPORT (MEMORY SAFE) ---
@offline_bp.route("/report", methods=["GET"])
def offline_report():
    result_file = os.path.join(UPLOAD_DIR, "last_results.csv")
    if not os.path.exists(result_file):
        return jsonify(success=False, message="Run prediction first"), 400

    df = pd.read_csv(result_file)
    class_counts = df["prediction"].value_counts().to_dict()

    # Generate PDF in memory
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", "B", 16)
    pdf.cell(0, 10, "AI-NIDS Offline Threat Analysis Report", ln=True, align='C')
    pdf.ln(10)

    pdf.set_font("Arial", size=12)
    pdf.cell(0, 10, f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=True)
    pdf.ln(5)

    pdf.set_font("Arial", "B", 12)
    pdf.cell(0, 10, "Classification Summary:", ln=True)
    pdf.set_font("Arial", size=12)
    
    for cls, count in class_counts.items():
        pdf.cell(0, 8, f"- {cls}: {count} occurrences", ln=True)

    # Convert to bytes for response (no local file saving)
    response = make_response(pdf.output(dest='S').encode('latin-1'))
    response.headers.set('Content-Disposition', 'attachment', filename='offline_report.pdf')
    response.headers.set('Content-Type', 'application/pdf')
    return response