File size: 5,119 Bytes
e851d90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import gradio as gr
import numpy as np
import pandas as pd
import tensorflow as tf
import joblib
import json

# --- 1. CONFIGURATION AND FILE LOADING ---

# Load the saved model, scaler, feature names, and categorical map
try:
    # Load Model
    model = tf.keras.models.load_model('improved_intrusion_detection_model.h5')

    # Load Preprocessing Objects
    scaler = joblib.load('kdd_scaler_StandardScaler.joblib')
    
    with open('kdd_41_original_feature_names.json', 'r') as f:
        FEATURE_NAMES = json.load(f)
    
    with open('kdd_categorical_unique_values.json', 'r') as f:
        CATEGORICAL_MAPPING = json.load(f)

except FileNotFoundError as e:
    print(f"Error loading required file: {e}. Ensure all files are in the same directory.")
    raise

# Define categorical and numerical feature names/indices based on the mapping
CATEGORICAL_COLS = list(CATEGORICAL_MAPPING.keys())
NUMERICAL_COLS = [col for col in FEATURE_NAMES if col not in CATEGORICAL_COLS]

# Determine the final column order after preprocessing
# This order must match the training data: Numerical + One-Hot Encoded
FINAL_COLUMNS = NUMERICAL_COLS
for col in CATEGORICAL_COLS:
    for value in CATEGORICAL_MAPPING[col]:
        FINAL_COLUMNS.append(f'{col}_{value}')

# --- 2. PREDICTION FUNCTION ---

def predict_attack(*raw_input_values):
    """
    Processes the 41 raw user inputs, prepares them for the model, and returns a prediction.
    """
    if len(raw_input_values) != len(FEATURE_NAMES):
        return "Input Error: Expected 41 features, received {len(raw_input_values)}."
    
    # 1. Create a raw DataFrame from the user input
    raw_df = pd.DataFrame([raw_input_values], columns=FEATURE_NAMES)
    
    # Ensure numerical columns are numeric type (Gradio gives strings)
    for col in NUMERICAL_COLS:
        # Graceful handling for non-numeric input
        try:
            raw_df[col] = pd.to_numeric(raw_df[col])
        except ValueError:
            return f"Input Error: Non-numeric value detected in column: {col}"
    
    # 2. One-Hot Encoding for Categorical Columns
    df_encoded = raw_df.copy()
    for col, unique_values in CATEGORICAL_MAPPING.items():
        # Create a temporary DataFrame for OHE, with columns for every known value
        ohe_temp = pd.DataFrame(0, index=df_encoded.index, columns=[f'{col}_{val}' for val in unique_values])
        
        # Set the correct column to 1 based on user's input value
        user_value = df_encoded[col].iloc[0]
        ohe_col_name = f'{col}_{user_value}'
        if ohe_col_name in ohe_temp.columns:
            ohe_temp[ohe_col_name] = 1
        
        # Drop the original column and concatenate the new OHE columns
        df_encoded = df_encoded.drop(columns=[col])
        df_encoded = pd.concat([df_encoded, ohe_temp], axis=1)

    # 3. Align and Reorder Features
    # The final DataFrame must contain all 119 columns in the exact order as the FINAL_COLUMNS list
    # Use reindex to add missing OHE columns (set to 0) and reorder
    X_processed = df_encoded.reindex(columns=FINAL_COLUMNS, fill_value=0)
    
    # Convert to NumPy array for scaling
    X_array = X_processed.values.astype(np.float32)

    # 4. Standard Scaling
    X_scaled = scaler.transform(X_array)
    
    # 5. Reshape for CNN (1, 119, 1)
    X_cnn = X_scaled.reshape((1, X_scaled.shape[1], 1))

    # 6. Predict
    prediction = model.predict(X_cnn, verbose=0)
    
    # Apply threshold and determine result
    # Output is a single value probability (0 to 1)
    probability = prediction[0][0]
    
    if probability > 0.5:
        result = f"🚨 ATTACK DETECTED! (Confidence: {probability:.2f})"
        color = "red"
    else:
        result = f"βœ… Normal Traffic (Confidence: {1 - probability:.2f})"
        color = "green"
        
    # HTML formatting for colored output in Gradio
    return f'<h1 style="color:{color}; font-size:24px;">{result}</h1>'

# --- 3. GRADIO INTERFACE SETUP ---

# Create 41 Gradio Input Components (Textboxes for simplicity)
input_components = []
for name in FEATURE_NAMES:
    if name in NUMERICAL_COLS:
        # Use Number component for better input validation/type enforcement
        input_components.append(gr.Number(label=name, value=0))
    elif name in CATEGORICAL_COLS:
        # Use Dropdown for categorical features
        input_components.append(gr.Dropdown(
            label=name,
            choices=CATEGORICAL_MAPPING[name],
            value=CATEGORICAL_MAPPING[name][0]
        ))
    else:
        # Fallback for unexpected case
        input_components.append(gr.Textbox(label=name, value="0"))

# Gradio Interface
iface = gr.Interface(
    fn=predict_attack,
    inputs=input_components,
    outputs=gr.HTML(label="Prediction Result"), # Use HTML component to render colored text
    title="Intrusion Detection System (KDD/NSL-KDD CNN)",
    description="Enter the 41 feature values of a network connection to detect if it is an attack or normal traffic. Use the attack patterns provided (e.g., Neptune DoS) to test the model."
)

# Launch the app
if __name__ == "__main__":
    iface.launch()