File size: 5,119 Bytes
e851d90 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import gradio as gr
import numpy as np
import pandas as pd
import tensorflow as tf
import joblib
import json
# --- 1. CONFIGURATION AND FILE LOADING ---
# Load the saved model, scaler, feature names, and categorical map
try:
# Load Model
model = tf.keras.models.load_model('improved_intrusion_detection_model.h5')
# Load Preprocessing Objects
scaler = joblib.load('kdd_scaler_StandardScaler.joblib')
with open('kdd_41_original_feature_names.json', 'r') as f:
FEATURE_NAMES = json.load(f)
with open('kdd_categorical_unique_values.json', 'r') as f:
CATEGORICAL_MAPPING = json.load(f)
except FileNotFoundError as e:
print(f"Error loading required file: {e}. Ensure all files are in the same directory.")
raise
# Define categorical and numerical feature names/indices based on the mapping
CATEGORICAL_COLS = list(CATEGORICAL_MAPPING.keys())
NUMERICAL_COLS = [col for col in FEATURE_NAMES if col not in CATEGORICAL_COLS]
# Determine the final column order after preprocessing
# This order must match the training data: Numerical + One-Hot Encoded
FINAL_COLUMNS = NUMERICAL_COLS
for col in CATEGORICAL_COLS:
for value in CATEGORICAL_MAPPING[col]:
FINAL_COLUMNS.append(f'{col}_{value}')
# --- 2. PREDICTION FUNCTION ---
def predict_attack(*raw_input_values):
"""
Processes the 41 raw user inputs, prepares them for the model, and returns a prediction.
"""
if len(raw_input_values) != len(FEATURE_NAMES):
return "Input Error: Expected 41 features, received {len(raw_input_values)}."
# 1. Create a raw DataFrame from the user input
raw_df = pd.DataFrame([raw_input_values], columns=FEATURE_NAMES)
# Ensure numerical columns are numeric type (Gradio gives strings)
for col in NUMERICAL_COLS:
# Graceful handling for non-numeric input
try:
raw_df[col] = pd.to_numeric(raw_df[col])
except ValueError:
return f"Input Error: Non-numeric value detected in column: {col}"
# 2. One-Hot Encoding for Categorical Columns
df_encoded = raw_df.copy()
for col, unique_values in CATEGORICAL_MAPPING.items():
# Create a temporary DataFrame for OHE, with columns for every known value
ohe_temp = pd.DataFrame(0, index=df_encoded.index, columns=[f'{col}_{val}' for val in unique_values])
# Set the correct column to 1 based on user's input value
user_value = df_encoded[col].iloc[0]
ohe_col_name = f'{col}_{user_value}'
if ohe_col_name in ohe_temp.columns:
ohe_temp[ohe_col_name] = 1
# Drop the original column and concatenate the new OHE columns
df_encoded = df_encoded.drop(columns=[col])
df_encoded = pd.concat([df_encoded, ohe_temp], axis=1)
# 3. Align and Reorder Features
# The final DataFrame must contain all 119 columns in the exact order as the FINAL_COLUMNS list
# Use reindex to add missing OHE columns (set to 0) and reorder
X_processed = df_encoded.reindex(columns=FINAL_COLUMNS, fill_value=0)
# Convert to NumPy array for scaling
X_array = X_processed.values.astype(np.float32)
# 4. Standard Scaling
X_scaled = scaler.transform(X_array)
# 5. Reshape for CNN (1, 119, 1)
X_cnn = X_scaled.reshape((1, X_scaled.shape[1], 1))
# 6. Predict
prediction = model.predict(X_cnn, verbose=0)
# Apply threshold and determine result
# Output is a single value probability (0 to 1)
probability = prediction[0][0]
if probability > 0.5:
result = f"π¨ ATTACK DETECTED! (Confidence: {probability:.2f})"
color = "red"
else:
result = f"β
Normal Traffic (Confidence: {1 - probability:.2f})"
color = "green"
# HTML formatting for colored output in Gradio
return f'<h1 style="color:{color}; font-size:24px;">{result}</h1>'
# --- 3. GRADIO INTERFACE SETUP ---
# Create 41 Gradio Input Components (Textboxes for simplicity)
input_components = []
for name in FEATURE_NAMES:
if name in NUMERICAL_COLS:
# Use Number component for better input validation/type enforcement
input_components.append(gr.Number(label=name, value=0))
elif name in CATEGORICAL_COLS:
# Use Dropdown for categorical features
input_components.append(gr.Dropdown(
label=name,
choices=CATEGORICAL_MAPPING[name],
value=CATEGORICAL_MAPPING[name][0]
))
else:
# Fallback for unexpected case
input_components.append(gr.Textbox(label=name, value="0"))
# Gradio Interface
iface = gr.Interface(
fn=predict_attack,
inputs=input_components,
outputs=gr.HTML(label="Prediction Result"), # Use HTML component to render colored text
title="Intrusion Detection System (KDD/NSL-KDD CNN)",
description="Enter the 41 feature values of a network connection to detect if it is an attack or normal traffic. Use the attack patterns provided (e.g., Neptune DoS) to test the model."
)
# Launch the app
if __name__ == "__main__":
iface.launch() |