Spaces:

hari6677
/

it-dect

Runtime error

File size: 5,119 Bytes

e851d90

import gradio as gr
import numpy as np
import pandas as pd
import tensorflow as tf
import joblib
import json

# --- 1. CONFIGURATION AND FILE LOADING ---

# Load the saved model, scaler, feature names, and categorical map
try:
    # Load Model
    model = tf.keras.models.load_model('improved_intrusion_detection_model.h5')

    # Load Preprocessing Objects
    scaler = joblib.load('kdd_scaler_StandardScaler.joblib')
    
    with open('kdd_41_original_feature_names.json', 'r') as f:
        FEATURE_NAMES = json.load(f)
    
    with open('kdd_categorical_unique_values.json', 'r') as f:
        CATEGORICAL_MAPPING = json.load(f)

except FileNotFoundError as e:
    print(f"Error loading required file: {e}. Ensure all files are in the same directory.")
    raise

# Define categorical and numerical feature names/indices based on the mapping
CATEGORICAL_COLS = list(CATEGORICAL_MAPPING.keys())
NUMERICAL_COLS = [col for col in FEATURE_NAMES if col not in CATEGORICAL_COLS]

# Determine the final column order after preprocessing
# This order must match the training data: Numerical + One-Hot Encoded
FINAL_COLUMNS = NUMERICAL_COLS
for col in CATEGORICAL_COLS:
    for value in CATEGORICAL_MAPPING[col]:
        FINAL_COLUMNS.append(f'{col}_{value}')

# --- 2. PREDICTION FUNCTION ---

def predict_attack(*raw_input_values):
    """
    Processes the 41 raw user inputs, prepares them for the model, and returns a prediction.
    """
    if len(raw_input_values) != len(FEATURE_NAMES):
        return "Input Error: Expected 41 features, received {len(raw_input_values)}."
    
    # 1. Create a raw DataFrame from the user input
    raw_df = pd.DataFrame([raw_input_values], columns=FEATURE_NAMES)
    
    # Ensure numerical columns are numeric type (Gradio gives strings)
    for col in NUMERICAL_COLS:
        # Graceful handling for non-numeric input
        try:
            raw_df[col] = pd.to_numeric(raw_df[col])
        except ValueError:
            return f"Input Error: Non-numeric value detected in column: {col}"
    
    # 2. One-Hot Encoding for Categorical Columns
    df_encoded = raw_df.copy()
    for col, unique_values in CATEGORICAL_MAPPING.items():
        # Create a temporary DataFrame for OHE, with columns for every known value
        ohe_temp = pd.DataFrame(0, index=df_encoded.index, columns=[f'{col}_{val}' for val in unique_values])
        
        # Set the correct column to 1 based on user's input value
        user_value = df_encoded[col].iloc[0]
        ohe_col_name = f'{col}_{user_value}'
        if ohe_col_name in ohe_temp.columns:
            ohe_temp[ohe_col_name] = 1
        
        # Drop the original column and concatenate the new OHE columns
        df_encoded = df_encoded.drop(columns=[col])
        df_encoded = pd.concat([df_encoded, ohe_temp], axis=1)

    # 3. Align and Reorder Features
    # The final DataFrame must contain all 119 columns in the exact order as the FINAL_COLUMNS list
    # Use reindex to add missing OHE columns (set to 0) and reorder
    X_processed = df_encoded.reindex(columns=FINAL_COLUMNS, fill_value=0)
    
    # Convert to NumPy array for scaling
    X_array = X_processed.values.astype(np.float32)

    # 4. Standard Scaling
    X_scaled = scaler.transform(X_array)
    
    # 5. Reshape for CNN (1, 119, 1)
    X_cnn = X_scaled.reshape((1, X_scaled.shape[1], 1))

    # 6. Predict
    prediction = model.predict(X_cnn, verbose=0)
    
    # Apply threshold and determine result
    # Output is a single value probability (0 to 1)
    probability = prediction[0][0]
    
    if probability > 0.5:
        result = f"🚨 ATTACK DETECTED! (Confidence: {probability:.2f})"
        color = "red"
    else:
        result = f"✅ Normal Traffic (Confidence: {1 - probability:.2f})"
        color = "green"
        
    # HTML formatting for colored output in Gradio
    return f'<h1 style="color:{color}; font-size:24px;">{result}</h1>'

# --- 3. GRADIO INTERFACE SETUP ---

# Create 41 Gradio Input Components (Textboxes for simplicity)
input_components = []
for name in FEATURE_NAMES:
    if name in NUMERICAL_COLS:
        # Use Number component for better input validation/type enforcement
        input_components.append(gr.Number(label=name, value=0))
    elif name in CATEGORICAL_COLS:
        # Use Dropdown for categorical features
        input_components.append(gr.Dropdown(
            label=name,
            choices=CATEGORICAL_MAPPING[name],
            value=CATEGORICAL_MAPPING[name][0]
        ))
    else:
        # Fallback for unexpected case
        input_components.append(gr.Textbox(label=name, value="0"))

# Gradio Interface
iface = gr.Interface(
    fn=predict_attack,
    inputs=input_components,
    outputs=gr.HTML(label="Prediction Result"), # Use HTML component to render colored text
    title="Intrusion Detection System (KDD/NSL-KDD CNN)",
    description="Enter the 41 feature values of a network connection to detect if it is an attack or normal traffic. Use the attack patterns provided (e.g., Neptune DoS) to test the model."
)

# Launch the app
if __name__ == "__main__":
    iface.launch()