File size: 2,440 Bytes
9b77caa
 
95c089b
f6e4e35
ed4ae5b
588821a
ed4ae5b
7a9668f
ed4ae5b
f6e4e35
 
ed4ae5b
 
edd1c24
 
 
 
 
 
ed4ae5b
edd1c24
 
bf47bf0
b3c5d6f
 
 
 
bf47bf0
504624b
b3c5d6f
bf47bf0
b3c5d6f
 
 
 
 
 
 
72996f9
 
b3c5d6f
 
 
 
ed4ae5b
 
f6e4e35
ed4ae5b
f6e4e35
 
 
ed4ae5b
 
 
bf47bf0
b3c5d6f
ed4ae5b
b3c5d6f
 
ed4ae5b
edd1c24
f6e4e35
ed4ae5b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import streamlit as st
import pandas as pd
import joblib
import pickle
import numpy as np

# Load model and preprocessing artifacts
model = joblib.load("ensemble_voting_model.pkl")
with open("features_to_drop.pkl", "rb") as f:
    features_to_drop = pickle.load(f)

# Column names from the raw 49-column dataset (before feature engineering)
raw_columns = [
    'srcip', 'sport', 'dstip', 'dsport', 'proto', 'state', 'dur', 'sbytes', 'dbytes',
    'sttl', 'dttl', 'sloss', 'dloss', 'service', 'Sload', 'Dload', 'Spkts', 'Dpkts',
    'swin', 'dwin', 'stcpb', 'dtcpb', 'smeansz', 'dmeansz', 'trans_depth', 'res_bdy_len',
    'Sjit', 'Djit', 'Stime', 'Ltime', 'Sintpkt', 'Dintpkt', 'tcprtt', 'synack', 'ackdat',
    'is_sm_ips_ports', 'ct_state_ttl', 'ct_flw_http_mthd', 'is_ftp_login', 'ct_ftp_cmd',
    'ct_srv_src', 'ct_srv_dst', 'ct_dst_ltm', 'ct_src_ ltm', 'ct_src_dport_ltm',
    'ct_dst_sport_ltm', 'ct_dst_src_ltm', 'attack_cat', 'Label'
]

# Function to preprocess a single input row
def preprocess_input(row_values):
    if len(row_values) != 49:
        raise ValueError(f"❌ Expected 49 values, but got {len(row_values)}.")
    
    # Create DataFrame from input
    input_df = pd.DataFrame([row_values], columns=raw_columns)

    # Convert all columns to numeric
    input_df = input_df.apply(pd.to_numeric, errors='coerce')

    # Feature engineering
    input_df['duration'] = input_df['Ltime'] - input_df['Stime']
    input_df['byte_ratio'] = input_df['sbytes'] / (input_df['dbytes'] + 1)
    input_df['pkt_ratio'] = input_df['Spkts'] / (input_df['Dpkts'] + 1)

    # βœ… Fix: convert features_to_drop to list before adding with another list
    input_df = input_df.drop(columns=list(features_to_drop) + ['attack_cat', 'Label'], errors='ignore')

    return input_df

# Streamlit UI
st.title("πŸ” Intrusion Detection In Networks")
st.markdown("Paste a **single row** of raw features from the dataset (49 values, tab-separated):")

user_input = st.text_area("Input Row", height=150)

if st.button("Predict"):
    try:
        # Parse the input
        values = user_input.strip().split("\t")

        # Preprocess the input row
        processed_df = preprocess_input(values)

        # Predict using the preprocessed data
        prediction = model.predict(processed_df)[0]
        st.success(f"βœ… Predicted Attack Category: **{prediction}**")

    except Exception as e:
        st.error(f"❌ Error processing input: {e}")