MrUtakata commited on
Commit
dd5bf99
·
verified ·
1 Parent(s): 2434a2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -86
app.py CHANGED
@@ -1,117 +1,95 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import gdown
4
  import pickle
5
  import joblib
6
- import xgboost as xgb
7
-
8
- ##############################################
9
- # Helper Functions
10
- ##############################################
11
 
 
12
  @st.cache_resource
13
  def load_model_artifacts():
14
- """
15
- Loads and returns the artifacts needed for testing the IDS:
16
- - features_to_drop: The set of features dropped during training.
17
- - category_encodings: The mapping for encoding categorical variables.
18
- - xgb_model: The pre-trained XGBoost classifier.
19
- """
20
  with open('features_to_drop.pkl', 'rb') as f:
21
  features_to_drop = pickle.load(f)
22
  with open('category_encodings.pkl', 'rb') as f:
23
  category_encodings = pickle.load(f)
24
- xgb_model = joblib.load('xgb_model.pkl')
25
- return features_to_drop, category_encodings, xgb_model
26
 
 
27
  def preprocess_input(df, features_to_drop, category_encodings):
28
- """
29
- Preprocesses the incoming test DataFrame:
30
- - Converts required columns to numeric.
31
- - Computes engineered features: duration, byte_ratio, and pkt_ratio.
32
- - Drops columns that were removed during model training.
33
- - Encodes categorical variables using pre-saved mappings.
34
- """
35
  df = df.copy()
36
- expected_cols = ["Stime", "Ltime", "sbytes", "dbytes", "Spkts", "Dpkts"]
37
- for col in expected_cols:
38
- if col in df.columns:
39
- df[col] = pd.to_numeric(df[col], errors='coerce')
40
- else:
41
- st.error(f"Missing required column: {col}")
42
- return None
43
 
44
  # Create engineered features
45
- df['duration'] = df['Ltime'] - df['Stime']
46
- df['byte_ratio'] = df['sbytes'] / (df['dbytes'] + 1)
47
- df['pkt_ratio'] = df['Spkts'] / (df['Dpkts'] + 1)
48
-
49
- # Drop features (if present in the input) that were filtered during training
50
  drop_cols = list(features_to_drop.intersection(set(df.columns)))
51
- if drop_cols:
52
- df = df.drop(columns=drop_cols)
53
-
54
- # Encode categorical variables
55
- for col, categories in category_encodings.items():
56
  if col in df.columns:
57
  df[col] = df[col].astype(str)
58
- df[col] = pd.Categorical(df[col], categories=categories)
59
  df[col] = df[col].cat.codes
 
60
  df = df.fillna(0)
61
  return df
62
 
63
- ##############################################
64
- # Streamlit Interface - TextArea Input
65
- ##############################################
66
 
67
- st.set_page_config(page_title="Intrusion Detection System - Test", layout="wide")
68
- st.title("Intrusion Detection System (IDS) - Single Row Input")
69
-
70
- st.markdown("""
71
- Paste a **single row of comma-separated values** below. Include only the relevant features required for prediction.
72
-
73
- **Expected columns (in order):**
74
- `Stime, Ltime, sbytes, dbytes, Spkts, Dpkts`
75
-
76
- You may include additional columns (like `label`) if desired, but they will be ignored.
77
- """)
78
 
79
  with st.form("manual_input_form"):
80
- text_input = st.text_area(
81
- "Paste a single row of data (comma-separated values):",
82
- placeholder="e.g. 1425579984.0,1425579990.0,275,423,10,8"
83
- )
 
 
 
 
 
 
 
 
 
 
84
  submitted = st.form_submit_button("Run IDS Prediction")
85
 
 
86
  if submitted:
87
- try:
88
- # Split and parse the user input into a list of values
89
- input_values = [x.strip() for x in text_input.split(',') if x.strip() != '']
90
-
91
- # Define the column names expected by the model
92
- expected_columns = ["Stime", "Ltime", "sbytes", "dbytes", "Spkts", "Dpkts"]
93
-
94
- if len(input_values) < len(expected_columns):
95
- st.error(f"Not enough values provided. Expected at least {len(expected_columns)} values.")
96
- else:
97
- # Take only the first N values needed
98
- input_data = dict(zip(expected_columns, input_values[:len(expected_columns)]))
99
- user_input = pd.DataFrame([input_data])
100
-
101
- # Load model artifacts
102
- features_to_drop, category_encodings, model = load_model_artifacts()
103
 
104
- # Preprocess and predict
105
- processed_input = preprocess_input(user_input, features_to_drop, category_encodings)
106
 
107
- if processed_input is not None:
108
- prediction = model.predict(processed_input)[0]
109
- st.success(f"Prediction: {prediction}")
110
- st.markdown("""
111
- - **13** → Normal Traffic
112
- - **Other values** → Intrusion Category (refer to model documentation for exact mappings)
113
- """)
114
- else:
115
- st.error("Preprocessing failed. Please check your input values.")
116
- except Exception as e:
117
- st.error(f"An error occurred while processing your input: {e}")
 
1
  import streamlit as st
2
  import pandas as pd
 
3
  import pickle
4
  import joblib
 
 
 
 
 
5
 
6
+ # ------------------- Load Artifacts -------------------
7
  @st.cache_resource
8
  def load_model_artifacts():
 
 
 
 
 
 
9
  with open('features_to_drop.pkl', 'rb') as f:
10
  features_to_drop = pickle.load(f)
11
  with open('category_encodings.pkl', 'rb') as f:
12
  category_encodings = pickle.load(f)
13
+ model = joblib.load('xgb_model.pkl')
14
+ return features_to_drop, category_encodings, model
15
 
16
+ # ------------------- Preprocess Function -------------------
17
  def preprocess_input(df, features_to_drop, category_encodings):
 
 
 
 
 
 
 
18
  df = df.copy()
19
+
20
+ # Ensure numeric conversion
21
+ for col in ["Stime", "Ltime", "sbytes", "dbytes", "Spkts", "Dpkts"]:
22
+ df[col] = pd.to_numeric(df[col], errors='coerce')
 
 
 
23
 
24
  # Create engineered features
25
+ df["duration"] = df["Ltime"] - df["Stime"]
26
+ df["byte_ratio"] = df["sbytes"] / (df["dbytes"] + 1)
27
+ df["pkt_ratio"] = df["Spkts"] / (df["Dpkts"] + 1)
28
+
29
+ # Drop features removed during training
30
  drop_cols = list(features_to_drop.intersection(set(df.columns)))
31
+ df.drop(columns=drop_cols, inplace=True, errors='ignore')
32
+
33
+ # Encode categoricals
34
+ for col, cats in category_encodings.items():
 
35
  if col in df.columns:
36
  df[col] = df[col].astype(str)
37
+ df[col] = pd.Categorical(df[col], categories=cats)
38
  df[col] = df[col].cat.codes
39
+
40
  df = df.fillna(0)
41
  return df
42
 
43
+ # ------------------- Streamlit UI -------------------
44
+ st.set_page_config("Intrusion Detection - Manual Test", layout="wide")
45
+ st.title("Intrusion Detection System (IDS) - Manual Row Test")
46
 
47
+ st.markdown("Enter a single row of network traffic data below for intrusion detection:")
 
 
 
 
 
 
 
 
 
 
48
 
49
  with st.form("manual_input_form"):
50
+ col1, col2 = st.columns(2)
51
+ with col1:
52
+ Stime = st.number_input("Stime (Start Time)", min_value=0.0, step=1.0)
53
+ sbytes = st.number_input("sbytes (Source Bytes)", min_value=0, step=1)
54
+ Spkts = st.number_input("Spkts (Source Packets)", min_value=0, step=1)
55
+ proto = st.selectbox("Protocol (proto)", ["tcp", "udp", "icmp", "-"])
56
+ service = st.text_input("Service (e.g., http, dns, -)", value="-")
57
+ with col2:
58
+ Ltime = st.number_input("Ltime (End Time)", min_value=0.0, step=1.0)
59
+ dbytes = st.number_input("dbytes (Destination Bytes)", min_value=0, step=1)
60
+ Dpkts = st.number_input("Dpkts (Destination Packets)", min_value=0, step=1)
61
+ state = st.selectbox("State", ["CON", "INT", "FIN", "RST", "REQ", "-"])
62
+ attack_cat = st.text_input("Optional: Known Attack Category", value="")
63
+
64
  submitted = st.form_submit_button("Run IDS Prediction")
65
 
66
+ # ------------------- Prediction Logic -------------------
67
  if submitted:
68
+ user_input = pd.DataFrame([{
69
+ "Stime": Stime,
70
+ "Ltime": Ltime,
71
+ "sbytes": sbytes,
72
+ "dbytes": dbytes,
73
+ "Spkts": Spkts,
74
+ "Dpkts": Dpkts,
75
+ "proto": proto,
76
+ "service": service,
77
+ "state": state,
78
+ "attack_cat": attack_cat # optional
79
+ }])
80
+
81
+ # Load model artifacts
82
+ features_to_drop, category_encodings, model = load_model_artifacts()
 
83
 
84
+ # Preprocess input
85
+ processed_input = preprocess_input(user_input, features_to_drop, category_encodings)
86
 
87
+ if processed_input is not None:
88
+ prediction = model.predict(processed_input)[0]
89
+ st.success(f"Prediction: {prediction}")
90
+ st.markdown("""
91
+ - **13** → Normal Traffic
92
+ - **Other values** → Intrusion Category (refer to model documentation)
93
+ """)
94
+ else:
95
+ st.error("Preprocessing failed. Please check your input fields.")