Update app.py
Browse files
app.py
CHANGED
|
@@ -1,117 +1,95 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
-
import gdown
|
| 4 |
import pickle
|
| 5 |
import joblib
|
| 6 |
-
import xgboost as xgb
|
| 7 |
-
|
| 8 |
-
##############################################
|
| 9 |
-
# Helper Functions
|
| 10 |
-
##############################################
|
| 11 |
|
|
|
|
| 12 |
@st.cache_resource
|
| 13 |
def load_model_artifacts():
|
| 14 |
-
"""
|
| 15 |
-
Loads and returns the artifacts needed for testing the IDS:
|
| 16 |
-
- features_to_drop: The set of features dropped during training.
|
| 17 |
-
- category_encodings: The mapping for encoding categorical variables.
|
| 18 |
-
- xgb_model: The pre-trained XGBoost classifier.
|
| 19 |
-
"""
|
| 20 |
with open('features_to_drop.pkl', 'rb') as f:
|
| 21 |
features_to_drop = pickle.load(f)
|
| 22 |
with open('category_encodings.pkl', 'rb') as f:
|
| 23 |
category_encodings = pickle.load(f)
|
| 24 |
-
|
| 25 |
-
return features_to_drop, category_encodings,
|
| 26 |
|
|
|
|
| 27 |
def preprocess_input(df, features_to_drop, category_encodings):
|
| 28 |
-
"""
|
| 29 |
-
Preprocesses the incoming test DataFrame:
|
| 30 |
-
- Converts required columns to numeric.
|
| 31 |
-
- Computes engineered features: duration, byte_ratio, and pkt_ratio.
|
| 32 |
-
- Drops columns that were removed during model training.
|
| 33 |
-
- Encodes categorical variables using pre-saved mappings.
|
| 34 |
-
"""
|
| 35 |
df = df.copy()
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
else:
|
| 41 |
-
st.error(f"Missing required column: {col}")
|
| 42 |
-
return None
|
| 43 |
|
| 44 |
# Create engineered features
|
| 45 |
-
df[
|
| 46 |
-
df[
|
| 47 |
-
df[
|
| 48 |
-
|
| 49 |
-
# Drop features
|
| 50 |
drop_cols = list(features_to_drop.intersection(set(df.columns)))
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
for col, categories in category_encodings.items():
|
| 56 |
if col in df.columns:
|
| 57 |
df[col] = df[col].astype(str)
|
| 58 |
-
df[col] = pd.Categorical(df[col], categories=
|
| 59 |
df[col] = df[col].cat.codes
|
|
|
|
| 60 |
df = df.fillna(0)
|
| 61 |
return df
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
|
| 67 |
-
st.
|
| 68 |
-
st.title("Intrusion Detection System (IDS) - Single Row Input")
|
| 69 |
-
|
| 70 |
-
st.markdown("""
|
| 71 |
-
Paste a **single row of comma-separated values** below. Include only the relevant features required for prediction.
|
| 72 |
-
|
| 73 |
-
**Expected columns (in order):**
|
| 74 |
-
`Stime, Ltime, sbytes, dbytes, Spkts, Dpkts`
|
| 75 |
-
|
| 76 |
-
You may include additional columns (like `label`) if desired, but they will be ignored.
|
| 77 |
-
""")
|
| 78 |
|
| 79 |
with st.form("manual_input_form"):
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
submitted = st.form_submit_button("Run IDS Prediction")
|
| 85 |
|
|
|
|
| 86 |
if submitted:
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
features_to_drop, category_encodings, model = load_model_artifacts()
|
| 103 |
|
| 104 |
-
|
| 105 |
-
|
| 106 |
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
except Exception as e:
|
| 117 |
-
st.error(f"An error occurred while processing your input: {e}")
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
|
|
|
| 3 |
import pickle
|
| 4 |
import joblib
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
+
# ------------------- Load Artifacts -------------------
|
| 7 |
@st.cache_resource
|
| 8 |
def load_model_artifacts():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
with open('features_to_drop.pkl', 'rb') as f:
|
| 10 |
features_to_drop = pickle.load(f)
|
| 11 |
with open('category_encodings.pkl', 'rb') as f:
|
| 12 |
category_encodings = pickle.load(f)
|
| 13 |
+
model = joblib.load('xgb_model.pkl')
|
| 14 |
+
return features_to_drop, category_encodings, model
|
| 15 |
|
| 16 |
+
# ------------------- Preprocess Function -------------------
|
| 17 |
def preprocess_input(df, features_to_drop, category_encodings):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
df = df.copy()
|
| 19 |
+
|
| 20 |
+
# Ensure numeric conversion
|
| 21 |
+
for col in ["Stime", "Ltime", "sbytes", "dbytes", "Spkts", "Dpkts"]:
|
| 22 |
+
df[col] = pd.to_numeric(df[col], errors='coerce')
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# Create engineered features
|
| 25 |
+
df["duration"] = df["Ltime"] - df["Stime"]
|
| 26 |
+
df["byte_ratio"] = df["sbytes"] / (df["dbytes"] + 1)
|
| 27 |
+
df["pkt_ratio"] = df["Spkts"] / (df["Dpkts"] + 1)
|
| 28 |
+
|
| 29 |
+
# Drop features removed during training
|
| 30 |
drop_cols = list(features_to_drop.intersection(set(df.columns)))
|
| 31 |
+
df.drop(columns=drop_cols, inplace=True, errors='ignore')
|
| 32 |
+
|
| 33 |
+
# Encode categoricals
|
| 34 |
+
for col, cats in category_encodings.items():
|
|
|
|
| 35 |
if col in df.columns:
|
| 36 |
df[col] = df[col].astype(str)
|
| 37 |
+
df[col] = pd.Categorical(df[col], categories=cats)
|
| 38 |
df[col] = df[col].cat.codes
|
| 39 |
+
|
| 40 |
df = df.fillna(0)
|
| 41 |
return df
|
| 42 |
|
| 43 |
+
# ------------------- Streamlit UI -------------------
|
| 44 |
+
st.set_page_config("Intrusion Detection - Manual Test", layout="wide")
|
| 45 |
+
st.title("Intrusion Detection System (IDS) - Manual Row Test")
|
| 46 |
|
| 47 |
+
st.markdown("Enter a single row of network traffic data below for intrusion detection:")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
with st.form("manual_input_form"):
|
| 50 |
+
col1, col2 = st.columns(2)
|
| 51 |
+
with col1:
|
| 52 |
+
Stime = st.number_input("Stime (Start Time)", min_value=0.0, step=1.0)
|
| 53 |
+
sbytes = st.number_input("sbytes (Source Bytes)", min_value=0, step=1)
|
| 54 |
+
Spkts = st.number_input("Spkts (Source Packets)", min_value=0, step=1)
|
| 55 |
+
proto = st.selectbox("Protocol (proto)", ["tcp", "udp", "icmp", "-"])
|
| 56 |
+
service = st.text_input("Service (e.g., http, dns, -)", value="-")
|
| 57 |
+
with col2:
|
| 58 |
+
Ltime = st.number_input("Ltime (End Time)", min_value=0.0, step=1.0)
|
| 59 |
+
dbytes = st.number_input("dbytes (Destination Bytes)", min_value=0, step=1)
|
| 60 |
+
Dpkts = st.number_input("Dpkts (Destination Packets)", min_value=0, step=1)
|
| 61 |
+
state = st.selectbox("State", ["CON", "INT", "FIN", "RST", "REQ", "-"])
|
| 62 |
+
attack_cat = st.text_input("Optional: Known Attack Category", value="")
|
| 63 |
+
|
| 64 |
submitted = st.form_submit_button("Run IDS Prediction")
|
| 65 |
|
| 66 |
+
# ------------------- Prediction Logic -------------------
|
| 67 |
if submitted:
|
| 68 |
+
user_input = pd.DataFrame([{
|
| 69 |
+
"Stime": Stime,
|
| 70 |
+
"Ltime": Ltime,
|
| 71 |
+
"sbytes": sbytes,
|
| 72 |
+
"dbytes": dbytes,
|
| 73 |
+
"Spkts": Spkts,
|
| 74 |
+
"Dpkts": Dpkts,
|
| 75 |
+
"proto": proto,
|
| 76 |
+
"service": service,
|
| 77 |
+
"state": state,
|
| 78 |
+
"attack_cat": attack_cat # optional
|
| 79 |
+
}])
|
| 80 |
+
|
| 81 |
+
# Load model artifacts
|
| 82 |
+
features_to_drop, category_encodings, model = load_model_artifacts()
|
|
|
|
| 83 |
|
| 84 |
+
# Preprocess input
|
| 85 |
+
processed_input = preprocess_input(user_input, features_to_drop, category_encodings)
|
| 86 |
|
| 87 |
+
if processed_input is not None:
|
| 88 |
+
prediction = model.predict(processed_input)[0]
|
| 89 |
+
st.success(f"Prediction: {prediction}")
|
| 90 |
+
st.markdown("""
|
| 91 |
+
- **13** → Normal Traffic
|
| 92 |
+
- **Other values** → Intrusion Category (refer to model documentation)
|
| 93 |
+
""")
|
| 94 |
+
else:
|
| 95 |
+
st.error("Preprocessing failed. Please check your input fields.")
|
|
|
|
|
|