Spaces:

MrUtakata
/

ids

Sleeping

App Files Files Community

MrUtakata commited on Apr 12, 2025

Commit

dd5bf99

verified ·

1 Parent(s): 2434a2a

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -86

app.py CHANGED Viewed

@@ -1,117 +1,95 @@
 import streamlit as st
 import pandas as pd
-import gdown
 import pickle
 import joblib
-import xgboost as xgb
-##############################################
-# Helper Functions
-##############################################
 @st.cache_resource
 def load_model_artifacts():
-    """
-    Loads and returns the artifacts needed for testing the IDS:
-      - features_to_drop: The set of features dropped during training.
-      - category_encodings: The mapping for encoding categorical variables.
-      - xgb_model: The pre-trained XGBoost classifier.
-    """
     with open('features_to_drop.pkl', 'rb') as f:
         features_to_drop = pickle.load(f)
     with open('category_encodings.pkl', 'rb') as f:
         category_encodings = pickle.load(f)
-    xgb_model = joblib.load('xgb_model.pkl')
-    return features_to_drop, category_encodings, xgb_model
 def preprocess_input(df, features_to_drop, category_encodings):
-    """
-    Preprocesses the incoming test DataFrame:
-      - Converts required columns to numeric.
-      - Computes engineered features: duration, byte_ratio, and pkt_ratio.
-      - Drops columns that were removed during model training.
-      - Encodes categorical variables using pre-saved mappings.
-    """
     df = df.copy()
-    expected_cols = ["Stime", "Ltime", "sbytes", "dbytes", "Spkts", "Dpkts"]
-    for col in expected_cols:
-        if col in df.columns:
-            df[col] = pd.to_numeric(df[col], errors='coerce')
-        else:
-            st.error(f"Missing required column: {col}")
-            return None
     # Create engineered features
-    df['duration'] = df['Ltime'] - df['Stime']
-    df['byte_ratio'] = df['sbytes'] / (df['dbytes'] + 1)
-    df['pkt_ratio'] = df['Spkts'] / (df['Dpkts'] + 1)
-    # Drop features (if present in the input) that were filtered during training
     drop_cols = list(features_to_drop.intersection(set(df.columns)))
-    if drop_cols:
-        df = df.drop(columns=drop_cols)
-    # Encode categorical variables
-    for col, categories in category_encodings.items():
         if col in df.columns:
             df[col] = df[col].astype(str)
-            df[col] = pd.Categorical(df[col], categories=categories)
             df[col] = df[col].cat.codes
     df = df.fillna(0)
     return df
-##############################################
-# Streamlit Interface - TextArea Input
-##############################################
-st.set_page_config(page_title="Intrusion Detection System - Test", layout="wide")
-st.title("Intrusion Detection System (IDS) - Single Row Input")
-st.markdown("""
-Paste a **single row of comma-separated values** below. Include only the relevant features required for prediction.
-**Expected columns (in order):**
-`Stime, Ltime, sbytes, dbytes, Spkts, Dpkts`
-You may include additional columns (like `label`) if desired, but they will be ignored.
-""")
 with st.form("manual_input_form"):
-    text_input = st.text_area(
-        "Paste a single row of data (comma-separated values):",
-        placeholder="e.g. 1425579984.0,1425579990.0,275,423,10,8"
-    )
     submitted = st.form_submit_button("Run IDS Prediction")
 if submitted:
-    try:
-        # Split and parse the user input into a list of values
-        input_values = [x.strip() for x in text_input.split(',') if x.strip() != '']
-        # Define the column names expected by the model
-        expected_columns = ["Stime", "Ltime", "sbytes", "dbytes", "Spkts", "Dpkts"]
-        if len(input_values) < len(expected_columns):
-            st.error(f"Not enough values provided. Expected at least {len(expected_columns)} values.")
-        else:
-            # Take only the first N values needed
-            input_data = dict(zip(expected_columns, input_values[:len(expected_columns)]))
-            user_input = pd.DataFrame([input_data])
-            # Load model artifacts
-            features_to_drop, category_encodings, model = load_model_artifacts()
-            # Preprocess and predict
-            processed_input = preprocess_input(user_input, features_to_drop, category_encodings)
-            if processed_input is not None:
-                prediction = model.predict(processed_input)[0]
-                st.success(f"Prediction: {prediction}")
-                st.markdown("""
-                    - **13** → Normal Traffic
-                    - **Other values** → Intrusion Category (refer to model documentation for exact mappings)
-                """)
-            else:
-                st.error("Preprocessing failed. Please check your input values.")
-    except Exception as e:
-        st.error(f"An error occurred while processing your input: {e}")

 import streamlit as st
 import pandas as pd
 import pickle
 import joblib
+# ------------------- Load Artifacts -------------------
 @st.cache_resource
 def load_model_artifacts():
     with open('features_to_drop.pkl', 'rb') as f:
         features_to_drop = pickle.load(f)
     with open('category_encodings.pkl', 'rb') as f:
         category_encodings = pickle.load(f)
+    model = joblib.load('xgb_model.pkl')
+    return features_to_drop, category_encodings, model
+# ------------------- Preprocess Function -------------------
 def preprocess_input(df, features_to_drop, category_encodings):
     df = df.copy()
+    # Ensure numeric conversion
+    for col in ["Stime", "Ltime", "sbytes", "dbytes", "Spkts", "Dpkts"]:
+        df[col] = pd.to_numeric(df[col], errors='coerce')
     # Create engineered features
+    df["duration"] = df["Ltime"] - df["Stime"]
+    df["byte_ratio"] = df["sbytes"] / (df["dbytes"] + 1)
+    df["pkt_ratio"] = df["Spkts"] / (df["Dpkts"] + 1)
+    # Drop features removed during training
     drop_cols = list(features_to_drop.intersection(set(df.columns)))
+    df.drop(columns=drop_cols, inplace=True, errors='ignore')
+    # Encode categoricals
+    for col, cats in category_encodings.items():
         if col in df.columns:
             df[col] = df[col].astype(str)
+            df[col] = pd.Categorical(df[col], categories=cats)
             df[col] = df[col].cat.codes
     df = df.fillna(0)
     return df
+# ------------------- Streamlit UI -------------------
+st.set_page_config("Intrusion Detection - Manual Test", layout="wide")
+st.title("Intrusion Detection System (IDS) - Manual Row Test")
+st.markdown("Enter a single row of network traffic data below for intrusion detection:")
 with st.form("manual_input_form"):
+    col1, col2 = st.columns(2)
+    with col1:
+        Stime = st.number_input("Stime (Start Time)", min_value=0.0, step=1.0)
+        sbytes = st.number_input("sbytes (Source Bytes)", min_value=0, step=1)
+        Spkts = st.number_input("Spkts (Source Packets)", min_value=0, step=1)
+        proto = st.selectbox("Protocol (proto)", ["tcp", "udp", "icmp", "-"])
+        service = st.text_input("Service (e.g., http, dns, -)", value="-")
+    with col2:
+        Ltime = st.number_input("Ltime (End Time)", min_value=0.0, step=1.0)
+        dbytes = st.number_input("dbytes (Destination Bytes)", min_value=0, step=1)
+        Dpkts = st.number_input("Dpkts (Destination Packets)", min_value=0, step=1)
+        state = st.selectbox("State", ["CON", "INT", "FIN", "RST", "REQ", "-"])
+        attack_cat = st.text_input("Optional: Known Attack Category", value="")
     submitted = st.form_submit_button("Run IDS Prediction")
+# ------------------- Prediction Logic -------------------
 if submitted:
+    user_input = pd.DataFrame([{
+        "Stime": Stime,
+        "Ltime": Ltime,
+        "sbytes": sbytes,
+        "dbytes": dbytes,
+        "Spkts": Spkts,
+        "Dpkts": Dpkts,
+        "proto": proto,
+        "service": service,
+        "state": state,
+        "attack_cat": attack_cat  # optional
+    }])
+    # Load model artifacts
+    features_to_drop, category_encodings, model = load_model_artifacts()
+    # Preprocess input
+    processed_input = preprocess_input(user_input, features_to_drop, category_encodings)
+    if processed_input is not None:
+        prediction = model.predict(processed_input)[0]
+        st.success(f"Prediction: {prediction}")
+        st.markdown("""
+        - **13** → Normal Traffic
+        - **Other values** → Intrusion Category (refer to model documentation)
+        """)
+    else:
+        st.error("Preprocessing failed. Please check your input fields.")