MrUtakata commited on
Commit
bf47bf0
·
verified ·
1 Parent(s): 504624b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -17
app.py CHANGED
@@ -20,26 +20,15 @@ raw_columns = [
20
  'ct_dst_sport_ltm', 'ct_dst_src_ltm', 'attack_cat', 'Label'
21
  ]
22
 
23
- # Define the columns that will be added with placeholder values
24
- missing_columns = ['srcip', 'dstip']
25
-
26
- # Function to create a dataframe with missing columns filled with placeholder values
27
  def preprocess_input(row_values):
28
- # Ensure the row has 49 values
29
  if len(row_values) != 49:
30
  raise ValueError(f"❌ Expected 49 values, but got {len(row_values)}.")
31
 
32
- # Create the DataFrame with the input values
33
  input_df = pd.DataFrame([row_values], columns=raw_columns)
34
 
35
- # Add the missing columns with placeholder values (like NaN)
36
- for col in missing_columns:
37
- input_df[col] = np.nan # Fill missing columns with NaN (or zero if needed)
38
-
39
- # Ensure that all columns are in the same order as the trained model expects
40
- input_df = input_df[raw_columns]
41
-
42
- # Convert columns to numeric where applicable
43
  input_df = input_df.apply(pd.to_numeric, errors='coerce')
44
 
45
  # Feature engineering
@@ -47,8 +36,8 @@ def preprocess_input(row_values):
47
  input_df['byte_ratio'] = input_df['sbytes'] / (input_df['dbytes'] + 1)
48
  input_df['pkt_ratio'] = input_df['Spkts'] / (input_df['Dpkts'] + 1)
49
 
50
- # Drop correlated features (e.g., ones with high correlation)
51
- input_df = input_df.drop(columns=features_to_drop, errors='ignore')
52
 
53
  return input_df
54
 
@@ -63,7 +52,7 @@ if st.button("Predict"):
63
  # Parse the input
64
  values = user_input.strip().split("\t")
65
 
66
- # Preprocess the input (adding missing columns and performing feature engineering)
67
  processed_df = preprocess_input(values)
68
 
69
  # Predict using the preprocessed data
 
20
  'ct_dst_sport_ltm', 'ct_dst_src_ltm', 'attack_cat', 'Label'
21
  ]
22
 
23
+ # Function to preprocess a single input row
 
 
 
24
  def preprocess_input(row_values):
 
25
  if len(row_values) != 49:
26
  raise ValueError(f"❌ Expected 49 values, but got {len(row_values)}.")
27
 
28
+ # Create DataFrame from input
29
  input_df = pd.DataFrame([row_values], columns=raw_columns)
30
 
31
+ # Convert all columns to numeric
 
 
 
 
 
 
 
32
  input_df = input_df.apply(pd.to_numeric, errors='coerce')
33
 
34
  # Feature engineering
 
36
  input_df['byte_ratio'] = input_df['sbytes'] / (input_df['dbytes'] + 1)
37
  input_df['pkt_ratio'] = input_df['Spkts'] / (input_df['Dpkts'] + 1)
38
 
39
+ # Drop unused or label columns
40
+ input_df = input_df.drop(columns=features_to_drop + ['attack_cat', 'Label'], errors='ignore')
41
 
42
  return input_df
43
 
 
52
  # Parse the input
53
  values = user_input.strip().split("\t")
54
 
55
+ # Preprocess the input row
56
  processed_df = preprocess_input(values)
57
 
58
  # Predict using the preprocessed data