hieu3636 commited on
Commit
0479565
·
verified ·
1 Parent(s): 02cbf7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -21
app.py CHANGED
@@ -4,39 +4,82 @@ import numpy as np
4
  import joblib
5
  import tensorflow as tf
6
 
7
- # Load model & scaler
8
- model = tf.keras.models.load_model("mlp_model.keras")
 
 
9
  scaler = joblib.load("scaler.pkl")
10
 
11
- N_FEATURES = model.input_shape[1]
 
12
 
13
- def predict_csv(file):
14
- df = pd.read_csv(file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- # Drop label column if it exists
17
- df = df.drop(columns=["Label", "label"], errors="ignore")
18
 
19
- # Add row index for display only
20
- df.insert(0, "row_id", range(1, len(df) + 1))
 
 
 
21
 
22
- # Separate features for model
23
- feature_df = df.drop(columns=["row_id"])
24
 
25
- # Check feature count
26
- if feature_df.shape[1] != N_FEATURES:
 
27
  return (
28
- f"Expected {N_FEATURES} features, "
29
- f"but got {feature_df.shape[1]} columns."
30
  )
31
 
 
 
 
 
32
  X = feature_df.values.astype(float)
 
 
33
  X_scaled = scaler.transform(X)
34
 
 
35
  probs = model.predict(X_scaled).reshape(-1)
36
  preds = (probs > 0.5).astype(int)
37
 
38
- # Build result table (row_id kept)
39
  result = df.copy()
 
40
  result["probability_malware"] = probs
41
  result["prediction"] = preds
42
  result["prediction_label"] = result["prediction"].map(
@@ -45,14 +88,17 @@ def predict_csv(file):
45
 
46
  return result
47
 
48
-
49
-
 
50
  demo = gr.Interface(
51
  fn=predict_csv,
52
  inputs=gr.File(label="Upload CSV file"),
53
  outputs=gr.Dataframe(label="Prediction Result"),
54
- title="Malware Detection MLP Model",
55
- description="Upload a CSV file with features to predict malware or benign."
 
 
56
  )
57
 
58
- demo.launch()
 
4
  import joblib
5
  import tensorflow as tf
6
 
7
+ # =========================
8
+ # LOAD MODEL & SCALER
9
+ # =========================
10
+ model = tf.keras.models.load_model("mlp_malware.keras")
11
  scaler = joblib.load("scaler.pkl")
12
 
13
+ # =========================
14
+ # 30 SELECTED FEATURES
15
 
16
+ SELECTED_FEATURES = [
17
+ "filesize",
18
+ "E_file",
19
+ "E_text",
20
+ "E_data",
21
+ "AddressOfEntryPoint",
22
+ "NumberOfSections",
23
+ "SizeOfInitializedData",
24
+ "SizeOfImage",
25
+ "SizeOfOptionalHeader",
26
+ "SizeOfCode",
27
+ "DirectoryEntryImportSize",
28
+ "ImageBase",
29
+ "CheckSum",
30
+ "Magic",
31
+ "MinorLinkerVersion",
32
+ "MajorSubsystemVersion",
33
+ "e_lfanew",
34
+ "sus_sections",
35
+ "PointerToSymbolTable",
36
+ "SectionsLength",
37
+ "SizeOfStackReverse",
38
+ "MajorOperatingSystemVersion",
39
+ "non_sus_sections",
40
+ "Characteristics",
41
+ "NumberOfSymbols",
42
+ "BaseOfData",
43
+ "MajorImageVersion",
44
+ "FH_char5",
45
+ "FH_char8",
46
+ "OH_DLLchar5"
47
+ ]
48
 
49
+ N_FEATURES = len(SELECTED_FEATURES)
 
50
 
51
+ # =========================
52
+ # PREDICTION FUNCTION
53
+ # =========================
54
+ def predict_csv(file):
55
+ df = pd.read_csv(file)
56
 
57
+ # Drop label columns if exist
58
+ df = df.drop(columns=["Label", "label", "class", "Class"], errors="ignore")
59
 
60
+ # Check missing features
61
+ missing_features = [f for f in SELECTED_FEATURES if f not in df.columns]
62
+ if missing_features:
63
  return (
64
+ f"Missing required features: {missing_features}"
 
65
  )
66
 
67
+ # Keep only selected features & correct order
68
+ feature_df = df[SELECTED_FEATURES].copy()
69
+
70
+ # Convert to float
71
  X = feature_df.values.astype(float)
72
+
73
+ # Scale
74
  X_scaled = scaler.transform(X)
75
 
76
+ # Predict
77
  probs = model.predict(X_scaled).reshape(-1)
78
  preds = (probs > 0.5).astype(int)
79
 
80
+ # Build output dataframe
81
  result = df.copy()
82
+ result.insert(0, "row_id", range(1, len(df) + 1))
83
  result["probability_malware"] = probs
84
  result["prediction"] = preds
85
  result["prediction_label"] = result["prediction"].map(
 
88
 
89
  return result
90
 
91
+ # =========================
92
+ # GRADIO INTERFACE
93
+ # =========================
94
  demo = gr.Interface(
95
  fn=predict_csv,
96
  inputs=gr.File(label="Upload CSV file"),
97
  outputs=gr.Dataframe(label="Prediction Result"),
98
+ title="Malware Detection",
99
+ description=(
100
+ "Upload a CSV file containing PE features. "
101
+ )
102
  )
103
 
104
+ demo.launch()