hieu3636 commited on
Commit
2c27db2
·
verified ·
1 Parent(s): 7b4cb68

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -84
app.py CHANGED
@@ -1,104 +1,75 @@
1
- import gradio as gr
2
  import pandas as pd
 
3
  import numpy as np
4
- import joblib
5
- import tensorflow as tf
6
 
7
- # =========================
8
- # LOAD MODEL & SCALER
9
- # =========================
10
- model = tf.keras.models.load_model("mlp_malware.keras")
11
- scaler = joblib.load("scaler.pkl")
12
 
13
- # =========================
14
- # 30 SELECTED FEATURES
 
15
 
16
- SELECTED_FEATURES = [
17
- "filesize",
18
- "E_file",
19
- "E_text",
20
- "E_data",
21
- "AddressOfEntryPoint",
22
- "NumberOfSections",
23
- "SizeOfInitializedData",
24
- "SizeOfImage",
25
- "SizeOfOptionalHeader",
26
- "SizeOfCode",
27
- "DirectoryEntryImportSize",
28
- "ImageBase",
29
- "CheckSum",
30
- "Magic",
31
- "MinorLinkerVersion",
32
- "MajorSubsystemVersion",
33
- "e_lfanew",
34
- "sus_sections",
35
- "PointerToSymbolTable",
36
- "SectionsLength",
37
- "SizeOfStackReserve",
38
- "MajorOperatingSystemVersion",
39
- "non_sus_sections",
40
- "Characteristics",
41
- "NumberOfSymbols",
42
- "BaseOfData",
43
- "MajorImageVersion",
44
- "FH_char5",
45
- "FH_char8",
46
- "OH_DLLchar5"
47
- ]
48
 
49
- N_FEATURES = len(SELECTED_FEATURES)
 
 
 
50
 
51
- # =========================
52
- # PREDICTION FUNCTION
53
- # =========================
54
- def predict_csv(file):
55
- df = pd.read_csv(file)
56
 
57
- # Drop label columns if exist
58
- df = df.drop(columns=["Label", "label", "class", "Class"], errors="ignore")
 
 
 
59
 
60
- # Check missing features
61
- missing_features = [f for f in SELECTED_FEATURES if f not in df.columns]
62
- if missing_features:
63
- return (
64
- f"Missing required features: {missing_features}"
65
- )
66
 
67
- # Keep only selected features & correct order
68
- feature_df = df[SELECTED_FEATURES].copy()
 
69
 
70
- # Convert to float
71
- X = feature_df.values.astype(float)
 
 
72
 
73
- # Scale
74
- X_scaled = scaler.transform(X)
75
 
76
- # Predict
77
- probs = model.predict(X_scaled).reshape(-1)
78
- preds = (probs > 0.5).astype(int)
79
 
80
- # Build output dataframe
81
- result = df.copy()
82
- result.insert(0, "row_id", range(1, len(df) + 1))
83
- result["probability_malware"] = probs
84
- result["prediction"] = preds
85
- result["prediction_label"] = result["prediction"].map(
86
- {1: "malware", 0: "benign"}
87
- )
88
 
89
- return result
 
 
 
90
 
91
- # =========================
92
- # GRADIO INTERFACE
93
- # =========================
94
- demo = gr.Interface(
95
- fn=predict_csv,
96
- inputs=gr.File(label="Upload CSV file"),
97
- outputs=gr.Dataframe(label="Prediction Result"),
98
- title="Malware Detection",
99
  description=(
100
- "Upload a CSV file containing PE features. "
 
 
101
  )
102
  )
103
 
104
- demo.launch()
 
 
1
+ import joblib
2
  import pandas as pd
3
+ import gradio as gr
4
  import numpy as np
 
 
5
 
6
+ # ======================
7
+ # LOAD MODEL
8
+ # ======================
9
+ artifact = joblib.load("stacking_model.pkl")
 
10
 
11
+ base_models = artifact["base_models"] # list of (name, model)
12
+ meta_model = artifact["meta_model"]
13
+ feature_names = artifact["features"]
14
 
15
+ # ======================
16
+ # PREDICTION FUNCTION
17
+ # ======================
18
+ def predict_malware_csv(file):
19
+ # Read CSV
20
+ df = pd.read_csv(file.name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ # Check missing features
23
+ missing = set(feature_names) - set(df.columns)
24
+ if missing:
25
+ return f"❌ Missing features: {list(missing)}", None
26
 
27
+ X = df[feature_names]
 
 
 
 
28
 
29
+ # Level-1 predictions
30
+ meta_inputs = []
31
+ for name, model in base_models:
32
+ prob = model.predict_proba(X)[:, 1]
33
+ meta_inputs.append(prob)
34
 
35
+ meta_X = np.column_stack(meta_inputs)
 
 
 
 
 
36
 
37
+ # Meta prediction
38
+ preds = meta_model.predict(meta_X)
39
+ probs = meta_model.predict_proba(meta_X)[:, 1]
40
 
41
+ # Append results
42
+ result_df = df.copy()
43
+ result_df["Prediction"] = np.where(preds == 1, "Malware", "Benign")
44
+ result_df["Malware_Probability"] = probs
45
 
46
+ return "✅ Prediction completed", result_df
 
47
 
 
 
 
48
 
49
+ # ======================
50
+ # UI
51
+ # ======================
52
+ inputs = gr.File(
53
+ label="Upload CSV file (features only)",
54
+ file_types=[".csv"]
55
+ )
 
56
 
57
+ outputs = [
58
+ gr.Textbox(label="Status"),
59
+ gr.Dataframe(label="Prediction Results")
60
+ ]
61
 
62
+ app = gr.Interface(
63
+ fn=predict_malware_csv,
64
+ inputs=inputs,
65
+ outputs=outputs,
66
+ title="Stacking-based Malware Detection",
 
 
 
67
  description=(
68
+ "Upload a CSV file containing malware features.\n\n"
69
+ "Model: ExtraTrees + RandomForest + LightGBM + LogisticRegression → XGBoost\n"
70
+ f"Required features: {', '.join(feature_names)}"
71
  )
72
  )
73
 
74
+ if __name__ == "__main__":
75
+ app.launch()