Spaces:

mallware
/

UI_stacking

Sleeping

App Files Files Community

hieu3636 commited on Jan 31

Commit

2c27db2

verified ·

1 Parent(s): 7b4cb68

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -84

app.py CHANGED Viewed

@@ -1,104 +1,75 @@
-import gradio as gr
 import pandas as pd
 import numpy as np
-import joblib
-import tensorflow as tf
-# =========================
-# LOAD MODEL & SCALER
-# =========================
-model = tf.keras.models.load_model("mlp_malware.keras")
-scaler = joblib.load("scaler.pkl")
-# =========================
-# 30 SELECTED FEATURES
-SELECTED_FEATURES = [
-    "filesize",
-    "E_file",
-    "E_text",
-    "E_data",
-    "AddressOfEntryPoint",
-    "NumberOfSections",
-    "SizeOfInitializedData",
-    "SizeOfImage",
-    "SizeOfOptionalHeader",
-    "SizeOfCode",
-    "DirectoryEntryImportSize",
-    "ImageBase",
-    "CheckSum",
-    "Magic",
-    "MinorLinkerVersion",
-    "MajorSubsystemVersion",
-    "e_lfanew",
-    "sus_sections",
-    "PointerToSymbolTable",
-    "SectionsLength",
-    "SizeOfStackReserve",
-    "MajorOperatingSystemVersion",
-    "non_sus_sections",
-    "Characteristics",
-    "NumberOfSymbols",
-    "BaseOfData",
-    "MajorImageVersion",
-    "FH_char5",
-    "FH_char8",
-    "OH_DLLchar5"
-]
-N_FEATURES = len(SELECTED_FEATURES)
-# =========================
-# PREDICTION FUNCTION
-# =========================
-def predict_csv(file):
-    df = pd.read_csv(file)
-    # Drop label columns if exist
-    df = df.drop(columns=["Label", "label", "class", "Class"], errors="ignore")
-    # Check missing features
-    missing_features = [f for f in SELECTED_FEATURES if f not in df.columns]
-    if missing_features:
-        return (
-            f"Missing required features: {missing_features}"
-        )
-    # Keep only selected features & correct order
-    feature_df = df[SELECTED_FEATURES].copy()
-    # Convert to float
-    X = feature_df.values.astype(float)
-    # Scale
-    X_scaled = scaler.transform(X)
-    # Predict
-    probs = model.predict(X_scaled).reshape(-1)
-    preds = (probs > 0.5).astype(int)
-    # Build output dataframe
-    result = df.copy()
-    result.insert(0, "row_id", range(1, len(df) + 1))
-    result["probability_malware"] = probs
-    result["prediction"] = preds
-    result["prediction_label"] = result["prediction"].map(
-        {1: "malware", 0: "benign"}
-    )
-    return result
-# =========================
-# GRADIO INTERFACE
-# =========================
-demo = gr.Interface(
-    fn=predict_csv,
-    inputs=gr.File(label="Upload CSV file"),
-    outputs=gr.Dataframe(label="Prediction Result"),
-    title="Malware Detection",
     description=(
-        "Upload a CSV file containing PE features. "
     )
 )
-demo.launch()

+import joblib
 import pandas as pd
+import gradio as gr
 import numpy as np
+# ======================
+# LOAD MODEL
+# ======================
+artifact = joblib.load("stacking_model.pkl")
+base_models = artifact["base_models"]      # list of (name, model)
+meta_model = artifact["meta_model"]
+feature_names = artifact["features"]
+# ======================
+# PREDICTION FUNCTION
+# ======================
+def predict_malware_csv(file):
+    # Read CSV
+    df = pd.read_csv(file.name)
+    # Check missing features
+    missing = set(feature_names) - set(df.columns)
+    if missing:
+        return f"❌ Missing features: {list(missing)}", None
+    X = df[feature_names]
+    # Level-1 predictions
+    meta_inputs = []
+    for name, model in base_models:
+        prob = model.predict_proba(X)[:, 1]
+        meta_inputs.append(prob)
+    meta_X = np.column_stack(meta_inputs)
+    # Meta prediction
+    preds = meta_model.predict(meta_X)
+    probs = meta_model.predict_proba(meta_X)[:, 1]
+    # Append results
+    result_df = df.copy()
+    result_df["Prediction"] = np.where(preds == 1, "Malware", "Benign")
+    result_df["Malware_Probability"] = probs
+    return "✅ Prediction completed", result_df
+# ======================
+# UI
+# ======================
+inputs = gr.File(
+    label="Upload CSV file (features only)",
+    file_types=[".csv"]
+)
+outputs = [
+    gr.Textbox(label="Status"),
+    gr.Dataframe(label="Prediction Results")
+]
+app = gr.Interface(
+    fn=predict_malware_csv,
+    inputs=inputs,
+    outputs=outputs,
+    title="Stacking-based Malware Detection",
     description=(
+        "Upload a CSV file containing malware features.\n\n"
+        "Model: ExtraTrees + RandomForest + LightGBM + LogisticRegression → XGBoost\n"
+        f"Required features: {', '.join(feature_names)}"
     )
 )
+if __name__ == "__main__":
+    app.launch()