Spaces:

malware-USTH
/

mlp

Sleeping

App Files Files Community

hieu3636 commited on Jan 30

Commit

0479565

verified ·

1 Parent(s): 02cbf7b

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -21

app.py CHANGED Viewed

@@ -4,39 +4,82 @@ import numpy as np
 import joblib
 import tensorflow as tf
-# Load model & scaler
-model = tf.keras.models.load_model("mlp_model.keras")
 scaler = joblib.load("scaler.pkl")
-N_FEATURES = model.input_shape[1]
-def predict_csv(file):
-    df = pd.read_csv(file)
-    # Drop label column if it exists
-    df = df.drop(columns=["Label", "label"], errors="ignore")
-    # Add row index for display only
-    df.insert(0, "row_id", range(1, len(df) + 1))
-    # Separate features for model
-    feature_df = df.drop(columns=["row_id"])
-    # Check feature count
-    if feature_df.shape[1] != N_FEATURES:
         return (
-            f"Expected {N_FEATURES} features, "
-            f"but got {feature_df.shape[1]} columns."
         )
     X = feature_df.values.astype(float)
     X_scaled = scaler.transform(X)
     probs = model.predict(X_scaled).reshape(-1)
     preds = (probs > 0.5).astype(int)
-    # Build result table (row_id kept)
     result = df.copy()
     result["probability_malware"] = probs
     result["prediction"] = preds
     result["prediction_label"] = result["prediction"].map(
@@ -45,14 +88,17 @@ def predict_csv(file):
     return result
 demo = gr.Interface(
     fn=predict_csv,
     inputs=gr.File(label="Upload CSV file"),
     outputs=gr.Dataframe(label="Prediction Result"),
-    title="Malware Detection MLP Model",
-    description="Upload a CSV file with features to predict malware or benign."
 )
-demo.launch()

 import joblib
 import tensorflow as tf
+# =========================
+# LOAD MODEL & SCALER
+# =========================
+model = tf.keras.models.load_model("mlp_malware.keras")
 scaler = joblib.load("scaler.pkl")
+# =========================
+# 30 SELECTED FEATURES
+SELECTED_FEATURES = [
+    "filesize",
+    "E_file",
+    "E_text",
+    "E_data",
+    "AddressOfEntryPoint",
+    "NumberOfSections",
+    "SizeOfInitializedData",
+    "SizeOfImage",
+    "SizeOfOptionalHeader",
+    "SizeOfCode",
+    "DirectoryEntryImportSize",
+    "ImageBase",
+    "CheckSum",
+    "Magic",
+    "MinorLinkerVersion",
+    "MajorSubsystemVersion",
+    "e_lfanew",
+    "sus_sections",
+    "PointerToSymbolTable",
+    "SectionsLength",
+    "SizeOfStackReverse",
+    "MajorOperatingSystemVersion",
+    "non_sus_sections",
+    "Characteristics",
+    "NumberOfSymbols",
+    "BaseOfData",
+    "MajorImageVersion",
+    "FH_char5",
+    "FH_char8",
+    "OH_DLLchar5"
+]
+N_FEATURES = len(SELECTED_FEATURES)
+# =========================
+# PREDICTION FUNCTION
+# =========================
+def predict_csv(file):
+    df = pd.read_csv(file)
+    # Drop label columns if exist
+    df = df.drop(columns=["Label", "label", "class", "Class"], errors="ignore")
+    # Check missing features
+    missing_features = [f for f in SELECTED_FEATURES if f not in df.columns]
+    if missing_features:
         return (
+            f"Missing required features: {missing_features}"
         )
+    # Keep only selected features & correct order
+    feature_df = df[SELECTED_FEATURES].copy()
+    # Convert to float
     X = feature_df.values.astype(float)
+    # Scale
     X_scaled = scaler.transform(X)
+    # Predict
     probs = model.predict(X_scaled).reshape(-1)
     preds = (probs > 0.5).astype(int)
+    # Build output dataframe
     result = df.copy()
+    result.insert(0, "row_id", range(1, len(df) + 1))
     result["probability_malware"] = probs
     result["prediction"] = preds
     result["prediction_label"] = result["prediction"].map(
     return result
+# =========================
+# GRADIO INTERFACE
+# =========================
 demo = gr.Interface(
     fn=predict_csv,
     inputs=gr.File(label="Upload CSV file"),
     outputs=gr.Dataframe(label="Prediction Result"),
+    title="Malware Detection",
+    description=(
+        "Upload a CSV file containing PE features. "
+    )
 )
+demo.launch()