Spaces:
Sleeping
Sleeping
| import joblib | |
| import pandas as pd | |
| import gradio as gr | |
| import numpy as np | |
| import re | |
| # ====================== | |
| # LOAD MODEL | |
| # ====================== | |
| artifact = joblib.load("stacking_model.pkl") | |
| base_models = artifact["base_models"] # list of (name, model) | |
| meta_model = artifact["meta_model"] | |
| feature_names = artifact["features"] | |
| # ====================== | |
| # CLEAN FUNCTION (same as training) | |
| # ====================== | |
| def clean_numeric(val): | |
| if pd.isna(val): | |
| return None | |
| val = str(val).strip() | |
| val = re.sub(r'\s+', '', val) | |
| # scientific notation | |
| if re.match(r'^-?\d+(\.\d+)?[eE][+-]?\d+$', val): | |
| return float(val) | |
| # remove thousand separators | |
| if val.count('.') > 1: | |
| val = val.replace('.', '') | |
| # comma decimal -> dot | |
| if ',' in val and '.' not in val: | |
| val = val.replace(',', '.') | |
| try: | |
| return float(val) | |
| except ValueError: | |
| return None | |
| def load_and_clean_csv(file): | |
| df = pd.read_csv( | |
| file.name, | |
| sep=None, | |
| engine='python', | |
| dtype=str | |
| ) | |
| # clean header | |
| df.columns = ( | |
| df.columns | |
| .astype(str) | |
| .str.strip() | |
| .str.replace(r'\s+', '', regex=True) | |
| ) | |
| # clean numeric values | |
| for col in df.columns: | |
| if col not in ['Label', 'file_name']: | |
| df[col] = df[col].apply(clean_numeric) | |
| return df | |
| # ====================== | |
| # PREDICTION FUNCTION | |
| # ====================== | |
| def predict_malware_csv(file): | |
| df = load_and_clean_csv(file) | |
| # Check missing features | |
| missing = set(feature_names) - set(df.columns) | |
| if missing: | |
| return f"β Missing features: {list(missing)}", None | |
| # Keep only needed features | |
| X = df[feature_names].copy() | |
| # π₯ CLEAN NUMERIC FEATURES | |
| for col in feature_names: | |
| X[col] = X[col].apply(clean_numeric) | |
| # Optional: fill NaN if needed | |
| # X = X.fillna(0) | |
| # Level-1 predictions | |
| meta_inputs = [] | |
| for name, model in base_models: | |
| prob = model.predict_proba(X)[:, 1] | |
| meta_inputs.append(prob) | |
| meta_X = np.column_stack(meta_inputs) | |
| # Meta prediction | |
| preds = meta_model.predict(meta_X) | |
| probs = meta_model.predict_proba(meta_X)[:, 1] | |
| # Append results | |
| result_df = df.copy() | |
| result_df["Prediction"] = np.where(preds == 1, "Malware", "Benign") | |
| result_df["Malware_Probability"] = probs | |
| return "β Prediction completed", result_df | |
| # ====================== | |
| # UI | |
| # ====================== | |
| inputs = gr.File( | |
| label="Upload CSV file (features only)", | |
| file_types=[".csv"] | |
| ) | |
| outputs = [ | |
| gr.Textbox(label="Status"), | |
| gr.Dataframe(label="Prediction Results") | |
| ] | |
| app = gr.Interface( | |
| fn=predict_malware_csv, | |
| inputs=inputs, | |
| outputs=outputs, | |
| title="Stacking-based Malware Detection", | |
| description=( | |
| "Upload a CSV file.\n\n" | |
| ) | |
| ) | |
| if __name__ == "__main__": | |
| app.launch() | |