Spaces:

mallware
/

UI_stacking

Sleeping

App Files Files Community

UI_stacking / app.py

hieu3636

Update app.py

ff5d94a verified 2 months ago

raw

history blame contribute delete

2.93 kB

	import joblib
	import pandas as pd
	import gradio as gr
	import numpy as np
	import re

	# ======================
	# LOAD MODEL
	# ======================
	artifact = joblib.load("stacking_model.pkl")

	base_models = artifact["base_models"] # list of (name, model)
	meta_model = artifact["meta_model"]
	feature_names = artifact["features"]

	# ======================
	# CLEAN FUNCTION (same as training)
	# ======================
	def clean_numeric(val):
	if pd.isna(val):
	return None

	val = str(val).strip()
	val = re.sub(r'\s+', '', val)

	# scientific notation
	if re.match(r'^-?\d+(\.\d+)?[eE][+-]?\d+$', val):
	return float(val)

	# remove thousand separators
	if val.count('.') > 1:
	val = val.replace('.', '')

	# comma decimal -> dot
	if ',' in val and '.' not in val:
	val = val.replace(',', '.')

	try:
	return float(val)
	except ValueError:
	return None

	def load_and_clean_csv(file):
	df = pd.read_csv(
	file.name,
	sep=None,
	engine='python',
	dtype=str
	)

	# clean header
	df.columns = (
	df.columns
	.astype(str)
	.str.strip()
	.str.replace(r'\s+', '', regex=True)
	)

	# clean numeric values
	for col in df.columns:
	if col not in ['Label', 'file_name']:
	df[col] = df[col].apply(clean_numeric)

	return df

	# ======================
	# PREDICTION FUNCTION
	# ======================
	def predict_malware_csv(file):
	df = load_and_clean_csv(file)

	# Check missing features
	missing = set(feature_names) - set(df.columns)
	if missing:
	return f"❌ Missing features: {list(missing)}", None

	# Keep only needed features
	X = df[feature_names].copy()

	# 🔥 CLEAN NUMERIC FEATURES
	for col in feature_names:
	X[col] = X[col].apply(clean_numeric)

	# Optional: fill NaN if needed
	# X = X.fillna(0)

	# Level-1 predictions
	meta_inputs = []
	for name, model in base_models:
	prob = model.predict_proba(X)[:, 1]
	meta_inputs.append(prob)

	meta_X = np.column_stack(meta_inputs)

	# Meta prediction
	preds = meta_model.predict(meta_X)
	probs = meta_model.predict_proba(meta_X)[:, 1]

	# Append results
	result_df = df.copy()
	result_df["Prediction"] = np.where(preds == 1, "Malware", "Benign")
	result_df["Malware_Probability"] = probs

	return "✅ Prediction completed", result_df


	# ======================
	# UI
	# ======================
	inputs = gr.File(
	label="Upload CSV file (features only)",
	file_types=[".csv"]
	)

	outputs = [
	gr.Textbox(label="Status"),
	gr.Dataframe(label="Prediction Results")
	]

	app = gr.Interface(
	fn=predict_malware_csv,
	inputs=inputs,
	outputs=outputs,
	title="Stacking-based Malware Detection",
	description=(
	"Upload a CSV file.\n\n"
	)
	)

	if __name__ == "__main__":
	app.launch()