Spaces:

hari6677
/

dect_traff

Sleeping

App Files Files Community

dect_traff / app.py

hari6677

Create app.py

96adf77 verified 3 months ago

raw

history blame contribute delete

3.82 kB

	import joblib
	import numpy as np
	import tensorflow as tf
	import pandas as pd
	import gradio as gr
	import os

	# --- Configuration ---
	MODEL_PATH = "improved_intrusion_detection_model_SIMPLIFIED.h5"
	SCALER_PATH = "standard_scaler.pkl"
	FEATURES_PATH = "feature_names.pkl"
	LABEL_ENCODER_PATH = "label_encoder.pkl"
	FINAL_THRESHOLD = 0.7
	CATEGORICAL_COLS = ['protocol_type', 'service', 'flag']

	# --- Load Artifacts ---
	# The model and preprocessors are loaded once when the app starts
	try:
	model = tf.keras.models.load_model(MODEL_PATH)
	scaler = joblib.load(SCALER_PATH)
	final_features = joblib.load(FEATURES_PATH)
	label_encoder = joblib.load(LABEL_ENCODER_PATH)
	print("Model and preprocessors loaded successfully.")
	except Exception as e:
	print(f"Error loading model artifacts: {e}")
	# Exit if essential files are missing
	exit()

	def preprocess_and_predict(*raw_input_features):
	"""
	Takes raw inputs, preprocesses them exactly like the training data,
	and returns the prediction.
	"""

	# 1. Convert tuple of inputs to a single list/Series
	input_data = pd.Series(raw_input_features, index=raw_input_features_names)

	# Reshape for single sample processing
	df_raw = pd.DataFrame([input_data])

	# 2. One-Hot Encode Categorical Features
	df_encoded = pd.get_dummies(df_raw, columns=CATEGORICAL_COLS)

	# 3. Align columns with training data and fill missing features with 0
	# This is CRUCIAL for deployment correctness.
	df_encoded = df_encoded.reindex(columns=final_features, fill_value=0)

	# 4. Scale Numerical Features
	X_scaled = scaler.transform(df_encoded)

	# 5. Reshape for CNN Input: (1 sample, 122 features, 1 channel)
	X_cnn = X_scaled.reshape(X_scaled.shape[0], X_scaled.shape[1], 1)

	# 6. Predict Probability
	y_pred_proba = model.predict(X_cnn, verbose=0)[0][0]

	# 7. Apply Fixed Threshold and Decode Label
	if y_pred_proba >= FINAL_THRESHOLD:
	prediction_int = 1
	else:
	prediction_int = 0

	# Decode 0 or 1 back to 'normal' or 'attack'
	final_label = label_encoder.inverse_transform([prediction_int])[0]

	return final_label, f"Confidence: {y_pred_proba:.4f}"


	# --- Gradio Interface Setup ---

	# Create a list of the 41 feature names (excluding 'label') for the UI
	raw_input_features_names = [
	'duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes',
	'land', 'wrong_fragment', 'urgent', 'hot', 'num_failed_logins',
	'logged_in', 'num_compromised', 'root_shell', 'su_attempted', 'num_root',
	'num_file_creations', 'num_shells', 'num_access_files', 'num_outbound_cmds',
	'is_host_login', 'is_guest_login', 'count', 'srv_count', 'serror_rate',
	'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate', 'same_srv_rate',
	'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count',
	'dst_host_same_srv_rate', 'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate',
	'dst_host_srv_diff_host_rate', 'dst_host_serror_rate', 'dst_host_srv_serror_rate',
	'dst_host_rerror_rate', 'dst_host_srv_rerror_rate'
	]

	# Create Gradio inputs corresponding to the feature types
	inputs = [
	gr.Number(label=name, value=0) if name not in CATEGORICAL_COLS else
	gr.Textbox(label=name, value='tcp') # Default example for categorical
	for name in raw_input_features_names
	]

	iface = gr.Interface(
	fn=preprocess_and_predict,
	inputs=inputs,
	outputs=[gr.Label(label="Prediction"), gr.Textbox(label="Details")],
	title="CNN Network Intrusion Detector (KDD)",
	description="Enter the 41 raw network traffic features to classify the connection as 'normal' or 'attack'. Optimized with 0.7 threshold.",
	allow_flagging="never"
	)

	if __name__ == "__main__":
	iface.launch()