import gradio as gr import tensorflow as tf import numpy as np import pandas as pd import joblib import os from huggingface_hub import hf_hub_download # --- Configuration --- REPO_ID = "netgoat-ai/GoatAI" MODEL_FILENAME = "goatai.keras" SCALER_FILENAME = "scaler.pkl" DEFAULT_THRESHOLD = 0.003 FEATURE_NAMES = [ "Flow Duration", "Total Fwd Packets", "Total Backward Packets", "Packet Length Mean", "Flow IAT Mean", "Fwd Flag Count" ] # --- Load Resources --- def load_file(filename, repo_id): """ Checks for a local file. If not found, attempts to download from HF Hub. """ if os.path.exists(filename): print(f"Found local file: {filename}") return filename print(f"'{filename}' not found locally. Attempting download from {repo_id}...") try: downloaded_path = hf_hub_download(repo_id=repo_id, filename=filename) print(f"Successfully downloaded to: {downloaded_path}") return downloaded_path except Exception as e: print(f"Could not download {filename}: {e}") return None # 1. Load Model model_path = load_file(MODEL_FILENAME, REPO_ID) model = None if model_path: try: model = tf.keras.models.load_model(model_path) print("Model loaded successfully.") except Exception as e: print(f"Error loading model: {e}") # 2. Load Scaler scaler_path = load_file(SCALER_FILENAME, REPO_ID) scaler = None if scaler_path: try: scaler = joblib.load(scaler_path) print("Scaler loaded successfully.") except Exception as e: print(f"Error loading scaler: {e}") else: print("Warning: Scaler not available. Input data must be pre-normalized.") # --- Simulation Logic (Based on the dataset code) --- def generate_benign_sample(): """Generates a realistic BENIGN traffic sample.""" duration = np.random.randint(50000, 60000000) fwd_pkts = np.random.randint(10, 100) bwd_pkts = fwd_pkts + np.random.randint(5, 50) pkt_len = abs(np.random.normal(loc=500, scale=200)) iat = abs(np.random.normal(loc=100000, scale=50000)) syn_flag = np.random.choice([0, 1], p=[0.95, 0.05]) data = [duration, fwd_pkts, bwd_pkts, pkt_len, iat, syn_flag] return ", ".join([f"{x:.2f}" for x in data]) def generate_attack_sample(): """Generates a realistic ATTACK traffic sample.""" duration = np.random.randint(100, 10000) fwd_pkts = np.random.randint(500, 50000) # Huge volume bwd_pkts = np.random.randint(0, 5) # No response pkt_len = np.random.normal(loc=1200, scale=10) # Fixed size (approx) iat = np.random.exponential(scale=100) # Super fast syn_flag = np.random.choice([0, 1], p=[0.1, 0.9]) data = [duration, fwd_pkts, bwd_pkts, pkt_len, iat, syn_flag] return ", ".join([f"{x:.2f}" for x in data]) # --- Prediction Logic --- def predict(csv_text, threshold): if model is None: return "System Error: Model not loaded.", 0.0, f"Could not find {MODEL_FILENAME}." try: # 1. Parse Input data_list = [float(x.strip()) for x in csv_text.split(',') if x.strip()] # 2. Validate Dimensions (Must be 6) if len(data_list) != 6: return f"
{desc}