import streamlit as st import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder from sklearn.compose import ColumnTransformer from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Input, Dense import matplotlib.pyplot as plt @st.cache_data def load_data(): return pd.read_csv("Global_Cybersecurity_Threats_2015-2024.csv") df = load_data() st.title("Cybersecurity Attack Type - ANN Summary & Metrics") target = 'Attack Type' cat_features = [ 'Country', 'Target Industry', 'Attack Source', 'Security Vulnerability Type', 'Defense Mechanism Used' ] num_features = [ 'Year', 'Financial Loss (in Million $)', 'Number of Affected Users', 'Incident Resolution Time (in Hours)' ] X = df.drop(columns=[target]) y = df[target] preprocessor = ColumnTransformer( transformers=[ ('num', StandardScaler(), num_features), ('cat', OneHotEncoder(handle_unknown='ignore'), cat_features) ] ) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=27) X_train = preprocessor.fit_transform(X_train) X_test = preprocessor.transform(X_test) le = LabelEncoder() y_train = le.fit_transform(y_train) y_test = le.transform(y_test) st.sidebar.header("Model Parameters") epochs = st.sidebar.slider("Epochs", 5, 100, 30) batch_size = st.sidebar.selectbox("Batch Size", [8, 16, 32, 64], index=1) model = Sequential() model.add(Input(shape=(X_train.shape[1],))) model.add(Dense(16, activation='relu')) model.add(Dense(32, activation='relu')) model.add(Dense(len(np.unique(y)), activation='softmax')) model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) if st.button("Train Model"): history = model.fit(X_train, y_train, validation_split=0.2, epochs=epochs, batch_size=batch_size, verbose=0) st.success("Model training complete.") st.subheader("Model Summary") model_summary = [] model.summary(print_fn=lambda x: model_summary.append(x)) st.text("\\n".join(model_summary)) st.subheader("Training and Validation Metrics") fig, ax = plt.subplots(2, 1, figsize=(8, 6)) ax[0].plot(history.history['loss'], label='Loss') ax[0].plot(history.history['val_loss'], label='Val Loss') ax[0].legend() ax[0].set_title("Loss vs Val Loss") ax[1].plot(history.history['accuracy'], label='Accuracy') ax[1].plot(history.history['val_accuracy'], label='Val Accuracy') ax[1].legend() ax[1].set_title("Accuracy vs Val Accuracy") st.pyplot(fig) min_val_loss = min(history.history['val_loss']) best_val_acc = max(history.history['val_accuracy']) st.write(f"**Minimum Validation Loss:** {min_val_loss:.4f}") st.write(f"**Best Validation Accuracy:** {best_val_acc:.4f}")