import base64 import pickle import numpy as np import streamlit as st import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier # Example model from sklearn.preprocessing import StandardScaler # Streamlit app title st.title('ITI105 Team Project') st.subheader('Machine Learning Project for Phishing web site prediction App') if 'clear_output' not in st.session_state: st.session_state.clear_output = False # Function to clear specific elements def clear_previous_output(): st.session_state.clear_output = True # Load the pre-uploaded dataset default_file_path = 'https://raw.githubusercontent.com/JimmyYehtut/ITI105Files/main/test_dataset.csv' df_new = pd.read_csv(default_file_path) # Upload the CSV file uploaded_file = st.file_uploader("Choose a CSV file with website data", type="csv") row_index = None if uploaded_file is not None: # Read the CSV file df = pd.read_csv(uploaded_file) # st.write("Original Dataframe:", df) # Extract the URL column to display in the dropdown url_list = df['url'].tolist() # Display the dropdown with URL options selected_url = st.selectbox("Select URL for Prediction", url_list) # Display the list fo model selected_model = st.selectbox("Select Model for Prediction", [ 'Logistic Regression', 'Decision Tree', 'KNN', 'XGBoost','Random Forest', 'SVM'],index=4) # Remove the first (non-numeric) and last (target) columns if df.shape[1] > 2: # Ensure there are enough columns to remove features_df = df.iloc[:, 1:-1] # Drop first and last columns # Select a row for prediction # row_index = st.number_input("Select a row index for prediction", min_value=0, max_value=len(features_df)-1, step=1) row_index = df[df['url'] == selected_url].index[0] # Display the selected row's features in a table selected_row = df.iloc[row_index, :] st.subheader("List of selected website features:") st.table(selected_row.to_frame().T) else: st.write("The dataset does not have enough columns after removing the first and last columns.") else: # st.error("ERROR!!! Please upload a CSV file to continue.") st.write("Using pre-uploaded sample data:") df = df_new # Extract the URL column to display in the dropdown url_list = df['url'].tolist() # Display the dropdown with URL options selected_url = st.selectbox("Select URL for Prediction", url_list) # Display the list fo model selected_model = st.selectbox("Select Model for Prediction", [ 'Logistic Regression', 'Decision Tree', 'KNN', 'XGBoost','Random Forest', 'SVM'],index=4) # Remove the first (non-numeric) and last (target) columns if df.shape[1] > 2: # Ensure there are enough columns to remove features_df = df.iloc[:, 1:-1] # Drop first and last columns # Select a row for prediction # row_index = st.number_input("Select a row index for prediction", min_value=0, max_value=len(features_df)-1, step=1) row_index = df[df['url'] == selected_url].index[0] # Display the selected row's features in a table selected_row = df.iloc[row_index, :] st.subheader("List of selected website features:") st.table(selected_row.to_frame().T) else: st.write("The dataset does not have enough columns after removing the first and last columns.") if st.button("Predict"): # Clear previous st.success, st.error, and st.markdown elements clear_previous_output() file_ = open("It'ok.webp", "rb") contents = file_.read() data_url_ok = base64.b64encode(contents).decode("utf-8") file_.close() file = open("Warning.gif", "rb") contents = file.read() data_url_warning = base64.b64encode(contents).decode("utf-8") file.close() if row_index is not None: input_values = features_df.iloc[row_index].values # Get selected row data # st.write("Selected Features Dataframe for predicton:", input_values) # st.write("Selected Row Data (Features Only):", input_values) single_sample = np.array(input_values) # Dummy model for the purpose of this example # Normally you would load a pre-trained model or train one # X = features_df # Using the processed features data # y = [0]*len(df) # Dummy target variable for training the model (since we don't have a real target) # # Train/test split # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # model = RandomForestClassifier() # model.fit(X_train, y_train) # Show progress spinner while making predictions with st.spinner('Making prediction...'): try: # Load the pre-trained scaler with open('scaler.pkl', 'rb') as f: scaler = pickle.load(f) # Scale the new data X_new_scaled = scaler.transform(single_sample.reshape(1, -1)) # Load the selected model if selected_model == 'Logistic Regression': with open('best_logreg_model.pkl', 'rb') as f: model = pickle.load(f) elif selected_model == 'Decision Tree': with open('decision_tree_model.pkl', 'rb') as f: model = pickle.load(f) elif selected_model == 'KNN': with open('knn_model.pkl', 'rb') as f: model = pickle.load(f) elif selected_model == 'XGBoost': with open('xgboost_model.pkl', 'rb') as f: model = pickle.load(f) elif selected_model == 'Random Forest': with open('best_rf_model.pkl', 'rb') as f: model = pickle.load(f) elif selected_model == 'SVM': with open('best_svm_model.pkl', 'rb') as f: model = pickle.load(f) # Make predictions prediction = model.predict(X_new_scaled) y_pred_proba = model.predict_proba(X_new_scaled) # Display the prediction using st.write st.write(f"Prediction for the selected URL ({selected_url}): **{prediction[0]}**") # Display the prediction result if prediction[0] == 0 or prediction[0] == "legitimate": st.success("The website is not a phishing website.") st.markdown(f'cat gif', unsafe_allow_html=True,) else: st.error("The website is a phishing website.") st.markdown(f'cat gif', unsafe_allow_html=True,) # Visualize prediction confidence scores as a bar chart st.write("Prediction Confidence Scores:") class_names = model.classes_ plt.figure(figsize=(8, 4)) sns.barplot(x=class_names, y=y_pred_proba[0]) plt.title("Prediction Confidence Scores") plt.xlabel("Class") plt.ylabel("Probability") st.pyplot(plt) except FileNotFoundError as e: st.error(f"Model file for {selected_model} not found: {str(e)}") except Exception as e: st.error(f"An error occurred while loading the model: {str(e)}") else: st.error("ERROR!!! Please provide web site information for prediction !!!") # This block clears the elements only if the prediction button is pressed if st.session_state.clear_output: st.session_state.clear_output = False # st.success("") # Clear any previous success messages # st.error("") # Clear any previous error messages # st.markdown("") # Clear any previous markdown content