YEHTUT commited on
Commit
35b93a0
·
verified ·
1 Parent(s): c08cc1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -154
app.py CHANGED
@@ -1,154 +1,164 @@
1
- import base64
2
- import pickle
3
- import numpy as np
4
- import streamlit as st
5
- import pandas as pd
6
- from sklearn.model_selection import train_test_split
7
- from sklearn.ensemble import RandomForestClassifier # Example model
8
- from sklearn.preprocessing import StandardScaler
9
- # Streamlit app title
10
- st.title('ITI105 Team Project')
11
- st.subheader('Machine Learning Project for Phishing web site prediction App')
12
-
13
- if 'clear_output' not in st.session_state:
14
- st.session_state.clear_output = False
15
-
16
- # Function to clear specific elements
17
- def clear_previous_output():
18
- st.session_state.clear_output = True
19
-
20
- # Load the pre-uploaded dataset
21
- default_file_path = 'https://raw.githubusercontent.com/JimmyYehtut/ITI105Files/main/test_dataset.csv'
22
- df_new = pd.read_csv(default_file_path)
23
-
24
-
25
- # Upload the CSV file
26
- uploaded_file = st.file_uploader("Choose a CSV file with website data", type="csv")
27
- row_index = None
28
- if uploaded_file is not None:
29
- # Read the CSV file
30
- df = pd.read_csv(uploaded_file)
31
- # st.write("Original Dataframe:", df)
32
-
33
- # Extract the URL column to display in the dropdown
34
- url_list = df['url'].tolist()
35
-
36
- # Display the dropdown with URL options
37
- selected_url = st.selectbox("Select URL for Prediction", url_list)
38
-
39
- # Display the list fo model
40
- selected_model = st.selectbox("Select Model for Prediction", ['Random Forest', 'Logistic Regression', 'SVM', 'KNN', 'Decision Tree'])
41
-
42
-
43
- # Remove the first (non-numeric) and last (target) columns
44
- if df.shape[1] > 2: # Ensure there are enough columns to remove
45
- features_df = df.iloc[:, 1:-1] # Drop first and last columns
46
-
47
-
48
- # Select a row for prediction
49
- # row_index = st.number_input("Select a row index for prediction", min_value=0, max_value=len(features_df)-1, step=1)
50
- row_index = df[df['url'] == selected_url].index[0]
51
- # Display the selected row's features in a table
52
- selected_row = df.iloc[row_index, :]
53
- st.subheader("List of selected website features:")
54
- st.table(selected_row.to_frame().T)
55
-
56
- else:
57
- st.write("The dataset does not have enough columns after removing the first and last columns.")
58
- else:
59
- # st.error("ERROR!!! Please upload a CSV file to continue.")
60
- st.write("Using pre-uploaded sample data:")
61
- df = df_new
62
- # Extract the URL column to display in the dropdown
63
- url_list = df['url'].tolist()
64
-
65
- # Display the dropdown with URL options
66
- selected_url = st.selectbox("Select URL for Prediction", url_list)
67
-
68
- # Display the list fo model
69
- selected_model = st.selectbox("Select Model for Prediction", ['Random Forest', 'Logistic Regression', 'SVM', 'KNN', 'Decision Tree'])
70
-
71
-
72
- # Remove the first (non-numeric) and last (target) columns
73
- if df.shape[1] > 2: # Ensure there are enough columns to remove
74
- features_df = df.iloc[:, 1:-1] # Drop first and last columns
75
-
76
-
77
- # Select a row for prediction
78
- # row_index = st.number_input("Select a row index for prediction", min_value=0, max_value=len(features_df)-1, step=1)
79
- row_index = df[df['url'] == selected_url].index[0]
80
- # Display the selected row's features in a table
81
- selected_row = df.iloc[row_index, :]
82
- st.subheader("List of selected website features:")
83
- st.table(selected_row.to_frame().T)
84
-
85
- else:
86
- st.write("The dataset does not have enough columns after removing the first and last columns.")
87
-
88
- if st.button("Predict"):
89
-
90
- # Clear previous st.success, st.error, and st.markdown elements
91
- clear_previous_output()
92
- file_ = open("It'ok.webp", "rb")
93
- contents = file_.read()
94
- data_url_ok = base64.b64encode(contents).decode("utf-8")
95
- file_.close()
96
-
97
- file = open("Warning.gif", "rb")
98
- contents = file.read()
99
- data_url_warning = base64.b64encode(contents).decode("utf-8")
100
- file.close()
101
- if row_index is not None:
102
- input_values = features_df.iloc[row_index].values # Get selected row data
103
- # st.write("Selected Features Dataframe for predicton:", input_values)
104
- # st.write("Selected Row Data (Features Only):", input_values)
105
- single_sample = np.array(input_values)
106
- # Dummy model for the purpose of this example
107
- # Normally you would load a pre-trained model or train one
108
- # X = features_df # Using the processed features data
109
- # y = [0]*len(df) # Dummy target variable for training the model (since we don't have a real target)
110
-
111
- # # Train/test split
112
- # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
113
- # model = RandomForestClassifier()
114
- # model.fit(X_train, y_train)
115
-
116
- # Show progress spinner while making predictions
117
- with st.spinner('Making prediction...'):
118
- # Predict based on selected row
119
- # prediction = model.predict([input_values])
120
- # Load the pre-trained scaler and model
121
- with open('scaler.pkl', 'rb') as f:
122
- scalar = pickle.load(f)
123
-
124
- with open('rf_clf.pkl', 'rb') as f:
125
- rf_clf = pickle.load(f)
126
-
127
- # Scale the new data using the pre-trained scaler
128
- X_new_scaled = scalar.transform(single_sample.reshape(1, -1))
129
-
130
- # Make predictions using the pre-trained model
131
- prediction = rf_clf.predict(X_new_scaled)
132
-
133
- # loaded_model = pickle.load(open('Random_Forest.sav', 'rb'))
134
- # prediction = loaded_model.predict(np.array(single_sample))
135
-
136
-
137
- # st.write(f"Prediction : {prediction[0]}")
138
- if prediction[0] == 0:
139
- st.success("The website is not a phishing website.")
140
- st.markdown(f'<img src="data:image/gif;base64,{data_url_ok}" alt="cat gif">', unsafe_allow_html=True,)
141
- else:
142
- st.error("The website is a phishing website.")
143
- st.markdown(f'<img src="data:image/gif;base64,{data_url_warning}" alt="cat gif">', unsafe_allow_html=True,)
144
-
145
- # Note: Since we don't have a real target, accuracy calculation is skipped.
146
- else:
147
- st.error("ERROR!!! Please provide web site information for prediction !!!")
148
-
149
- # This block clears the elements only if the prediction button is pressed
150
- if st.session_state.clear_output:
151
- st.session_state.clear_output = False
152
- # st.success("") # Clear any previous success messages
153
- # st.error("") # Clear any previous error messages
154
- # st.markdown("") # Clear any previous markdown content
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import pickle
3
+ import numpy as np
4
+ import streamlit as st
5
+ import pandas as pd
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.ensemble import RandomForestClassifier # Example model
8
+ from sklearn.preprocessing import StandardScaler
9
+ # Streamlit app title
10
+ st.title('ITI105 Team Project')
11
+ st.subheader('Machine Learning Project for Phishing web site prediction App')
12
+
13
+ if 'clear_output' not in st.session_state:
14
+ st.session_state.clear_output = False
15
+
16
+ # Function to clear specific elements
17
+ def clear_previous_output():
18
+ st.session_state.clear_output = True
19
+
20
+ # Load the pre-uploaded dataset
21
+ default_file_path = 'https://raw.githubusercontent.com/JimmyYehtut/ITI105Files/main/test_dataset.csv'
22
+ df_new = pd.read_csv(default_file_path)
23
+
24
+
25
+ # Upload the CSV file
26
+ uploaded_file = st.file_uploader("Choose a CSV file with website data", type="csv")
27
+ row_index = None
28
+ if uploaded_file is not None:
29
+ # Read the CSV file
30
+ df = pd.read_csv(uploaded_file)
31
+ # st.write("Original Dataframe:", df)
32
+
33
+ # Extract the URL column to display in the dropdown
34
+ url_list = df['url'].tolist()
35
+
36
+ # Display the dropdown with URL options
37
+ selected_url = st.selectbox("Select URL for Prediction", url_list)
38
+
39
+ # Display the list fo model
40
+ selected_model = st.selectbox("Select Model for Prediction", ['Random Forest', 'Logistic Regression', 'SVM', 'KNN', 'Decision Tree'])
41
+
42
+
43
+ # Remove the first (non-numeric) and last (target) columns
44
+ if df.shape[1] > 2: # Ensure there are enough columns to remove
45
+ features_df = df.iloc[:, 1:-1] # Drop first and last columns
46
+
47
+
48
+ # Select a row for prediction
49
+ # row_index = st.number_input("Select a row index for prediction", min_value=0, max_value=len(features_df)-1, step=1)
50
+ row_index = df[df['url'] == selected_url].index[0]
51
+ # Display the selected row's features in a table
52
+ selected_row = df.iloc[row_index, :]
53
+ st.subheader("List of selected website features:")
54
+ st.table(selected_row.to_frame().T)
55
+
56
+ else:
57
+ st.write("The dataset does not have enough columns after removing the first and last columns.")
58
+ else:
59
+ # st.error("ERROR!!! Please upload a CSV file to continue.")
60
+ st.write("Using pre-uploaded sample data:")
61
+ df = df_new
62
+ # Extract the URL column to display in the dropdown
63
+ url_list = df['url'].tolist()
64
+
65
+ # Display the dropdown with URL options
66
+ selected_url = st.selectbox("Select URL for Prediction", url_list)
67
+
68
+ # Display the list fo model
69
+ selected_model = st.selectbox("Select Model for Prediction", ['Random Forest', 'Logistic Regression', 'SVM', 'KNN', 'Decision Tree'])
70
+
71
+
72
+ # Remove the first (non-numeric) and last (target) columns
73
+ if df.shape[1] > 2: # Ensure there are enough columns to remove
74
+ features_df = df.iloc[:, 1:-1] # Drop first and last columns
75
+
76
+
77
+ # Select a row for prediction
78
+ # row_index = st.number_input("Select a row index for prediction", min_value=0, max_value=len(features_df)-1, step=1)
79
+ row_index = df[df['url'] == selected_url].index[0]
80
+ # Display the selected row's features in a table
81
+ selected_row = df.iloc[row_index, :]
82
+ st.subheader("List of selected website features:")
83
+ st.table(selected_row.to_frame().T)
84
+
85
+ else:
86
+ st.write("The dataset does not have enough columns after removing the first and last columns.")
87
+
88
+ if st.button("Predict"):
89
+
90
+ # Clear previous st.success, st.error, and st.markdown elements
91
+ clear_previous_output()
92
+ file_ = open("It'ok.webp", "rb")
93
+ contents = file_.read()
94
+ data_url_ok = base64.b64encode(contents).decode("utf-8")
95
+ file_.close()
96
+
97
+ file = open("Warning.gif", "rb")
98
+ contents = file.read()
99
+ data_url_warning = base64.b64encode(contents).decode("utf-8")
100
+ file.close()
101
+ if row_index is not None:
102
+ input_values = features_df.iloc[row_index].values # Get selected row data
103
+ # st.write("Selected Features Dataframe for predicton:", input_values)
104
+ # st.write("Selected Row Data (Features Only):", input_values)
105
+ single_sample = np.array(input_values)
106
+ # Dummy model for the purpose of this example
107
+ # Normally you would load a pre-trained model or train one
108
+ # X = features_df # Using the processed features data
109
+ # y = [0]*len(df) # Dummy target variable for training the model (since we don't have a real target)
110
+
111
+ # # Train/test split
112
+ # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
113
+ # model = RandomForestClassifier()
114
+ # model.fit(X_train, y_train)
115
+
116
+ # Show progress spinner while making predictions
117
+ with st.spinner('Making prediction...'):
118
+ # Predict based on selected row
119
+ # prediction = model.predict([input_values])
120
+ # Load the pre-trained scaler and model
121
+ with open('scaler.pkl', 'rb') as f:
122
+ scalar = pickle.load(f)
123
+
124
+ with open('rf_clf.pkl', 'rb') as f:
125
+ rf_clf = pickle.load(f)
126
+
127
+ # Scale the new data using the pre-trained scaler
128
+ X_new_scaled = scalar.transform(single_sample.reshape(1, -1))
129
+
130
+ # Make predictions using the pre-trained model
131
+ prediction = rf_clf.predict(X_new_scaled)
132
+
133
+ # loaded_model = pickle.load(open('Random_Forest.sav', 'rb'))
134
+ # prediction = loaded_model.predict(np.array(single_sample))
135
+
136
+
137
+ # st.write(f"Prediction : {prediction[0]}")
138
+ if prediction[0] == 0:
139
+ st.success("The website is not a phishing website.")
140
+ st.markdown(f'<img src="data:image/gif;base64,{data_url_ok}" alt="cat gif">', unsafe_allow_html=True,)
141
+ else:
142
+ st.error("The website is a phishing website.")
143
+ st.markdown(f'<img src="data:image/gif;base64,{data_url_warning}" alt="cat gif">', unsafe_allow_html=True,)
144
+
145
+
146
+ # Visualize prediction confidence scores as a bar chart
147
+ st.write("Prediction Confidence Scores:")
148
+ class_names = rf_clf.classes_
149
+ plt.figure(figsize=(8, 4))
150
+ sns.barplot(x=class_names, y=y_pred_proba[0])
151
+ plt.title("Prediction Confidence Scores")
152
+ plt.xlabel("Class")
153
+ plt.ylabel("Probability")
154
+ st.pyplot(plt)
155
+
156
+ else:
157
+ st.error("ERROR!!! Please provide web site information for prediction !!!")
158
+
159
+ # This block clears the elements only if the prediction button is pressed
160
+ if st.session_state.clear_output:
161
+ st.session_state.clear_output = False
162
+ # st.success("") # Clear any previous success messages
163
+ # st.error("") # Clear any previous error messages
164
+ # st.markdown("") # Clear any previous markdown content