YEHTUT commited on
Commit
4bc1edc
·
verified ·
1 Parent(s): d3fdccd

Upload 6 files

Browse files
Files changed (6) hide show
  1. It'ok.webp +0 -0
  2. Warning.gif +0 -0
  3. app.py +121 -0
  4. requirements.txt +1 -0
  5. rf_clf.pkl +3 -0
  6. scaler.pkl +3 -0
It'ok.webp ADDED
Warning.gif ADDED
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import pickle
3
+ import numpy as np
4
+ import streamlit as st
5
+ import pandas as pd
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.ensemble import RandomForestClassifier # Example model
8
+ from sklearn.preprocessing import StandardScaler
9
+ # Streamlit app title
10
+ st.title('ITI105 Team Project')
11
+ st.subheader('Phishing web site Machine Learning Prediction App')
12
+
13
+ if 'clear_output' not in st.session_state:
14
+ st.session_state.clear_output = False
15
+
16
+ # Function to clear specific elements
17
+ def clear_previous_output():
18
+ st.session_state.clear_output = True
19
+ # Upload the CSV file
20
+ uploaded_file = st.file_uploader("Choose a CSV file with website data", type="csv")
21
+ row_index = None
22
+ if uploaded_file is not None:
23
+ # Read the CSV file
24
+ df = pd.read_csv(uploaded_file)
25
+ # st.write("Original Dataframe:", df)
26
+
27
+ # Extract the URL column to display in the dropdown
28
+ url_list = df['url'].tolist()
29
+
30
+ # Display the dropdown with URL options
31
+ selected_url = st.selectbox("Select URL for Prediction", url_list)
32
+
33
+ # Display the list fo model
34
+ selected_model = st.selectbox("Select Model for Prediction", ['Random Forest', 'Logistic Regression', 'SVM', 'KNN', 'Decision Tree'])
35
+
36
+
37
+ # Remove the first (non-numeric) and last (target) columns
38
+ if df.shape[1] > 2: # Ensure there are enough columns to remove
39
+ features_df = df.iloc[:, 1:-1] # Drop first and last columns
40
+
41
+
42
+ # Select a row for prediction
43
+ # row_index = st.number_input("Select a row index for prediction", min_value=0, max_value=len(features_df)-1, step=1)
44
+ row_index = df[df['url'] == selected_url].index[0]
45
+ # Display the selected row's features in a table
46
+ selected_row = df.iloc[row_index, :]
47
+ st.subheader("List of selected website features:")
48
+ st.table(selected_row.to_frame().T)
49
+
50
+ else:
51
+ st.write("The dataset does not have enough columns after removing the first and last columns.")
52
+ else:
53
+ st.error("ERROR!!! Please upload a CSV file to continue.")
54
+
55
+ if st.button("Predict"):
56
+
57
+ # Clear previous st.success, st.error, and st.markdown elements
58
+ clear_previous_output()
59
+ file_ = open("It'ok.webp", "rb")
60
+ contents = file_.read()
61
+ data_url_ok = base64.b64encode(contents).decode("utf-8")
62
+ file_.close()
63
+
64
+ file = open("Warning.gif", "rb")
65
+ contents = file.read()
66
+ data_url_warning = base64.b64encode(contents).decode("utf-8")
67
+ file.close()
68
+ if row_index is not None:
69
+ input_values = features_df.iloc[row_index].values # Get selected row data
70
+ # st.write("Selected Features Dataframe for predicton:", input_values)
71
+ # st.write("Selected Row Data (Features Only):", input_values)
72
+ single_sample = np.array(input_values)
73
+ # Dummy model for the purpose of this example
74
+ # Normally you would load a pre-trained model or train one
75
+ # X = features_df # Using the processed features data
76
+ # y = [0]*len(df) # Dummy target variable for training the model (since we don't have a real target)
77
+
78
+ # # Train/test split
79
+ # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
80
+ # model = RandomForestClassifier()
81
+ # model.fit(X_train, y_train)
82
+
83
+ # Show progress spinner while making predictions
84
+ with st.spinner('Making prediction...'):
85
+ # Predict based on selected row
86
+ # prediction = model.predict([input_values])
87
+ # Load the pre-trained scaler and model
88
+ with open('scaler.pkl', 'rb') as f:
89
+ scalar = pickle.load(f)
90
+
91
+ with open('rf_clf.pkl', 'rb') as f:
92
+ rf_clf = pickle.load(f)
93
+
94
+ # Scale the new data using the pre-trained scaler
95
+ X_new_scaled = scalar.transform(single_sample.reshape(1, -1))
96
+
97
+ # Make predictions using the pre-trained model
98
+ prediction = rf_clf.predict(X_new_scaled)
99
+
100
+ # loaded_model = pickle.load(open('Random_Forest.sav', 'rb'))
101
+ # prediction = loaded_model.predict(np.array(single_sample))
102
+
103
+
104
+ # st.write(f"Prediction : {prediction[0]}")
105
+ if prediction[0] == 0:
106
+ st.success("The website is not a phishing website.")
107
+ st.markdown(f'<img src="data:image/gif;base64,{data_url_ok}" alt="cat gif">', unsafe_allow_html=True,)
108
+ else:
109
+ st.error("The website is a phishing website.")
110
+ st.markdown(f'<img src="data:image/gif;base64,{data_url_warning}" alt="cat gif">', unsafe_allow_html=True,)
111
+
112
+ # Note: Since we don't have a real target, accuracy calculation is skipped.
113
+ else:
114
+ st.error("ERROR!!! Please provide web site information for prediction !!!")
115
+
116
+ # This block clears the elements only if the prediction button is pressed
117
+ if st.session_state.clear_output:
118
+ st.session_state.clear_output = False
119
+ # st.success("") # Clear any previous success messages
120
+ # st.error("") # Clear any previous error messages
121
+ # st.markdown("") # Clear any previous markdown content
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ scikit-learn
rf_clf.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40e1f347cbde7b0e785e7a70df60e20339dabcd142aee071e254ca43532a96fb
3
+ size 109067685
scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6c0343372b5eb9294885fa2843ae3cd6ad5ac4da1bcb39bf43ba47baadd76a9
3
+ size 3988