Kurkur99 commited on
Commit
d574e4c
·
1 Parent(s): cb3bc4a

Upload 10 files

Browse files
app.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from eda import run_eda_app
4
+ from prediction import run_prediction_app
5
+ import matplotlib.pyplot as plt
6
+
7
+ # Load the dataset
8
+ data = pd.read_csv('online_shoppers_intention.csv')
9
+
10
+ # Main app
11
+ def main():
12
+ st.title('Online Shoppers Intention Prediction App')
13
+
14
+ # Sidebar for navigation
15
+ menu = ['Home', 'EDA', 'Prediction']
16
+ choice = st.sidebar.selectbox('Menu', menu)
17
+
18
+ if choice == 'Home':
19
+ st.write('Welcome to the Online Shoppers Intention Prediction App!')
20
+ st.write('Navigate to the EDA or Prediction sections using the sidebar to explore further.')
21
+
22
+ elif choice == 'EDA':
23
+ run_eda_app(data)
24
+
25
+ elif choice == 'Prediction':
26
+ run_prediction_app()
27
+
28
+ if __name__ == '__main__':
29
+ main()
categorical_cols_list.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ["Month", "VisitorType", "Weekend"]
eda.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+
6
+ def run_eda_app(data):
7
+ st.subheader('Exploratory Data Analysis')
8
+
9
+ # Display data
10
+ st.write("Here's a glimpse of the dataset:")
11
+ st.write(data.head())
12
+
13
+ # Show data distribution
14
+ if st.checkbox('Show data distribution'):
15
+ st.write('Data distribution:')
16
+ fig, ax = plt.subplots()
17
+ data.hist(ax=ax, bins=30, figsize=(20,15))
18
+ st.pyplot(fig)
19
+
20
+ # Correlation heatmap
21
+ if st.checkbox('Show correlation heatmap'):
22
+ st.write('Correlation heatmap:')
23
+ fig, ax = plt.subplots(figsize=(10,8))
24
+ sns.heatmap(data.corr(), annot=True, cmap='coolwarm', ax=ax)
25
+ st.pyplot(fig)
26
+
27
+ # Monthly distribution of sessions
28
+ if st.checkbox('Show monthly distribution of sessions'):
29
+ st.write('Monthly distribution of sessions:')
30
+ fig, ax = plt.subplots(figsize=(10,6))
31
+ data['Month'].value_counts().plot(kind='bar', ax=ax)
32
+ ax.set_title('Number of sessions per month')
33
+ ax.set_ylabel('Count')
34
+ st.pyplot(fig)
encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67d6537db61c0edfa4a5b16853d6a8fc5147eeca697548626525cb0a9177b70b
3
+ size 868
model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a8691e7dafeb9f9e8a38bdd2a46958d7741811747cb7d6db99d32cb176b000a
3
+ size 33484083
numerical_cols_list.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ["Administrative", "Administrative_Duration", "Informational", "Informational_Duration", "ProductRelated", "ProductRelated_Duration", "BounceRates", "ExitRates", "PageValues", "SpecialDay", "OperatingSystems", "Browser", "Region", "TrafficType"]
online_shoppers_intention.csv ADDED
The diff for this file is too large to render. See raw diff
 
prediction.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pickle
4
+
5
+ # Load the trained Random Forest classifier
6
+ with open('model.pkl', 'rb') as file:
7
+ model = pickle.load(file)
8
+
9
+ def run_prediction_app():
10
+ st.subheader('Predict Revenue Generation')
11
+
12
+ # Taking input from the user
13
+ Administrative = st.number_input('Administrative', value=0)
14
+ Administrative_Duration = st.number_input('Administrative Duration', value=0.0)
15
+ Informational = st.number_input('Informational', value=0)
16
+ Informational_Duration = st.number_input('Informational Duration', value=0.0)
17
+ ProductRelated = st.number_input('ProductRelated', value=0)
18
+ ProductRelated_Duration = st.number_input('ProductRelated Duration', value=0.0)
19
+ BounceRates = st.number_input('BounceRates', value=0.0)
20
+ ExitRates = st.number_input('ExitRates', value=0.0)
21
+ PageValues = st.number_input('PageValues', value=0.0)
22
+ SpecialDay = st.number_input('SpecialDay', value=0.0)
23
+ Month = st.selectbox('Month', ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'June', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
24
+ OperatingSystems = st.number_input('Operating Systems', value=1)
25
+ Browser = st.number_input('Browser', value=1)
26
+ Region = st.number_input('Region', value=1)
27
+ TrafficType = st.number_input('Traffic Type', value=1)
28
+ VisitorType = st.selectbox('Visitor Type', ['Returning_Visitor', 'New_Visitor', 'Other'])
29
+ Weekend = st.checkbox('Weekend?')
30
+
31
+ # When 'Predict' is clicked, make the prediction and store it
32
+ if st.button('Predict'):
33
+ input_data = {
34
+ 'Administrative': Administrative,
35
+ 'Administrative_Duration': Administrative_Duration,
36
+ 'Informational': Informational,
37
+ 'Informational_Duration': Informational_Duration,
38
+ 'ProductRelated': ProductRelated,
39
+ 'ProductRelated_Duration': ProductRelated_Duration,
40
+ 'BounceRates': BounceRates,
41
+ 'ExitRates': ExitRates,
42
+ 'PageValues': PageValues,
43
+ 'SpecialDay': SpecialDay,
44
+ 'Month': Month,
45
+ 'OperatingSystems': OperatingSystems,
46
+ 'Browser': Browser,
47
+ 'Region': Region,
48
+ 'TrafficType': TrafficType,
49
+ 'VisitorType': VisitorType,
50
+ 'Weekend': Weekend
51
+ }
52
+
53
+ # Make prediction
54
+ prediction = model.predict([list(input_data.values())])[0]
55
+ st.write(f"Prediction: {'Revenue' if prediction else 'No Revenue'}")
requirement.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ seaborn
4
+ matplotlib
5
+ numpy
6
+ scikit-learn
7
+ plotly
scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ceaaf7cab937c6bd3e17e87df18b70cf7126c4da35037e95d6c2ce02c5557ae
3
+ size 1103