Spaces:
Sleeping
Sleeping
Upload 10 files
Browse files- app.py +29 -0
- categorical_cols_list.txt +1 -0
- eda.py +34 -0
- encoder.pkl +3 -0
- model.pkl +3 -0
- numerical_cols_list.txt +1 -0
- online_shoppers_intention.csv +0 -0
- prediction.py +55 -0
- requirement.txt +7 -0
- scaler.pkl +3 -0
app.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from eda import run_eda_app
|
| 4 |
+
from prediction import run_prediction_app
|
| 5 |
+
import matplotlib.pyplot as plt
|
| 6 |
+
|
| 7 |
+
# Load the dataset
|
| 8 |
+
data = pd.read_csv('online_shoppers_intention.csv')
|
| 9 |
+
|
| 10 |
+
# Main app
|
| 11 |
+
def main():
|
| 12 |
+
st.title('Online Shoppers Intention Prediction App')
|
| 13 |
+
|
| 14 |
+
# Sidebar for navigation
|
| 15 |
+
menu = ['Home', 'EDA', 'Prediction']
|
| 16 |
+
choice = st.sidebar.selectbox('Menu', menu)
|
| 17 |
+
|
| 18 |
+
if choice == 'Home':
|
| 19 |
+
st.write('Welcome to the Online Shoppers Intention Prediction App!')
|
| 20 |
+
st.write('Navigate to the EDA or Prediction sections using the sidebar to explore further.')
|
| 21 |
+
|
| 22 |
+
elif choice == 'EDA':
|
| 23 |
+
run_eda_app(data)
|
| 24 |
+
|
| 25 |
+
elif choice == 'Prediction':
|
| 26 |
+
run_prediction_app()
|
| 27 |
+
|
| 28 |
+
if __name__ == '__main__':
|
| 29 |
+
main()
|
categorical_cols_list.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["Month", "VisitorType", "Weekend"]
|
eda.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
import seaborn as sns
|
| 5 |
+
|
| 6 |
+
def run_eda_app(data):
|
| 7 |
+
st.subheader('Exploratory Data Analysis')
|
| 8 |
+
|
| 9 |
+
# Display data
|
| 10 |
+
st.write("Here's a glimpse of the dataset:")
|
| 11 |
+
st.write(data.head())
|
| 12 |
+
|
| 13 |
+
# Show data distribution
|
| 14 |
+
if st.checkbox('Show data distribution'):
|
| 15 |
+
st.write('Data distribution:')
|
| 16 |
+
fig, ax = plt.subplots()
|
| 17 |
+
data.hist(ax=ax, bins=30, figsize=(20,15))
|
| 18 |
+
st.pyplot(fig)
|
| 19 |
+
|
| 20 |
+
# Correlation heatmap
|
| 21 |
+
if st.checkbox('Show correlation heatmap'):
|
| 22 |
+
st.write('Correlation heatmap:')
|
| 23 |
+
fig, ax = plt.subplots(figsize=(10,8))
|
| 24 |
+
sns.heatmap(data.corr(), annot=True, cmap='coolwarm', ax=ax)
|
| 25 |
+
st.pyplot(fig)
|
| 26 |
+
|
| 27 |
+
# Monthly distribution of sessions
|
| 28 |
+
if st.checkbox('Show monthly distribution of sessions'):
|
| 29 |
+
st.write('Monthly distribution of sessions:')
|
| 30 |
+
fig, ax = plt.subplots(figsize=(10,6))
|
| 31 |
+
data['Month'].value_counts().plot(kind='bar', ax=ax)
|
| 32 |
+
ax.set_title('Number of sessions per month')
|
| 33 |
+
ax.set_ylabel('Count')
|
| 34 |
+
st.pyplot(fig)
|
encoder.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67d6537db61c0edfa4a5b16853d6a8fc5147eeca697548626525cb0a9177b70b
|
| 3 |
+
size 868
|
model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a8691e7dafeb9f9e8a38bdd2a46958d7741811747cb7d6db99d32cb176b000a
|
| 3 |
+
size 33484083
|
numerical_cols_list.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["Administrative", "Administrative_Duration", "Informational", "Informational_Duration", "ProductRelated", "ProductRelated_Duration", "BounceRates", "ExitRates", "PageValues", "SpecialDay", "OperatingSystems", "Browser", "Region", "TrafficType"]
|
online_shoppers_intention.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
prediction.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pickle
|
| 4 |
+
|
| 5 |
+
# Load the trained Random Forest classifier
|
| 6 |
+
with open('model.pkl', 'rb') as file:
|
| 7 |
+
model = pickle.load(file)
|
| 8 |
+
|
| 9 |
+
def run_prediction_app():
|
| 10 |
+
st.subheader('Predict Revenue Generation')
|
| 11 |
+
|
| 12 |
+
# Taking input from the user
|
| 13 |
+
Administrative = st.number_input('Administrative', value=0)
|
| 14 |
+
Administrative_Duration = st.number_input('Administrative Duration', value=0.0)
|
| 15 |
+
Informational = st.number_input('Informational', value=0)
|
| 16 |
+
Informational_Duration = st.number_input('Informational Duration', value=0.0)
|
| 17 |
+
ProductRelated = st.number_input('ProductRelated', value=0)
|
| 18 |
+
ProductRelated_Duration = st.number_input('ProductRelated Duration', value=0.0)
|
| 19 |
+
BounceRates = st.number_input('BounceRates', value=0.0)
|
| 20 |
+
ExitRates = st.number_input('ExitRates', value=0.0)
|
| 21 |
+
PageValues = st.number_input('PageValues', value=0.0)
|
| 22 |
+
SpecialDay = st.number_input('SpecialDay', value=0.0)
|
| 23 |
+
Month = st.selectbox('Month', ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'June', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
|
| 24 |
+
OperatingSystems = st.number_input('Operating Systems', value=1)
|
| 25 |
+
Browser = st.number_input('Browser', value=1)
|
| 26 |
+
Region = st.number_input('Region', value=1)
|
| 27 |
+
TrafficType = st.number_input('Traffic Type', value=1)
|
| 28 |
+
VisitorType = st.selectbox('Visitor Type', ['Returning_Visitor', 'New_Visitor', 'Other'])
|
| 29 |
+
Weekend = st.checkbox('Weekend?')
|
| 30 |
+
|
| 31 |
+
# When 'Predict' is clicked, make the prediction and store it
|
| 32 |
+
if st.button('Predict'):
|
| 33 |
+
input_data = {
|
| 34 |
+
'Administrative': Administrative,
|
| 35 |
+
'Administrative_Duration': Administrative_Duration,
|
| 36 |
+
'Informational': Informational,
|
| 37 |
+
'Informational_Duration': Informational_Duration,
|
| 38 |
+
'ProductRelated': ProductRelated,
|
| 39 |
+
'ProductRelated_Duration': ProductRelated_Duration,
|
| 40 |
+
'BounceRates': BounceRates,
|
| 41 |
+
'ExitRates': ExitRates,
|
| 42 |
+
'PageValues': PageValues,
|
| 43 |
+
'SpecialDay': SpecialDay,
|
| 44 |
+
'Month': Month,
|
| 45 |
+
'OperatingSystems': OperatingSystems,
|
| 46 |
+
'Browser': Browser,
|
| 47 |
+
'Region': Region,
|
| 48 |
+
'TrafficType': TrafficType,
|
| 49 |
+
'VisitorType': VisitorType,
|
| 50 |
+
'Weekend': Weekend
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
# Make prediction
|
| 54 |
+
prediction = model.predict([list(input_data.values())])[0]
|
| 55 |
+
st.write(f"Prediction: {'Revenue' if prediction else 'No Revenue'}")
|
requirement.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
pandas
|
| 3 |
+
seaborn
|
| 4 |
+
matplotlib
|
| 5 |
+
numpy
|
| 6 |
+
scikit-learn
|
| 7 |
+
plotly
|
scaler.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ceaaf7cab937c6bd3e17e87df18b70cf7126c4da35037e95d6c2ce02c5557ae
|
| 3 |
+
size 1103
|