rishikesh commited on
Commit
47e6804
·
1 Parent(s): 1652f41

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +52 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ import numpy as np
4
+
5
+ # Stores loaded model in cache so that we don't need to reload model repeatedly for each input
6
+ @st.cache(allow_output_mutation=True)
7
+ def load_model():
8
+ model = pickle.load(open('random_forest_model.sav', 'rb'))
9
+ country_dict = pickle.load(open('country_dict.pickle', 'rb'))
10
+ scaler = pickle.load(open('standardScaler.pickle', 'rb'))
11
+ return model, scaler, country_dict
12
+
13
+ def featurize(time, country, scaler, country_dict):
14
+ arr = np.array([country_dict[country], time]).reshape(1,-1)
15
+ vector = scaler.transform(arr)
16
+ return vector
17
+
18
+ def main():
19
+ model, scaler, country_dict = load_model()
20
+ st.title("\'365 data science\' : free-to-paid user conversion predictor")
21
+ list_of_countries = list(country_dict.keys())
22
+ st.write("\'365 data science\' is a ed-tech company that creates data science courses comprising of video lectures and \
23
+ exercises in the form of quizzes and exams. Some of the courses offered are free and majority of the other courses \
24
+ need the user to buy paid subscription. Students mostly register on this platform as 'free-tier user' as the registration is free of cost. \
25
+ They enroll for free courses and then if they like the content of the platform, they proceed to buy paid-subscription \
26
+ which offers lot of perks as compared to free tier. Paid student get access to large library of courses along with certificates, \
27
+ quizzes and exams.")
28
+ st.write("This application predicts how likely the student is to buy the paid subscription based on the number of minutes \
29
+ he spent engaging with the free course content and the country he comes from. In the exploratory data analysis done, it was found that \
30
+ total time spent by user and nationality of user are two major and most significant factor for determining how likely the user is \
31
+ to buy the course. Typical range for total time watched for students is mostly 0.1 to 100 minutes")
32
+
33
+ with st.form("my_form"):
34
+ total_time = st.number_input('Time spent on platform watching tutorials')
35
+ student_country = st.selectbox('country', list_of_countries)
36
+ st.write('Total time spent : ', total_time)
37
+ st.write('Student country :', student_country)
38
+
39
+ # Every form must have a submit button.
40
+ submitted = st.form_submit_button("Submit")
41
+
42
+ if submitted:
43
+ vector = featurize(total_time, student_country, scaler, country_dict)
44
+ prediction = model.predict(vector)[0]
45
+ predicted_proba = model.predict_proba(vector)
46
+ if prediction == 0 :
47
+ st.write('Student is ', str(round(predicted_proba[0][0]*100)), '% likely to NOT buy the paid subscription')
48
+ else :
49
+ st.write('Student is ', str(round(predicted_proba[0][1]*100)), '% likely to buy the paid subscription')
50
+
51
+ if __name__ == '__main__' :
52
+ main()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pickle
2
+ numpy
3
+ sklearn
4
+ scikit-learn