Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- app.py +52 -0
- requirements.txt +4 -0
app.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pickle
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
# Stores loaded model in cache so that we don't need to reload model repeatedly for each input
|
| 6 |
+
@st.cache(allow_output_mutation=True)
|
| 7 |
+
def load_model():
|
| 8 |
+
model = pickle.load(open('random_forest_model.sav', 'rb'))
|
| 9 |
+
country_dict = pickle.load(open('country_dict.pickle', 'rb'))
|
| 10 |
+
scaler = pickle.load(open('standardScaler.pickle', 'rb'))
|
| 11 |
+
return model, scaler, country_dict
|
| 12 |
+
|
| 13 |
+
def featurize(time, country, scaler, country_dict):
|
| 14 |
+
arr = np.array([country_dict[country], time]).reshape(1,-1)
|
| 15 |
+
vector = scaler.transform(arr)
|
| 16 |
+
return vector
|
| 17 |
+
|
| 18 |
+
def main():
|
| 19 |
+
model, scaler, country_dict = load_model()
|
| 20 |
+
st.title("\'365 data science\' : free-to-paid user conversion predictor")
|
| 21 |
+
list_of_countries = list(country_dict.keys())
|
| 22 |
+
st.write("\'365 data science\' is a ed-tech company that creates data science courses comprising of video lectures and \
|
| 23 |
+
exercises in the form of quizzes and exams. Some of the courses offered are free and majority of the other courses \
|
| 24 |
+
need the user to buy paid subscription. Students mostly register on this platform as 'free-tier user' as the registration is free of cost. \
|
| 25 |
+
They enroll for free courses and then if they like the content of the platform, they proceed to buy paid-subscription \
|
| 26 |
+
which offers lot of perks as compared to free tier. Paid student get access to large library of courses along with certificates, \
|
| 27 |
+
quizzes and exams.")
|
| 28 |
+
st.write("This application predicts how likely the student is to buy the paid subscription based on the number of minutes \
|
| 29 |
+
he spent engaging with the free course content and the country he comes from. In the exploratory data analysis done, it was found that \
|
| 30 |
+
total time spent by user and nationality of user are two major and most significant factor for determining how likely the user is \
|
| 31 |
+
to buy the course. Typical range for total time watched for students is mostly 0.1 to 100 minutes")
|
| 32 |
+
|
| 33 |
+
with st.form("my_form"):
|
| 34 |
+
total_time = st.number_input('Time spent on platform watching tutorials')
|
| 35 |
+
student_country = st.selectbox('country', list_of_countries)
|
| 36 |
+
st.write('Total time spent : ', total_time)
|
| 37 |
+
st.write('Student country :', student_country)
|
| 38 |
+
|
| 39 |
+
# Every form must have a submit button.
|
| 40 |
+
submitted = st.form_submit_button("Submit")
|
| 41 |
+
|
| 42 |
+
if submitted:
|
| 43 |
+
vector = featurize(total_time, student_country, scaler, country_dict)
|
| 44 |
+
prediction = model.predict(vector)[0]
|
| 45 |
+
predicted_proba = model.predict_proba(vector)
|
| 46 |
+
if prediction == 0 :
|
| 47 |
+
st.write('Student is ', str(round(predicted_proba[0][0]*100)), '% likely to NOT buy the paid subscription')
|
| 48 |
+
else :
|
| 49 |
+
st.write('Student is ', str(round(predicted_proba[0][1]*100)), '% likely to buy the paid subscription')
|
| 50 |
+
|
| 51 |
+
if __name__ == '__main__' :
|
| 52 |
+
main()
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pickle
|
| 2 |
+
numpy
|
| 3 |
+
sklearn
|
| 4 |
+
scikit-learn
|