rajsecrets0 commited on
Commit
ecad1fd
·
verified ·
1 Parent(s): b5a4169

Upload 7 files

Browse files
Files changed (7) hide show
  1. Procfile +1 -0
  2. penguins-app.py +80 -0
  3. penguins_clf.pkl +3 -0
  4. penguins_example.csv +2 -0
  5. requirements.txt +4 -0
  6. runtime.txt +1 -0
  7. setup.sh +9 -0
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: sh setup.sh && streamlit run penguins-app.py
penguins-app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pickle
5
+ from sklearn.ensemble import RandomForestClassifier
6
+
7
+ st.write("""
8
+ # Penguin Prediction App
9
+
10
+ This app predicts the **Palmer Penguin** species!
11
+
12
+ Data obtained from the [palmerpenguins library](https://github.com/allisonhorst/palmerpenguins) in R by Allison Horst.
13
+ """)
14
+
15
+ st.sidebar.header('User Input Features')
16
+
17
+ st.sidebar.markdown("""
18
+ [Example CSV input file](https://raw.githubusercontent.com/dataprofessor/data/master/penguins_example.csv)
19
+ """)
20
+
21
+ # Collects user input features into dataframe
22
+ uploaded_file = st.sidebar.file_uploader("Upload your input CSV file", type=["csv"])
23
+ if uploaded_file is not None:
24
+ input_df = pd.read_csv(uploaded_file)
25
+ else:
26
+ def user_input_features():
27
+ island = st.sidebar.selectbox('Island',('Biscoe','Dream','Torgersen'))
28
+ sex = st.sidebar.selectbox('Sex',('male','female'))
29
+ bill_length_mm = st.sidebar.slider('Bill length (mm)', 32.1,59.6,43.9)
30
+ bill_depth_mm = st.sidebar.slider('Bill depth (mm)', 13.1,21.5,17.2)
31
+ flipper_length_mm = st.sidebar.slider('Flipper length (mm)', 172.0,231.0,201.0)
32
+ body_mass_g = st.sidebar.slider('Body mass (g)', 2700.0,6300.0,4207.0)
33
+ data = {'island': island,
34
+ 'bill_length_mm': bill_length_mm,
35
+ 'bill_depth_mm': bill_depth_mm,
36
+ 'flipper_length_mm': flipper_length_mm,
37
+ 'body_mass_g': body_mass_g,
38
+ 'sex': sex}
39
+ features = pd.DataFrame(data, index=[0])
40
+ return features
41
+ input_df = user_input_features()
42
+
43
+ # Combines user input features with entire penguins dataset
44
+ # This will be useful for the encoding phase
45
+ penguins_raw = pd.read_csv('https://raw.githubusercontent.com/dataprofessor/data/master/penguins_cleaned.csv')
46
+ penguins = penguins_raw.drop(columns=['species'], axis=1)
47
+ df = pd.concat([input_df,penguins],axis=0)
48
+
49
+ # Encoding of ordinal features
50
+ # https://www.kaggle.com/pratik1120/penguin-dataset-eda-classification-and-clustering
51
+ encode = ['sex','island']
52
+ for col in encode:
53
+ dummy = pd.get_dummies(df[col], prefix=col)
54
+ df = pd.concat([df,dummy], axis=1)
55
+ del df[col]
56
+ df = df[:1] # Selects only the first row (the user input data)
57
+
58
+ # Displays the user input features
59
+ st.subheader('User Input features')
60
+
61
+ if uploaded_file is not None:
62
+ st.write(df)
63
+ else:
64
+ st.write('Awaiting CSV file to be uploaded. Currently using example input parameters (shown below).')
65
+ st.write(df)
66
+
67
+ # Reads in saved classification model
68
+ load_clf = pickle.load(open('penguins_clf.pkl', 'rb'))
69
+
70
+ # Apply model to make predictions
71
+ prediction = load_clf.predict(df)
72
+ prediction_proba = load_clf.predict_proba(df)
73
+
74
+
75
+ st.subheader('Prediction')
76
+ penguins_species = np.array(['Adelie','Chinstrap','Gentoo'])
77
+ st.write(penguins_species[prediction])
78
+
79
+ st.subheader('Prediction Probability')
80
+ st.write(prediction_proba)
penguins_clf.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63e47201b9288f4112a57fc808640ca7bccb5568e118246cada0ed2cca013e42
3
+ size 271320
penguins_example.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
2
+ Biscoe,43.9,17.2,201.0,4207.0,male
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit==0.61.0
2
+ pandas==0.25.3
3
+ numpy==1.19
4
+ scikit-learn==0.22.1
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.7.9
setup.sh ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ mkdir -p ~/.streamlit/
2
+
3
+ echo "\
4
+ [server]\n\
5
+ port = $PORT\n\
6
+ enableCORS = false\n\
7
+ headless = true\n\
8
+ \n\
9
+ " > ~/.streamlit/config.toml