Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| import pickle | |
| import os | |
| import pickle | |
| from sklearn.metrics import accuracy_score | |
| from sklearn.ensemble import RandomForestClassifier | |
| penguin_df = pd.read_csv('src/penguins.csv') | |
| st.write(penguin_df.head()) | |
| st.subheader('Penguin Species') | |
| penguin_df.dropna(inplace=True) | |
| output = penguin_df['species'] | |
| features = penguin_df[['island', 'bill_length_mm', 'bill_depth_mm', | |
| 'flipper_length_mm', 'body_mass_g', 'sex']] | |
| features = pd.get_dummies(features) | |
| st.write('Here are our output variables') | |
| st.write(output.head()) | |
| st.write('Here are our feature variables') | |
| st.write(features.head()) | |
| st.subheader('Model Training') | |
| output = penguin_df['species'] | |
| features = penguin_df[['island', 'bill_length_mm', 'bill_depth_mm', | |
| 'flipper_length_mm', 'body_mass_g', 'sex']] | |
| features = pd.get_dummies(features) | |
| output, uniques = pd.factorize(output) | |
| x_train, x_test, y_train, y_test = train_test_split( | |
| features, output, test_size=.8) | |
| rfc = RandomForestClassifier(random_state=15) | |
| rfc.fit(x_train.values, y_train) | |
| y_pred = rfc.predict(x_test.values) | |
| score = accuracy_score(y_pred, y_test) | |
| st.write('Our accuracy score for this model is {}'.format(score)) | |
| st.subheader('Save the Model Output to Pickle') | |
| # Create output directory if it doesn't exist | |
| output_dir = "outputs" | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Save the model | |
| model_filename = os.path.join(output_dir, "random_forest_penguin.pickle") | |
| with open(model_filename, "wb") as rf_pickle: | |
| pickle.dump(rfc, rf_pickle) | |
| # Save the uniques or other data | |
| uniques_filename = os.path.join(output_dir, "uniques_data.pickle") | |
| with open(uniques_filename, "wb") as output_pickle: | |
| pickle.dump(uniques, output_pickle) | |
| st.write("Model saved to {}".format(model_filename)) | |
| st.write("Click below to download the model.") | |
| # Load the files to enable download | |
| with open(model_filename, "rb") as f: | |
| model_bytes = f.read() | |
| st.download_button( | |
| label="Download Trained Model (random_forest_penguin.pickle)", | |
| data=model_bytes, | |
| file_name="random_forest_penguin.pickle", | |
| mime="application/octet-stream" | |
| ) | |
| # Load the files to enable download | |
| with open(uniques_filename, "rb") as f: | |
| model_bytes = f.read() | |
| st.download_button( | |
| label="Download the data (uniques_data.pickle)", | |
| data=model_bytes, | |
| file_name="uniques_data.pickle", | |
| mime="application/octet-stream" | |
| ) |