Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,222 +1,225 @@
|
|
| 1 |
-
# Importo librerie utili
|
| 2 |
-
import re
|
| 3 |
-
import json
|
| 4 |
-
import numpy as np
|
| 5 |
-
import pandas as pd
|
| 6 |
-
import streamlit as st
|
| 7 |
-
from itertools import product
|
| 8 |
-
from datetime import datetime
|
| 9 |
-
import matplotlib.pyplot as plt
|
| 10 |
-
from htbuilder.units import percent, px
|
| 11 |
-
from sklearn.model_selection import train_test_split
|
| 12 |
-
from htbuilder import HtmlElement, div, hr, a, p, img, styles
|
| 13 |
-
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelBinarizer, OneHotEncoder
|
| 14 |
-
from sklearn.metrics import precision_score,recall_score,f1_score, accuracy_score, confusion_matrix, r2_score, precision_recall_curve, roc_auc_score, roc_curve
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
from
|
| 22 |
-
from functions import
|
| 23 |
-
|
| 24 |
-
#
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
st.
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
st.write("
|
| 40 |
-
st.write("
|
| 41 |
-
st.write("")
|
| 42 |
-
st.write("
|
| 43 |
-
|
| 44 |
-
st.write("
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
st.write("
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
#
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
if
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
st.
|
| 73 |
-
|
| 74 |
-
st.
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
st.
|
| 82 |
-
st.
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
st.write("
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
st.
|
| 143 |
-
st.
|
| 144 |
-
st.
|
| 145 |
-
st.write(
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
st.session_state["
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
st.write("
|
| 173 |
-
st.write("
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
if Task1 == "
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
st.
|
| 201 |
-
|
| 202 |
-
st.write("
|
| 203 |
-
st.write("
|
| 204 |
-
st.write("")
|
| 205 |
-
st.write("--
|
| 206 |
-
st.write("")
|
| 207 |
-
st.write("")
|
| 208 |
-
|
| 209 |
-
st.
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Importo librerie utili
|
| 2 |
+
import re
|
| 3 |
+
import json
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import streamlit as st
|
| 7 |
+
from itertools import product
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
import matplotlib.pyplot as plt
|
| 10 |
+
from htbuilder.units import percent, px
|
| 11 |
+
from sklearn.model_selection import train_test_split
|
| 12 |
+
from htbuilder import HtmlElement, div, hr, a, p, img, styles
|
| 13 |
+
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelBinarizer, OneHotEncoder
|
| 14 |
+
from sklearn.metrics import precision_score,recall_score,f1_score, accuracy_score, confusion_matrix, r2_score, precision_recall_curve, roc_auc_score, roc_curve
|
| 15 |
+
import os
|
| 16 |
+
os.mkdir("functions")
|
| 17 |
+
os.mkdir("models")
|
| 18 |
+
os.mkdir("test files")
|
| 19 |
+
# Importo i moduli custom presenti nella cartella "Functions" e il modello di rete neurale nella cartella "Model"
|
| 20 |
+
# In alternativa, lasciarli nella stessa cartella di questo file .py e chiamarli con from name_module import *
|
| 21 |
+
from model import NeuralNetwork # Modello Neural Network
|
| 22 |
+
from functions import remove_missing, feature_selection, imputation_process, missing_target, categoric_to_numeric # Pulizia e trasformazione dataset
|
| 23 |
+
from functions import custom_split, standardize_x_train, target_transformation, nn_builder, metrics_plot # Training, Finalizzazione, e Pipeline
|
| 24 |
+
from functions import model_finalization, test_pipeline # Finalizzazione e Pipeline
|
| 25 |
+
from functions import footer # Footer
|
| 26 |
+
|
| 27 |
+
##################################################################################################################################################################################################
|
| 28 |
+
|
| 29 |
+
# Imposto la pagina
|
| 30 |
+
st.set_page_config(
|
| 31 |
+
page_title = "Prediction with Neural Networks",
|
| 32 |
+
page_icon = "🧊",
|
| 33 |
+
layout = "wide",
|
| 34 |
+
menu_items = { 'About': "This is simple app to guide users and build a Neural Network model to make predictions." }
|
| 35 |
+
)
|
| 36 |
+
st.markdown('''<style> section.main > div {max-width:75rem} </style>''', unsafe_allow_html = True)
|
| 37 |
+
|
| 38 |
+
# Titolo
|
| 39 |
+
st.write("# Predictions with Neural Networks")
|
| 40 |
+
st.write("")
|
| 41 |
+
st.write("This is a simple app to guide you in the process of applying a custom Neural Network model to a dataset. Currently")
|
| 42 |
+
st.write("- The app supports binary classification, multivariate classification, and regression analyses.")
|
| 43 |
+
st.write("- The app cannot process dates in your dataset.")
|
| 44 |
+
st.write("")
|
| 45 |
+
st.write("### Upload Data")
|
| 46 |
+
uploaded_file = st.file_uploader("Choose a CSV file for the analysis") # Pulsante per upload dati (file CSV)
|
| 47 |
+
st.write("Important: the delimiter in the csv file must be a semicolon!")
|
| 48 |
+
|
| 49 |
+
if uploaded_file is not None: # Check se il file è stato caricato o meno - Tutto lo script si basa sul caricamento o meno di un file
|
| 50 |
+
st.write("File successfully uploaded!") # Messaggio di caricamento
|
| 51 |
+
dataframe = pd.read_csv(uploaded_file, delimiter = ';')
|
| 52 |
+
if st.checkbox('Show dataframe', key = 50):
|
| 53 |
+
st.write(dataframe)
|
| 54 |
+
|
| 55 |
+
# Primo check sui valori mancanti
|
| 56 |
+
st.write("Number of missing values")
|
| 57 |
+
st.write( pd.DataFrame( {'# Missing values': np.array(dataframe.isna().sum()), '% Missing values': np.array(100*dataframe.isna().sum()/dataframe.shape[0])}, index = dataframe.columns))
|
| 58 |
+
st.write("**Note**: Columns with more than 70\% of missing data will be removed from the dataset")
|
| 59 |
+
st.write("**Note**: Rows with more than 70\% of missing data will be removed from the dataset")
|
| 60 |
+
|
| 61 |
+
# Rimozione righe e colonne con più del 70% di valori mancanti
|
| 62 |
+
dataframe, Selected_columns_start = remove_missing.remove_missing(dataframe)
|
| 63 |
+
|
| 64 |
+
# Type of analysis
|
| 65 |
+
Task1 = st.selectbox( 'What is the task of this analysis?', ['','Classification','Regression'] )
|
| 66 |
+
st.write("If the task is 'Classification', the final model will predict classes; if 'Regression', it will predict numbers.")
|
| 67 |
+
|
| 68 |
+
# L'app si avvia solo se è stata scelta la tipologia di analisi
|
| 69 |
+
if Task1 :
|
| 70 |
+
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
| 71 |
+
# Selezione delle varie features (numeriche e categoriche) e della colonna target e correzione colonne numeriche del dataframe
|
| 72 |
+
st.text("")
|
| 73 |
+
st.text("")
|
| 74 |
+
st.text("")
|
| 75 |
+
st.write("### Select Target, Categorical, and Numerical features")
|
| 76 |
+
dataframe, Tar, Categ, Numer = feature_selection.feature_selection(dataframe, Task1)
|
| 77 |
+
st.write("Unselected columns will be excluded from the dataset.")
|
| 78 |
+
|
| 79 |
+
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
| 80 |
+
# Gestione dei valori mancanti nelle colonne categoriche e categoriche, e infine i valori mancanti nella colonna Target (che vengono eliminati)
|
| 81 |
+
st.text("")
|
| 82 |
+
st.text("")
|
| 83 |
+
st.text("")
|
| 84 |
+
st.write("### Deal with missing data")
|
| 85 |
+
st.write("If the target features has missing values, they will be dropped from the dataset")
|
| 86 |
+
dataframe, categ_impute, numer_impute, Sub_categ_list, Sub_num_list, step_further, a, b = imputation_process.imputation(dataframe, Categ, Numer)
|
| 87 |
+
dataframe, step_further = missing_target.missing_target(dataframe, Categ, Numer, Tar, a, b, step_further)
|
| 88 |
+
st.write("Number of missing values: %d" % dataframe.isna().sum().sum())
|
| 89 |
+
if st.checkbox('Show dataframe', key = 53):
|
| 90 |
+
st.write(dataframe)
|
| 91 |
+
|
| 92 |
+
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
| 93 |
+
# Conversione dei valori categorici in valori numerici
|
| 94 |
+
# Check dello step, della presenza di colonne categoriche e dell'assenza di valori mancanti (altrimenti, lo script si interrompe)
|
| 95 |
+
if (step_further == 3) and (len(Categ) != 0 and Categ != ["None"]) and (dataframe.isna().sum().sum() == 0) :
|
| 96 |
+
dataframe, Tra_categ_list, step_further = categoric_to_numeric.categoric_to_numeric(dataframe, Categ, step_further)
|
| 97 |
+
elif (dataframe.isna().sum().sum() != 0) :
|
| 98 |
+
st.write("Something's wrong with the data. Missing values are still there...")
|
| 99 |
+
else :
|
| 100 |
+
step_further, Tra_categ_list = 4, [[], None, []]
|
| 101 |
+
|
| 102 |
+
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
| 103 |
+
# Creazione dei set di Training e Validation per la valutazione del modello
|
| 104 |
+
# Check iniziale per mostrare la sezione
|
| 105 |
+
if step_further == 4 and (Categ or Numer) and dataframe.isna().sum().sum() == 0:
|
| 106 |
+
X, y, X_train, X_test, y_train, y_test, step_further, final_columns = custom_split.train_test_customsplit(dataframe, Tar, step_further)
|
| 107 |
+
st.session_state["Final_columns"] = final_columns
|
| 108 |
+
|
| 109 |
+
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
| 110 |
+
# Standardizzazione del dataframe X per il Training
|
| 111 |
+
# Check avanzamento e valori nulli
|
| 112 |
+
if step_further == 5 and dataframe.isna().sum().sum() == 0:
|
| 113 |
+
X_train, X_test, Tra_num, step_further, flag_stand = standardize_x_train.standardize_x_train(dataframe, X_train, X_test, step_further)
|
| 114 |
+
|
| 115 |
+
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
| 116 |
+
# Trasformazione della colonna Target
|
| 117 |
+
if step_further == 6 :
|
| 118 |
+
y_train, y_test, Norm_tar_list, step_further = target_transformation.target_transformation(dataframe, Tar, y_train, y_test, step_further, Task1)
|
| 119 |
+
if st.checkbox('Show Target (training set)', key = 61):
|
| 120 |
+
st.write(y_train)
|
| 121 |
+
|
| 122 |
+
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
| 123 |
+
# Costruzione della Rete Neurale
|
| 124 |
+
# Dopo il primo training, è possibile finalizzare il modello ed applicare un nuovo file di test
|
| 125 |
+
if step_further == 7 and Task1 :
|
| 126 |
+
# Scelta dei diversi parametri da parte dell'utente e costruzione dell'oggetto "Model"
|
| 127 |
+
Hidden_layers, Algo, Alpha, Regularization, Momentum, Early_stopping, Verbose, Max_iter, Function_, Batch, Decay, Lambda, Random_state, Patient, Final_metric = nn_builder.nn_builder(dataframe, Task1)
|
| 128 |
+
Model = NeuralNetwork.NeuralNet(task = Task1, function = Function_, Hidden_layers = Hidden_layers, algo = Algo, batch = Batch, alpha = float(Alpha), decay = float(Decay),
|
| 129 |
+
regularization = Regularization, Lambda = float(Lambda), Max_iter = int(Max_iter), momentum = float(Momentum), random_state = int(Random_state), verbose = Verbose,
|
| 130 |
+
early_stopping = Early_stopping, patient = int(Patient), flag_plot = False, metric = Final_metric)
|
| 131 |
+
st.text("")
|
| 132 |
+
st.text("")
|
| 133 |
+
st.text("")
|
| 134 |
+
if st.button('Start the training!'): # Pulsante per avviare il training
|
| 135 |
+
Model.Training(X_train, y_train, X_test, y_test) # Training NN
|
| 136 |
+
st.write('Training complete!')
|
| 137 |
+
metrics_plot.metrics_plot(Model, X_train, X_test, y_train, y_test, Task1, Norm_tar_list, Final_metric) # Calcolo metriche finali (per Regressione) e plot
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
#----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
| 141 |
+
# Model Finalization
|
| 142 |
+
st.text("")
|
| 143 |
+
st.text("")
|
| 144 |
+
st.text("")
|
| 145 |
+
st.write("Now, if you want, you can finalize the model!")
|
| 146 |
+
st.write("The Training and the Test set will be used together to create the final model.")
|
| 147 |
+
st.write("The dataset will be standardized and the Target normalized using the methods defined above.")
|
| 148 |
+
st.write(r"After the model finalization, you can download the JSON file with the model parameters and weights.")
|
| 149 |
+
|
| 150 |
+
flag_finalization = 0 # Flag finalizzazione
|
| 151 |
+
if "ButBut" not in st.session_state : # Pulsante finalizzazione
|
| 152 |
+
st.session_state["ButBut"] = False
|
| 153 |
+
else :
|
| 154 |
+
st.session_state["ButBut"] = st.button("Finalization of the model")
|
| 155 |
+
|
| 156 |
+
# Inizio processo di finalizzazione del modello: eseguo il training finale e salvo i parametri in un file JSON
|
| 157 |
+
if st.session_state["ButBut"] :
|
| 158 |
+
Final_model, Tra_num_list_final, flag_finalization, Norm_tar_list_final = model_finalization.finalization(X, y, Model, Task1, Final_metric, Tra_num, Norm_tar_list, flag_stand)
|
| 159 |
+
|
| 160 |
+
# Salvo tutto nella sessione (per far in modo che, una volta caricato il file di test, tutti parametri rimangano salvati)
|
| 161 |
+
st.session_state["Final_model"] = Final_model
|
| 162 |
+
st.session_state["Tra_num_list_final"] = Tra_num_list_final
|
| 163 |
+
st.session_state["Norm_tar_list_final"] = Norm_tar_list_final
|
| 164 |
+
st.session_state["flag_finalization"] = flag_finalization
|
| 165 |
+
|
| 166 |
+
# Salvataggio del modello finale nel file "Best_model_parameters.json"
|
| 167 |
+
file_name = "Best_model_parameters.json"
|
| 168 |
+
Final_model.Save_model(file_name)
|
| 169 |
+
|
| 170 |
+
#----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
| 171 |
+
# Model application on a new file
|
| 172 |
+
st.write("")
|
| 173 |
+
st.write("")
|
| 174 |
+
st.write("### Upload Data for testing the model")
|
| 175 |
+
st.write("If you finalized your model, you can upload a new file to apply the best model")
|
| 176 |
+
st.write("The file must have the same number of columns of the original file with the same names.")
|
| 177 |
+
uploaded_file_test = st.file_uploader("Choose a CSV file to apply the best model") # Upload a new file
|
| 178 |
+
if uploaded_file_test is not None and st.session_state["flag_finalization"] == 1: # Check se è stato caricato un file tramite pulsante
|
| 179 |
+
# Trasformazione del dataset con pipeline
|
| 180 |
+
X_test_final, y_test_final, dataframe_test = test_pipeline.pipeline_nn(uploaded_file_test, Selected_columns_start, Numer, Categ, Tar, Sub_num_list, Sub_categ_list,
|
| 181 |
+
Tra_categ_list, st.session_state["Final_columns"], st.session_state["Tra_num_list_final"], st.session_state["Norm_tar_list_final"], Task1)
|
| 182 |
+
|
| 183 |
+
Predictions_test = st.session_state["Final_model"].Predict( X_test_final ) # Applicazione del modello finale
|
| 184 |
+
if Task1 == "Classification" : # Calcolo predizioni e probabilità (solo per classificazione)
|
| 185 |
+
Predictions_prob_test = st.session_state["Final_model"].Predict_proba( X_test_final )
|
| 186 |
+
dataframe_test["Probability"] = Predictions_prob_test
|
| 187 |
+
if Task1 == "Regression" :
|
| 188 |
+
if st.session_state["Norm_tar_list_final"][0] == 3:
|
| 189 |
+
Predictions_test = st.session_state["Norm_tar_list_final"][1].inverse_transform(Predictions_test)
|
| 190 |
+
elif st.session_state["Norm_tar_list_final"][0] == 1:
|
| 191 |
+
Predictions_test = 10**Predictions_test + 1
|
| 192 |
+
|
| 193 |
+
dataframe_test["Predictions"] = Predictions_test
|
| 194 |
+
st.write("")
|
| 195 |
+
st.write("Uploaded table with predictions:")
|
| 196 |
+
st.write( dataframe_test )
|
| 197 |
+
|
| 198 |
+
# Converto il dataframe e lo salvo in un file csv (se l'utente clicca un pulsante)
|
| 199 |
+
dataframe_test = dataframe_test.to_csv(index = False).encode('utf-8')
|
| 200 |
+
st.download_button( "Download the dataframe", dataframe_test, "Predictions.csv", "text/csv", key = 'download-csv' )
|
| 201 |
+
|
| 202 |
+
st.write("")
|
| 203 |
+
st.write("If you want to make changes to the original model:")
|
| 204 |
+
st.write(" 1. Modify the model parameters accordingly")
|
| 205 |
+
st.write(" 2. Re-train and re-finalize the model")
|
| 206 |
+
st.write("After that, you will see the new predictions in the uploaded dataframe.")
|
| 207 |
+
st.write("")
|
| 208 |
+
st.write("-----------------------------")
|
| 209 |
+
st.write("")
|
| 210 |
+
st.write("")
|
| 211 |
+
html_str = f"""<style>p.a {{font: bold 23.5px Sans;}}</style><p class="a">Everything is done!</p>"""
|
| 212 |
+
st.markdown(html_str, unsafe_allow_html = True)
|
| 213 |
+
|
| 214 |
+
# If a task has not been chosen
|
| 215 |
+
else :
|
| 216 |
+
st.write("")
|
| 217 |
+
st.write("")
|
| 218 |
+
st.write("Choose a task for this analysis!")
|
| 219 |
+
|
| 220 |
+
# --------------------------------------------------------------------------------------------------------------------------------------------------------
|
| 221 |
+
# Footer (le funzioni utilizzate sono in functions.py)
|
| 222 |
+
if __name__ == "__main__":
|
| 223 |
+
myargs = [ "Made in ", footer.image_render('https://avatars3.githubusercontent.com/u/45109972?s=400&v=4', width = px(25), height = px(25)),
|
| 224 |
+
" by ", footer.link_render("https://www.linkedin.com/in/samuele-campitiello-ph-d-913b90104/", "Samuele Campitiello") ]
|
| 225 |
+
footer.footer(*myargs)
|