# Importo librerie utili import re import json import numpy as np import pandas as pd import streamlit as st from itertools import product from datetime import datetime import matplotlib.pyplot as plt from htbuilder.units import percent, px from sklearn.model_selection import train_test_split from htbuilder import HtmlElement, div, hr, a, p, img, styles from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelBinarizer, OneHotEncoder from sklearn.metrics import precision_score,recall_score,f1_score, accuracy_score, confusion_matrix, r2_score, precision_recall_curve, roc_auc_score, roc_curve # Importo i moduli custom presenti nella cartella "Functions" e il modello di rete neurale nella cartella "Model" # In alternativa, lasciarli nella stessa cartella di questo file .py e chiamarli con from name_module import * from model import NeuralNetwork # Modello Neural Network from functions import remove_missing, feature_selection, imputation_process, missing_target, categoric_to_numeric # Pulizia e trasformazione dataset from functions import custom_split, standardize_x_train, target_transformation, nn_builder, metrics_plot # Training, Finalizzazione, e Pipeline from functions import model_finalization, test_pipeline # Finalizzazione e Pipeline from functions import footer # Footer ################################################################################################################################################################################################## # Imposto la pagina st.set_page_config( page_title = "Prediction with Neural Networks", page_icon = "🧊", layout = "wide", menu_items = { 'About': "This is simple app to guide users and build a Neural Network model to make predictions." } ) st.markdown('''''', unsafe_allow_html = True) # Titolo st.write("# Predictions with Neural Networks") st.write("") st.write("This is a simple app to guide you in the process of applying a custom Neural Network model to a dataset. Currently") st.write("- The app supports binary classification, multivariate classification, and regression analyses.") st.write("- The app cannot process dates in your dataset.") st.write("") st.write("### Upload Data") uploaded_file = st.file_uploader("Choose a CSV file for the analysis") # Pulsante per upload dati (file CSV) st.write("Important: the delimiter in the csv file must be a semicolon!") if uploaded_file is not None: # Check se il file è stato caricato o meno - Tutto lo script si basa sul caricamento o meno di un file st.write("File successfully uploaded!") # Messaggio di caricamento dataframe = pd.read_csv(uploaded_file, delimiter = ';') if st.checkbox('Show dataframe', key = 50): st.write(dataframe) # Primo check sui valori mancanti st.write("Number of missing values") st.write( pd.DataFrame( {'# Missing values': np.array(dataframe.isna().sum()), '% Missing values': np.array(100*dataframe.isna().sum()/dataframe.shape[0])}, index = dataframe.columns)) st.write("**Note**: Columns with more than 70\% of missing data will be removed from the dataset") st.write("**Note**: Rows with more than 70\% of missing data will be removed from the dataset") # Rimozione righe e colonne con più del 70% di valori mancanti dataframe, Selected_columns_start = remove_missing.remove_missing(dataframe) # Type of analysis Task1 = st.selectbox( 'What is the task of this analysis?', ['','Classification','Regression'] ) st.write("If the task is 'Classification', the final model will predict classes; if 'Regression', it will predict numbers.") # L'app si avvia solo se è stata scelta la tipologia di analisi if Task1 : #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Selezione delle varie features (numeriche e categoriche) e della colonna target e correzione colonne numeriche del dataframe st.text("") st.text("") st.text("") st.write("### Select Target, Categorical, and Numerical features") dataframe, Tar, Categ, Numer = feature_selection.feature_selection(dataframe, Task1) st.write("Unselected columns will be excluded from the dataset.") #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Gestione dei valori mancanti nelle colonne categoriche e categoriche, e infine i valori mancanti nella colonna Target (che vengono eliminati) st.text("") st.text("") st.text("") st.write("### Deal with missing data") st.write("If the target features has missing values, they will be dropped from the dataset") dataframe, categ_impute, numer_impute, Sub_categ_list, Sub_num_list, step_further, a, b = imputation_process.imputation(dataframe, Categ, Numer) dataframe, step_further = missing_target.missing_target(dataframe, Categ, Numer, Tar, a, b, step_further) st.write("Number of missing values: %d" % dataframe.isna().sum().sum()) if st.checkbox('Show dataframe', key = 53): st.write(dataframe) #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Conversione dei valori categorici in valori numerici # Check dello step, della presenza di colonne categoriche e dell'assenza di valori mancanti (altrimenti, lo script si interrompe) if (step_further == 3) and (len(Categ) != 0 and Categ != ["None"]) and (dataframe.isna().sum().sum() == 0) : dataframe, Tra_categ_list, step_further = categoric_to_numeric.categoric_to_numeric(dataframe, Categ, step_further) elif (dataframe.isna().sum().sum() != 0) : st.write("Something's wrong with the data. Missing values are still there...") else : step_further, Tra_categ_list = 4, [[], None, []] #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Creazione dei set di Training e Validation per la valutazione del modello # Check iniziale per mostrare la sezione if step_further == 4 and (Categ or Numer) and dataframe.isna().sum().sum() == 0: X, y, X_train, X_test, y_train, y_test, step_further, final_columns = custom_split.train_test_customsplit(dataframe, Tar, step_further) st.session_state["Final_columns"] = final_columns #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Standardizzazione del dataframe X per il Training # Check avanzamento e valori nulli if step_further == 5 and dataframe.isna().sum().sum() == 0: X_train, X_test, Tra_num, step_further, flag_stand = standardize_x_train.standardize_x_train(dataframe, X_train, X_test, step_further) #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Trasformazione della colonna Target if step_further == 6 : y_train, y_test, Norm_tar_list, step_further = target_transformation.target_transformation(dataframe, Tar, y_train, y_test, step_further, Task1) if st.checkbox('Show Target (training set)', key = 61): st.write(y_train) #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Costruzione della Rete Neurale # Dopo il primo training, è possibile finalizzare il modello ed applicare un nuovo file di test if step_further == 7 and Task1 : # Scelta dei diversi parametri da parte dell'utente e costruzione dell'oggetto "Model" Hidden_layers, Algo, Alpha, Regularization, Momentum, Early_stopping, Verbose, Max_iter, Function_, Batch, Decay, Lambda, Random_state, Patient, Final_metric = nn_builder.nn_builder(dataframe, Task1) Model = NeuralNetwork.NeuralNet(task = Task1, function = Function_, Hidden_layers = Hidden_layers, algo = Algo, batch = Batch, alpha = float(Alpha), decay = float(Decay), regularization = Regularization, Lambda = float(Lambda), Max_iter = int(Max_iter), momentum = float(Momentum), random_state = int(Random_state), verbose = Verbose, early_stopping = Early_stopping, patient = int(Patient), flag_plot = False, metric = Final_metric) st.text("") st.text("") st.text("") if st.button('Start the training!'): # Pulsante per avviare il training Model.Training(X_train, y_train, X_test, y_test) # Training NN st.write('Training complete!') metrics_plot.metrics_plot(Model, X_train, X_test, y_train, y_test, Task1, Norm_tar_list, Final_metric) # Calcolo metriche finali (per Regressione) e plot #---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Model Finalization st.text("") st.text("") st.text("") st.write("Now, if you want, you can finalize the model!") st.write("The Training and the Test set will be used together to create the final model.") st.write("The dataset will be standardized and the Target normalized using the methods defined above.") st.write(r"After the model finalization, you can download the JSON file with the model parameters and weights.") flag_finalization = 0 # Flag finalizzazione if "ButBut" not in st.session_state : # Pulsante finalizzazione st.session_state["ButBut"] = False else : st.session_state["ButBut"] = st.button("Finalization of the model") # Inizio processo di finalizzazione del modello: eseguo il training finale e salvo i parametri in un file JSON if st.session_state["ButBut"] : Final_model, Tra_num_list_final, flag_finalization, Norm_tar_list_final = model_finalization.finalization(X, y, Model, Task1, Final_metric, Tra_num, Norm_tar_list, flag_stand) # Salvo tutto nella sessione (per far in modo che, una volta caricato il file di test, tutti parametri rimangano salvati) st.session_state["Final_model"] = Final_model st.session_state["Tra_num_list_final"] = Tra_num_list_final st.session_state["Norm_tar_list_final"] = Norm_tar_list_final st.session_state["flag_finalization"] = flag_finalization # Salvataggio del modello finale nel file "Best_model_parameters.json" file_name = "Best_model_parameters.json" Final_model.Save_model(file_name) #---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Model application on a new file st.write("") st.write("") st.write("### Upload Data for testing the model") st.write("If you finalized your model, you can upload a new file to apply the best model") st.write("The file must have the same number of columns of the original file with the same names.") uploaded_file_test = st.file_uploader("Choose a CSV file to apply the best model") # Upload a new file if uploaded_file_test is not None and st.session_state["flag_finalization"] == 1: # Check se è stato caricato un file tramite pulsante # Trasformazione del dataset con pipeline X_test_final, y_test_final, dataframe_test = test_pipeline.pipeline_nn(uploaded_file_test, Selected_columns_start, Numer, Categ, Tar, Sub_num_list, Sub_categ_list, Tra_categ_list, st.session_state["Final_columns"], st.session_state["Tra_num_list_final"], st.session_state["Norm_tar_list_final"], Task1) Predictions_test = st.session_state["Final_model"].Predict( X_test_final ) # Applicazione del modello finale if Task1 == "Classification" : # Calcolo predizioni e probabilità (solo per classificazione) Predictions_prob_test = st.session_state["Final_model"].Predict_proba( X_test_final ) dataframe_test["Probability"] = Predictions_prob_test if Task1 == "Regression" : if st.session_state["Norm_tar_list_final"][0] == 3: Predictions_test = st.session_state["Norm_tar_list_final"][1].inverse_transform(Predictions_test) elif st.session_state["Norm_tar_list_final"][0] == 1: Predictions_test = 10**Predictions_test + 1 dataframe_test["Predictions"] = Predictions_test st.write("") st.write("Uploaded table with predictions:") st.write( dataframe_test ) # Converto il dataframe e lo salvo in un file csv (se l'utente clicca un pulsante) dataframe_test = dataframe_test.to_csv(index = False).encode('utf-8') st.download_button( "Download the dataframe", dataframe_test, "Predictions.csv", "text/csv", key = 'download-csv' ) st.write("") st.write("If you want to make changes to the original model:") st.write(" 1. Modify the model parameters accordingly") st.write(" 2. Re-train and re-finalize the model") st.write("After that, you will see the new predictions in the uploaded dataframe.") st.write("") st.write("-----------------------------") st.write("") st.write("") html_str = f"""

Everything is done!

""" st.markdown(html_str, unsafe_allow_html = True) # If a task has not been chosen else : st.write("") st.write("") st.write("Choose a task for this analysis!") # -------------------------------------------------------------------------------------------------------------------------------------------------------- # Footer (le funzioni utilizzate sono in functions.py) if __name__ == "__main__": myargs = [ "Made in ", footer.image_render('https://avatars3.githubusercontent.com/u/45109972?s=400&v=4', width = px(25), height = px(25)), " by ", footer.link_render("https://www.linkedin.com/in/samuele-campitiello-ph-d-913b90104/", "Samuele Campitiello") ] footer.footer(*myargs)