Spaces:
Sleeping
Sleeping
| # Importo librerie utili | |
| import re | |
| import json | |
| import numpy as np | |
| import pandas as pd | |
| import streamlit as st | |
| from itertools import product | |
| from datetime import datetime | |
| import matplotlib.pyplot as plt | |
| from htbuilder.units import percent, px | |
| from sklearn.model_selection import train_test_split | |
| from htbuilder import HtmlElement, div, hr, a, p, img, styles | |
| from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelBinarizer, OneHotEncoder | |
| from sklearn.metrics import precision_score,recall_score,f1_score, accuracy_score, confusion_matrix, r2_score, precision_recall_curve, roc_auc_score, roc_curve | |
| # Importo i moduli custom presenti nella cartella "Functions" e il modello di rete neurale nella cartella "Model" | |
| # In alternativa, lasciarli nella stessa cartella di questo file .py e chiamarli con from name_module import * | |
| from model import NeuralNetwork # Modello Neural Network | |
| from functions import remove_missing, feature_selection, imputation_process, missing_target, categoric_to_numeric # Pulizia e trasformazione dataset | |
| from functions import custom_split, standardize_x_train, target_transformation, nn_builder, metrics_plot # Training, Finalizzazione, e Pipeline | |
| from functions import model_finalization, test_pipeline # Finalizzazione e Pipeline | |
| from functions import footer # Footer | |
| ################################################################################################################################################################################################## | |
| # Imposto la pagina | |
| st.set_page_config( | |
| page_title = "Prediction with Neural Networks", | |
| page_icon = "🧊", | |
| layout = "wide", | |
| menu_items = { 'About': "This is simple app to guide users and build a Neural Network model to make predictions." } | |
| ) | |
| st.markdown('''<style> section.main > div {max-width:75rem} </style>''', unsafe_allow_html = True) | |
| # Titolo | |
| st.write("# Predictions with Neural Networks") | |
| st.write("") | |
| st.write("This is a simple app to guide you in the process of applying a custom Neural Network model to a dataset. Currently") | |
| st.write("- The app supports binary classification, multivariate classification, and regression analyses.") | |
| st.write("- The app cannot process dates in your dataset.") | |
| st.write("") | |
| st.write("### Upload Data") | |
| uploaded_file = st.file_uploader("Choose a CSV file for the analysis") # Pulsante per upload dati (file CSV) | |
| st.write("Important: the delimiter in the csv file must be a semicolon!") | |
| if uploaded_file is not None: # Check se il file è stato caricato o meno - Tutto lo script si basa sul caricamento o meno di un file | |
| st.write("File successfully uploaded!") # Messaggio di caricamento | |
| dataframe = pd.read_csv(uploaded_file, delimiter = ';') | |
| if st.checkbox('Show dataframe', key = 50): | |
| st.write(dataframe) | |
| # Primo check sui valori mancanti | |
| st.write("Number of missing values") | |
| st.write( pd.DataFrame( {'# Missing values': np.array(dataframe.isna().sum()), '% Missing values': np.array(100*dataframe.isna().sum()/dataframe.shape[0])}, index = dataframe.columns)) | |
| st.write("**Note**: Columns with more than 70\% of missing data will be removed from the dataset") | |
| st.write("**Note**: Rows with more than 70\% of missing data will be removed from the dataset") | |
| # Rimozione righe e colonne con più del 70% di valori mancanti | |
| dataframe, Selected_columns_start = remove_missing.remove_missing(dataframe) | |
| # Type of analysis | |
| Task1 = st.selectbox( 'What is the task of this analysis?', ['','Classification','Regression'] ) | |
| st.write("If the task is 'Classification', the final model will predict classes; if 'Regression', it will predict numbers.") | |
| # L'app si avvia solo se è stata scelta la tipologia di analisi | |
| if Task1 : | |
| #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
| # Selezione delle varie features (numeriche e categoriche) e della colonna target e correzione colonne numeriche del dataframe | |
| st.text("") | |
| st.text("") | |
| st.text("") | |
| st.write("### Select Target, Categorical, and Numerical features") | |
| dataframe, Tar, Categ, Numer = feature_selection.feature_selection(dataframe, Task1) | |
| st.write("Unselected columns will be excluded from the dataset.") | |
| #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
| # Gestione dei valori mancanti nelle colonne categoriche e categoriche, e infine i valori mancanti nella colonna Target (che vengono eliminati) | |
| st.text("") | |
| st.text("") | |
| st.text("") | |
| st.write("### Deal with missing data") | |
| st.write("If the target features has missing values, they will be dropped from the dataset") | |
| dataframe, categ_impute, numer_impute, Sub_categ_list, Sub_num_list, step_further, a, b = imputation_process.imputation(dataframe, Categ, Numer) | |
| dataframe, step_further = missing_target.missing_target(dataframe, Categ, Numer, Tar, a, b, step_further) | |
| st.write("Number of missing values: %d" % dataframe.isna().sum().sum()) | |
| if st.checkbox('Show dataframe', key = 53): | |
| st.write(dataframe) | |
| #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
| # Conversione dei valori categorici in valori numerici | |
| # Check dello step, della presenza di colonne categoriche e dell'assenza di valori mancanti (altrimenti, lo script si interrompe) | |
| if (step_further == 3) and (len(Categ) != 0 and Categ != ["None"]) and (dataframe.isna().sum().sum() == 0) : | |
| dataframe, Tra_categ_list, step_further = categoric_to_numeric.categoric_to_numeric(dataframe, Categ, step_further) | |
| elif (dataframe.isna().sum().sum() != 0) : | |
| st.write("Something's wrong with the data. Missing values are still there...") | |
| else : | |
| step_further, Tra_categ_list = 4, [[], None, []] | |
| #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
| # Creazione dei set di Training e Validation per la valutazione del modello | |
| # Check iniziale per mostrare la sezione | |
| if step_further == 4 and (Categ or Numer) and dataframe.isna().sum().sum() == 0: | |
| X, y, X_train, X_test, y_train, y_test, step_further, final_columns = custom_split.train_test_customsplit(dataframe, Tar, step_further) | |
| st.session_state["Final_columns"] = final_columns | |
| #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
| # Standardizzazione del dataframe X per il Training | |
| # Check avanzamento e valori nulli | |
| if step_further == 5 and dataframe.isna().sum().sum() == 0: | |
| X_train, X_test, Tra_num, step_further, flag_stand = standardize_x_train.standardize_x_train(dataframe, X_train, X_test, step_further) | |
| #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
| # Trasformazione della colonna Target | |
| if step_further == 6 : | |
| y_train, y_test, Norm_tar_list, step_further = target_transformation.target_transformation(dataframe, Tar, y_train, y_test, step_further, Task1) | |
| if st.checkbox('Show Target (training set)', key = 61): | |
| st.write(y_train) | |
| #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
| # Costruzione della Rete Neurale | |
| # Dopo il primo training, è possibile finalizzare il modello ed applicare un nuovo file di test | |
| if step_further == 7 and Task1 : | |
| # Scelta dei diversi parametri da parte dell'utente e costruzione dell'oggetto "Model" | |
| Hidden_layers, Algo, Alpha, Regularization, Momentum, Early_stopping, Verbose, Max_iter, Function_, Batch, Decay, Lambda, Random_state, Patient, Final_metric = nn_builder.nn_builder(dataframe, Task1) | |
| Model = NeuralNetwork.NeuralNet(task = Task1, function = Function_, Hidden_layers = Hidden_layers, algo = Algo, batch = Batch, alpha = float(Alpha), decay = float(Decay), | |
| regularization = Regularization, Lambda = float(Lambda), Max_iter = int(Max_iter), momentum = float(Momentum), random_state = int(Random_state), verbose = Verbose, | |
| early_stopping = Early_stopping, patient = int(Patient), flag_plot = False, metric = Final_metric) | |
| st.text("") | |
| st.text("") | |
| st.text("") | |
| if st.button('Start the training!'): # Pulsante per avviare il training | |
| Model.Training(X_train, y_train, X_test, y_test) # Training NN | |
| st.write('Training complete!') | |
| metrics_plot.metrics_plot(Model, X_train, X_test, y_train, y_test, Task1, Norm_tar_list, Final_metric) # Calcolo metriche finali (per Regressione) e plot | |
| #---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
| # Model Finalization | |
| st.text("") | |
| st.text("") | |
| st.text("") | |
| st.write("Now, if you want, you can finalize the model!") | |
| st.write("The Training and the Test set will be used together to create the final model.") | |
| st.write("The dataset will be standardized and the Target normalized using the methods defined above.") | |
| st.write(r"After the model finalization, you can download the JSON file with the model parameters and weights.") | |
| flag_finalization = 0 # Flag finalizzazione | |
| if "ButBut" not in st.session_state : # Pulsante finalizzazione | |
| st.session_state["ButBut"] = False | |
| else : | |
| st.session_state["ButBut"] = st.button("Finalization of the model") | |
| # Inizio processo di finalizzazione del modello: eseguo il training finale e salvo i parametri in un file JSON | |
| if st.session_state["ButBut"] : | |
| Final_model, Tra_num_list_final, flag_finalization, Norm_tar_list_final = model_finalization.finalization(X, y, Model, Task1, Final_metric, Tra_num, Norm_tar_list, flag_stand) | |
| # Salvo tutto nella sessione (per far in modo che, una volta caricato il file di test, tutti parametri rimangano salvati) | |
| st.session_state["Final_model"] = Final_model | |
| st.session_state["Tra_num_list_final"] = Tra_num_list_final | |
| st.session_state["Norm_tar_list_final"] = Norm_tar_list_final | |
| st.session_state["flag_finalization"] = flag_finalization | |
| # Salvataggio del modello finale nel file "Best_model_parameters.json" | |
| file_name = "Best_model_parameters.json" | |
| Final_model.Save_model(file_name) | |
| #---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
| # Model application on a new file | |
| st.write("") | |
| st.write("") | |
| st.write("### Upload Data for testing the model") | |
| st.write("If you finalized your model, you can upload a new file to apply the best model") | |
| st.write("The file must have the same number of columns of the original file with the same names.") | |
| uploaded_file_test = st.file_uploader("Choose a CSV file to apply the best model") # Upload a new file | |
| if uploaded_file_test is not None and st.session_state["flag_finalization"] == 1: # Check se è stato caricato un file tramite pulsante | |
| # Trasformazione del dataset con pipeline | |
| X_test_final, y_test_final, dataframe_test = test_pipeline.pipeline_nn(uploaded_file_test, Selected_columns_start, Numer, Categ, Tar, Sub_num_list, Sub_categ_list, | |
| Tra_categ_list, st.session_state["Final_columns"], st.session_state["Tra_num_list_final"], st.session_state["Norm_tar_list_final"], Task1) | |
| Predictions_test = st.session_state["Final_model"].Predict( X_test_final ) # Applicazione del modello finale | |
| if Task1 == "Classification" : # Calcolo predizioni e probabilità (solo per classificazione) | |
| Predictions_prob_test = st.session_state["Final_model"].Predict_proba( X_test_final ) | |
| dataframe_test["Probability"] = Predictions_prob_test | |
| if Task1 == "Regression" : | |
| if st.session_state["Norm_tar_list_final"][0] == 3: | |
| Predictions_test = st.session_state["Norm_tar_list_final"][1].inverse_transform(Predictions_test) | |
| elif st.session_state["Norm_tar_list_final"][0] == 1: | |
| Predictions_test = 10**Predictions_test + 1 | |
| dataframe_test["Predictions"] = Predictions_test | |
| st.write("") | |
| st.write("Uploaded table with predictions:") | |
| st.write( dataframe_test ) | |
| # Converto il dataframe e lo salvo in un file csv (se l'utente clicca un pulsante) | |
| dataframe_test = dataframe_test.to_csv(index = False).encode('utf-8') | |
| st.download_button( "Download the dataframe", dataframe_test, "Predictions.csv", "text/csv", key = 'download-csv' ) | |
| st.write("") | |
| st.write("If you want to make changes to the original model:") | |
| st.write(" 1. Modify the model parameters accordingly") | |
| st.write(" 2. Re-train and re-finalize the model") | |
| st.write("After that, you will see the new predictions in the uploaded dataframe.") | |
| st.write("") | |
| st.write("-----------------------------") | |
| st.write("") | |
| st.write("") | |
| html_str = f"""<style>p.a {{font: bold 23.5px Sans;}}</style><p class="a">Everything is done!</p>""" | |
| st.markdown(html_str, unsafe_allow_html = True) | |
| # If a task has not been chosen | |
| else : | |
| st.write("") | |
| st.write("") | |
| st.write("Choose a task for this analysis!") | |
| # -------------------------------------------------------------------------------------------------------------------------------------------------------- | |
| # Footer (le funzioni utilizzate sono in functions.py) | |
| if __name__ == "__main__": | |
| myargs = [ "Made in ", footer.image_render('https://avatars3.githubusercontent.com/u/45109972?s=400&v=4', width = px(25), height = px(25)), | |
| " by ", footer.link_render("https://www.linkedin.com/in/samuele-campitiello-ph-d-913b90104/", "Samuele Campitiello") ] | |
| footer.footer(*myargs) | |