File size: 16,366 Bytes
3372e4d
 
 
 
 
 
 
 
 
 
 
 
 
 
075049b
3372e4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
# Importo librerie utili
import re
import json
import numpy as np
import pandas as pd
import streamlit as st
from itertools import product
from datetime import datetime
import matplotlib.pyplot as plt
from htbuilder.units import percent, px
from sklearn.model_selection import train_test_split
from htbuilder import HtmlElement, div, hr, a, p, img, styles
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelBinarizer, OneHotEncoder
from sklearn.metrics import precision_score,recall_score,f1_score, accuracy_score, confusion_matrix, r2_score, precision_recall_curve, roc_auc_score, roc_curve

# Importo i moduli custom presenti nella cartella "Functions" e il modello di rete neurale nella cartella "Model"
# In alternativa, lasciarli nella stessa cartella di questo file .py e chiamarli con from name_module import * 
from model import NeuralNetwork                                                                                        # Modello Neural Network
from functions import remove_missing, feature_selection, imputation_process, missing_target, categoric_to_numeric      # Pulizia e trasformazione dataset
from functions import custom_split, standardize_x_train, target_transformation, nn_builder, metrics_plot               # Training, Finalizzazione, e Pipeline
from functions import model_finalization, test_pipeline                                                                # Finalizzazione e Pipeline
from functions import footer                                                                                           # Footer

##################################################################################################################################################################################################

# Imposto la pagina
st.set_page_config(
    page_title = "Prediction with Neural Networks",
    page_icon = "🧊",
    layout = "wide",
    menu_items = { 'About': "This is simple app to guide users and build a Neural Network model to make predictions." }
)
st.markdown('''<style> section.main > div {max-width:75rem} </style>''', unsafe_allow_html = True)

# Titolo
st.write("# Predictions with Neural Networks")
st.write("")
st.write("This is a simple app to guide you in the process of applying a custom Neural Network model to a dataset. Currently")
st.write("- The app supports binary classification, multivariate classification, and regression analyses.")
st.write("- The app cannot process dates in your dataset.")
st.write("")
st.write("### Upload Data")
uploaded_file = st.file_uploader("Choose a CSV file for the analysis")        # Pulsante per upload dati (file CSV)
st.write("Important: the delimiter in the csv file must be a semicolon!")

if uploaded_file is not None:                                                 # Check se il file è stato caricato o meno - Tutto lo script si basa sul caricamento o meno di un file
    st.write("File successfully uploaded!")                                   # Messaggio di caricamento     
    dataframe = pd.read_csv(uploaded_file, delimiter = ';')
    if st.checkbox('Show dataframe', key = 50):                
        st.write(dataframe)

    # Primo check sui valori mancanti
    st.write("Number of missing values")
    st.write(  pd.DataFrame( {'# Missing values': np.array(dataframe.isna().sum()), '% Missing values': np.array(100*dataframe.isna().sum()/dataframe.shape[0])}, index = dataframe.columns))
    st.write("**Note**: Columns with more than 70\% of missing data will be removed from the dataset")
    st.write("**Note**: Rows with more than 70\% of missing data will be removed from the dataset")

    # Rimozione righe e colonne con più del 70% di valori mancanti    
    dataframe, Selected_columns_start = remove_missing.remove_missing(dataframe)

    # Type of analysis
    Task1 = st.selectbox( 'What is the task of this analysis?', ['','Classification','Regression'] )
    st.write("If the task is 'Classification', the final model will predict classes; if 'Regression', it will predict numbers.")

    # L'app si avvia solo se è stata scelta la tipologia di analisi
    if Task1 :
        #-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        # Selezione delle varie features (numeriche e categoriche) e della colonna target e correzione colonne numeriche del dataframe
        st.text("")
        st.text("")
        st.text("")
        st.write("### Select Target, Categorical, and Numerical features")
        dataframe, Tar, Categ, Numer = feature_selection.feature_selection(dataframe, Task1)
        st.write("Unselected columns will be excluded from the dataset.")
        
        #-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        # Gestione dei valori mancanti nelle colonne categoriche e categoriche, e infine i valori mancanti nella colonna Target (che vengono eliminati)
        st.text("")
        st.text("")
        st.text("")
        st.write("### Deal with missing data")   
        st.write("If the target features has missing values, they will be dropped from the dataset")
        dataframe, categ_impute, numer_impute, Sub_categ_list, Sub_num_list, step_further, a, b = imputation_process.imputation(dataframe, Categ, Numer)
        dataframe, step_further = missing_target.missing_target(dataframe, Categ, Numer, Tar, a, b, step_further)
        st.write("Number of missing values: %d" % dataframe.isna().sum().sum())
        if st.checkbox('Show dataframe', key = 53):                
            st.write(dataframe)
    
        #-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        # Conversione dei valori categorici in valori numerici    
        # Check dello step, della presenza di colonne categoriche e dell'assenza di valori mancanti (altrimenti, lo script si interrompe)
        if (step_further == 3) and (len(Categ) != 0 and Categ != ["None"]) and (dataframe.isna().sum().sum() == 0) :
            dataframe, Tra_categ_list, step_further = categoric_to_numeric.categoric_to_numeric(dataframe, Categ, step_further)
        elif (dataframe.isna().sum().sum() != 0) :
            st.write("Something's wrong with the data. Missing values are still there...")
        else :
            step_further, Tra_categ_list = 4, [[], None, []]
        
        #-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        # Creazione dei set di Training e Validation per la valutazione del modello
        # Check iniziale per mostrare la sezione
        if step_further == 4 and (Categ or Numer) and dataframe.isna().sum().sum() == 0:
            X, y, X_train, X_test, y_train, y_test, step_further, final_columns = custom_split.train_test_customsplit(dataframe, Tar, step_further)
            st.session_state["Final_columns"] = final_columns
            
        #-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        # Standardizzazione del dataframe X per il Training
        # Check avanzamento e valori nulli
        if step_further == 5 and dataframe.isna().sum().sum() == 0:
            X_train, X_test, Tra_num, step_further, flag_stand = standardize_x_train.standardize_x_train(dataframe, X_train, X_test, step_further)
                
        #-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        # Trasformazione della colonna Target
        if step_further == 6 :
            y_train, y_test, Norm_tar_list, step_further = target_transformation.target_transformation(dataframe, Tar, y_train, y_test, step_further, Task1)
            if st.checkbox('Show Target (training set)', key = 61):                
                st.write(y_train)
    
        #-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        # Costruzione della Rete Neurale
        # Dopo il primo training, è possibile finalizzare il modello ed applicare un nuovo file di test
        if step_further == 7 and Task1 :
            # Scelta dei diversi parametri da parte dell'utente e costruzione dell'oggetto "Model"
            Hidden_layers, Algo, Alpha, Regularization, Momentum, Early_stopping, Verbose, Max_iter, Function_, Batch, Decay, Lambda, Random_state, Patient, Final_metric = nn_builder.nn_builder(dataframe, Task1)
            Model = NeuralNetwork.NeuralNet(task = Task1, function = Function_, Hidden_layers = Hidden_layers, algo = Algo, batch = Batch, alpha = float(Alpha), decay = float(Decay), 
                            regularization = Regularization, Lambda = float(Lambda), Max_iter = int(Max_iter), momentum = float(Momentum), random_state = int(Random_state), verbose = Verbose,
                            early_stopping = Early_stopping, patient = int(Patient), flag_plot = False, metric = Final_metric)
            st.text("")
            st.text("")
            st.text("")
            if st.button('Start the training!'):                                                                              # Pulsante per avviare il training
                Model.Training(X_train, y_train, X_test, y_test)                                                              # Training NN
                st.write('Training complete!')
                metrics_plot.metrics_plot(Model, X_train, X_test, y_train, y_test, Task1, Norm_tar_list, Final_metric)        # Calcolo metriche finali (per Regressione) e plot

            
            #----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
            # Model Finalization
            st.text("")
            st.text("")
            st.text("")
            st.write("Now, if you want, you can finalize the model!")
            st.write("The Training and the Test set will be used together to create the final model.")
            st.write("The dataset will be standardized and the Target normalized using the methods defined above.") 
            st.write(r"After the model finalization, you can download the JSON file with the model parameters and weights.")
            
            flag_finalization = 0                            # Flag finalizzazione
            if "ButBut" not in st.session_state :            # Pulsante finalizzazione
                st.session_state["ButBut"] = False
            else :
                st.session_state["ButBut"] = st.button("Finalization of the model")
    
            # Inizio processo di finalizzazione del modello: eseguo il training finale e salvo i parametri in un file JSON
            if st.session_state["ButBut"] :
                Final_model, Tra_num_list_final, flag_finalization, Norm_tar_list_final = model_finalization.finalization(X, y, Model, Task1, Final_metric, Tra_num, Norm_tar_list, flag_stand)
    
                # Salvo tutto nella sessione (per far in modo che, una volta caricato il file di test, tutti parametri rimangano salvati)
                st.session_state["Final_model"] = Final_model
                st.session_state["Tra_num_list_final"] = Tra_num_list_final
                st.session_state["Norm_tar_list_final"] = Norm_tar_list_final
                st.session_state["flag_finalization"] = flag_finalization
    
                # Salvataggio del modello finale nel file "Best_model_parameters.json"
                file_name = "Best_model_parameters.json"
                Final_model.Save_model(file_name)
    
            #----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
            # Model application on a new file
            st.write("")
            st.write("")
            st.write("### Upload Data for testing the model")
            st.write("If you finalized your model, you can upload a new file to apply the best model")
            st.write("The file must have the same number of columns of the original file with the same names.")
            uploaded_file_test = st.file_uploader("Choose a CSV file to apply the best model")                    # Upload a new file
            if uploaded_file_test is not None and st.session_state["flag_finalization"] == 1:                     # Check se è stato caricato un file tramite pulsante   
                # Trasformazione del dataset con pipeline
                X_test_final, y_test_final, dataframe_test = test_pipeline.pipeline_nn(uploaded_file_test, Selected_columns_start, Numer, Categ, Tar, Sub_num_list, Sub_categ_list, 
                                                Tra_categ_list, st.session_state["Final_columns"], st.session_state["Tra_num_list_final"], st.session_state["Norm_tar_list_final"], Task1)
    
                Predictions_test = st.session_state["Final_model"].Predict( X_test_final )                        # Applicazione del modello finale
                if Task1 == "Classification" :                                                                    # Calcolo predizioni e probabilità (solo per classificazione)
                    Predictions_prob_test = st.session_state["Final_model"].Predict_proba( X_test_final )
                    dataframe_test["Probability"] = Predictions_prob_test
                if Task1 == "Regression" :
                    if st.session_state["Norm_tar_list_final"][0] == 3:
                        Predictions_test = st.session_state["Norm_tar_list_final"][1].inverse_transform(Predictions_test)
                    elif st.session_state["Norm_tar_list_final"][0] == 1:
                        Predictions_test = 10**Predictions_test + 1
                
                dataframe_test["Predictions"] = Predictions_test
                st.write("")
                st.write("Uploaded table with predictions:")
                st.write( dataframe_test )
                
                # Converto il dataframe e lo salvo in un file csv (se l'utente clicca un pulsante)
                dataframe_test = dataframe_test.to_csv(index = False).encode('utf-8')
                st.download_button( "Download the dataframe", dataframe_test, "Predictions.csv", "text/csv", key = 'download-csv' )
    
                st.write("")
                st.write("If you want to make changes to the original model:")
                st.write(" 1. Modify the model parameters accordingly")
                st.write(" 2. Re-train and re-finalize the model")
                st.write("After that, you will see the new predictions in the uploaded dataframe.")
                st.write("")
                st.write("-----------------------------")
                st.write("")
                st.write("")
                html_str = f"""<style>p.a {{font: bold 23.5px Sans;}}</style><p class="a">Everything is done!</p>"""                
                st.markdown(html_str, unsafe_allow_html = True)

    # If a task has not been chosen
    else :
        st.write("")
        st.write("")
        st.write("Choose a task for this analysis!")

# --------------------------------------------------------------------------------------------------------------------------------------------------------
# Footer (le funzioni utilizzate sono in functions.py)
if __name__ == "__main__":
    myargs = [ "Made in ", footer.image_render('https://avatars3.githubusercontent.com/u/45109972?s=400&v=4', width = px(25), height = px(25)), 
        " by ", footer.link_render("https://www.linkedin.com/in/samuele-campitiello-ph-d-913b90104/", "Samuele Campitiello") ]
    footer.footer(*myargs)