Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import pandas as pd | |
| import tensorflow as tf | |
| from numpy import mean | |
| from numpy import std | |
| from sklearn.metrics import mean_squared_error | |
| from sklearn.preprocessing import MinMaxScaler | |
| from statsmodels.tsa.statespace.sarimax import SARIMAX | |
| from prophet import Prophet | |
| import gradio as gr | |
| import matplotlib.pyplot as plt | |
| import datetime as dt | |
| from io import BytesIO | |
| from PIL import Image | |
| # Load the dataset | |
| dataset = pd.read_csv('temp_data.csv') | |
| def df_to_sup(dataset): | |
| df = dataset.copy() # Create a copy of the input DataFrame | |
| window_sizes = [2, 3, 6, 12] | |
| for window_size in window_sizes: | |
| df[f'SMA({window_size})'] = df['Valor'].rolling(window=window_size).mean() | |
| lags = [1, 2, 3, 4, 6, 12] | |
| for lag in lags: | |
| df[f'lag({lag})'] = df['Valor'].shift(lag) | |
| df.dropna(inplace=True) # Drop rows with NaN values | |
| df['Data_Completa'] = pd.to_datetime(df['Data_Completa']).map(dt.datetime.toordinal) | |
| df_1 = df.iloc[:, -11:] # Select the last 11 columns | |
| df_1['Valor'] = df['Valor'] | |
| return df_1 | |
| # Function to perform predictions and generate the final plot | |
| def predict_and_plot(instituicao, conta, train_sizes=[0.65, 0.7, 0.75, 0.8, 0.85]): | |
| dataset_filter = dataset[(dataset['Instituição'] == instituicao) & (dataset['Conta'] == conta)] | |
| # ------------------- | |
| # Univariate | |
| data = dataset_filter[['Data_Completa', 'Valor']] | |
| data.rename(columns={'Valor': 'y', 'Data_Completa': 'ds'}, inplace=True) | |
| # ------------------- | |
| # Supervised | |
| df_1 = df_to_sup(dataset_filter) | |
| X = df_1.drop('Valor', axis = 1) | |
| y = df_1.loc[:,['Valor']] | |
| # ------------------- | |
| results = {} # Dictionary to store results for different train sizes | |
| for train_size_proportion in train_sizes: | |
| # ------------------- | |
| # Univariate | |
| train_size = int(train_size_proportion * len(data)) | |
| train_data, test_data = data[:train_size], data[train_size:] | |
| train_features = train_data['ds'] | |
| train_target = train_data['y'] | |
| test_features = test_data['ds'] | |
| test_target = test_data['y'] | |
| # ------------------ | |
| # Supervised | |
| train_size_S = int(train_size_proportion * len(df_1)) | |
| X_train = X[:train_size_S] | |
| y_train = y[:train_size_S] | |
| X_test = X[train_size_S:] | |
| y_test = y[train_size_S:] | |
| scaler_x = MinMaxScaler(feature_range = (0,1)) | |
| scaler_y = MinMaxScaler(feature_range = (0,1)) | |
| input_scaler = scaler_x.fit(X_train) | |
| output_scaler = scaler_y.fit(y_train) | |
| train_y_norm = output_scaler.transform(y_train) | |
| train_x_norm = input_scaler.transform(X_train) | |
| test_y_norm = output_scaler.transform(y_test) | |
| test_x_norm = input_scaler.transform(X_test) | |
| X_test = test_x_norm.reshape(test_x_norm.shape[0], 1, test_x_norm.shape[1]) | |
| X_train = train_x_norm.reshape(train_x_norm.shape[0], 1, train_x_norm.shape[1]) | |
| y_test = test_y_norm.reshape(test_y_norm.shape[0],1) | |
| y_train = train_y_norm.reshape(train_y_norm.shape[0], 1) | |
| # ------------------ | |
| sarimax_model = SARIMAX(train_target, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12)) | |
| sarimax_results = sarimax_model.fit() | |
| prophet_model = Prophet(seasonality_mode='multiplicative') | |
| prophet_model.fit(train_data.rename(columns={'Valor': 'y'})) | |
| lstm_model = tf.keras.models.Sequential() | |
| lstm_model.add(tf.keras.layers.LSTM(units = 1000, return_sequences = True, input_shape = [X_train.shape[1], X_train.shape[2]])) | |
| lstm_model.add(tf.keras.layers.Dropout(0.05)) | |
| lstm_model.add(tf.keras.layers.LSTM(units = 1000)) | |
| lstm_model.add(tf.keras.layers.Dropout(0.05)) | |
| lstm_model.add(tf.keras.layers.Dense(units = 1)) | |
| lstm_model.compile(loss='mse', optimizer='adam') | |
| lstm_model.fit(X_train, y_train, epochs = 150, validation_split = 0.2, batch_size = 4, shuffle = False) | |
| # ------------------ | |
| sarimax_predictions = sarimax_results.predict(start=len(train_target), end=len(train_target) + len(test_target) - 1, dynamic=False) | |
| prophet_predictions = prophet_model.predict(test_data.rename(columns={'target_column': 'y'})) | |
| y_test = scaler_y.inverse_transform(y_test) | |
| y_train = scaler_y.inverse_transform(y_train) | |
| def prediction(model): | |
| prediction = model.predict(X_test) | |
| prediction = scaler_y.inverse_transform(prediction) | |
| return prediction | |
| lstm_predictions = prediction(lstm_model) | |
| # ------------------ | |
| sarimax_error = mean_squared_error(test_target, sarimax_predictions) | |
| prophet_error = mean_squared_error(test_target, prophet_predictions['yhat']) | |
| lstm_error = lstm_model.evaluate(X_test, y_test, batch_size = 4, verbose = 0) | |
| # ------------------ | |
| errors = {'SARIMAX': sarimax_error, 'Prophet': prophet_error, 'LSTM': lstm_error} | |
| best_model = min(errors, key=errors.get) | |
| if best_model == 'SARIMAX': | |
| best_predictions = sarimax_predictions | |
| elif best_model == 'Prophet': | |
| best_predictions = prophet_predictions['yhat'] | |
| else: | |
| best_predictions = lstm_predictions | |
| best_predictions.reset_index(drop=True, inplace=True) | |
| test_target.reset_index(drop=True, inplace=True) | |
| to_plot = pd.concat([best_predictions, test_target], axis=1) | |
| plt.figure(figsize=(10, 6)) | |
| for column in to_plot.columns: | |
| plt.plot(to_plot.index, to_plot[column], label=column) | |
| plt.xlabel('Índice') | |
| plt.ylabel('Valores') | |
| plt.legend() | |
| plt.title('Comparação de Valores') | |
| plt.grid(True) | |
| # Save the plot as an image | |
| img_buffer = BytesIO() | |
| plt.savefig(img_buffer, format="png") | |
| img_buffer.seek(0) | |
| plot_image = Image.open(img_buffer) | |
| table = pd.DataFrame(best_predictions) | |
| results[train_size_proportion] = { | |
| 'best_model': best_model, | |
| 'plot_image': plot_image, | |
| 'table': pd.DataFrame(best_predictions) | |
| } | |
| #return results | |
| return best_model, plot_image, table | |
| # Create a Gradio interface | |
| iface = gr.Interface( | |
| fn=predict_and_plot, | |
| inputs=[ | |
| gr.inputs.Dropdown(label="Instituição", choices=list(dataset['Instituição'].unique())), | |
| gr.inputs.Dropdown(label="Conta", choices=list(dataset['Conta'].unique())) | |
| ], | |
| outputs=[ | |
| gr.outputs.Textbox(label="Melhor Modelo"), | |
| gr.outputs.Image(type="pil", label="Gráfico"), | |
| #gr.outputs.Dataframe(type="numpy", label="Best Predictions") | |
| #gr.outputs.Textbox(label="Best Predictions JSON") | |
| gr.outputs.Dataframe(label="Previsões", type='pandas') | |
| ], | |
| live=False, | |
| title="Timeseries Pipeline", | |
| description="Seleciona 'Instituição' e 'Conta' para obter o modelo com as melhores previsões, gráfico, e tabela com as previsões.", | |
| #theme=gr.themes.Glass() | |
| ) | |
| # Launch the Gradio interface | |
| iface.launch() |