import numpy as np import pandas as pd import tensorflow as tf from numpy import mean from numpy import std from sklearn.metrics import mean_squared_error from sklearn.preprocessing import MinMaxScaler from statsmodels.tsa.statespace.sarimax import SARIMAX from prophet import Prophet import gradio as gr import matplotlib.pyplot as plt import datetime as dt from io import BytesIO from PIL import Image # Load the dataset dataset = pd.read_csv('temp_data.csv') def df_to_sup(dataset): df = dataset.copy() # Create a copy of the input DataFrame window_sizes = [2, 3, 6, 12] for window_size in window_sizes: df[f'SMA({window_size})'] = df['Valor'].rolling(window=window_size).mean() lags = [1, 2, 3, 4, 6, 12] for lag in lags: df[f'lag({lag})'] = df['Valor'].shift(lag) df.dropna(inplace=True) # Drop rows with NaN values df['Data_Completa'] = pd.to_datetime(df['Data_Completa']).map(dt.datetime.toordinal) df_1 = df.iloc[:, -11:] # Select the last 11 columns df_1['Valor'] = df['Valor'] return df_1 # Function to perform predictions and generate the final plot def predict_and_plot(instituicao, conta, train_sizes=[0.65, 0.7, 0.75, 0.8, 0.85]): dataset_filter = dataset[(dataset['Instituição'] == instituicao) & (dataset['Conta'] == conta)] # ------------------- # Univariate data = dataset_filter[['Data_Completa', 'Valor']] data.rename(columns={'Valor': 'y', 'Data_Completa': 'ds'}, inplace=True) # ------------------- # Supervised df_1 = df_to_sup(dataset_filter) X = df_1.drop('Valor', axis = 1) y = df_1.loc[:,['Valor']] # ------------------- results = {} # Dictionary to store results for different train sizes for train_size_proportion in train_sizes: # ------------------- # Univariate train_size = int(train_size_proportion * len(data)) train_data, test_data = data[:train_size], data[train_size:] train_features = train_data['ds'] train_target = train_data['y'] test_features = test_data['ds'] test_target = test_data['y'] # ------------------ # Supervised train_size_S = int(train_size_proportion * len(df_1)) X_train = X[:train_size_S] y_train = y[:train_size_S] X_test = X[train_size_S:] y_test = y[train_size_S:] scaler_x = MinMaxScaler(feature_range = (0,1)) scaler_y = MinMaxScaler(feature_range = (0,1)) input_scaler = scaler_x.fit(X_train) output_scaler = scaler_y.fit(y_train) train_y_norm = output_scaler.transform(y_train) train_x_norm = input_scaler.transform(X_train) test_y_norm = output_scaler.transform(y_test) test_x_norm = input_scaler.transform(X_test) X_test = test_x_norm.reshape(test_x_norm.shape[0], 1, test_x_norm.shape[1]) X_train = train_x_norm.reshape(train_x_norm.shape[0], 1, train_x_norm.shape[1]) y_test = test_y_norm.reshape(test_y_norm.shape[0],1) y_train = train_y_norm.reshape(train_y_norm.shape[0], 1) # ------------------ sarimax_model = SARIMAX(train_target, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12)) sarimax_results = sarimax_model.fit() prophet_model = Prophet(seasonality_mode='multiplicative') prophet_model.fit(train_data.rename(columns={'Valor': 'y'})) lstm_model = tf.keras.models.Sequential() lstm_model.add(tf.keras.layers.LSTM(units = 1000, return_sequences = True, input_shape = [X_train.shape[1], X_train.shape[2]])) lstm_model.add(tf.keras.layers.Dropout(0.05)) lstm_model.add(tf.keras.layers.LSTM(units = 1000)) lstm_model.add(tf.keras.layers.Dropout(0.05)) lstm_model.add(tf.keras.layers.Dense(units = 1)) lstm_model.compile(loss='mse', optimizer='adam') lstm_model.fit(X_train, y_train, epochs = 150, validation_split = 0.2, batch_size = 4, shuffle = False) # ------------------ sarimax_predictions = sarimax_results.predict(start=len(train_target), end=len(train_target) + len(test_target) - 1, dynamic=False) prophet_predictions = prophet_model.predict(test_data.rename(columns={'target_column': 'y'})) y_test = scaler_y.inverse_transform(y_test) y_train = scaler_y.inverse_transform(y_train) def prediction(model): prediction = model.predict(X_test) prediction = scaler_y.inverse_transform(prediction) return prediction lstm_predictions = prediction(lstm_model) # ------------------ sarimax_error = mean_squared_error(test_target, sarimax_predictions) prophet_error = mean_squared_error(test_target, prophet_predictions['yhat']) lstm_error = lstm_model.evaluate(X_test, y_test, batch_size = 4, verbose = 0) # ------------------ errors = {'SARIMAX': sarimax_error, 'Prophet': prophet_error, 'LSTM': lstm_error} best_model = min(errors, key=errors.get) if best_model == 'SARIMAX': best_predictions = sarimax_predictions elif best_model == 'Prophet': best_predictions = prophet_predictions['yhat'] else: best_predictions = lstm_predictions best_predictions.reset_index(drop=True, inplace=True) test_target.reset_index(drop=True, inplace=True) to_plot = pd.concat([best_predictions, test_target], axis=1) plt.figure(figsize=(10, 6)) for column in to_plot.columns: plt.plot(to_plot.index, to_plot[column], label=column) plt.xlabel('Índice') plt.ylabel('Valores') plt.legend() plt.title('Comparação de Valores') plt.grid(True) # Save the plot as an image img_buffer = BytesIO() plt.savefig(img_buffer, format="png") img_buffer.seek(0) plot_image = Image.open(img_buffer) table = pd.DataFrame(best_predictions) results[train_size_proportion] = { 'best_model': best_model, 'plot_image': plot_image, 'table': pd.DataFrame(best_predictions) } #return results return best_model, plot_image, table # Create a Gradio interface iface = gr.Interface( fn=predict_and_plot, inputs=[ gr.inputs.Dropdown(label="Instituição", choices=list(dataset['Instituição'].unique())), gr.inputs.Dropdown(label="Conta", choices=list(dataset['Conta'].unique())) ], outputs=[ gr.outputs.Textbox(label="Melhor Modelo"), gr.outputs.Image(type="pil", label="Gráfico"), #gr.outputs.Dataframe(type="numpy", label="Best Predictions") #gr.outputs.Textbox(label="Best Predictions JSON") gr.outputs.Dataframe(label="Previsões", type='pandas') ], live=False, title="Timeseries Pipeline", description="Seleciona 'Instituição' e 'Conta' para obter o modelo com as melhores previsões, gráfico, e tabela com as previsões.", #theme=gr.themes.Glass() ) # Launch the Gradio interface iface.launch()