Spaces:
Sleeping
Sleeping
| """Reimplement TimeGAN-pytorch Codebase. | |
| Reference: Jinsung Yoon, Daniel Jarrett, Mihaela van der Schaar, | |
| "Time-series Generative Adversarial Networks," | |
| Neural Information Processing Systems (NeurIPS), 2019. | |
| Paper link: https://papers.nips.cc/paper/8789-time-series-generative-adversarial-networks | |
| Last updated Date: October 18th 2021 | |
| Code author: Zhiwei Zhang (bitzzw@gmail.com) | |
| ----------------------------- | |
| predictive_metrics.py | |
| Note: Use Post-hoc RNN to predict one-step ahead (last feature) | |
| """ | |
| # Necessary Packages | |
| import tensorflow as tf | |
| import tensorflow._api.v2.compat.v1 as tf1 | |
| tf.compat.v1.disable_eager_execution() | |
| import numpy as np | |
| from sklearn.metrics import mean_absolute_error | |
| from utils.metric_utils import extract_time | |
| def predictive_score_metrics(ori_data, generated_data): | |
| """Report the performance of Post-hoc RNN one-step ahead prediction. | |
| Args: | |
| - ori_data: original data | |
| - generated_data: generated synthetic data | |
| Returns: | |
| - predictive_score: MAE of the predictions on the original data | |
| """ | |
| # Initialization on the Graph | |
| tf1.reset_default_graph() | |
| # Basic Parameters | |
| no, seq_len, dim = ori_data.shape | |
| # Set maximum sequence length and each sequence length | |
| ori_time, ori_max_seq_len = extract_time(ori_data) | |
| generated_time, generated_max_seq_len = extract_time(ori_data) | |
| max_seq_len = max([ori_max_seq_len, generated_max_seq_len]) | |
| # max_seq_len = 36 | |
| ## Builde a post-hoc RNN predictive network | |
| # Network parameters | |
| hidden_dim = int(dim / 2) | |
| iterations = 5000 | |
| batch_size = 128 | |
| # Input place holders | |
| X = tf1.placeholder(tf.float32, [None, max_seq_len - 1, dim - 1], name="myinput_x") | |
| T = tf1.placeholder(tf.int32, [None], name="myinput_t") | |
| Y = tf1.placeholder(tf.float32, [None, max_seq_len - 1, 1], name="myinput_y") | |
| # Predictor function | |
| def predictor(x, t): | |
| """Simple predictor function. | |
| Args: | |
| - x: time-series data | |
| - t: time information | |
| Returns: | |
| - y_hat: prediction | |
| - p_vars: predictor variables | |
| """ | |
| with tf1.variable_scope("predictor", reuse=tf1.AUTO_REUSE) as vs: | |
| p_cell = tf1.nn.rnn_cell.GRUCell( | |
| num_units=hidden_dim, activation=tf.nn.tanh, name="p_cell" | |
| ) | |
| p_outputs, p_last_states = tf1.nn.dynamic_rnn( | |
| p_cell, x, dtype=tf.float32, sequence_length=t | |
| ) | |
| # y_hat_logit = tf.contrib.layers.fully_connected(p_outputs, 1, activation_fn=None) | |
| y_hat_logit = tf1.layers.dense(p_outputs, 1, activation=None) | |
| y_hat = tf.nn.sigmoid(y_hat_logit) | |
| p_vars = [v for v in tf1.all_variables() if v.name.startswith(vs.name)] | |
| return y_hat, p_vars | |
| y_pred, p_vars = predictor(X, T) | |
| # Loss for the predictor | |
| p_loss = tf1.losses.absolute_difference(Y, y_pred) | |
| # optimizer | |
| p_solver = tf1.train.AdamOptimizer().minimize(p_loss, var_list=p_vars) | |
| ## Training | |
| # Session start | |
| sess = tf1.Session() | |
| sess.run(tf1.global_variables_initializer()) | |
| from tqdm.auto import tqdm | |
| # Training using Synthetic dataset | |
| for itt in tqdm(range(iterations), desc="training", total=iterations): | |
| # Set mini-batch | |
| idx = np.random.permutation(len(generated_data)) | |
| train_idx = idx[:batch_size] | |
| X_mb = list(generated_data[i][:-1, : (dim - 1)] for i in train_idx) | |
| T_mb = list(generated_time[i] - 1 for i in train_idx) | |
| Y_mb = list( | |
| np.reshape( | |
| generated_data[i][1:, (dim - 1)], | |
| [len(generated_data[i][1:, (dim - 1)]), 1], | |
| ) | |
| for i in train_idx | |
| ) | |
| # Train predictor | |
| _, step_p_loss = sess.run( | |
| [p_solver, p_loss], feed_dict={X: X_mb, T: T_mb, Y: Y_mb} | |
| ) | |
| ## Test the trained model on the original data | |
| idx = np.random.permutation(len(ori_data)) | |
| train_idx = idx[:no] | |
| # idx = np.random.permutation(len(generated_data)) | |
| # train_idx = idx[:batch_size] | |
| # X_mb = list(generated_data[i][:-1,:(dim-1)] for i in train_idx) | |
| # T_mb = list(generated_time[i]-1 for i in train_idx) | |
| # Y_mb = list(np.reshape(generated_data[i][1:,(dim-1)],[len(generated_data[i][1:,(dim-1)]),1]) for i in train_idx) | |
| X_mb = list(ori_data[i][:-1, : (dim - 1)] for i in train_idx) | |
| T_mb = list(ori_time[i] - 1 for i in train_idx) | |
| Y_mb = list( | |
| np.reshape(ori_data[i][1:, (dim - 1)], [len(ori_data[i][1:, (dim - 1)]), 1]) | |
| for i in train_idx | |
| ) | |
| # Prediction | |
| pred_Y_curr = sess.run(y_pred, feed_dict={X: X_mb, T: T_mb}) | |
| # Compute the performance in terms of MAE | |
| MAE_temp = 0 | |
| for i in range(no): | |
| MAE_temp = MAE_temp + mean_absolute_error(Y_mb[i], pred_Y_curr[i, :, :]) | |
| predictive_score = MAE_temp / no | |
| return predictive_score | |