pipeline_config_id: runner_config data: taxi: data_format: json train_dir: easytpp/taxi # ./data/taxi/train.json valid_dir: easytpp/taxi # ./data/taxi/dev.json test_dir: easytpp/taxi # ./data/taxi/test.json data_specs: num_event_types: 10 pad_token_id: 10 padding_side: right # padding_strategy: max_length # truncation_strategy: longest_first # or Truncate to a maximum length specified with the argument `max_length` # max_len: 20 conttime: data_format: pkl train_dir: ../data/conttime/train.pkl valid_dir: ../data/conttime/dev.pkl test_dir: ../data/conttime/test.pkl data_specs: num_event_types: 5 pad_token_id: 5 padding_side: right truncation_side: right # padding_strategy: max_length # for ode tpp we have to set this to max_length # max_len: 20 hawkes_1d: data_format: pkl train_dir: ../data/hawkes/train.pkl valid_dir: ../data/hawkes/dev.pkl test_dir: ../data/hawkes/test.pkl data_specs: num_event_types: 1 pad_token_id: 1 padding_side: right truncation_side: right retweet: data_format: pkl train_dir: ../data/retweet/train.pkl valid_dir: ../data/retweet/dev.pkl test_dir: ../data/retweet/test.pkl data_specs: num_event_types: 3 pad_token_id: 3 padding_side: right truncation_side: right RMTPP_train: base_config: stage: train backend: torch dataset_id: taxi runner_id: std_tpp model_id: RMTPP # model name base_dir: './checkpoints/' trainer_config: batch_size: 256 max_epoch: 20 shuffle: False optimizer: adam learning_rate: 1.e-3 valid_freq: 1 use_tfb: False metrics: [ 'acc', 'rmse' ] seed: 2019 gpu: -1 model_config: hidden_size: 32 time_emb_size: 16 num_layers: 2 num_heads: 2 mc_num_sample_per_step: 20 sharing_param_layer: False loss_integral_num_sample_per_step: 20 dropout: 0.0 use_ln: False thinning: num_seq: 10 num_sample: 1 num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm look_ahead_time: 10 patience_counter: 5 # the maximum iteration used in adaptive thinning over_sample_rate: 5 num_samples_boundary: 5 dtime_max: 5 RMTPP_gen: base_config: stage: gen backend: torch dataset_id: retweet runner_id: std_tpp base_dir: './checkpoints/' model_id: RMTPP model_config: hidden_size: 32 time_emb_size: 16 mc_num_sample_per_step: 20 sharing_param_layer: False loss_integral_num_sample_per_step: 20 dropout: 0.0 use_ln: False seed: 2019 gpu: 0 pretrained_model_dir: ./checkpoints/2555_4348724608_230603-155841/models/saved_model thinning: num_seq: 10 num_sample: 1 num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm look_ahead_time: 10 patience_counter: 5 # the maximum iteration used in adaptive thinning over_sample_rate: 5 num_samples_boundary: 5 dtime_max: 5 num_step_gen: 10 NHP_eval: base_config: stage: eval backend: torch dataset_id: taxi runner_id: std_tpp base_dir: './checkpoints/' model_id: NHP trainer_config: batch_size: 256 max_epoch: 1 model_config: hidden_size: 64 use_ln: False seed: 2019 gpu: 0 pretrained_model_dir: ./checkpoints/26507_4380788096_231111-101848/models/saved_model thinning: num_seq: 10 num_sample: 1 num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm look_ahead_time: 10 patience_counter: 5 # the maximum iteration used in adaptive thinning over_sample_rate: 5 num_samples_boundary: 5 dtime_max: 5 NHP_gen: base_config: stage: eval backend: torch dataset_id: taxi runner_id: std_tpp model_id: NHP # model name base_dir: './checkpoints/' trainer_config: batch_size: 256 max_epoch: 20 shuffle: False optimizer: adam learning_rate: 1.e-3 valid_freq: 1 use_tfb: False metrics: [ 'acc', 'rmse' ] seed: 2019 gpu: -1 model_config: hidden_size: 64 loss_integral_num_sample_per_step: 20 pretrained_model_dir: ./checkpoints/75518_4377527680_230530-132355/models/saved_model thinning: num_seq: 10 num_sample: 1 num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm look_ahead_time: 10 patience_counter: 5 # the maximum iteration used in adaptive thinning over_sample_rate: 5 num_samples_boundary: 5 dtime_max: 5 num_step_gen: 1 FullyNN_train: base_config: stage: train backend: torch dataset_id: taxi runner_id: std_tpp model_id: FullyNN # model name base_dir: './checkpoints/' trainer_config: batch_size: 256 max_epoch: 200 shuffle: False optimizer: adam learning_rate: 1.e-3 valid_freq: 1 use_tfb: False metrics: [ 'acc', 'rmse' ] seed: 2019 gpu: 0 model_config: rnn_type: LSTM hidden_size: 32 time_emb_size: 4 num_layers: 2 num_heads: 2 mc_num_sample_per_step: 20 sharing_param_layer: False loss_integral_num_sample_per_step: 20 dropout: 0.0 use_ln: False model_specs: num_mlp_layers: 3 # thinning: # num_seq: 10 # num_sample: 1 # num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm # look_ahead_time: 10 # patience_counter: 5 # the maximum iteration used in adaptive thinning # over_sample_rate: 5 # num_samples_boundary: 5 # dtime_max: 5 # num_step_gen: 1 IntensityFree_train: base_config: stage: train backend: torch dataset_id: taxi runner_id: std_tpp model_id: IntensityFree # model name base_dir: './checkpoints/' trainer_config: batch_size: 256 max_epoch: 200 shuffle: False optimizer: adam learning_rate: 1.e-3 valid_freq: 1 use_tfb: False metrics: [ 'acc', 'rmse' ] seed: 2019 gpu: 0 model_config: hidden_size: 32 time_emb_size: 16 num_layers: 2 num_heads: 2 mc_num_sample_per_step: 20 sharing_param_layer: False loss_integral_num_sample_per_step: 20 dropout: 0.0 use_ln: False model_specs: num_mix_components: 3 thinning: num_seq: 10 num_sample: 1 num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm look_ahead_time: 10 patience_counter: 5 # the maximum iteration used in adaptive thinning over_sample_rate: 5 num_samples_boundary: 5 dtime_max: 5 num_step_gen: 1 ODETPP_train: base_config: stage: train backend: torch dataset_id: taxi runner_id: std_tpp model_id: ODETPP # model name base_dir: './checkpoints/' trainer_config: batch_size: 32 max_epoch: 200 shuffle: False optimizer: adam learning_rate: 1.e-1 valid_freq: 1 use_tfb: False metrics: [ 'acc', 'rmse' ] seed: 2019 gpu: -1 model_config: hidden_size: 4 time_emb_size: 4 num_layers: 1 sharing_param_layer: False loss_integral_num_sample_per_step: 20 dropout: 0.0 use_ln: False model_specs: ode_num_sample_per_step: 2 time_factor: 100 thinning: num_seq: 10 num_sample: 1 num_exp: 50 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm look_ahead_time: 10 patience_counter: 5 # the maximum iteration used in adaptive thinning over_sample_rate: 5 num_samples_boundary: 5 dtime_max: 5 num_step_gen: 1 ODETPP_gen: base_config: stage: gen backend: torch dataset_id: retweet runner_id: std_tpp base_dir: './checkpoints/' model_id: ODETPP trainer_config: batch_size: 256 max_epoch: 1 model_config: hidden_size: 32 time_emb_size: 16 num_layers: 1 sharing_param_layer: False loss_integral_num_sample_per_step: 20 dropout: 0.0 use_ln: False seed: 2019 gpu: 0 pretrained_model_dir: ./checkpoints/3538_4310828416_230603-165911/models/saved_model model_specs: ode_num_sample_per_step: 2 time_factor: 100 thinning: num_seq: 10 num_sample: 1 num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm look_ahead_time: 10 patience_counter: 5 # the maximum iteration used in adaptive thinning over_sample_rate: 5 num_samples_boundary: 5 dtime_max: 5 num_step_gen: 10 NHP_train: base_config: stage: train backend: torch dataset_id: taxi runner_id: std_tpp model_id: NHP # model name base_dir: './checkpoints/' trainer_config: batch_size: 256 max_epoch: 2 shuffle: False optimizer: adam learning_rate: 1.e-3 valid_freq: 1 use_tfb: False metrics: [ 'acc', 'rmse' ] seed: 2019 gpu: -1 model_config: hidden_size: 64 loss_integral_num_sample_per_step: 20 # pretrained_model_dir: ./checkpoints/75518_4377527680_230530-132355/models/saved_model thinning: num_seq: 10 num_sample: 1 num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm look_ahead_time: 10 patience_counter: 5 # the maximum iteration used in adaptive thinning over_sample_rate: 5 num_samples_boundary: 5 dtime_max: 5 num_step_gen: 1 SAHP_train: base_config: stage: train backend: torch dataset_id: taxi runner_id: std_tpp model_id: SAHP # model name base_dir: './checkpoints/' trainer_config: batch_size: 256 max_epoch: 20 shuffle: False optimizer: adam learning_rate: 1.e-3 valid_freq: 1 use_tfb: False metrics: [ 'acc', 'rmse' ] seed: 2019 gpu: 0 model_config: hidden_size: 32 time_emb_size: 16 num_layers: 2 num_heads: 2 loss_integral_num_sample_per_step: 20 use_ln: False thinning: num_seq: 10 num_sample: 1 num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm look_ahead_time: 10 patience_counter: 5 # the maximum iteration used in adaptive thinning over_sample_rate: 5 num_samples_boundary: 5 dtime_max: 5 num_step_gen: 1 SAHP_gen: base_config: stage: gen backend: torch dataset_id: retweet runner_id: std_tpp model_id: SAHP # model name base_dir: './checkpoints/' trainer_config: batch_size: 256 max_epoch: 1 model_config: hidden_size: 16 time_emb_size: 4 num_layers: 2 num_heads: 2 loss_integral_num_sample_per_step: 20 use_ln: False thinning: num_seq: 10 num_sample: 1 num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm look_ahead_time: 10 patience_counter: 5 # the maximum iteration used in adaptive thinning over_sample_rate: 5 num_samples_boundary: 5 dtime_max: 5 num_step_gen: 10 THP_train: base_config: stage: train backend: torch dataset_id: taxi runner_id: std_tpp model_id: THP # model name base_dir: './checkpoints/' trainer_config: batch_size: 256 max_epoch: 30 shuffle: False optimizer: adam learning_rate: 1.e-3 valid_freq: 1 use_tfb: False metrics: [ 'acc', 'rmse' ] seed: 2019 gpu: -1 model_config: hidden_size: 32 time_emb_size: 16 num_layers: 2 num_heads: 2 mc_num_sample_per_step: 20 loss_integral_num_sample_per_step: 20 use_ln: False thinning: num_seq: 10 num_sample: 1 num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm look_ahead_time: 10 patience_counter: 5 # the maximum iteration used in adaptive thinning over_sample_rate: 5 num_samples_boundary: 5 dtime_max: 5 num_step_gen: 1 THP_gen: base_config: stage: gen backend: torch dataset_id: retweet runner_id: std_tpp model_id: THP # model name base_dir: './checkpoints/' trainer_config: batch_size: 256 max_epoch: 1 model_config: hidden_size: 32 time_emb_size: 16 num_layers: 2 num_heads: 2 mc_num_sample_per_step: 20 loss_integral_num_sample_per_step: 20 use_ln: False # pretrained_model_dir: ./checkpoints/2694_4384867712_230603-160544/models/saved_model thinning: num_seq: 10 num_sample: 1 num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm look_ahead_time: 10 patience_counter: 5 # the maximum iteration used in adaptive thinning over_sample_rate: 5 num_samples_boundary: 5 dtime_max: 5 num_step_gen: 10 AttNHP_train: base_config: stage: train backend: torch dataset_id: taxi runner_id: std_tpp model_id: AttNHP # model name base_dir: './checkpoints/' trainer_config: batch_size: 256 max_epoch: 200 shuffle: False optimizer: adam learning_rate: 1.e-3 valid_freq: 1 use_tfb: False metrics: [ 'acc', 'rmse' ] seed: 2019 gpu: -1 model_config: hidden_size: 16 time_emb_size: 4 num_layers: 2 num_heads: 2 loss_integral_num_sample_per_step: 10 use_ln: False thinning: num_seq: 2 num_sample: 1 num_exp: 50 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm look_ahead_time: 10 patience_counter: 5 # the maximum iteration used in adaptive thinning over_sample_rate: 5 num_samples_boundary: 5 dtime_max: 5 num_step_gen: 1 AttNHP_gen: base_config: stage: gen backend: torch dataset_id: retweet runner_id: std_tpp model_id: AttNHP # model name base_dir: './checkpoints/' trainer_config: batch_size: 256 max_epoch: 1 model_config: hidden_size: 16 time_emb_size: 4 num_layers: 2 num_heads: 2 mc_num_sample_per_step: 20 loss_integral_num_sample_per_step: 20 use_ln: False # pretrained_model_dir: ./checkpoints/6934_4375315840_230603-222826/models/saved_model thinning: num_seq: 10 num_sample: 1 num_exp: 50 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm look_ahead_time: 10 patience_counter: 5 # the maximum iteration used in adaptive thinning over_sample_rate: 5 num_samples_boundary: 5 dtime_max: 5 num_step_gen: 10 # Example configuration for training State-Space Point Process (S2P2) model. S2P2_train: base_config: stage: train backend: torch dataset_id: taxi runner_id: std_tpp model_id: S2P2 base_dir: './checkpoints/' trainer_config: batch_size: 256 max_epoch: 300 shuffle: True optimizer: adam learning_rate: 1.e-2 valid_freq: 1 use_tfb: False metrics: [ 'acc', 'rmse' ] seed: 2019 gpu: -1 # ID of GPU to use. Set to -1 to use CPU instead. `mps` backend could lead to incorrect results, please use CPU or CUDA. model_config: hidden_size: 128 # Number of dimensions for u_t and y_t, labeled as H in the paper. loss_integral_num_sample_per_step: 10 # How many time points to use to estimate the integrated intensity between each pair of subsequent events for the log-likelihood. use_mc_samples: True # Use Monte-Carlo sampling for the integral estimation. If False, uses a quadrature with a grid of evenly spaced points. num_layers: 4 # Number of LLH layers. model_specs: P: 16 # Number of dimensions for the hidden state x_t, labeled as P in the paper. dropout_rate: 0.1 # Dropout rate, used immediately after the activation function between layers but before the normalization. Formally, we set u^{(l+1)}_t = LayerNorm(dropout(\sigma(y^{(l)}_t)) + u^{(l)}_t). act_func: gelu # gelu | half_glu | full_glu # Activation function to use between layers. for_loop: True # If enabled, uses for-loop for computing the recurrence in the LLH layers. If disabled, uses a parallel scan. pre_norm: False # Should be set to False. If True, uses a LayerNorm on the inputs to a LLH layer. post_norm: True # Should be set to True. If True, uses a LayerNorm on the outputs of a LLH layer (after transforming and adding the residual). int_forward_variant: False # Should be set to False. If True, uses u_{t_i} as the ZOH constant for u_t with t \in (t_i, t_{i+1}]. int_backward_variant: True # Should be set to True. If True, uses u_{t_{i+1}-} as the ZOH constant for u_t with t \in (t_i, t_{i+1}]. relative_time: True # If True, predicts the scaling factor to be applied to the dynamics between each pair of subsequent events. See Sec. 3.3 of the paper.