pipeline_config_id: runner_config

data:
  taxi:
    data_format: json
    train_dir:  easytpp/taxi  # ./data/taxi/train.json
    valid_dir:  easytpp/taxi # ./data/taxi/dev.json
    test_dir:  easytpp/taxi # ./data/taxi/test.json
    data_specs:
      num_event_types: 10
      pad_token_id: 10
      padding_side: right
#      padding_strategy: max_length
#      truncation_strategy: longest_first # or Truncate to a maximum length specified with the argument `max_length`
#      max_len: 20
  conttime:
    data_format: pkl
    train_dir: ../data/conttime/train.pkl
    valid_dir: ../data/conttime/dev.pkl
    test_dir: ../data/conttime/test.pkl
    data_specs:
      num_event_types: 5
      pad_token_id: 5
      padding_side: right
      truncation_side: right
#      padding_strategy: max_length  # for ode tpp we have to set this to max_length
#      max_len: 20
  hawkes_1d:
    data_format: pkl
    train_dir: ../data/hawkes/train.pkl
    valid_dir: ../data/hawkes/dev.pkl
    test_dir: ../data/hawkes/test.pkl
    data_specs:
      num_event_types: 1
      pad_token_id: 1
      padding_side: right
      truncation_side: right
  retweet:
    data_format: pkl
    train_dir: ../data/retweet/train.pkl
    valid_dir: ../data/retweet/dev.pkl
    test_dir: ../data/retweet/test.pkl
    data_specs:
      num_event_types: 3
      pad_token_id: 3
      padding_side: right
      truncation_side: right


RMTPP_train:
  base_config:
    stage: train
    backend: torch
    dataset_id: taxi
    runner_id: std_tpp
    model_id: RMTPP # model name
    base_dir: './checkpoints/'
  trainer_config:
    batch_size: 256
    max_epoch: 20
    shuffle: False
    optimizer: adam
    learning_rate: 1.e-3
    valid_freq: 1
    use_tfb: False
    metrics: [ 'acc', 'rmse' ]
    seed: 2019
    gpu: -1
  model_config:
    hidden_size: 32
    time_emb_size: 16
    num_layers: 2
    num_heads: 2
    mc_num_sample_per_step: 20
    sharing_param_layer: False
    loss_integral_num_sample_per_step: 20
    dropout: 0.0
    use_ln: False
    thinning:
      num_seq: 10
      num_sample: 1
      num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm
      look_ahead_time: 10
      patience_counter: 5 # the maximum iteration used in adaptive thinning
      over_sample_rate: 5
      num_samples_boundary: 5
      dtime_max: 5


RMTPP_gen:
  base_config:
    stage: gen
    backend: torch
    dataset_id: retweet
    runner_id: std_tpp
    base_dir: './checkpoints/'
    model_id: RMTPP
  model_config:
    hidden_size: 32
    time_emb_size: 16
    mc_num_sample_per_step: 20
    sharing_param_layer: False
    loss_integral_num_sample_per_step: 20
    dropout: 0.0
    use_ln: False
    seed: 2019
    gpu: 0
    pretrained_model_dir: ./checkpoints/2555_4348724608_230603-155841/models/saved_model
    thinning:
      num_seq: 10
      num_sample: 1
      num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm
      look_ahead_time: 10
      patience_counter: 5 # the maximum iteration used in adaptive thinning
      over_sample_rate: 5
      num_samples_boundary: 5
      dtime_max: 5
      num_step_gen: 10

NHP_eval:
  base_config:
    stage: eval
    backend: torch
    dataset_id: taxi
    runner_id: std_tpp
    base_dir: './checkpoints/'
    model_id: NHP
  trainer_config:
    batch_size: 256
    max_epoch: 1
  model_config:
    hidden_size: 64
    use_ln: False
    seed: 2019
    gpu: 0
    pretrained_model_dir: ./checkpoints/26507_4380788096_231111-101848/models/saved_model
    thinning:
      num_seq: 10
      num_sample: 1
      num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm
      look_ahead_time: 10
      patience_counter: 5 # the maximum iteration used in adaptive thinning
      over_sample_rate: 5
      num_samples_boundary: 5
      dtime_max: 5

NHP_gen:
  base_config:
    stage: eval
    backend: torch
    dataset_id: taxi
    runner_id: std_tpp
    model_id: NHP # model name
    base_dir: './checkpoints/'
    trainer_config:
      batch_size: 256
      max_epoch: 20
      shuffle: False
      optimizer: adam
      learning_rate: 1.e-3
      valid_freq: 1
      use_tfb: False
      metrics: [ 'acc', 'rmse' ]
      seed: 2019
      gpu: -1
    model_config:
      hidden_size: 64
      loss_integral_num_sample_per_step: 20
      pretrained_model_dir: ./checkpoints/75518_4377527680_230530-132355/models/saved_model
      thinning:
        num_seq: 10
        num_sample: 1
        num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm
        look_ahead_time: 10
        patience_counter: 5 # the maximum iteration used in adaptive thinning
        over_sample_rate: 5
        num_samples_boundary: 5
        dtime_max: 5
        num_step_gen: 1

FullyNN_train:
  base_config:
    stage: train
    backend: torch
    dataset_id: taxi
    runner_id: std_tpp
    model_id: FullyNN # model name
    base_dir: './checkpoints/'
  trainer_config:
    batch_size: 256
    max_epoch: 200
    shuffle: False
    optimizer: adam
    learning_rate: 1.e-3
    valid_freq: 1
    use_tfb: False
    metrics: [ 'acc', 'rmse' ]
    seed: 2019
    gpu: 0
  model_config:
    rnn_type: LSTM
    hidden_size: 32
    time_emb_size: 4
    num_layers: 2
    num_heads: 2
    mc_num_sample_per_step: 20
    sharing_param_layer: False
    loss_integral_num_sample_per_step: 20
    dropout: 0.0
    use_ln: False
    model_specs:
      num_mlp_layers: 3
#    thinning:
#      num_seq: 10
#      num_sample: 1
#      num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm
#      look_ahead_time: 10
#      patience_counter: 5 # the maximum iteration used in adaptive thinning
#      over_sample_rate: 5
#      num_samples_boundary: 5
#      dtime_max: 5
#      num_step_gen: 1


IntensityFree_train:
  base_config:
    stage: train
    backend: torch
    dataset_id: taxi
    runner_id: std_tpp
    model_id: IntensityFree # model name
    base_dir: './checkpoints/'
  trainer_config:
    batch_size: 256
    max_epoch: 200
    shuffle: False
    optimizer: adam
    learning_rate: 1.e-3
    valid_freq: 1
    use_tfb: False
    metrics: [ 'acc', 'rmse' ]
    seed: 2019
    gpu: 0
  model_config:
    hidden_size: 32
    time_emb_size: 16
    num_layers: 2
    num_heads: 2
    mc_num_sample_per_step: 20
    sharing_param_layer: False
    loss_integral_num_sample_per_step: 20
    dropout: 0.0
    use_ln: False
    model_specs:
      num_mix_components: 3
    thinning:
      num_seq: 10
      num_sample: 1
      num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm
      look_ahead_time: 10
      patience_counter: 5 # the maximum iteration used in adaptive thinning
      over_sample_rate: 5
      num_samples_boundary: 5
      dtime_max: 5
      num_step_gen: 1


ODETPP_train:
  base_config:
    stage: train
    backend: torch
    dataset_id: taxi
    runner_id: std_tpp
    model_id: ODETPP # model name
    base_dir: './checkpoints/'
  trainer_config:
    batch_size: 32
    max_epoch: 200
    shuffle: False
    optimizer: adam
    learning_rate: 1.e-1
    valid_freq: 1
    use_tfb: False
    metrics: [ 'acc', 'rmse' ]
    seed: 2019
    gpu: -1
  model_config:
    hidden_size: 4
    time_emb_size: 4
    num_layers: 1
    sharing_param_layer: False
    loss_integral_num_sample_per_step: 20
    dropout: 0.0
    use_ln: False
    model_specs:
      ode_num_sample_per_step: 2
      time_factor: 100
    thinning:
      num_seq: 10
      num_sample: 1
      num_exp: 50 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm
      look_ahead_time: 10
      patience_counter: 5 # the maximum iteration used in adaptive thinning
      over_sample_rate: 5
      num_samples_boundary: 5
      dtime_max: 5
      num_step_gen: 1

ODETPP_gen:
  base_config:
    stage: gen
    backend: torch
    dataset_id: retweet
    runner_id: std_tpp
    base_dir: './checkpoints/'
    model_id: ODETPP
  trainer_config:
    batch_size: 256
    max_epoch: 1
  model_config:
    hidden_size: 32
    time_emb_size: 16
    num_layers: 1
    sharing_param_layer: False
    loss_integral_num_sample_per_step: 20
    dropout: 0.0
    use_ln: False
    seed: 2019
    gpu: 0
    pretrained_model_dir: ./checkpoints/3538_4310828416_230603-165911/models/saved_model
    model_specs:
      ode_num_sample_per_step: 2
      time_factor: 100
    thinning:
      num_seq: 10
      num_sample: 1
      num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm
      look_ahead_time: 10
      patience_counter: 5 # the maximum iteration used in adaptive thinning
      over_sample_rate: 5
      num_samples_boundary: 5
      dtime_max: 5
      num_step_gen: 10

NHP_train:
  base_config:
    stage: train
    backend: torch
    dataset_id: taxi
    runner_id: std_tpp
    model_id: NHP # model name
    base_dir: './checkpoints/'
  trainer_config:
    batch_size: 256
    max_epoch: 2
    shuffle: False
    optimizer: adam
    learning_rate: 1.e-3
    valid_freq: 1
    use_tfb: False
    metrics: [ 'acc', 'rmse' ]
    seed: 2019
    gpu: -1
  model_config:
    hidden_size: 64
    loss_integral_num_sample_per_step: 20
#    pretrained_model_dir: ./checkpoints/75518_4377527680_230530-132355/models/saved_model
    thinning:
      num_seq: 10
      num_sample: 1
      num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm
      look_ahead_time: 10
      patience_counter: 5 # the maximum iteration used in adaptive thinning
      over_sample_rate: 5
      num_samples_boundary: 5
      dtime_max: 5
      num_step_gen: 1


SAHP_train:
  base_config:
    stage: train
    backend: torch
    dataset_id: taxi
    runner_id: std_tpp
    model_id: SAHP # model name
    base_dir: './checkpoints/'
  trainer_config:
    batch_size: 256
    max_epoch: 20
    shuffle: False
    optimizer: adam
    learning_rate: 1.e-3
    valid_freq: 1
    use_tfb: False
    metrics: [ 'acc', 'rmse' ]
    seed: 2019
    gpu: 0
  model_config:
    hidden_size: 32
    time_emb_size: 16
    num_layers: 2
    num_heads: 2
    loss_integral_num_sample_per_step: 20
    use_ln: False
    thinning:
      num_seq: 10
      num_sample: 1
      num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm
      look_ahead_time: 10
      patience_counter: 5 # the maximum iteration used in adaptive thinning
      over_sample_rate: 5
      num_samples_boundary: 5
      dtime_max: 5
      num_step_gen: 1


SAHP_gen:
  base_config:
    stage: gen
    backend: torch
    dataset_id: retweet
    runner_id: std_tpp
    model_id: SAHP # model name
    base_dir: './checkpoints/'
  trainer_config:
    batch_size: 256
    max_epoch: 1
  model_config:
    hidden_size: 16
    time_emb_size: 4
    num_layers: 2
    num_heads: 2
    loss_integral_num_sample_per_step: 20
    use_ln: False
    thinning:
      num_seq: 10
      num_sample: 1
      num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm
      look_ahead_time: 10
      patience_counter: 5 # the maximum iteration used in adaptive thinning
      over_sample_rate: 5
      num_samples_boundary: 5
      dtime_max: 5
      num_step_gen: 10

THP_train:
  base_config:
    stage: train
    backend: torch
    dataset_id: taxi
    runner_id: std_tpp
    model_id: THP # model name
    base_dir: './checkpoints/'
  trainer_config:
    batch_size: 256
    max_epoch: 30
    shuffle: False
    optimizer: adam
    learning_rate: 1.e-3
    valid_freq: 1
    use_tfb: False
    metrics: [ 'acc', 'rmse' ]
    seed: 2019
    gpu: -1
  model_config:
    hidden_size: 32
    time_emb_size: 16
    num_layers: 2
    num_heads: 2
    mc_num_sample_per_step: 20
    loss_integral_num_sample_per_step: 20
    use_ln: False
    thinning:
      num_seq: 10
      num_sample: 1
      num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm
      look_ahead_time: 10
      patience_counter: 5 # the maximum iteration used in adaptive thinning
      over_sample_rate: 5
      num_samples_boundary: 5
      dtime_max: 5
      num_step_gen: 1


THP_gen:
  base_config:
    stage: gen
    backend: torch
    dataset_id: retweet
    runner_id: std_tpp
    model_id: THP # model name
    base_dir: './checkpoints/'
  trainer_config:
    batch_size: 256
    max_epoch: 1
  model_config:
    hidden_size: 32
    time_emb_size: 16
    num_layers: 2
    num_heads: 2
    mc_num_sample_per_step: 20
    loss_integral_num_sample_per_step: 20
    use_ln: False
#    pretrained_model_dir: ./checkpoints/2694_4384867712_230603-160544/models/saved_model
    thinning:
      num_seq: 10
      num_sample: 1
      num_exp: 500 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm
      look_ahead_time: 10
      patience_counter: 5 # the maximum iteration used in adaptive thinning
      over_sample_rate: 5
      num_samples_boundary: 5
      dtime_max: 5
      num_step_gen: 10

AttNHP_train:
  base_config:
    stage: train
    backend: torch
    dataset_id: taxi
    runner_id: std_tpp
    model_id: AttNHP # model name
    base_dir: './checkpoints/'
  trainer_config:
    batch_size: 256
    max_epoch: 200
    shuffle: False
    optimizer: adam
    learning_rate: 1.e-3
    valid_freq: 1
    use_tfb: False
    metrics: [ 'acc', 'rmse' ]
    seed: 2019
    gpu: -1
  model_config:
    hidden_size: 16
    time_emb_size: 4
    num_layers: 2
    num_heads: 2
    loss_integral_num_sample_per_step: 10
    use_ln: False
    thinning:
      num_seq: 2
      num_sample: 1
      num_exp: 50 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm
      look_ahead_time: 10
      patience_counter: 5 # the maximum iteration used in adaptive thinning
      over_sample_rate: 5
      num_samples_boundary: 5
      dtime_max: 5
      num_step_gen: 1


AttNHP_gen:
  base_config:
    stage: gen
    backend: torch
    dataset_id: retweet
    runner_id: std_tpp
    model_id: AttNHP # model name
    base_dir: './checkpoints/'
  trainer_config:
    batch_size: 256
    max_epoch: 1
  model_config:
    hidden_size: 16
    time_emb_size: 4
    num_layers: 2
    num_heads: 2
    mc_num_sample_per_step: 20
    loss_integral_num_sample_per_step: 20
    use_ln: False
#    pretrained_model_dir: ./checkpoints/6934_4375315840_230603-222826/models/saved_model
    thinning:
      num_seq: 10
      num_sample: 1
      num_exp: 50 # number of i.i.d. Exp(intensity_bound) draws at one time in thinning algorithm
      look_ahead_time: 10
      patience_counter: 5 # the maximum iteration used in adaptive thinning
      over_sample_rate: 5
      num_samples_boundary: 5
      dtime_max: 5
      num_step_gen: 10


# Example configuration for training State-Space Point Process (S2P2) model.
S2P2_train:
  base_config:
    stage: train
    backend: torch
    dataset_id: taxi
    runner_id: std_tpp
    model_id: S2P2
    base_dir: './checkpoints/'
  trainer_config:
    batch_size: 256
    max_epoch: 300
    shuffle: True
    optimizer: adam
    learning_rate: 1.e-2
    valid_freq: 1
    use_tfb: False
    metrics: [ 'acc', 'rmse' ]
    seed: 2019
    gpu: -1  # ID of GPU to use. Set to -1 to use CPU instead. `mps` backend could lead to incorrect results, please use CPU or CUDA.
  model_config:
    hidden_size: 128  # Number of dimensions for u_t and y_t, labeled as H in the paper.
    loss_integral_num_sample_per_step: 10  # How many time points to use to estimate the integrated intensity between each pair of subsequent events for the log-likelihood.
    use_mc_samples: True  # Use Monte-Carlo sampling for the integral estimation. If False, uses a quadrature with a grid of evenly spaced points.
    num_layers: 4  # Number of LLH layers.
    model_specs:
      P: 16  # Number of dimensions for the hidden state x_t, labeled as P in the paper.
      dropout_rate: 0.1  # Dropout rate, used immediately after the activation function between layers but before the normalization. Formally, we set u^{(l+1)}_t = LayerNorm(dropout(\sigma(y^{(l)}_t)) + u^{(l)}_t).
      act_func: gelu  # gelu | half_glu | full_glu  # Activation function to use between layers.
      for_loop: True  # If enabled, uses for-loop for computing the recurrence in the LLH layers. If disabled, uses a parallel scan.
      pre_norm: False  # Should be set to False. If True, uses a LayerNorm on the inputs to a LLH layer.
      post_norm: True  # Should be set to True. If True, uses a LayerNorm on the outputs of a LLH layer (after transforming and adding the residual).
      int_forward_variant: False  # Should be set to False. If True, uses u_{t_i} as the ZOH constant for u_t with t \in (t_i, t_{i+1}].
      int_backward_variant: True  # Should be set to True. If True, uses u_{t_{i+1}-} as the ZOH constant for u_t with t \in (t_i, t_{i+1}].
      relative_time: True  # If True, predicts the scaling factor to be applied to the dynamics between each pair of subsequent events. See Sec. 3.3 of the paper.