File size: 7,503 Bytes
9b43be1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
dataset:
  dataset_kwargs:
    field_name_for_dimension_grouping: base_intensity_functions
    files_to_load:
      base_intensity_functions: base_intensity_functions.pt
      event_times: event_times.pt
      event_types: event_types.pt
      kernel_functions: kernel_functions.pt
      time_offsets: time_offsets.pt
    shuffle: true
  loader_kwargs:
    batch_size: 6
    full_len_ratio: 0.1
    max_number_of_minibatch_sizes: 8
    max_path_count: 2000
    max_sequence_len: 100
    min_path_count: 400
    min_sequence_len: 15
    num_inference_paths: 1
    num_inference_times: 2000
    num_workers: 16
    test_batch_size: 2
    variable_num_of_paths: true
    variable_sequence_lens:
      train: true
      validation: false
  name: HawkesDataLoader
  path:
    train: !!python/tuple
    - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_exp_kernel_no_interactions/train
    - data/synthetic_data/hawkes/1k_1D_2k_paths_Gamma_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_exp_kernel_no_interactions/train
    - data/synthetic_data/hawkes/1k_5D_2k_paths_Gamma_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel_no_interactions/train
    - data/synthetic_data/hawkes/1k_10D_2k_paths_poisson/train
    - data/synthetic_data/hawkes/1k_10D_2k_paths_Gamma_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_10D_2k_paths_sin_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel_no_interactions/train
    - data/synthetic_data/hawkes/1k_15D_2k_paths_poisson/train
    - data/synthetic_data/hawkes/1k_15D_2k_paths_Gamma_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_15D_2k_paths_sin_base_exp_kernel/train
    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel/train
    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_no_interactions/train
    - data/synthetic_data/hawkes/5k_22D_2k_paths_poisson/train
    - data/synthetic_data/hawkes/5k_22D_2k_paths_Gamma_base_exp_kernel/train
    - data/synthetic_data/hawkes/5k_22D_2k_paths_sin_base_exp_kernel/train
    - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel_sparse/train
    - data/synthetic_data/hawkes/1k_10D_2k_paths_Gamma_base_exp_kernel_sparse/train
    - data/synthetic_data/hawkes/1k_10D_2k_paths_sin_base_exp_kernel_sparse/train
    - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel_sparse/train
    - data/synthetic_data/hawkes/1k_15D_2k_paths_Gamma_base_exp_kernel_sparse/train
    - data/synthetic_data/hawkes/1k_15D_2k_paths_sin_base_exp_kernel_sparse/train
    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_sparse/train
    - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_rayleigh_kernel/train
    - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_rayleigh_kernel/train
    - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_rayleigh_kernel/train
    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel/train
    - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_rayleigh_kernel_sparse/train
    - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_rayleigh_kernel_sparse/train
    - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_rayleigh_kernel_sparse/train
    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel_sparse/train
    validation: !!python/tuple
    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel/val
    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_no_interactions/val
    - data/synthetic_data/hawkes/5k_22D_2k_paths_poisson/val
    - data/synthetic_data/hawkes/5k_22D_2k_paths_Gamma_base_exp_kernel/val
    - data/synthetic_data/hawkes/5k_22D_2k_paths_sin_base_exp_kernel/val
    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel/val
    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel_sparse/val
distributed:
  activation_chekpoint: false
  checkpoint_type: full_state
  enabled: false
  min_num_params: 1e5
  sharding_strategy: NO_SHARD
  wrap_policy: SIZE_BAZED
experiment:
  device_map: auto
  name: FIM_Hawkes_10-22st_nll_mc_only_2000_paths_mixed_100_events_mixed-experiment-seed-10-dataset-dataset_kwargs-field_name_for_dimension_grouping-base_intensity_functions
  name_add_date: true
  seed: 10
model:
  alpha_decoder:
    hidden_act:
      name: torch.nn.GELU
    hidden_layers: !!python/tuple
    - 256
    - 256
    name: fim.models.blocks.base.MLP
  beta_decoder:
    hidden_act:
      name: torch.nn.GELU
    hidden_layers: !!python/tuple
    - 256
    - 256
    name: fim.models.blocks.base.MLP
  context_summary_encoder:
    encoder_layer:
      batch_first: true
      dropout: 0.0
      name: torch.nn.TransformerEncoderLayer
      nhead: 4
    name: torch.nn.TransformerEncoder
    num_layers: 2
  context_summary_pooling:
    attention:
      nhead: 4
    name: fim.models.blocks.neural_operators.AttentionOperator
    num_res_layers: 1
    paths_block_attention: false
  context_ts_encoder:
    encoder_layer:
      batch_first: true
      dropout: 0.0
      name: torch.nn.TransformerEncoderLayer
      nhead: 4
    name: torch.nn.TransformerEncoder
    num_layers: 4
  decoder_ts:
    decoder_layer:
      batch_first: true
      dropout: 0.0
      name: torch.nn.TransformerDecoderLayer
      nhead: 4
    name: torch.nn.TransformerDecoder
    num_layers: 4
  delta_time_encoder:
    name: fim.models.blocks.positional_encodings.SineTimeEncoding
    out_features: 256
  evaluation_mark_encoder:
    name: torch.nn.Linear
  hidden_act:
    name: torch.nn.GELU
  hidden_dim: 256
  loss_weights:
    alpha: 0.0
    mu: 0.0
    nll: 1.0
    relative_spike: 0.0
    smape: 0.0
  mark_encoder:
    name: torch.nn.Linear
    out_features: 256
  mark_fusion_attention: null
  max_num_marks: 22
  model_type: fimhawkes
  mu_decoder:
    hidden_act:
      name: torch.nn.GELU
    hidden_layers: !!python/tuple
    - 256
    - 256
    name: fim.models.blocks.base.MLP
  nll:
    method: monte_carlo
    num_integration_points: 200
  normalize_by_max_time: false
  normalize_times: true
  thinning: null
  time_encoder:
    name: fim.models.blocks.positional_encodings.SineTimeEncoding
    out_features: 256
optimizers: !!python/tuple
- optimizer_d:
    lr: 5.0e-05
    name: torch.optim.AdamW
    weight_decay: 0.0001
trainer:
  best_metric: loss
  debug_iterations: null
  detect_anomaly: false
  epochs: 100000
  evaluation_epoch:
    enable_plotting: false
    inference_path_idx: 0
    iterator_name: validation
    path: fim.trainers.evaluation_epochs.HawkesEvaluationPlots
    plot_frequency: 10
  experiment_dir: ./results/
  gradient_accumulation_steps: 6
  logging_format: RANK_%(rank)s - %(asctime)s - %(name)s - %(levelname)s - %(message)s
  name: Trainer
  precision: bf16_mixed
  save_every: 1
  schedulers: !!python/tuple
  - beta: 1.0
    label: gauss_nll
    name: fim.utils.param_scheduler.ConstantScheduler
  - beta: 1.0
    label: init_cross_entropy
    name: fim.utils.param_scheduler.ConstantScheduler
  - beta: 1.0
    label: missing_link
    name: fim.utils.param_scheduler.ConstantScheduler