Safetensors
patchtst
abao commited on
Commit
cc584ef
·
verified ·
1 Parent(s): 6f80852

Upload 3 files

Browse files
Files changed (3) hide show
  1. config.json +61 -0
  2. model.safetensors +3 -0
  3. training_info.json +515 -0
config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu",
3
+ "architectures": [
4
+ "PatchTSTForPretraining"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bias": true,
8
+ "channel_attention": true,
9
+ "channel_consistent_masking": false,
10
+ "channel_rope": false,
11
+ "context_length": 1024,
12
+ "d_model": 768,
13
+ "distribution_output": null,
14
+ "do_mask_input": true,
15
+ "dropout": 0.0,
16
+ "ff_dropout": 0.0,
17
+ "ffn_dim": 512,
18
+ "head_dropout": 0.0,
19
+ "huber_delta": 1.0,
20
+ "init_std": 0.02,
21
+ "loss": "mse",
22
+ "mask_type": "random",
23
+ "mask_value": 0,
24
+ "max_wavelength": 500,
25
+ "mode": "pretrain",
26
+ "model_type": "patchtst",
27
+ "norm_eps": 1e-05,
28
+ "norm_type": "rmsnorm",
29
+ "num_attention_heads": 12,
30
+ "num_forecast_mask_patches": 3,
31
+ "num_hidden_layers": 12,
32
+ "num_input_channels": 1,
33
+ "num_parallel_samples": 100,
34
+ "num_poly_feats": 120,
35
+ "num_rff": 256,
36
+ "num_targets": 1,
37
+ "output_range": null,
38
+ "patch_length": 16,
39
+ "patch_stride": 16,
40
+ "path_dropout": 0.0,
41
+ "poly_degrees": 2,
42
+ "pooling_type": "max",
43
+ "positional_dropout": 0.0,
44
+ "positional_encoding_type": "sincos",
45
+ "pre_norm": true,
46
+ "prediction_length": 128,
47
+ "pretrained_encoder_path": null,
48
+ "pretrained_pft_path": null,
49
+ "random_mask_ratio": 0.5,
50
+ "rff_scale": 1.0,
51
+ "rff_trainable": false,
52
+ "rope_percent": 0.75,
53
+ "scaling": "std",
54
+ "share_embedding": true,
55
+ "share_projection": true,
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.40.1",
58
+ "unmasked_channel_indices": null,
59
+ "use_cls_token": false,
60
+ "use_dynamics_embedding": false
61
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e18a3adea588ba6bfa50c4b4f85f4ddb32fe9e8b1cbf646430fa62dd13b48aad
3
+ size 264843096
training_info.json ADDED
@@ -0,0 +1,515 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "mode": "pretrain",
4
+ "context_length": 1024,
5
+ "prediction_length": 128,
6
+ "distribution_output": null,
7
+ "loss": "mse",
8
+ "huber_delta": 1.0,
9
+ "patch_length": 16,
10
+ "patch_stride": 16,
11
+ "num_hidden_layers": 12,
12
+ "d_model": 768,
13
+ "num_attention_heads": 12,
14
+ "channel_attention": true,
15
+ "ffn_dim": 512,
16
+ "norm_type": "rmsnorm",
17
+ "norm_eps": 1e-05,
18
+ "attention_dropout": 0.0,
19
+ "positional_dropout": 0.0,
20
+ "path_dropout": 0.0,
21
+ "ff_dropout": 0.0,
22
+ "bias": true,
23
+ "activation_function": "gelu",
24
+ "pre_norm": true,
25
+ "use_cls_token": false,
26
+ "init_std": 0.02,
27
+ "scaling": "std",
28
+ "do_mask_input": null,
29
+ "mask_type": "random",
30
+ "random_mask_ratio": 0.5,
31
+ "num_forecast_mask_patches": 3,
32
+ "channel_consistent_masking": false,
33
+ "unmasked_channel_indices": null,
34
+ "mask_value": 0,
35
+ "pooling_type": "max",
36
+ "head_dropout": 0.0,
37
+ "num_parallel_samples": 100,
38
+ "channel_rope": false,
39
+ "max_wavelength": 500,
40
+ "rope_percent": 0.75,
41
+ "pretrained_encoder_path": null,
42
+ "pretrained_pft_path": null,
43
+ "use_dynamics_embedding": false,
44
+ "num_poly_feats": 120,
45
+ "poly_degrees": 2,
46
+ "rff_trainable": false,
47
+ "rff_scale": 1.0,
48
+ "num_rff": 256
49
+ },
50
+ "train_config": {
51
+ "seed": 99,
52
+ "max_steps": 800000,
53
+ "save_steps": 50000,
54
+ "log_steps": 1000,
55
+ "resume_from_checkpoint": null,
56
+ "per_device_train_batch_size": 192,
57
+ "gradient_accumulation_steps": 1,
58
+ "max_grad_norm": 1.0,
59
+ "dataloader_num_workers": 16,
60
+ "dataloader_prefetch_factor": 2,
61
+ "tf32": false,
62
+ "torch_compile": true,
63
+ "optim": "adamw_torch_fused",
64
+ "learning_rate": 0.001,
65
+ "lr_scheduler_type": "cosine",
66
+ "warmup_ratio": 0.05,
67
+ "weight_decay": 0.0,
68
+ "output_dir": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints/",
69
+ "ddp_backend": "nccl",
70
+ "ddp_find_unused_parameters": false,
71
+ "remove_unused_columns": false
72
+ },
73
+ "all_config": {
74
+ "run_name": "panda_mlm_nh12_dmodel768_mixedp",
75
+ "wandb": {
76
+ "log": true,
77
+ "project_name": "panda",
78
+ "entity": "gilpinlab",
79
+ "group_name": null,
80
+ "resume": false,
81
+ "resume_run_id": null,
82
+ "tags": null
83
+ },
84
+ "patchtst": {
85
+ "mode": "pretrain",
86
+ "context_length": 1024,
87
+ "prediction_length": 128,
88
+ "distribution_output": null,
89
+ "loss": "mse",
90
+ "huber_delta": 1.0,
91
+ "patch_length": 16,
92
+ "patch_stride": 16,
93
+ "num_hidden_layers": 12,
94
+ "d_model": 768,
95
+ "num_attention_heads": 12,
96
+ "channel_attention": true,
97
+ "ffn_dim": 512,
98
+ "norm_type": "rmsnorm",
99
+ "norm_eps": 1e-05,
100
+ "attention_dropout": 0.0,
101
+ "positional_dropout": 0.0,
102
+ "path_dropout": 0.0,
103
+ "ff_dropout": 0.0,
104
+ "bias": true,
105
+ "activation_function": "gelu",
106
+ "pre_norm": true,
107
+ "use_cls_token": false,
108
+ "init_std": 0.02,
109
+ "scaling": "std",
110
+ "do_mask_input": null,
111
+ "mask_type": "random",
112
+ "random_mask_ratio": 0.5,
113
+ "num_forecast_mask_patches": 3,
114
+ "channel_consistent_masking": false,
115
+ "unmasked_channel_indices": null,
116
+ "mask_value": 0,
117
+ "pooling_type": "max",
118
+ "head_dropout": 0.0,
119
+ "num_parallel_samples": 100,
120
+ "channel_rope": false,
121
+ "max_wavelength": 500,
122
+ "rope_percent": 0.75,
123
+ "pretrained_encoder_path": null,
124
+ "pretrained_pft_path": null,
125
+ "use_dynamics_embedding": false,
126
+ "num_poly_feats": 120,
127
+ "poly_degrees": 2,
128
+ "rff_trainable": false,
129
+ "rff_scale": 1.0,
130
+ "num_rff": 256
131
+ },
132
+ "chronos": {
133
+ "model_id": "amazon/chronos-t5-mini",
134
+ "model_type": "seq2seq",
135
+ "random_init": false,
136
+ "tie_embeddings": true,
137
+ "context_length": 512,
138
+ "prediction_length": 128,
139
+ "num_samples": 20,
140
+ "n_tokens": 4096,
141
+ "n_special_tokens": 2,
142
+ "pad_token_id": 0,
143
+ "eos_token_id": 1,
144
+ "use_eos_token": true,
145
+ "tokenizer_class": "MeanScaleUniformBins",
146
+ "tokenizer_kwargs": {
147
+ "low_limit": -15.0,
148
+ "high_limit": 15.0
149
+ },
150
+ "temperature": 1.0,
151
+ "top_k": 50,
152
+ "top_p": 1.0
153
+ },
154
+ "train": {
155
+ "seed": 99,
156
+ "max_steps": 800000,
157
+ "save_steps": 50000,
158
+ "log_steps": 1000,
159
+ "resume_from_checkpoint": null,
160
+ "per_device_train_batch_size": 192,
161
+ "gradient_accumulation_steps": 1,
162
+ "max_grad_norm": 1.0,
163
+ "dataloader_num_workers": 16,
164
+ "dataloader_prefetch_factor": 2,
165
+ "tf32": false,
166
+ "torch_compile": true,
167
+ "optim": "adamw_torch_fused",
168
+ "learning_rate": 0.001,
169
+ "lr_scheduler_type": "cosine",
170
+ "warmup_ratio": 0.05,
171
+ "weight_decay": 0.0,
172
+ "output_dir": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints/",
173
+ "ddp_backend": "nccl",
174
+ "ddp_find_unused_parameters": false,
175
+ "remove_unused_columns": false
176
+ },
177
+ "scheduler": {
178
+ "enabled": false,
179
+ "schedule_value_name": "noise_scale",
180
+ "schedule_name": "cosine",
181
+ "epoch_stop": 0.5,
182
+ "init_value": 1.0,
183
+ "final_value": 0.0,
184
+ "eps": 0.008,
185
+ "num_steps": 4,
186
+ "decay_rate": 8.0
187
+ },
188
+ "eval": {
189
+ "mode": "predict",
190
+ "data_paths_lst": null,
191
+ "checkpoint_path": null,
192
+ "device": "cuda:7",
193
+ "torch_dtype": "float32",
194
+ "batch_size": 32,
195
+ "num_subdirs": null,
196
+ "num_samples_per_subdir": null,
197
+ "sliding_context": true,
198
+ "save_contexts": false,
199
+ "save_labels": false,
200
+ "save_predictions": false,
201
+ "save_completions": false,
202
+ "save_masks": false,
203
+ "num_processes": 10,
204
+ "metric_names": [
205
+ "mse",
206
+ "mae",
207
+ "smape",
208
+ "spearman"
209
+ ],
210
+ "forecast_save_dir": null,
211
+ "labels_save_dir": null,
212
+ "completions_save_dir": null,
213
+ "patch_input_save_dir": null,
214
+ "timestep_masks_save_dir": null,
215
+ "metrics_save_dir": null,
216
+ "metrics_fname": "metrics",
217
+ "overwrite": false,
218
+ "seed": 1,
219
+ "num_samples": 1,
220
+ "parallel_sample_reduction": "mean",
221
+ "limit_prediction_length": true,
222
+ "context_length": 512,
223
+ "prediction_length": 64,
224
+ "num_test_instances": 1,
225
+ "window_style": "sampled",
226
+ "window_stride": 1,
227
+ "split_coords": false,
228
+ "verbose": false,
229
+ "baselines": {
230
+ "baseline_model": "fourier_arima",
231
+ "order": [
232
+ 4,
233
+ 1,
234
+ 4
235
+ ],
236
+ "num_fourier_terms": 5
237
+ },
238
+ "chronos": {
239
+ "zero_shot": false,
240
+ "deterministic": true
241
+ }
242
+ },
243
+ "run_metrics": {
244
+ "wandb_run_id": null,
245
+ "plot_dir": "figures",
246
+ "save_dir": null,
247
+ "save_fname": "metrics.json"
248
+ },
249
+ "train_data_dirs": [
250
+ "/stor/work/AMDG_Gilpin_Summer2024/data/improved/base_mixedp_ic16/train",
251
+ "/stor/work/AMDG_Gilpin_Summer2024/data/improved/skew_mixedp_ic16/train",
252
+ "/stor/work/AMDG_Gilpin_Summer2024/data/improved/final_skew40/train",
253
+ "/stor/work/AMDG_Gilpin_Summer2024/data/improved/final_skew40/train_z5_z10",
254
+ "/stor/work/AMDG_Gilpin_Summer2024/data/improved/final_base40/train",
255
+ "/stor/work/AMDG_Gilpin_Summer2024/data/improved/final_base40/train_z5_z10"
256
+ ],
257
+ "probability": null,
258
+ "shuffle_buffer_length": 100000,
259
+ "min_past": 60,
260
+ "max_missing_prop": 0.9,
261
+ "fixed_dim": 3,
262
+ "augmentations": {
263
+ "augmentation_rate": 0.2,
264
+ "probabilities": [
265
+ 0.3333333333333333,
266
+ 0.3333333333333333,
267
+ 0.3333333333333333,
268
+ 0.0,
269
+ 0.0
270
+ ],
271
+ "dim_range": [
272
+ 3,
273
+ 8
274
+ ],
275
+ "lag_range": [
276
+ 1,
277
+ 10
278
+ ],
279
+ "phase_surrogate_cutoff": 1.0,
280
+ "mode_range": [
281
+ 5,
282
+ 15
283
+ ],
284
+ "max_wavenumber": 10.0,
285
+ "max_amp": 10.0
286
+ },
287
+ "multiprocess_kwargs": {
288
+ "processes": null,
289
+ "maxtasksperchild": null
290
+ },
291
+ "restart_sampling": {
292
+ "split_name": null,
293
+ "params_json_path": null,
294
+ "systems_batch_size": 128,
295
+ "batch_idx_low": null,
296
+ "batch_idx_high": null,
297
+ "starting_sample_idx": 0,
298
+ "save_first_sample": true
299
+ },
300
+ "sampling": {
301
+ "data_dir": null,
302
+ "sys_class": "continuous_no_delay",
303
+ "test_split": 0.3,
304
+ "split_prefix": null,
305
+ "rseed": 999,
306
+ "ic_rseed": 888,
307
+ "num_points": 4096,
308
+ "num_periods": 40,
309
+ "num_periods_min": 40,
310
+ "num_periods_max": 40,
311
+ "num_ics": 1,
312
+ "num_param_perturbations": 4,
313
+ "param_scale": 0.5,
314
+ "split_coords": false,
315
+ "standardize": false,
316
+ "verbose": false,
317
+ "multiprocessing": true,
318
+ "debug_system": null,
319
+ "silence_integration_errors": false,
320
+ "save_params": true,
321
+ "save_traj_stats": false,
322
+ "ignore_probability": 0.0,
323
+ "sign_match_probability": 0.5,
324
+ "atol": 1e-10,
325
+ "rtol": 1e-09,
326
+ "reference_traj": {
327
+ "length": 4096,
328
+ "transient": 0.5,
329
+ "n_periods": 40,
330
+ "atol": 1e-07,
331
+ "rtol": 1e-06
332
+ }
333
+ },
334
+ "validator": {
335
+ "enable": true,
336
+ "verbose": false,
337
+ "transient_time_frac": 0.05,
338
+ "plot_save_dir": null,
339
+ "save_failed_trajs": false,
340
+ "attractor_tests": [
341
+ "check_not_linear",
342
+ "check_boundedness",
343
+ "check_not_fixed_point",
344
+ "check_zero_one_test",
345
+ "check_power_spectrum",
346
+ "check_stationarity"
347
+ ]
348
+ },
349
+ "events": {
350
+ "max_duration": 300,
351
+ "instability_threshold": 10000.0,
352
+ "min_step": 1e-10,
353
+ "verbose": true
354
+ },
355
+ "skew": {
356
+ "num_pairs": 5000,
357
+ "pairs_rseed": 123,
358
+ "sys_idx_low": 0,
359
+ "sys_idx_high": null,
360
+ "normalization_strategy": "flow_rms",
361
+ "randomize_driver_indices": true,
362
+ "transform_scales": true,
363
+ "train_nonskew_path": null,
364
+ "test_nonskew_path": null,
365
+ "coupling_map_type": "additive",
366
+ "coupling_map": {
367
+ "transform_scales": true,
368
+ "randomize_driver_indices": true,
369
+ "normalization_strategy": "flow_rms",
370
+ "random_seed": 0
371
+ }
372
+ },
373
+ "analysis": {
374
+ "data_dir": null,
375
+ "split": null,
376
+ "num_samples": null,
377
+ "one_dim_target": false,
378
+ "save_dir": "outputs",
379
+ "plots_dir": "figures",
380
+ "compute_quantile_limits": false,
381
+ "compute_max_lyapunov_exponents": false,
382
+ "filter_ensemble": true,
383
+ "filter_json_fname": "failed_samples",
384
+ "verbose": true,
385
+ "attractor_tests": [
386
+ "check_zero_one_test"
387
+ ],
388
+ "check_not_transient": {
389
+ "max_transient_prop": 0.2,
390
+ "atol": 0.001
391
+ },
392
+ "check_stationarity": {
393
+ "p_value": 0.05
394
+ },
395
+ "check_boundedness": {
396
+ "threshold": 10000.0,
397
+ "max_zscore": 5,
398
+ "eps": 1e-10
399
+ },
400
+ "check_zero_one_test": {
401
+ "threshold": 0.2,
402
+ "strategy": "score"
403
+ }
404
+ },
405
+ "base_style": "ggplot",
406
+ "matplotlib_style": {
407
+ "font": {
408
+ "serif": "Computer Modern Roman",
409
+ "size": 10
410
+ },
411
+ "axes": {
412
+ "titlesize": 12,
413
+ "labelsize": 10,
414
+ "linewidth": 0.75,
415
+ "facecolor": "white",
416
+ "grid": false
417
+ },
418
+ "grid": {
419
+ "color": "gray",
420
+ "linewidth": 0.5,
421
+ "alpha": 0.5
422
+ },
423
+ "lines": {
424
+ "linewidth": 1.5,
425
+ "markersize": 5
426
+ },
427
+ "xtick": {
428
+ "labelsize": 8,
429
+ "major": {
430
+ "size": 4
431
+ },
432
+ "minor": {
433
+ "size": 2
434
+ },
435
+ "direction": "in"
436
+ },
437
+ "ytick": {
438
+ "labelsize": 8,
439
+ "major": {
440
+ "size": 4
441
+ },
442
+ "minor": {
443
+ "size": 2
444
+ },
445
+ "direction": "in"
446
+ },
447
+ "figure": {
448
+ "figsize": [
449
+ 3.25,
450
+ 2.5
451
+ ],
452
+ "dpi": 300,
453
+ "autolayout": true,
454
+ "facecolor": "white"
455
+ },
456
+ "legend": {
457
+ "fontsize": 8,
458
+ "title_fontsize": 9,
459
+ "loc": "upper right",
460
+ "frameon": false
461
+ },
462
+ "savefig": {
463
+ "dpi": 300,
464
+ "format": "pdf",
465
+ "transparent": false
466
+ }
467
+ }
468
+ },
469
+ "job_info": {
470
+ "cuda_available": true,
471
+ "device_count": 6,
472
+ "device_names": {
473
+ "0": "AMD Instinct MI100",
474
+ "1": "AMD Instinct MI100",
475
+ "2": "AMD Instinct MI100",
476
+ "3": "AMD Instinct MI100",
477
+ "4": "AMD Instinct MI100",
478
+ "5": "AMD Instinct MI100"
479
+ },
480
+ "mem_info": {
481
+ "0": [
482
+ 6170607616,
483
+ 34342961152
484
+ ],
485
+ "1": [
486
+ 6174015488,
487
+ 34342961152
488
+ ],
489
+ "2": [
490
+ 6161432576,
491
+ 34342961152
492
+ ],
493
+ "3": [
494
+ 6161432576,
495
+ 34342961152
496
+ ],
497
+ "4": [
498
+ 6161432576,
499
+ 34342961152
500
+ ],
501
+ "5": [
502
+ 6174015488,
503
+ 34342961152
504
+ ]
505
+ },
506
+ "torchelastic_launched": true,
507
+ "world_size": 6,
508
+ "python_version": "3.10.14 (main, May 6 2024, 19:42:50) [GCC 11.2.0]",
509
+ "torch_version": "2.2.2+rocm5.7",
510
+ "numpy_version": "1.26.4",
511
+ "gluonts_version": "0.15.1",
512
+ "transformers_version": "4.40.1",
513
+ "accelerate_version": "1.7.0"
514
+ }
515
+ }