Safetensors
patchtst
abao commited on
Commit
25d348d
·
verified ·
1 Parent(s): a7c93e1

Upload 4 files

Browse files
Files changed (4) hide show
  1. config.json +61 -0
  2. generation_config.json +4 -0
  3. model.safetensors +3 -0
  4. training_info.json +513 -0
config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu",
3
+ "architectures": [
4
+ "PatchTSTForPrediction"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bias": true,
8
+ "channel_attention": true,
9
+ "channel_consistent_masking": false,
10
+ "channel_rope": false,
11
+ "context_length": 512,
12
+ "d_model": 768,
13
+ "distribution_output": null,
14
+ "do_mask_input": false,
15
+ "dropout": 0.0,
16
+ "ff_dropout": 0.0,
17
+ "ffn_dim": 768,
18
+ "head_dropout": 0.0,
19
+ "huber_delta": 1.0,
20
+ "init_std": 0.02,
21
+ "loss": "mse",
22
+ "mask_type": "random",
23
+ "mask_value": 0,
24
+ "max_wavelength": 500,
25
+ "mode": "predict",
26
+ "model_type": "patchtst",
27
+ "norm_eps": 1e-05,
28
+ "norm_type": "rmsnorm",
29
+ "num_attention_heads": 12,
30
+ "num_forecast_mask_patches": 3,
31
+ "num_hidden_layers": 12,
32
+ "num_input_channels": 1,
33
+ "num_parallel_samples": 100,
34
+ "num_poly_feats": 188,
35
+ "num_rff": 376,
36
+ "num_targets": 1,
37
+ "output_range": null,
38
+ "patch_length": 16,
39
+ "patch_stride": 16,
40
+ "path_dropout": 0.0,
41
+ "poly_degrees": 2,
42
+ "pooling_type": "mean",
43
+ "positional_dropout": 0.0,
44
+ "positional_encoding_type": "sincos",
45
+ "pre_norm": true,
46
+ "prediction_length": 128,
47
+ "pretrained_encoder_path": null,
48
+ "pretrained_pft_path": null,
49
+ "random_mask_ratio": 0.5,
50
+ "rff_scale": 1.0,
51
+ "rff_trainable": false,
52
+ "rope_percent": 0.75,
53
+ "scaling": "std",
54
+ "share_embedding": true,
55
+ "share_projection": true,
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.40.1",
58
+ "unmasked_channel_indices": null,
59
+ "use_cls_token": false,
60
+ "use_dynamics_embedding": true
61
+ }
generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.40.1"
4
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ba12daf0ce4769d64e6d60cfe2962f845125a93651b40927f553402cc1a8ec8
3
+ size 286393664
training_info.json ADDED
@@ -0,0 +1,513 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "mode": "predict",
4
+ "context_length": 512,
5
+ "prediction_length": 128,
6
+ "distribution_output": null,
7
+ "loss": "mse",
8
+ "huber_delta": 1.0,
9
+ "patch_length": 16,
10
+ "patch_stride": 16,
11
+ "num_hidden_layers": 12,
12
+ "d_model": 768,
13
+ "num_attention_heads": 12,
14
+ "channel_attention": true,
15
+ "ffn_dim": 768,
16
+ "norm_type": "rmsnorm",
17
+ "norm_eps": 1e-05,
18
+ "attention_dropout": 0.0,
19
+ "positional_dropout": 0.0,
20
+ "path_dropout": 0.0,
21
+ "ff_dropout": 0.0,
22
+ "bias": true,
23
+ "activation_function": "gelu",
24
+ "pre_norm": true,
25
+ "use_cls_token": false,
26
+ "init_std": 0.02,
27
+ "scaling": "std",
28
+ "do_mask_input": null,
29
+ "mask_type": "random",
30
+ "random_mask_ratio": 0.5,
31
+ "num_forecast_mask_patches": 3,
32
+ "channel_consistent_masking": false,
33
+ "unmasked_channel_indices": null,
34
+ "mask_value": 0,
35
+ "pooling_type": "mean",
36
+ "head_dropout": 0.0,
37
+ "num_parallel_samples": 100,
38
+ "channel_rope": false,
39
+ "max_wavelength": 500,
40
+ "rope_percent": 0.75,
41
+ "pretrained_encoder_path": null,
42
+ "pretrained_pft_path": null,
43
+ "use_dynamics_embedding": true,
44
+ "num_poly_feats": 188,
45
+ "poly_degrees": 2,
46
+ "rff_trainable": false,
47
+ "rff_scale": 1.0,
48
+ "num_rff": 376
49
+ },
50
+ "train_config": {
51
+ "seed": 99,
52
+ "max_steps": 800000,
53
+ "save_steps": 50000,
54
+ "log_steps": 1000,
55
+ "resume_from_checkpoint": null,
56
+ "per_device_train_batch_size": 384,
57
+ "gradient_accumulation_steps": 1,
58
+ "max_grad_norm": 1.0,
59
+ "dataloader_num_workers": 16,
60
+ "dataloader_prefetch_factor": 2,
61
+ "tf32": false,
62
+ "torch_compile": true,
63
+ "optim": "adamw_torch_fused",
64
+ "learning_rate": 0.001,
65
+ "lr_scheduler_type": "cosine",
66
+ "warmup_ratio": 0.05,
67
+ "weight_decay": 0.0,
68
+ "output_dir": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints/",
69
+ "ddp_backend": "nccl",
70
+ "ddp_find_unused_parameters": false,
71
+ "remove_unused_columns": false
72
+ },
73
+ "all_config": {
74
+ "run_name": "panda_nh12_dmodel768_mixedp",
75
+ "wandb": {
76
+ "log": true,
77
+ "project_name": "panda",
78
+ "entity": "gilpinlab",
79
+ "group_name": null,
80
+ "resume": false,
81
+ "resume_run_id": null,
82
+ "tags": null
83
+ },
84
+ "patchtst": {
85
+ "mode": "predict",
86
+ "context_length": 512,
87
+ "prediction_length": 128,
88
+ "distribution_output": null,
89
+ "loss": "mse",
90
+ "huber_delta": 1.0,
91
+ "patch_length": 16,
92
+ "patch_stride": 16,
93
+ "num_hidden_layers": 12,
94
+ "d_model": 768,
95
+ "num_attention_heads": 12,
96
+ "channel_attention": true,
97
+ "ffn_dim": 768,
98
+ "norm_type": "rmsnorm",
99
+ "norm_eps": 1e-05,
100
+ "attention_dropout": 0.0,
101
+ "positional_dropout": 0.0,
102
+ "path_dropout": 0.0,
103
+ "ff_dropout": 0.0,
104
+ "bias": true,
105
+ "activation_function": "gelu",
106
+ "pre_norm": true,
107
+ "use_cls_token": false,
108
+ "init_std": 0.02,
109
+ "scaling": "std",
110
+ "do_mask_input": null,
111
+ "mask_type": "random",
112
+ "random_mask_ratio": 0.5,
113
+ "num_forecast_mask_patches": 3,
114
+ "channel_consistent_masking": false,
115
+ "unmasked_channel_indices": null,
116
+ "mask_value": 0,
117
+ "pooling_type": "mean",
118
+ "head_dropout": 0.0,
119
+ "num_parallel_samples": 100,
120
+ "channel_rope": false,
121
+ "max_wavelength": 500,
122
+ "rope_percent": 0.75,
123
+ "pretrained_encoder_path": null,
124
+ "pretrained_pft_path": null,
125
+ "use_dynamics_embedding": true,
126
+ "num_poly_feats": 188,
127
+ "poly_degrees": 2,
128
+ "rff_trainable": false,
129
+ "rff_scale": 1.0,
130
+ "num_rff": 376
131
+ },
132
+ "chronos": {
133
+ "model_id": "amazon/chronos-t5-mini",
134
+ "model_type": "seq2seq",
135
+ "random_init": false,
136
+ "tie_embeddings": true,
137
+ "context_length": 512,
138
+ "prediction_length": 128,
139
+ "num_samples": 20,
140
+ "n_tokens": 4096,
141
+ "n_special_tokens": 2,
142
+ "pad_token_id": 0,
143
+ "eos_token_id": 1,
144
+ "use_eos_token": true,
145
+ "tokenizer_class": "MeanScaleUniformBins",
146
+ "tokenizer_kwargs": {
147
+ "low_limit": -15.0,
148
+ "high_limit": 15.0
149
+ },
150
+ "temperature": 1.0,
151
+ "top_k": 50,
152
+ "top_p": 1.0
153
+ },
154
+ "train": {
155
+ "seed": 99,
156
+ "max_steps": 800000,
157
+ "save_steps": 50000,
158
+ "log_steps": 1000,
159
+ "resume_from_checkpoint": null,
160
+ "per_device_train_batch_size": 384,
161
+ "gradient_accumulation_steps": 1,
162
+ "max_grad_norm": 1.0,
163
+ "dataloader_num_workers": 16,
164
+ "dataloader_prefetch_factor": 2,
165
+ "tf32": false,
166
+ "torch_compile": true,
167
+ "optim": "adamw_torch_fused",
168
+ "learning_rate": 0.001,
169
+ "lr_scheduler_type": "cosine",
170
+ "warmup_ratio": 0.05,
171
+ "weight_decay": 0.0,
172
+ "output_dir": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints/",
173
+ "ddp_backend": "nccl",
174
+ "ddp_find_unused_parameters": false,
175
+ "remove_unused_columns": false
176
+ },
177
+ "scheduler": {
178
+ "enabled": false,
179
+ "schedule_value_name": "noise_scale",
180
+ "schedule_name": "cosine",
181
+ "epoch_stop": 0.5,
182
+ "init_value": 1.0,
183
+ "final_value": 0.0,
184
+ "eps": 0.008,
185
+ "num_steps": 4,
186
+ "decay_rate": 8.0
187
+ },
188
+ "eval": {
189
+ "mode": "predict",
190
+ "data_paths_lst": null,
191
+ "checkpoint_path": null,
192
+ "device": "cuda:7",
193
+ "torch_dtype": "float32",
194
+ "batch_size": 32,
195
+ "num_subdirs": null,
196
+ "num_samples_per_subdir": null,
197
+ "sliding_context": true,
198
+ "save_contexts": false,
199
+ "save_labels": false,
200
+ "save_predictions": false,
201
+ "save_completions": false,
202
+ "save_masks": false,
203
+ "num_processes": 10,
204
+ "metric_names": [
205
+ "mse",
206
+ "mae",
207
+ "smape",
208
+ "spearman"
209
+ ],
210
+ "forecast_save_dir": null,
211
+ "labels_save_dir": null,
212
+ "completions_save_dir": null,
213
+ "patch_input_save_dir": null,
214
+ "timestep_masks_save_dir": null,
215
+ "metrics_save_dir": null,
216
+ "metrics_fname": "metrics",
217
+ "overwrite": false,
218
+ "seed": 1,
219
+ "num_samples": 1,
220
+ "parallel_sample_reduction": "mean",
221
+ "limit_prediction_length": true,
222
+ "context_length": 512,
223
+ "prediction_length": 64,
224
+ "num_test_instances": 1,
225
+ "window_style": "sampled",
226
+ "window_stride": 1,
227
+ "split_coords": false,
228
+ "verbose": false,
229
+ "baselines": {
230
+ "baseline_model": "fourier_arima",
231
+ "order": [
232
+ 4,
233
+ 1,
234
+ 4
235
+ ],
236
+ "num_fourier_terms": 5
237
+ },
238
+ "chronos": {
239
+ "zero_shot": false,
240
+ "deterministic": true
241
+ }
242
+ },
243
+ "run_metrics": {
244
+ "wandb_run_id": null,
245
+ "plot_dir": "figures",
246
+ "save_dir": null,
247
+ "save_fname": "metrics.json"
248
+ },
249
+ "train_data_dirs": [
250
+ "/stor/work/AMDG_Gilpin_Summer2024/data/improved/base_mixedp_ic16/train",
251
+ "/stor/work/AMDG_Gilpin_Summer2024/data/improved/skew_mixedp_ic16/train",
252
+ "/stor/work/AMDG_Gilpin_Summer2024/data/improved/final_base40/train",
253
+ "/stor/work/AMDG_Gilpin_Summer2024/data/improved/final_base40/train_z5_z10"
254
+ ],
255
+ "probability": null,
256
+ "shuffle_buffer_length": 100000,
257
+ "min_past": 60,
258
+ "max_missing_prop": 0.9,
259
+ "fixed_dim": 3,
260
+ "augmentations": {
261
+ "augmentation_rate": 0.2,
262
+ "probabilities": [
263
+ 0.3333333333333333,
264
+ 0.3333333333333333,
265
+ 0.3333333333333333,
266
+ 0.0,
267
+ 0.0
268
+ ],
269
+ "dim_range": [
270
+ 3,
271
+ 8
272
+ ],
273
+ "lag_range": [
274
+ 1,
275
+ 10
276
+ ],
277
+ "phase_surrogate_cutoff": 1.0,
278
+ "mode_range": [
279
+ 5,
280
+ 15
281
+ ],
282
+ "max_wavenumber": 10.0,
283
+ "max_amp": 10.0
284
+ },
285
+ "multiprocess_kwargs": {
286
+ "processes": null,
287
+ "maxtasksperchild": null
288
+ },
289
+ "restart_sampling": {
290
+ "split_name": null,
291
+ "params_json_path": null,
292
+ "systems_batch_size": 128,
293
+ "batch_idx_low": null,
294
+ "batch_idx_high": null,
295
+ "starting_sample_idx": 0,
296
+ "save_first_sample": true
297
+ },
298
+ "sampling": {
299
+ "data_dir": null,
300
+ "sys_class": "continuous_no_delay",
301
+ "test_split": 0.3,
302
+ "split_prefix": null,
303
+ "rseed": 999,
304
+ "ic_rseed": 888,
305
+ "num_points": 4096,
306
+ "num_periods": 40,
307
+ "num_periods_min": 40,
308
+ "num_periods_max": 40,
309
+ "num_ics": 1,
310
+ "num_param_perturbations": 4,
311
+ "param_scale": 0.5,
312
+ "split_coords": false,
313
+ "standardize": false,
314
+ "verbose": false,
315
+ "multiprocessing": true,
316
+ "debug_system": null,
317
+ "silence_integration_errors": false,
318
+ "save_params": true,
319
+ "save_traj_stats": false,
320
+ "ignore_probability": 0.0,
321
+ "sign_match_probability": 0.5,
322
+ "atol": 1e-10,
323
+ "rtol": 1e-09,
324
+ "reference_traj": {
325
+ "length": 4096,
326
+ "transient": 0.5,
327
+ "n_periods": 40,
328
+ "atol": 1e-07,
329
+ "rtol": 1e-06
330
+ }
331
+ },
332
+ "validator": {
333
+ "enable": true,
334
+ "verbose": false,
335
+ "transient_time_frac": 0.05,
336
+ "plot_save_dir": null,
337
+ "save_failed_trajs": false,
338
+ "attractor_tests": [
339
+ "check_not_linear",
340
+ "check_boundedness",
341
+ "check_not_fixed_point",
342
+ "check_zero_one_test",
343
+ "check_power_spectrum",
344
+ "check_stationarity"
345
+ ]
346
+ },
347
+ "events": {
348
+ "max_duration": 300,
349
+ "instability_threshold": 10000.0,
350
+ "min_step": 1e-10,
351
+ "verbose": true
352
+ },
353
+ "skew": {
354
+ "num_pairs": 5000,
355
+ "pairs_rseed": 123,
356
+ "sys_idx_low": 0,
357
+ "sys_idx_high": null,
358
+ "normalization_strategy": "flow_rms",
359
+ "randomize_driver_indices": true,
360
+ "transform_scales": true,
361
+ "train_nonskew_path": null,
362
+ "test_nonskew_path": null,
363
+ "coupling_map_type": "additive",
364
+ "coupling_map": {
365
+ "transform_scales": true,
366
+ "randomize_driver_indices": true,
367
+ "normalization_strategy": "flow_rms",
368
+ "random_seed": 0
369
+ }
370
+ },
371
+ "analysis": {
372
+ "data_dir": null,
373
+ "split": null,
374
+ "num_samples": null,
375
+ "one_dim_target": false,
376
+ "save_dir": "outputs",
377
+ "plots_dir": "figures",
378
+ "compute_quantile_limits": false,
379
+ "compute_max_lyapunov_exponents": false,
380
+ "filter_ensemble": true,
381
+ "filter_json_fname": "failed_samples",
382
+ "verbose": true,
383
+ "attractor_tests": [
384
+ "check_zero_one_test"
385
+ ],
386
+ "check_not_transient": {
387
+ "max_transient_prop": 0.2,
388
+ "atol": 0.001
389
+ },
390
+ "check_stationarity": {
391
+ "p_value": 0.05
392
+ },
393
+ "check_boundedness": {
394
+ "threshold": 10000.0,
395
+ "max_zscore": 5,
396
+ "eps": 1e-10
397
+ },
398
+ "check_zero_one_test": {
399
+ "threshold": 0.2,
400
+ "strategy": "score"
401
+ }
402
+ },
403
+ "base_style": "ggplot",
404
+ "matplotlib_style": {
405
+ "font": {
406
+ "serif": "Computer Modern Roman",
407
+ "size": 10
408
+ },
409
+ "axes": {
410
+ "titlesize": 12,
411
+ "labelsize": 10,
412
+ "linewidth": 0.75,
413
+ "facecolor": "white",
414
+ "grid": false
415
+ },
416
+ "grid": {
417
+ "color": "gray",
418
+ "linewidth": 0.5,
419
+ "alpha": 0.5
420
+ },
421
+ "lines": {
422
+ "linewidth": 1.5,
423
+ "markersize": 5
424
+ },
425
+ "xtick": {
426
+ "labelsize": 8,
427
+ "major": {
428
+ "size": 4
429
+ },
430
+ "minor": {
431
+ "size": 2
432
+ },
433
+ "direction": "in"
434
+ },
435
+ "ytick": {
436
+ "labelsize": 8,
437
+ "major": {
438
+ "size": 4
439
+ },
440
+ "minor": {
441
+ "size": 2
442
+ },
443
+ "direction": "in"
444
+ },
445
+ "figure": {
446
+ "figsize": [
447
+ 3.25,
448
+ 2.5
449
+ ],
450
+ "dpi": 300,
451
+ "autolayout": true,
452
+ "facecolor": "white"
453
+ },
454
+ "legend": {
455
+ "fontsize": 8,
456
+ "title_fontsize": 9,
457
+ "loc": "upper right",
458
+ "frameon": false
459
+ },
460
+ "savefig": {
461
+ "dpi": 300,
462
+ "format": "pdf",
463
+ "transparent": false
464
+ }
465
+ }
466
+ },
467
+ "job_info": {
468
+ "cuda_available": true,
469
+ "device_count": 6,
470
+ "device_names": {
471
+ "0": "AMD Instinct MI100",
472
+ "1": "AMD Instinct MI100",
473
+ "2": "AMD Instinct MI100",
474
+ "3": "AMD Instinct MI100",
475
+ "4": "AMD Instinct MI100",
476
+ "5": "AMD Instinct MI100"
477
+ },
478
+ "mem_info": {
479
+ "0": [
480
+ 8209039360,
481
+ 34342961152
482
+ ],
483
+ "1": [
484
+ 8212447232,
485
+ 34342961152
486
+ ],
487
+ "2": [
488
+ 8199864320,
489
+ 34342961152
490
+ ],
491
+ "3": [
492
+ 8199864320,
493
+ 34342961152
494
+ ],
495
+ "4": [
496
+ 8199864320,
497
+ 34342961152
498
+ ],
499
+ "5": [
500
+ 8212447232,
501
+ 34342961152
502
+ ]
503
+ },
504
+ "torchelastic_launched": true,
505
+ "world_size": 6,
506
+ "python_version": "3.10.14 (main, May 6 2024, 19:42:50) [GCC 11.2.0]",
507
+ "torch_version": "2.2.2+rocm5.7",
508
+ "numpy_version": "1.26.4",
509
+ "gluonts_version": "0.15.1",
510
+ "transformers_version": "4.40.1",
511
+ "accelerate_version": "1.7.0"
512
+ }
513
+ }