added CMS model from 2024 April 5 PF meeting
Browse files- cms/2024_04_05/pyg-cms_20240324_235743_208080/checkpoint-32-17.877384.pth +3 -0
- cms/2024_04_05/pyg-cms_20240324_235743_208080/hyperparameters.json +1 -0
- cms/2024_04_05/pyg-cms_20240324_235743_208080/mlpf_losses.pkl +3 -0
- cms/2024_04_05/pyg-cms_20240324_235743_208080/model_kwargs.pkl +3 -0
- cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/train/events.out.tfevents.1711317469.gpu1.local.2556485.0 +3 -0
- cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/train/events.out.tfevents.1712129489.joosep-desktop-work.646614.0 +3 -0
- cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/train/events.out.tfevents.1712129556.joosep-desktop-work.647125.0 +3 -0
- cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/train/events.out.tfevents.1712129684.joosep-desktop-work.647708.0 +3 -0
- cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/valid/events.out.tfevents.1711317469.gpu1.local.2556485.1 +3 -0
- cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/valid/events.out.tfevents.1712129489.joosep-desktop-work.646614.1 +3 -0
- cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/valid/events.out.tfevents.1712129556.joosep-desktop-work.647125.1 +3 -0
- cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/valid/events.out.tfevents.1712129684.joosep-desktop-work.647708.1 +3 -0
- cms/2024_04_05/pyg-cms_20240324_235743_208080/test-config.yaml +123 -0
- cms/2024_04_05/pyg-cms_20240324_235743_208080/train-config.yaml +129 -0
- cms/2024_04_05/pyg-cms_20240324_235743_208080/train.log +821 -0
cms/2024_04_05/pyg-cms_20240324_235743_208080/checkpoint-32-17.877384.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:131b35416593e8a69c529d85446cdbdd55e1d11f8870ac54c72448a75c72b57d
|
| 3 |
+
size 255893234
|
cms/2024_04_05/pyg-cms_20240324_235743_208080/hyperparameters.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"Num of mlpf parameters": 21304339, "config": "parameters/pytorch/pyg-cms.yaml", "prefix": null, "data_dir": "/scratch/persistent/joosep/tensorflow_datasets", "gpus": 1, "gpu_batch_multiplier": 20, "dataset": "cms", "num_workers": 4, "prefetch_factor": 50, "resume_training": null, "load": null, "train": true, "test": null, "num_epochs": 100, "patience": null, "lr": null, "conv_type": "attention", "num_convs": null, "make_plots": null, "export_onnx": null, "ntrain": null, "ntest": null, "nvalid": null, "val_freq": null, "checkpoint_freq": 1, "hpo": null, "ray_train": false, "local": null, "ray_cpus": null, "ray_gpus": null, "comet": true, "comet_offline": false, "comet_step_freq": null, "experiments_dir": null, "pipeline": null, "dtype": null, "attention_type": null, "test_datasets": {"cms_pf_ttbar": {"version": "1.7.1"}, "cms_pf_qcd_high_pt": {"version": "1.7.0"}, "cms_pf_qcd": {"version": "1.7.0"}, "cms_pf_ztt": {"version": "1.7.0"}, "cms_pf_sms_t1tttt": {"version": "1.7.0"}}}
|
cms/2024_04_05/pyg-cms_20240324_235743_208080/mlpf_losses.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:234fc7ea5a30d4927e05dbd27bbb5725d5c1f80589fd202ab593289a10ea4205
|
| 3 |
+
size 1836
|
cms/2024_04_05/pyg-cms_20240324_235743_208080/model_kwargs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dfd634217a0268a8cc4e2d4f7ec0c5aa90697a9159c5b3092585a23f6ee20200
|
| 3 |
+
size 554
|
cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/train/events.out.tfevents.1711317469.gpu1.local.2556485.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c52c5673bf17a6d231904f796ab11c3a8d0811b632231cab6f35169339f77caa
|
| 3 |
+
size 110493839
|
cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/train/events.out.tfevents.1712129489.joosep-desktop-work.646614.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6fecae60774c0ccdd10889871e01aaa6dc7dbacd4f29a4a3cfb66bf04584301
|
| 3 |
+
size 8739
|
cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/train/events.out.tfevents.1712129556.joosep-desktop-work.647125.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:312f1568be932b846927c63a3f0d90ce6a88ac0ffff2e4539bd0256a0fd26cea
|
| 3 |
+
size 16350
|
cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/train/events.out.tfevents.1712129684.joosep-desktop-work.647708.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc54dbdf81a50f9e9b9f91024169683f25fb0aba1f368e966f4bdc1037e388f4
|
| 3 |
+
size 23961
|
cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/valid/events.out.tfevents.1711317469.gpu1.local.2556485.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0811b521707025790c075d064a05992d40ce0da97127912e8e34b252c882223f
|
| 3 |
+
size 39256
|
cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/valid/events.out.tfevents.1712129489.joosep-desktop-work.646614.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f13d9ae9963f649a22ce1378b4d792e99f5950b4ad72add0b791a13a93c8fbd9
|
| 3 |
+
size 88
|
cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/valid/events.out.tfevents.1712129556.joosep-desktop-work.647125.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cbbc8fb83b45fae2fee80aa9f687ead4d28db6d8bf11dacda1048a2e29836cea
|
| 3 |
+
size 1312
|
cms/2024_04_05/pyg-cms_20240324_235743_208080/runs/valid/events.out.tfevents.1712129684.joosep-desktop-work.647708.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a85e631dde89ac409373f2e1445ed69d1dedacfc29262171e13eb44e267a9d7
|
| 3 |
+
size 2536
|
cms/2024_04_05/pyg-cms_20240324_235743_208080/test-config.yaml
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
attention_type: efficient
|
| 2 |
+
backend: pytorch
|
| 3 |
+
checkpoint_freq: 1
|
| 4 |
+
comet: false
|
| 5 |
+
comet_name: particleflow-pt
|
| 6 |
+
comet_offline: false
|
| 7 |
+
comet_step_freq: 10
|
| 8 |
+
config: parameters/pytorch/pyg-cms.yaml
|
| 9 |
+
conv_type: attention
|
| 10 |
+
data_dir: null
|
| 11 |
+
dataset: cms
|
| 12 |
+
dtype: float16
|
| 13 |
+
experiments_dir: experiments/
|
| 14 |
+
gpu_batch_multiplier: 10
|
| 15 |
+
gpus: 1
|
| 16 |
+
load: experiments/pyg-cms_20240324_235743_208080/checkpoints/checkpoint-32-17.877384.pth
|
| 17 |
+
lr: 0.0001
|
| 18 |
+
lr_schedule: cosinedecay
|
| 19 |
+
lr_schedule_config:
|
| 20 |
+
onecycle:
|
| 21 |
+
pct_start: 0.3
|
| 22 |
+
make_plots: true
|
| 23 |
+
model:
|
| 24 |
+
attention:
|
| 25 |
+
activation: relu
|
| 26 |
+
attention_type: efficient
|
| 27 |
+
conv_type: attention
|
| 28 |
+
dropout_conv_id_ff: 0.0
|
| 29 |
+
dropout_conv_id_mha: 0.0
|
| 30 |
+
dropout_conv_reg_ff: 0.0
|
| 31 |
+
dropout_conv_reg_mha: 0.0
|
| 32 |
+
dropout_ff: 0.0
|
| 33 |
+
head_dim: 16
|
| 34 |
+
num_convs: 6
|
| 35 |
+
num_heads: 32
|
| 36 |
+
cos_phi_mode: linear
|
| 37 |
+
energy_mode: linear
|
| 38 |
+
eta_mode: linear
|
| 39 |
+
gnn_lsh:
|
| 40 |
+
activation: elu
|
| 41 |
+
bin_size: 640
|
| 42 |
+
conv_type: gnn_lsh
|
| 43 |
+
distance_dim: 128
|
| 44 |
+
dropout_ff: 0.0
|
| 45 |
+
embedding_dim: 512
|
| 46 |
+
ffn_dist_hidden_dim: 128
|
| 47 |
+
ffn_dist_num_layers: 2
|
| 48 |
+
layernorm: true
|
| 49 |
+
max_num_bins: 200
|
| 50 |
+
num_convs: 3
|
| 51 |
+
num_node_messages: 2
|
| 52 |
+
width: 512
|
| 53 |
+
gravnet:
|
| 54 |
+
activation: elu
|
| 55 |
+
conv_type: gravnet
|
| 56 |
+
dropout_ff: 0.1
|
| 57 |
+
embedding_dim: 512
|
| 58 |
+
k: 16
|
| 59 |
+
num_convs: 3
|
| 60 |
+
propagate_dimensions: 32
|
| 61 |
+
space_dimensions: 4
|
| 62 |
+
width: 512
|
| 63 |
+
input_encoding: joint
|
| 64 |
+
learned_representation_mode: last
|
| 65 |
+
mamba:
|
| 66 |
+
activation: elu
|
| 67 |
+
conv_type: mamba
|
| 68 |
+
d_conv: 4
|
| 69 |
+
d_state: 32
|
| 70 |
+
dropout_ff: 0.0
|
| 71 |
+
embedding_dim: 1024
|
| 72 |
+
expand: 2
|
| 73 |
+
num_convs: 4
|
| 74 |
+
width: 1024
|
| 75 |
+
pt_mode: linear
|
| 76 |
+
sin_phi_mode: linear
|
| 77 |
+
trainable: all
|
| 78 |
+
ntest: null
|
| 79 |
+
ntrain: null
|
| 80 |
+
num_epochs: 100
|
| 81 |
+
num_workers: 4
|
| 82 |
+
nvalid: null
|
| 83 |
+
patience: 20
|
| 84 |
+
prefetch_factor: 50
|
| 85 |
+
ray_train: false
|
| 86 |
+
raytune:
|
| 87 |
+
asha:
|
| 88 |
+
brackets: 1
|
| 89 |
+
grace_period: 10
|
| 90 |
+
max_t: 200
|
| 91 |
+
reduction_factor: 4
|
| 92 |
+
default_metric: val_loss
|
| 93 |
+
default_mode: min
|
| 94 |
+
hyperband:
|
| 95 |
+
max_t: 200
|
| 96 |
+
reduction_factor: 4
|
| 97 |
+
hyperopt:
|
| 98 |
+
n_random_steps: 10
|
| 99 |
+
local_dir: null
|
| 100 |
+
nevergrad:
|
| 101 |
+
n_random_steps: 10
|
| 102 |
+
sched: asha
|
| 103 |
+
search_alg: hyperopt
|
| 104 |
+
sort_data: true
|
| 105 |
+
test_dataset:
|
| 106 |
+
cms_pf_qcd:
|
| 107 |
+
version: 1.7.1
|
| 108 |
+
test_datasets: []
|
| 109 |
+
train_dataset:
|
| 110 |
+
cms:
|
| 111 |
+
physical:
|
| 112 |
+
batch_size: 1
|
| 113 |
+
samples:
|
| 114 |
+
cms_pf_qcd:
|
| 115 |
+
version: 1.7.1
|
| 116 |
+
val_freq: null
|
| 117 |
+
valid_dataset:
|
| 118 |
+
cms:
|
| 119 |
+
physical:
|
| 120 |
+
batch_size: 1
|
| 121 |
+
samples:
|
| 122 |
+
cms_pf_qcd:
|
| 123 |
+
version: 1.7.1
|
cms/2024_04_05/pyg-cms_20240324_235743_208080/train-config.yaml
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
backend: pytorch
|
| 2 |
+
checkpoint_freq: 1
|
| 3 |
+
comet: true
|
| 4 |
+
comet_name: particleflow-pt
|
| 5 |
+
comet_offline: false
|
| 6 |
+
comet_step_freq: 10
|
| 7 |
+
config: parameters/pytorch/pyg-cms.yaml
|
| 8 |
+
conv_type: attention
|
| 9 |
+
data_dir: /scratch/persistent/joosep/tensorflow_datasets
|
| 10 |
+
dataset: cms
|
| 11 |
+
dtype: bfloat16
|
| 12 |
+
gpu_batch_multiplier: 20
|
| 13 |
+
gpus: 1
|
| 14 |
+
load: null
|
| 15 |
+
lr: 0.0001
|
| 16 |
+
lr_schedule: cosinedecay
|
| 17 |
+
lr_schedule_config:
|
| 18 |
+
onecycle:
|
| 19 |
+
pct_start: 0.3
|
| 20 |
+
model:
|
| 21 |
+
attention:
|
| 22 |
+
activation: relu
|
| 23 |
+
attention_type: flash
|
| 24 |
+
conv_type: attention
|
| 25 |
+
dropout_conv_id_ff: 0.0
|
| 26 |
+
dropout_conv_id_mha: 0.0
|
| 27 |
+
dropout_conv_reg_ff: 0.0
|
| 28 |
+
dropout_conv_reg_mha: 0.0
|
| 29 |
+
dropout_ff: 0.0
|
| 30 |
+
head_dim: 16
|
| 31 |
+
num_convs: 6
|
| 32 |
+
num_heads: 32
|
| 33 |
+
cos_phi_mode: linear
|
| 34 |
+
energy_mode: linear
|
| 35 |
+
eta_mode: linear
|
| 36 |
+
gnn_lsh:
|
| 37 |
+
activation: elu
|
| 38 |
+
bin_size: 640
|
| 39 |
+
conv_type: gnn_lsh
|
| 40 |
+
distance_dim: 128
|
| 41 |
+
dropout_ff: 0.0
|
| 42 |
+
embedding_dim: 512
|
| 43 |
+
ffn_dist_hidden_dim: 128
|
| 44 |
+
ffn_dist_num_layers: 2
|
| 45 |
+
layernorm: true
|
| 46 |
+
max_num_bins: 200
|
| 47 |
+
num_convs: 3
|
| 48 |
+
num_node_messages: 2
|
| 49 |
+
width: 512
|
| 50 |
+
gravnet:
|
| 51 |
+
activation: elu
|
| 52 |
+
conv_type: gravnet
|
| 53 |
+
dropout_ff: 0.1
|
| 54 |
+
embedding_dim: 512
|
| 55 |
+
k: 16
|
| 56 |
+
num_convs: 3
|
| 57 |
+
propagate_dimensions: 32
|
| 58 |
+
space_dimensions: 4
|
| 59 |
+
width: 512
|
| 60 |
+
input_encoding: joint
|
| 61 |
+
learned_representation_mode: last
|
| 62 |
+
mamba:
|
| 63 |
+
activation: elu
|
| 64 |
+
conv_type: mamba
|
| 65 |
+
d_conv: 4
|
| 66 |
+
d_state: 32
|
| 67 |
+
dropout_ff: 0.0
|
| 68 |
+
embedding_dim: 1024
|
| 69 |
+
expand: 2
|
| 70 |
+
num_convs: 4
|
| 71 |
+
width: 1024
|
| 72 |
+
pt_mode: linear
|
| 73 |
+
sin_phi_mode: linear
|
| 74 |
+
trainable: all
|
| 75 |
+
ntest: null
|
| 76 |
+
ntrain: null
|
| 77 |
+
num_epochs: 100
|
| 78 |
+
num_workers: 4
|
| 79 |
+
nvalid: null
|
| 80 |
+
patience: 20
|
| 81 |
+
prefetch_factor: 50
|
| 82 |
+
ray_train: false
|
| 83 |
+
raytune:
|
| 84 |
+
asha:
|
| 85 |
+
brackets: 1
|
| 86 |
+
grace_period: 10
|
| 87 |
+
max_t: 200
|
| 88 |
+
reduction_factor: 4
|
| 89 |
+
default_metric: val_loss
|
| 90 |
+
default_mode: min
|
| 91 |
+
hyperband:
|
| 92 |
+
max_t: 200
|
| 93 |
+
reduction_factor: 4
|
| 94 |
+
hyperopt:
|
| 95 |
+
n_random_steps: 10
|
| 96 |
+
local_dir: null
|
| 97 |
+
nevergrad:
|
| 98 |
+
n_random_steps: 10
|
| 99 |
+
sched: asha
|
| 100 |
+
search_alg: hyperopt
|
| 101 |
+
sort_data: true
|
| 102 |
+
test_dataset:
|
| 103 |
+
cms_pf_qcd:
|
| 104 |
+
version: 1.7.0
|
| 105 |
+
cms_pf_qcd_high_pt:
|
| 106 |
+
version: 1.7.0
|
| 107 |
+
cms_pf_sms_t1tttt:
|
| 108 |
+
version: 1.7.0
|
| 109 |
+
cms_pf_ttbar:
|
| 110 |
+
version: 1.7.1
|
| 111 |
+
cms_pf_ztt:
|
| 112 |
+
version: 1.7.0
|
| 113 |
+
test_datasets: []
|
| 114 |
+
train: true
|
| 115 |
+
train_dataset:
|
| 116 |
+
cms:
|
| 117 |
+
physical:
|
| 118 |
+
batch_size: 1
|
| 119 |
+
samples:
|
| 120 |
+
cms_pf_ttbar:
|
| 121 |
+
version: 1.7.1
|
| 122 |
+
val_freq: null
|
| 123 |
+
valid_dataset:
|
| 124 |
+
cms:
|
| 125 |
+
physical:
|
| 126 |
+
batch_size: 1
|
| 127 |
+
samples:
|
| 128 |
+
cms_pf_ttbar:
|
| 129 |
+
version: 1.7.1
|
cms/2024_04_05/pyg-cms_20240324_235743_208080/train.log
ADDED
|
@@ -0,0 +1,821 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2024-03-24 23:57:43,238] INFO: [0;35mWill use single-gpu: NVIDIA A100 80GB PCIe[0m
|
| 2 |
+
[2024-03-24 23:57:43,242] INFO: using dtype=torch.bfloat16
|
| 3 |
+
[2024-03-24 23:57:43,242] INFO: using dtype=torch.bfloat16
|
| 4 |
+
[2024-03-24 23:57:43,277] INFO: using attention_type=flash
|
| 5 |
+
[2024-03-24 23:57:43,277] INFO: using attention_type=flash
|
| 6 |
+
[2024-03-24 23:57:43,290] INFO: using attention_type=flash
|
| 7 |
+
[2024-03-24 23:57:43,290] INFO: using attention_type=flash
|
| 8 |
+
[2024-03-24 23:57:43,302] INFO: using attention_type=flash
|
| 9 |
+
[2024-03-24 23:57:43,302] INFO: using attention_type=flash
|
| 10 |
+
[2024-03-24 23:57:43,314] INFO: using attention_type=flash
|
| 11 |
+
[2024-03-24 23:57:43,314] INFO: using attention_type=flash
|
| 12 |
+
[2024-03-24 23:57:43,330] INFO: using attention_type=flash
|
| 13 |
+
[2024-03-24 23:57:43,330] INFO: using attention_type=flash
|
| 14 |
+
[2024-03-24 23:57:43,342] INFO: using attention_type=flash
|
| 15 |
+
[2024-03-24 23:57:43,342] INFO: using attention_type=flash
|
| 16 |
+
[2024-03-24 23:57:43,354] INFO: using attention_type=flash
|
| 17 |
+
[2024-03-24 23:57:43,354] INFO: using attention_type=flash
|
| 18 |
+
[2024-03-24 23:57:43,367] INFO: using attention_type=flash
|
| 19 |
+
[2024-03-24 23:57:43,367] INFO: using attention_type=flash
|
| 20 |
+
[2024-03-24 23:57:43,380] INFO: using attention_type=flash
|
| 21 |
+
[2024-03-24 23:57:43,380] INFO: using attention_type=flash
|
| 22 |
+
[2024-03-24 23:57:43,392] INFO: using attention_type=flash
|
| 23 |
+
[2024-03-24 23:57:43,392] INFO: using attention_type=flash
|
| 24 |
+
[2024-03-24 23:57:43,404] INFO: using attention_type=flash
|
| 25 |
+
[2024-03-24 23:57:43,404] INFO: using attention_type=flash
|
| 26 |
+
[2024-03-24 23:57:43,415] INFO: using attention_type=flash
|
| 27 |
+
[2024-03-24 23:57:43,415] INFO: using attention_type=flash
|
| 28 |
+
[2024-03-24 23:57:43,660] INFO: MLPF(
|
| 29 |
+
(nn0_id): Sequential(
|
| 30 |
+
(0): Linear(in_features=55, out_features=512, bias=True)
|
| 31 |
+
(1): ReLU()
|
| 32 |
+
(2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 33 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 34 |
+
(4): Linear(in_features=512, out_features=512, bias=True)
|
| 35 |
+
)
|
| 36 |
+
(nn0_reg): Sequential(
|
| 37 |
+
(0): Linear(in_features=55, out_features=512, bias=True)
|
| 38 |
+
(1): ReLU()
|
| 39 |
+
(2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 40 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 41 |
+
(4): Linear(in_features=512, out_features=512, bias=True)
|
| 42 |
+
)
|
| 43 |
+
(conv_id): ModuleList(
|
| 44 |
+
(0-5): 6 x SelfAttentionLayer(
|
| 45 |
+
(mha): MultiheadAttention(
|
| 46 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 47 |
+
)
|
| 48 |
+
(norm0): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 49 |
+
(norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 50 |
+
(seq): Sequential(
|
| 51 |
+
(0): Linear(in_features=512, out_features=512, bias=True)
|
| 52 |
+
(1): ReLU()
|
| 53 |
+
(2): Linear(in_features=512, out_features=512, bias=True)
|
| 54 |
+
(3): ReLU()
|
| 55 |
+
)
|
| 56 |
+
(dropout): Dropout(p=0.0, inplace=False)
|
| 57 |
+
)
|
| 58 |
+
)
|
| 59 |
+
(conv_reg): ModuleList(
|
| 60 |
+
(0-5): 6 x SelfAttentionLayer(
|
| 61 |
+
(mha): MultiheadAttention(
|
| 62 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 63 |
+
)
|
| 64 |
+
(norm0): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 65 |
+
(norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 66 |
+
(seq): Sequential(
|
| 67 |
+
(0): Linear(in_features=512, out_features=512, bias=True)
|
| 68 |
+
(1): ReLU()
|
| 69 |
+
(2): Linear(in_features=512, out_features=512, bias=True)
|
| 70 |
+
(3): ReLU()
|
| 71 |
+
)
|
| 72 |
+
(dropout): Dropout(p=0.0, inplace=False)
|
| 73 |
+
)
|
| 74 |
+
)
|
| 75 |
+
(nn_id): Sequential(
|
| 76 |
+
(0): Linear(in_features=567, out_features=512, bias=True)
|
| 77 |
+
(1): ReLU()
|
| 78 |
+
(2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 79 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 80 |
+
(4): Linear(in_features=512, out_features=9, bias=True)
|
| 81 |
+
)
|
| 82 |
+
(nn_pt): RegressionOutput(
|
| 83 |
+
(nn): Sequential(
|
| 84 |
+
(0): Linear(in_features=576, out_features=512, bias=True)
|
| 85 |
+
(1): ReLU()
|
| 86 |
+
(2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 87 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 88 |
+
(4): Linear(in_features=512, out_features=2, bias=True)
|
| 89 |
+
)
|
| 90 |
+
)
|
| 91 |
+
(nn_eta): RegressionOutput(
|
| 92 |
+
(nn): Sequential(
|
| 93 |
+
(0): Linear(in_features=576, out_features=512, bias=True)
|
| 94 |
+
(1): ReLU()
|
| 95 |
+
(2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 96 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 97 |
+
(4): Linear(in_features=512, out_features=2, bias=True)
|
| 98 |
+
)
|
| 99 |
+
)
|
| 100 |
+
(nn_sin_phi): RegressionOutput(
|
| 101 |
+
(nn): Sequential(
|
| 102 |
+
(0): Linear(in_features=576, out_features=512, bias=True)
|
| 103 |
+
(1): ReLU()
|
| 104 |
+
(2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 105 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 106 |
+
(4): Linear(in_features=512, out_features=2, bias=True)
|
| 107 |
+
)
|
| 108 |
+
)
|
| 109 |
+
(nn_cos_phi): RegressionOutput(
|
| 110 |
+
(nn): Sequential(
|
| 111 |
+
(0): Linear(in_features=576, out_features=512, bias=True)
|
| 112 |
+
(1): ReLU()
|
| 113 |
+
(2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 114 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 115 |
+
(4): Linear(in_features=512, out_features=2, bias=True)
|
| 116 |
+
)
|
| 117 |
+
)
|
| 118 |
+
(nn_energy): RegressionOutput(
|
| 119 |
+
(nn): Sequential(
|
| 120 |
+
(0): Linear(in_features=576, out_features=512, bias=True)
|
| 121 |
+
(1): ReLU()
|
| 122 |
+
(2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 123 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 124 |
+
(4): Linear(in_features=512, out_features=2, bias=True)
|
| 125 |
+
)
|
| 126 |
+
)
|
| 127 |
+
)
|
| 128 |
+
[2024-03-24 23:57:43,660] INFO: MLPF(
|
| 129 |
+
(nn0_id): Sequential(
|
| 130 |
+
(0): Linear(in_features=55, out_features=512, bias=True)
|
| 131 |
+
(1): ReLU()
|
| 132 |
+
(2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 133 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 134 |
+
(4): Linear(in_features=512, out_features=512, bias=True)
|
| 135 |
+
)
|
| 136 |
+
(nn0_reg): Sequential(
|
| 137 |
+
(0): Linear(in_features=55, out_features=512, bias=True)
|
| 138 |
+
(1): ReLU()
|
| 139 |
+
(2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 140 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 141 |
+
(4): Linear(in_features=512, out_features=512, bias=True)
|
| 142 |
+
)
|
| 143 |
+
(conv_id): ModuleList(
|
| 144 |
+
(0-5): 6 x SelfAttentionLayer(
|
| 145 |
+
(mha): MultiheadAttention(
|
| 146 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 147 |
+
)
|
| 148 |
+
(norm0): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 149 |
+
(norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 150 |
+
(seq): Sequential(
|
| 151 |
+
(0): Linear(in_features=512, out_features=512, bias=True)
|
| 152 |
+
(1): ReLU()
|
| 153 |
+
(2): Linear(in_features=512, out_features=512, bias=True)
|
| 154 |
+
(3): ReLU()
|
| 155 |
+
)
|
| 156 |
+
(dropout): Dropout(p=0.0, inplace=False)
|
| 157 |
+
)
|
| 158 |
+
)
|
| 159 |
+
(conv_reg): ModuleList(
|
| 160 |
+
(0-5): 6 x SelfAttentionLayer(
|
| 161 |
+
(mha): MultiheadAttention(
|
| 162 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 163 |
+
)
|
| 164 |
+
(norm0): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 165 |
+
(norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 166 |
+
(seq): Sequential(
|
| 167 |
+
(0): Linear(in_features=512, out_features=512, bias=True)
|
| 168 |
+
(1): ReLU()
|
| 169 |
+
(2): Linear(in_features=512, out_features=512, bias=True)
|
| 170 |
+
(3): ReLU()
|
| 171 |
+
)
|
| 172 |
+
(dropout): Dropout(p=0.0, inplace=False)
|
| 173 |
+
)
|
| 174 |
+
)
|
| 175 |
+
(nn_id): Sequential(
|
| 176 |
+
(0): Linear(in_features=567, out_features=512, bias=True)
|
| 177 |
+
(1): ReLU()
|
| 178 |
+
(2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 179 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 180 |
+
(4): Linear(in_features=512, out_features=9, bias=True)
|
| 181 |
+
)
|
| 182 |
+
(nn_pt): RegressionOutput(
|
| 183 |
+
(nn): Sequential(
|
| 184 |
+
(0): Linear(in_features=576, out_features=512, bias=True)
|
| 185 |
+
(1): ReLU()
|
| 186 |
+
(2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 187 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 188 |
+
(4): Linear(in_features=512, out_features=2, bias=True)
|
| 189 |
+
)
|
| 190 |
+
)
|
| 191 |
+
(nn_eta): RegressionOutput(
|
| 192 |
+
(nn): Sequential(
|
| 193 |
+
(0): Linear(in_features=576, out_features=512, bias=True)
|
| 194 |
+
(1): ReLU()
|
| 195 |
+
(2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 196 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 197 |
+
(4): Linear(in_features=512, out_features=2, bias=True)
|
| 198 |
+
)
|
| 199 |
+
)
|
| 200 |
+
(nn_sin_phi): RegressionOutput(
|
| 201 |
+
(nn): Sequential(
|
| 202 |
+
(0): Linear(in_features=576, out_features=512, bias=True)
|
| 203 |
+
(1): ReLU()
|
| 204 |
+
(2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 205 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 206 |
+
(4): Linear(in_features=512, out_features=2, bias=True)
|
| 207 |
+
)
|
| 208 |
+
)
|
| 209 |
+
(nn_cos_phi): RegressionOutput(
|
| 210 |
+
(nn): Sequential(
|
| 211 |
+
(0): Linear(in_features=576, out_features=512, bias=True)
|
| 212 |
+
(1): ReLU()
|
| 213 |
+
(2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 214 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 215 |
+
(4): Linear(in_features=512, out_features=2, bias=True)
|
| 216 |
+
)
|
| 217 |
+
)
|
| 218 |
+
(nn_energy): RegressionOutput(
|
| 219 |
+
(nn): Sequential(
|
| 220 |
+
(0): Linear(in_features=576, out_features=512, bias=True)
|
| 221 |
+
(1): ReLU()
|
| 222 |
+
(2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 223 |
+
(3): Dropout(p=0.0, inplace=False)
|
| 224 |
+
(4): Linear(in_features=512, out_features=2, bias=True)
|
| 225 |
+
)
|
| 226 |
+
)
|
| 227 |
+
)
|
| 228 |
+
[2024-03-24 23:57:43,666] INFO: Trainable parameters: 21304339
|
| 229 |
+
[2024-03-24 23:57:43,666] INFO: Trainable parameters: 21304339
|
| 230 |
+
[2024-03-24 23:57:43,668] INFO: Non-trainable parameters: 0
|
| 231 |
+
[2024-03-24 23:57:43,668] INFO: Non-trainable parameters: 0
|
| 232 |
+
[2024-03-24 23:57:43,671] INFO: Total parameters: 21304339
|
| 233 |
+
[2024-03-24 23:57:43,671] INFO: Total parameters: 21304339
|
| 234 |
+
[2024-03-24 23:57:43,682] INFO: Modules Trainable parameters Non-tranable parameters
|
| 235 |
+
nn0_id.0.weight 28160 0
|
| 236 |
+
nn0_id.0.bias 512 0
|
| 237 |
+
nn0_id.2.weight 512 0
|
| 238 |
+
nn0_id.2.bias 512 0
|
| 239 |
+
nn0_id.4.weight 262144 0
|
| 240 |
+
nn0_id.4.bias 512 0
|
| 241 |
+
nn0_reg.0.weight 28160 0
|
| 242 |
+
nn0_reg.0.bias 512 0
|
| 243 |
+
nn0_reg.2.weight 512 0
|
| 244 |
+
nn0_reg.2.bias 512 0
|
| 245 |
+
nn0_reg.4.weight 262144 0
|
| 246 |
+
nn0_reg.4.bias 512 0
|
| 247 |
+
conv_id.0.mha.in_proj_weight 786432 0
|
| 248 |
+
conv_id.0.mha.in_proj_bias 1536 0
|
| 249 |
+
conv_id.0.mha.out_proj.weight 262144 0
|
| 250 |
+
conv_id.0.mha.out_proj.bias 512 0
|
| 251 |
+
conv_id.0.norm0.weight 512 0
|
| 252 |
+
conv_id.0.norm0.bias 512 0
|
| 253 |
+
conv_id.0.norm1.weight 512 0
|
| 254 |
+
conv_id.0.norm1.bias 512 0
|
| 255 |
+
conv_id.0.seq.0.weight 262144 0
|
| 256 |
+
conv_id.0.seq.0.bias 512 0
|
| 257 |
+
conv_id.0.seq.2.weight 262144 0
|
| 258 |
+
conv_id.0.seq.2.bias 512 0
|
| 259 |
+
conv_id.1.mha.in_proj_weight 786432 0
|
| 260 |
+
conv_id.1.mha.in_proj_bias 1536 0
|
| 261 |
+
conv_id.1.mha.out_proj.weight 262144 0
|
| 262 |
+
conv_id.1.mha.out_proj.bias 512 0
|
| 263 |
+
conv_id.1.norm0.weight 512 0
|
| 264 |
+
conv_id.1.norm0.bias 512 0
|
| 265 |
+
conv_id.1.norm1.weight 512 0
|
| 266 |
+
conv_id.1.norm1.bias 512 0
|
| 267 |
+
conv_id.1.seq.0.weight 262144 0
|
| 268 |
+
conv_id.1.seq.0.bias 512 0
|
| 269 |
+
conv_id.1.seq.2.weight 262144 0
|
| 270 |
+
conv_id.1.seq.2.bias 512 0
|
| 271 |
+
conv_id.2.mha.in_proj_weight 786432 0
|
| 272 |
+
conv_id.2.mha.in_proj_bias 1536 0
|
| 273 |
+
conv_id.2.mha.out_proj.weight 262144 0
|
| 274 |
+
conv_id.2.mha.out_proj.bias 512 0
|
| 275 |
+
conv_id.2.norm0.weight 512 0
|
| 276 |
+
conv_id.2.norm0.bias 512 0
|
| 277 |
+
conv_id.2.norm1.weight 512 0
|
| 278 |
+
conv_id.2.norm1.bias 512 0
|
| 279 |
+
conv_id.2.seq.0.weight 262144 0
|
| 280 |
+
conv_id.2.seq.0.bias 512 0
|
| 281 |
+
conv_id.2.seq.2.weight 262144 0
|
| 282 |
+
conv_id.2.seq.2.bias 512 0
|
| 283 |
+
conv_id.3.mha.in_proj_weight 786432 0
|
| 284 |
+
conv_id.3.mha.in_proj_bias 1536 0
|
| 285 |
+
conv_id.3.mha.out_proj.weight 262144 0
|
| 286 |
+
conv_id.3.mha.out_proj.bias 512 0
|
| 287 |
+
conv_id.3.norm0.weight 512 0
|
| 288 |
+
conv_id.3.norm0.bias 512 0
|
| 289 |
+
conv_id.3.norm1.weight 512 0
|
| 290 |
+
conv_id.3.norm1.bias 512 0
|
| 291 |
+
conv_id.3.seq.0.weight 262144 0
|
| 292 |
+
conv_id.3.seq.0.bias 512 0
|
| 293 |
+
conv_id.3.seq.2.weight 262144 0
|
| 294 |
+
conv_id.3.seq.2.bias 512 0
|
| 295 |
+
conv_id.4.mha.in_proj_weight 786432 0
|
| 296 |
+
conv_id.4.mha.in_proj_bias 1536 0
|
| 297 |
+
conv_id.4.mha.out_proj.weight 262144 0
|
| 298 |
+
conv_id.4.mha.out_proj.bias 512 0
|
| 299 |
+
conv_id.4.norm0.weight 512 0
|
| 300 |
+
conv_id.4.norm0.bias 512 0
|
| 301 |
+
conv_id.4.norm1.weight 512 0
|
| 302 |
+
conv_id.4.norm1.bias 512 0
|
| 303 |
+
conv_id.4.seq.0.weight 262144 0
|
| 304 |
+
conv_id.4.seq.0.bias 512 0
|
| 305 |
+
conv_id.4.seq.2.weight 262144 0
|
| 306 |
+
conv_id.4.seq.2.bias 512 0
|
| 307 |
+
conv_id.5.mha.in_proj_weight 786432 0
|
| 308 |
+
conv_id.5.mha.in_proj_bias 1536 0
|
| 309 |
+
conv_id.5.mha.out_proj.weight 262144 0
|
| 310 |
+
conv_id.5.mha.out_proj.bias 512 0
|
| 311 |
+
conv_id.5.norm0.weight 512 0
|
| 312 |
+
conv_id.5.norm0.bias 512 0
|
| 313 |
+
conv_id.5.norm1.weight 512 0
|
| 314 |
+
conv_id.5.norm1.bias 512 0
|
| 315 |
+
conv_id.5.seq.0.weight 262144 0
|
| 316 |
+
conv_id.5.seq.0.bias 512 0
|
| 317 |
+
conv_id.5.seq.2.weight 262144 0
|
| 318 |
+
conv_id.5.seq.2.bias 512 0
|
| 319 |
+
conv_reg.0.mha.in_proj_weight 786432 0
|
| 320 |
+
conv_reg.0.mha.in_proj_bias 1536 0
|
| 321 |
+
conv_reg.0.mha.out_proj.weight 262144 0
|
| 322 |
+
conv_reg.0.mha.out_proj.bias 512 0
|
| 323 |
+
conv_reg.0.norm0.weight 512 0
|
| 324 |
+
conv_reg.0.norm0.bias 512 0
|
| 325 |
+
conv_reg.0.norm1.weight 512 0
|
| 326 |
+
conv_reg.0.norm1.bias 512 0
|
| 327 |
+
conv_reg.0.seq.0.weight 262144 0
|
| 328 |
+
conv_reg.0.seq.0.bias 512 0
|
| 329 |
+
conv_reg.0.seq.2.weight 262144 0
|
| 330 |
+
conv_reg.0.seq.2.bias 512 0
|
| 331 |
+
conv_reg.1.mha.in_proj_weight 786432 0
|
| 332 |
+
conv_reg.1.mha.in_proj_bias 1536 0
|
| 333 |
+
conv_reg.1.mha.out_proj.weight 262144 0
|
| 334 |
+
conv_reg.1.mha.out_proj.bias 512 0
|
| 335 |
+
conv_reg.1.norm0.weight 512 0
|
| 336 |
+
conv_reg.1.norm0.bias 512 0
|
| 337 |
+
conv_reg.1.norm1.weight 512 0
|
| 338 |
+
conv_reg.1.norm1.bias 512 0
|
| 339 |
+
conv_reg.1.seq.0.weight 262144 0
|
| 340 |
+
conv_reg.1.seq.0.bias 512 0
|
| 341 |
+
conv_reg.1.seq.2.weight 262144 0
|
| 342 |
+
conv_reg.1.seq.2.bias 512 0
|
| 343 |
+
conv_reg.2.mha.in_proj_weight 786432 0
|
| 344 |
+
conv_reg.2.mha.in_proj_bias 1536 0
|
| 345 |
+
conv_reg.2.mha.out_proj.weight 262144 0
|
| 346 |
+
conv_reg.2.mha.out_proj.bias 512 0
|
| 347 |
+
conv_reg.2.norm0.weight 512 0
|
| 348 |
+
conv_reg.2.norm0.bias 512 0
|
| 349 |
+
conv_reg.2.norm1.weight 512 0
|
| 350 |
+
conv_reg.2.norm1.bias 512 0
|
| 351 |
+
conv_reg.2.seq.0.weight 262144 0
|
| 352 |
+
conv_reg.2.seq.0.bias 512 0
|
| 353 |
+
conv_reg.2.seq.2.weight 262144 0
|
| 354 |
+
conv_reg.2.seq.2.bias 512 0
|
| 355 |
+
conv_reg.3.mha.in_proj_weight 786432 0
|
| 356 |
+
conv_reg.3.mha.in_proj_bias 1536 0
|
| 357 |
+
conv_reg.3.mha.out_proj.weight 262144 0
|
| 358 |
+
conv_reg.3.mha.out_proj.bias 512 0
|
| 359 |
+
conv_reg.3.norm0.weight 512 0
|
| 360 |
+
conv_reg.3.norm0.bias 512 0
|
| 361 |
+
conv_reg.3.norm1.weight 512 0
|
| 362 |
+
conv_reg.3.norm1.bias 512 0
|
| 363 |
+
conv_reg.3.seq.0.weight 262144 0
|
| 364 |
+
conv_reg.3.seq.0.bias 512 0
|
| 365 |
+
conv_reg.3.seq.2.weight 262144 0
|
| 366 |
+
conv_reg.3.seq.2.bias 512 0
|
| 367 |
+
conv_reg.4.mha.in_proj_weight 786432 0
|
| 368 |
+
conv_reg.4.mha.in_proj_bias 1536 0
|
| 369 |
+
conv_reg.4.mha.out_proj.weight 262144 0
|
| 370 |
+
conv_reg.4.mha.out_proj.bias 512 0
|
| 371 |
+
conv_reg.4.norm0.weight 512 0
|
| 372 |
+
conv_reg.4.norm0.bias 512 0
|
| 373 |
+
conv_reg.4.norm1.weight 512 0
|
| 374 |
+
conv_reg.4.norm1.bias 512 0
|
| 375 |
+
conv_reg.4.seq.0.weight 262144 0
|
| 376 |
+
conv_reg.4.seq.0.bias 512 0
|
| 377 |
+
conv_reg.4.seq.2.weight 262144 0
|
| 378 |
+
conv_reg.4.seq.2.bias 512 0
|
| 379 |
+
conv_reg.5.mha.in_proj_weight 786432 0
|
| 380 |
+
conv_reg.5.mha.in_proj_bias 1536 0
|
| 381 |
+
conv_reg.5.mha.out_proj.weight 262144 0
|
| 382 |
+
conv_reg.5.mha.out_proj.bias 512 0
|
| 383 |
+
conv_reg.5.norm0.weight 512 0
|
| 384 |
+
conv_reg.5.norm0.bias 512 0
|
| 385 |
+
conv_reg.5.norm1.weight 512 0
|
| 386 |
+
conv_reg.5.norm1.bias 512 0
|
| 387 |
+
conv_reg.5.seq.0.weight 262144 0
|
| 388 |
+
conv_reg.5.seq.0.bias 512 0
|
| 389 |
+
conv_reg.5.seq.2.weight 262144 0
|
| 390 |
+
conv_reg.5.seq.2.bias 512 0
|
| 391 |
+
nn_id.0.weight 290304 0
|
| 392 |
+
nn_id.0.bias 512 0
|
| 393 |
+
nn_id.2.weight 512 0
|
| 394 |
+
nn_id.2.bias 512 0
|
| 395 |
+
nn_id.4.weight 4608 0
|
| 396 |
+
nn_id.4.bias 9 0
|
| 397 |
+
nn_pt.nn.0.weight 294912 0
|
| 398 |
+
nn_pt.nn.0.bias 512 0
|
| 399 |
+
nn_pt.nn.2.weight 512 0
|
| 400 |
+
nn_pt.nn.2.bias 512 0
|
| 401 |
+
nn_pt.nn.4.weight 1024 0
|
| 402 |
+
nn_pt.nn.4.bias 2 0
|
| 403 |
+
nn_eta.nn.0.weight 294912 0
|
| 404 |
+
nn_eta.nn.0.bias 512 0
|
| 405 |
+
nn_eta.nn.2.weight 512 0
|
| 406 |
+
nn_eta.nn.2.bias 512 0
|
| 407 |
+
nn_eta.nn.4.weight 1024 0
|
| 408 |
+
nn_eta.nn.4.bias 2 0
|
| 409 |
+
nn_sin_phi.nn.0.weight 294912 0
|
| 410 |
+
nn_sin_phi.nn.0.bias 512 0
|
| 411 |
+
nn_sin_phi.nn.2.weight 512 0
|
| 412 |
+
nn_sin_phi.nn.2.bias 512 0
|
| 413 |
+
nn_sin_phi.nn.4.weight 1024 0
|
| 414 |
+
nn_sin_phi.nn.4.bias 2 0
|
| 415 |
+
nn_cos_phi.nn.0.weight 294912 0
|
| 416 |
+
nn_cos_phi.nn.0.bias 512 0
|
| 417 |
+
nn_cos_phi.nn.2.weight 512 0
|
| 418 |
+
nn_cos_phi.nn.2.bias 512 0
|
| 419 |
+
nn_cos_phi.nn.4.weight 1024 0
|
| 420 |
+
nn_cos_phi.nn.4.bias 2 0
|
| 421 |
+
nn_energy.nn.0.weight 294912 0
|
| 422 |
+
nn_energy.nn.0.bias 512 0
|
| 423 |
+
nn_energy.nn.2.weight 512 0
|
| 424 |
+
nn_energy.nn.2.bias 512 0
|
| 425 |
+
nn_energy.nn.4.weight 1024 0
|
| 426 |
+
nn_energy.nn.4.bias 2 0
|
| 427 |
+
[2024-03-24 23:57:43,682] INFO: Modules Trainable parameters Non-tranable parameters
|
| 428 |
+
nn0_id.0.weight 28160 0
|
| 429 |
+
nn0_id.0.bias 512 0
|
| 430 |
+
nn0_id.2.weight 512 0
|
| 431 |
+
nn0_id.2.bias 512 0
|
| 432 |
+
nn0_id.4.weight 262144 0
|
| 433 |
+
nn0_id.4.bias 512 0
|
| 434 |
+
nn0_reg.0.weight 28160 0
|
| 435 |
+
nn0_reg.0.bias 512 0
|
| 436 |
+
nn0_reg.2.weight 512 0
|
| 437 |
+
nn0_reg.2.bias 512 0
|
| 438 |
+
nn0_reg.4.weight 262144 0
|
| 439 |
+
nn0_reg.4.bias 512 0
|
| 440 |
+
conv_id.0.mha.in_proj_weight 786432 0
|
| 441 |
+
conv_id.0.mha.in_proj_bias 1536 0
|
| 442 |
+
conv_id.0.mha.out_proj.weight 262144 0
|
| 443 |
+
conv_id.0.mha.out_proj.bias 512 0
|
| 444 |
+
conv_id.0.norm0.weight 512 0
|
| 445 |
+
conv_id.0.norm0.bias 512 0
|
| 446 |
+
conv_id.0.norm1.weight 512 0
|
| 447 |
+
conv_id.0.norm1.bias 512 0
|
| 448 |
+
conv_id.0.seq.0.weight 262144 0
|
| 449 |
+
conv_id.0.seq.0.bias 512 0
|
| 450 |
+
conv_id.0.seq.2.weight 262144 0
|
| 451 |
+
conv_id.0.seq.2.bias 512 0
|
| 452 |
+
conv_id.1.mha.in_proj_weight 786432 0
|
| 453 |
+
conv_id.1.mha.in_proj_bias 1536 0
|
| 454 |
+
conv_id.1.mha.out_proj.weight 262144 0
|
| 455 |
+
conv_id.1.mha.out_proj.bias 512 0
|
| 456 |
+
conv_id.1.norm0.weight 512 0
|
| 457 |
+
conv_id.1.norm0.bias 512 0
|
| 458 |
+
conv_id.1.norm1.weight 512 0
|
| 459 |
+
conv_id.1.norm1.bias 512 0
|
| 460 |
+
conv_id.1.seq.0.weight 262144 0
|
| 461 |
+
conv_id.1.seq.0.bias 512 0
|
| 462 |
+
conv_id.1.seq.2.weight 262144 0
|
| 463 |
+
conv_id.1.seq.2.bias 512 0
|
| 464 |
+
conv_id.2.mha.in_proj_weight 786432 0
|
| 465 |
+
conv_id.2.mha.in_proj_bias 1536 0
|
| 466 |
+
conv_id.2.mha.out_proj.weight 262144 0
|
| 467 |
+
conv_id.2.mha.out_proj.bias 512 0
|
| 468 |
+
conv_id.2.norm0.weight 512 0
|
| 469 |
+
conv_id.2.norm0.bias 512 0
|
| 470 |
+
conv_id.2.norm1.weight 512 0
|
| 471 |
+
conv_id.2.norm1.bias 512 0
|
| 472 |
+
conv_id.2.seq.0.weight 262144 0
|
| 473 |
+
conv_id.2.seq.0.bias 512 0
|
| 474 |
+
conv_id.2.seq.2.weight 262144 0
|
| 475 |
+
conv_id.2.seq.2.bias 512 0
|
| 476 |
+
conv_id.3.mha.in_proj_weight 786432 0
|
| 477 |
+
conv_id.3.mha.in_proj_bias 1536 0
|
| 478 |
+
conv_id.3.mha.out_proj.weight 262144 0
|
| 479 |
+
conv_id.3.mha.out_proj.bias 512 0
|
| 480 |
+
conv_id.3.norm0.weight 512 0
|
| 481 |
+
conv_id.3.norm0.bias 512 0
|
| 482 |
+
conv_id.3.norm1.weight 512 0
|
| 483 |
+
conv_id.3.norm1.bias 512 0
|
| 484 |
+
conv_id.3.seq.0.weight 262144 0
|
| 485 |
+
conv_id.3.seq.0.bias 512 0
|
| 486 |
+
conv_id.3.seq.2.weight 262144 0
|
| 487 |
+
conv_id.3.seq.2.bias 512 0
|
| 488 |
+
conv_id.4.mha.in_proj_weight 786432 0
|
| 489 |
+
conv_id.4.mha.in_proj_bias 1536 0
|
| 490 |
+
conv_id.4.mha.out_proj.weight 262144 0
|
| 491 |
+
conv_id.4.mha.out_proj.bias 512 0
|
| 492 |
+
conv_id.4.norm0.weight 512 0
|
| 493 |
+
conv_id.4.norm0.bias 512 0
|
| 494 |
+
conv_id.4.norm1.weight 512 0
|
| 495 |
+
conv_id.4.norm1.bias 512 0
|
| 496 |
+
conv_id.4.seq.0.weight 262144 0
|
| 497 |
+
conv_id.4.seq.0.bias 512 0
|
| 498 |
+
conv_id.4.seq.2.weight 262144 0
|
| 499 |
+
conv_id.4.seq.2.bias 512 0
|
| 500 |
+
conv_id.5.mha.in_proj_weight 786432 0
|
| 501 |
+
conv_id.5.mha.in_proj_bias 1536 0
|
| 502 |
+
conv_id.5.mha.out_proj.weight 262144 0
|
| 503 |
+
conv_id.5.mha.out_proj.bias 512 0
|
| 504 |
+
conv_id.5.norm0.weight 512 0
|
| 505 |
+
conv_id.5.norm0.bias 512 0
|
| 506 |
+
conv_id.5.norm1.weight 512 0
|
| 507 |
+
conv_id.5.norm1.bias 512 0
|
| 508 |
+
conv_id.5.seq.0.weight 262144 0
|
| 509 |
+
conv_id.5.seq.0.bias 512 0
|
| 510 |
+
conv_id.5.seq.2.weight 262144 0
|
| 511 |
+
conv_id.5.seq.2.bias 512 0
|
| 512 |
+
conv_reg.0.mha.in_proj_weight 786432 0
|
| 513 |
+
conv_reg.0.mha.in_proj_bias 1536 0
|
| 514 |
+
conv_reg.0.mha.out_proj.weight 262144 0
|
| 515 |
+
conv_reg.0.mha.out_proj.bias 512 0
|
| 516 |
+
conv_reg.0.norm0.weight 512 0
|
| 517 |
+
conv_reg.0.norm0.bias 512 0
|
| 518 |
+
conv_reg.0.norm1.weight 512 0
|
| 519 |
+
conv_reg.0.norm1.bias 512 0
|
| 520 |
+
conv_reg.0.seq.0.weight 262144 0
|
| 521 |
+
conv_reg.0.seq.0.bias 512 0
|
| 522 |
+
conv_reg.0.seq.2.weight 262144 0
|
| 523 |
+
conv_reg.0.seq.2.bias 512 0
|
| 524 |
+
conv_reg.1.mha.in_proj_weight 786432 0
|
| 525 |
+
conv_reg.1.mha.in_proj_bias 1536 0
|
| 526 |
+
conv_reg.1.mha.out_proj.weight 262144 0
|
| 527 |
+
conv_reg.1.mha.out_proj.bias 512 0
|
| 528 |
+
conv_reg.1.norm0.weight 512 0
|
| 529 |
+
conv_reg.1.norm0.bias 512 0
|
| 530 |
+
conv_reg.1.norm1.weight 512 0
|
| 531 |
+
conv_reg.1.norm1.bias 512 0
|
| 532 |
+
conv_reg.1.seq.0.weight 262144 0
|
| 533 |
+
conv_reg.1.seq.0.bias 512 0
|
| 534 |
+
conv_reg.1.seq.2.weight 262144 0
|
| 535 |
+
conv_reg.1.seq.2.bias 512 0
|
| 536 |
+
conv_reg.2.mha.in_proj_weight 786432 0
|
| 537 |
+
conv_reg.2.mha.in_proj_bias 1536 0
|
| 538 |
+
conv_reg.2.mha.out_proj.weight 262144 0
|
| 539 |
+
conv_reg.2.mha.out_proj.bias 512 0
|
| 540 |
+
conv_reg.2.norm0.weight 512 0
|
| 541 |
+
conv_reg.2.norm0.bias 512 0
|
| 542 |
+
conv_reg.2.norm1.weight 512 0
|
| 543 |
+
conv_reg.2.norm1.bias 512 0
|
| 544 |
+
conv_reg.2.seq.0.weight 262144 0
|
| 545 |
+
conv_reg.2.seq.0.bias 512 0
|
| 546 |
+
conv_reg.2.seq.2.weight 262144 0
|
| 547 |
+
conv_reg.2.seq.2.bias 512 0
|
| 548 |
+
conv_reg.3.mha.in_proj_weight 786432 0
|
| 549 |
+
conv_reg.3.mha.in_proj_bias 1536 0
|
| 550 |
+
conv_reg.3.mha.out_proj.weight 262144 0
|
| 551 |
+
conv_reg.3.mha.out_proj.bias 512 0
|
| 552 |
+
conv_reg.3.norm0.weight 512 0
|
| 553 |
+
conv_reg.3.norm0.bias 512 0
|
| 554 |
+
conv_reg.3.norm1.weight 512 0
|
| 555 |
+
conv_reg.3.norm1.bias 512 0
|
| 556 |
+
conv_reg.3.seq.0.weight 262144 0
|
| 557 |
+
conv_reg.3.seq.0.bias 512 0
|
| 558 |
+
conv_reg.3.seq.2.weight 262144 0
|
| 559 |
+
conv_reg.3.seq.2.bias 512 0
|
| 560 |
+
conv_reg.4.mha.in_proj_weight 786432 0
|
| 561 |
+
conv_reg.4.mha.in_proj_bias 1536 0
|
| 562 |
+
conv_reg.4.mha.out_proj.weight 262144 0
|
| 563 |
+
conv_reg.4.mha.out_proj.bias 512 0
|
| 564 |
+
conv_reg.4.norm0.weight 512 0
|
| 565 |
+
conv_reg.4.norm0.bias 512 0
|
| 566 |
+
conv_reg.4.norm1.weight 512 0
|
| 567 |
+
conv_reg.4.norm1.bias 512 0
|
| 568 |
+
conv_reg.4.seq.0.weight 262144 0
|
| 569 |
+
conv_reg.4.seq.0.bias 512 0
|
| 570 |
+
conv_reg.4.seq.2.weight 262144 0
|
| 571 |
+
conv_reg.4.seq.2.bias 512 0
|
| 572 |
+
conv_reg.5.mha.in_proj_weight 786432 0
|
| 573 |
+
conv_reg.5.mha.in_proj_bias 1536 0
|
| 574 |
+
conv_reg.5.mha.out_proj.weight 262144 0
|
| 575 |
+
conv_reg.5.mha.out_proj.bias 512 0
|
| 576 |
+
conv_reg.5.norm0.weight 512 0
|
| 577 |
+
conv_reg.5.norm0.bias 512 0
|
| 578 |
+
conv_reg.5.norm1.weight 512 0
|
| 579 |
+
conv_reg.5.norm1.bias 512 0
|
| 580 |
+
conv_reg.5.seq.0.weight 262144 0
|
| 581 |
+
conv_reg.5.seq.0.bias 512 0
|
| 582 |
+
conv_reg.5.seq.2.weight 262144 0
|
| 583 |
+
conv_reg.5.seq.2.bias 512 0
|
| 584 |
+
nn_id.0.weight 290304 0
|
| 585 |
+
nn_id.0.bias 512 0
|
| 586 |
+
nn_id.2.weight 512 0
|
| 587 |
+
nn_id.2.bias 512 0
|
| 588 |
+
nn_id.4.weight 4608 0
|
| 589 |
+
nn_id.4.bias 9 0
|
| 590 |
+
nn_pt.nn.0.weight 294912 0
|
| 591 |
+
nn_pt.nn.0.bias 512 0
|
| 592 |
+
nn_pt.nn.2.weight 512 0
|
| 593 |
+
nn_pt.nn.2.bias 512 0
|
| 594 |
+
nn_pt.nn.4.weight 1024 0
|
| 595 |
+
nn_pt.nn.4.bias 2 0
|
| 596 |
+
nn_eta.nn.0.weight 294912 0
|
| 597 |
+
nn_eta.nn.0.bias 512 0
|
| 598 |
+
nn_eta.nn.2.weight 512 0
|
| 599 |
+
nn_eta.nn.2.bias 512 0
|
| 600 |
+
nn_eta.nn.4.weight 1024 0
|
| 601 |
+
nn_eta.nn.4.bias 2 0
|
| 602 |
+
nn_sin_phi.nn.0.weight 294912 0
|
| 603 |
+
nn_sin_phi.nn.0.bias 512 0
|
| 604 |
+
nn_sin_phi.nn.2.weight 512 0
|
| 605 |
+
nn_sin_phi.nn.2.bias 512 0
|
| 606 |
+
nn_sin_phi.nn.4.weight 1024 0
|
| 607 |
+
nn_sin_phi.nn.4.bias 2 0
|
| 608 |
+
nn_cos_phi.nn.0.weight 294912 0
|
| 609 |
+
nn_cos_phi.nn.0.bias 512 0
|
| 610 |
+
nn_cos_phi.nn.2.weight 512 0
|
| 611 |
+
nn_cos_phi.nn.2.bias 512 0
|
| 612 |
+
nn_cos_phi.nn.4.weight 1024 0
|
| 613 |
+
nn_cos_phi.nn.4.bias 2 0
|
| 614 |
+
nn_energy.nn.0.weight 294912 0
|
| 615 |
+
nn_energy.nn.0.bias 512 0
|
| 616 |
+
nn_energy.nn.2.weight 512 0
|
| 617 |
+
nn_energy.nn.2.bias 512 0
|
| 618 |
+
nn_energy.nn.4.weight 1024 0
|
| 619 |
+
nn_energy.nn.4.bias 2 0
|
| 620 |
+
[2024-03-24 23:57:43,685] INFO: Creating experiment dir experiments/pyg-cms_20240324_235743_208080
|
| 621 |
+
[2024-03-24 23:57:43,685] INFO: Creating experiment dir experiments/pyg-cms_20240324_235743_208080
|
| 622 |
+
[2024-03-24 23:57:43,690] INFO: [1mModel directory experiments/pyg-cms_20240324_235743_208080[0m
|
| 623 |
+
[2024-03-24 23:57:43,690] INFO: [1mModel directory experiments/pyg-cms_20240324_235743_208080[0m
|
| 624 |
+
[2024-03-24 23:57:49,345] INFO: [0;34mtrain_dataset: cms_pf_ttbar, 320100[0m
|
| 625 |
+
[2024-03-24 23:57:49,345] INFO: [0;34mtrain_dataset: cms_pf_ttbar, 320100[0m
|
| 626 |
+
[2024-03-24 23:57:49,622] INFO: [0;34mvalid_dataset: cms_pf_ttbar, 80040[0m
|
| 627 |
+
[2024-03-24 23:57:49,622] INFO: [0;34mvalid_dataset: cms_pf_ttbar, 80040[0m
|
| 628 |
+
[2024-03-24 23:57:49,736] INFO: [0;31mInitiating epoch #1 train run on device rank=0[0m
|
| 629 |
+
[2024-03-24 23:57:49,736] INFO: [0;31mInitiating epoch #1 train run on device rank=0[0m
|
| 630 |
+
[2024-03-25 04:22:50,148] INFO: [0;31mInitiating epoch #1 valid run on device rank=0[0m
|
| 631 |
+
[2024-03-25 04:22:50,148] INFO: [0;31mInitiating epoch #1 valid run on device rank=0[0m
|
| 632 |
+
[2024-03-25 04:42:43,853] INFO: [1mRank 0: epoch=1 / 100 train_loss=20.8840 valid_loss=19.4969 stale=0 time=284.9m eta=28205.3m[0m
|
| 633 |
+
[2024-03-25 04:42:43,853] INFO: [1mRank 0: epoch=1 / 100 train_loss=20.8840 valid_loss=19.4969 stale=0 time=284.9m eta=28205.3m[0m
|
| 634 |
+
[2024-03-25 04:42:43,865] INFO: [0;31mInitiating epoch #2 train run on device rank=0[0m
|
| 635 |
+
[2024-03-25 04:42:43,865] INFO: [0;31mInitiating epoch #2 train run on device rank=0[0m
|
| 636 |
+
[2024-03-25 09:08:30,154] INFO: [0;31mInitiating epoch #2 valid run on device rank=0[0m
|
| 637 |
+
[2024-03-25 09:08:30,154] INFO: [0;31mInitiating epoch #2 valid run on device rank=0[0m
|
| 638 |
+
[2024-03-25 09:28:30,625] INFO: [1mRank 0: epoch=2 / 100 train_loss=19.0650 valid_loss=18.8620 stale=0 time=285.78m eta=27963.4m[0m
|
| 639 |
+
[2024-03-25 09:28:30,625] INFO: [1mRank 0: epoch=2 / 100 train_loss=19.0650 valid_loss=18.8620 stale=0 time=285.78m eta=27963.4m[0m
|
| 640 |
+
[2024-03-25 09:28:30,647] INFO: [0;31mInitiating epoch #3 train run on device rank=0[0m
|
| 641 |
+
[2024-03-25 09:28:30,647] INFO: [0;31mInitiating epoch #3 train run on device rank=0[0m
|
| 642 |
+
[2024-03-25 13:55:12,130] INFO: [0;31mInitiating epoch #3 valid run on device rank=0[0m
|
| 643 |
+
[2024-03-25 13:55:12,130] INFO: [0;31mInitiating epoch #3 valid run on device rank=0[0m
|
| 644 |
+
[2024-03-25 14:15:15,806] INFO: [1mRank 0: epoch=3 / 100 train_loss=18.7688 valid_loss=18.6758 stale=0 time=286.75m eta=27723.7m[0m
|
| 645 |
+
[2024-03-25 14:15:15,806] INFO: [1mRank 0: epoch=3 / 100 train_loss=18.7688 valid_loss=18.6758 stale=0 time=286.75m eta=27723.7m[0m
|
| 646 |
+
[2024-03-25 14:15:15,821] INFO: [0;31mInitiating epoch #4 train run on device rank=0[0m
|
| 647 |
+
[2024-03-25 14:15:15,821] INFO: [0;31mInitiating epoch #4 train run on device rank=0[0m
|
| 648 |
+
[2024-03-25 18:42:35,229] INFO: [0;31mInitiating epoch #4 valid run on device rank=0[0m
|
| 649 |
+
[2024-03-25 18:42:35,229] INFO: [0;31mInitiating epoch #4 valid run on device rank=0[0m
|
| 650 |
+
[2024-03-25 19:02:40,697] INFO: [1mRank 0: epoch=4 / 100 train_loss=18.6170 valid_loss=18.5653 stale=0 time=287.41m eta=27476.4m[0m
|
| 651 |
+
[2024-03-25 19:02:40,697] INFO: [1mRank 0: epoch=4 / 100 train_loss=18.6170 valid_loss=18.5653 stale=0 time=287.41m eta=27476.4m[0m
|
| 652 |
+
[2024-03-25 19:02:40,717] INFO: [0;31mInitiating epoch #5 train run on device rank=0[0m
|
| 653 |
+
[2024-03-25 19:02:40,717] INFO: [0;31mInitiating epoch #5 train run on device rank=0[0m
|
| 654 |
+
[2024-03-25 23:29:55,640] INFO: [0;31mInitiating epoch #5 valid run on device rank=0[0m
|
| 655 |
+
[2024-03-25 23:29:55,640] INFO: [0;31mInitiating epoch #5 valid run on device rank=0[0m
|
| 656 |
+
[2024-03-25 23:50:00,453] INFO: [1mRank 0: epoch=5 / 100 train_loss=18.5102 valid_loss=18.4685 stale=0 time=287.33m eta=27211.4m[0m
|
| 657 |
+
[2024-03-25 23:50:00,453] INFO: [1mRank 0: epoch=5 / 100 train_loss=18.5102 valid_loss=18.4685 stale=0 time=287.33m eta=27211.4m[0m
|
| 658 |
+
[2024-03-25 23:50:00,467] INFO: [0;31mInitiating epoch #6 train run on device rank=0[0m
|
| 659 |
+
[2024-03-25 23:50:00,467] INFO: [0;31mInitiating epoch #6 train run on device rank=0[0m
|
| 660 |
+
[2024-03-26 04:16:46,611] INFO: [0;31mInitiating epoch #6 valid run on device rank=0[0m
|
| 661 |
+
[2024-03-26 04:16:46,611] INFO: [0;31mInitiating epoch #6 valid run on device rank=0[0m
|
| 662 |
+
[2024-03-26 04:36:44,551] INFO: [1mRank 0: epoch=6 / 100 train_loss=18.4325 valid_loss=18.4090 stale=0 time=286.73m eta=26929.6m[0m
|
| 663 |
+
[2024-03-26 04:36:44,551] INFO: [1mRank 0: epoch=6 / 100 train_loss=18.4325 valid_loss=18.4090 stale=0 time=286.73m eta=26929.6m[0m
|
| 664 |
+
[2024-03-26 04:36:44,567] INFO: [0;31mInitiating epoch #7 train run on device rank=0[0m
|
| 665 |
+
[2024-03-26 04:36:44,567] INFO: [0;31mInitiating epoch #7 train run on device rank=0[0m
|
| 666 |
+
[2024-03-26 09:01:18,997] INFO: [0;31mInitiating epoch #7 valid run on device rank=0[0m
|
| 667 |
+
[2024-03-26 09:01:18,997] INFO: [0;31mInitiating epoch #7 valid run on device rank=0[0m
|
| 668 |
+
[2024-03-26 09:21:17,339] INFO: [1mRank 0: epoch=7 / 100 train_loss=18.3752 valid_loss=18.3620 stale=0 time=284.55m eta=26617.4m[0m
|
| 669 |
+
[2024-03-26 09:21:17,339] INFO: [1mRank 0: epoch=7 / 100 train_loss=18.3752 valid_loss=18.3620 stale=0 time=284.55m eta=26617.4m[0m
|
| 670 |
+
[2024-03-26 09:21:17,356] INFO: [0;31mInitiating epoch #8 train run on device rank=0[0m
|
| 671 |
+
[2024-03-26 09:21:17,356] INFO: [0;31mInitiating epoch #8 train run on device rank=0[0m
|
| 672 |
+
[2024-03-26 13:46:38,478] INFO: [0;31mInitiating epoch #8 valid run on device rank=0[0m
|
| 673 |
+
[2024-03-26 13:46:38,478] INFO: [0;31mInitiating epoch #8 valid run on device rank=0[0m
|
| 674 |
+
[2024-03-26 14:06:32,269] INFO: [1mRank 0: epoch=8 / 100 train_loss=18.3286 valid_loss=18.3267 stale=0 time=285.25m eta=26320.2m[0m
|
| 675 |
+
[2024-03-26 14:06:32,269] INFO: [1mRank 0: epoch=8 / 100 train_loss=18.3286 valid_loss=18.3267 stale=0 time=285.25m eta=26320.2m[0m
|
| 676 |
+
[2024-03-26 14:06:32,282] INFO: [0;31mInitiating epoch #9 train run on device rank=0[0m
|
| 677 |
+
[2024-03-26 14:06:32,282] INFO: [0;31mInitiating epoch #9 train run on device rank=0[0m
|
| 678 |
+
[2024-03-26 18:32:18,831] INFO: [0;31mInitiating epoch #9 valid run on device rank=0[0m
|
| 679 |
+
[2024-03-26 18:32:18,831] INFO: [0;31mInitiating epoch #9 valid run on device rank=0[0m
|
| 680 |
+
[2024-03-26 18:52:20,399] INFO: [1mRank 0: epoch=9 / 100 train_loss=18.2890 valid_loss=18.2883 stale=0 time=285.8m eta=26031.2m[0m
|
| 681 |
+
[2024-03-26 18:52:20,399] INFO: [1mRank 0: epoch=9 / 100 train_loss=18.2890 valid_loss=18.2883 stale=0 time=285.8m eta=26031.2m[0m
|
| 682 |
+
[2024-03-26 18:52:20,416] INFO: [0;31mInitiating epoch #10 train run on device rank=0[0m
|
| 683 |
+
[2024-03-26 18:52:20,416] INFO: [0;31mInitiating epoch #10 train run on device rank=0[0m
|
| 684 |
+
[2024-03-26 23:18:19,010] INFO: [0;31mInitiating epoch #10 valid run on device rank=0[0m
|
| 685 |
+
[2024-03-26 23:18:19,010] INFO: [0;31mInitiating epoch #10 valid run on device rank=0[0m
|
| 686 |
+
[2024-03-26 23:38:18,380] INFO: [1mRank 0: epoch=10 / 100 train_loss=18.2532 valid_loss=18.2458 stale=0 time=285.97m eta=25744.3m[0m
|
| 687 |
+
[2024-03-26 23:38:18,380] INFO: [1mRank 0: epoch=10 / 100 train_loss=18.2532 valid_loss=18.2458 stale=0 time=285.97m eta=25744.3m[0m
|
| 688 |
+
[2024-03-26 23:38:18,395] INFO: [0;31mInitiating epoch #11 train run on device rank=0[0m
|
| 689 |
+
[2024-03-26 23:38:18,395] INFO: [0;31mInitiating epoch #11 train run on device rank=0[0m
|
| 690 |
+
[2024-03-27 04:04:16,483] INFO: [0;31mInitiating epoch #11 valid run on device rank=0[0m
|
| 691 |
+
[2024-03-27 04:04:16,483] INFO: [0;31mInitiating epoch #11 valid run on device rank=0[0m
|
| 692 |
+
[2024-03-27 04:24:14,645] INFO: [1mRank 0: epoch=11 / 100 train_loss=18.2191 valid_loss=18.2212 stale=0 time=285.94m eta=25457.4m[0m
|
| 693 |
+
[2024-03-27 04:24:14,645] INFO: [1mRank 0: epoch=11 / 100 train_loss=18.2191 valid_loss=18.2212 stale=0 time=285.94m eta=25457.4m[0m
|
| 694 |
+
[2024-03-27 04:24:14,659] INFO: [0;31mInitiating epoch #12 train run on device rank=0[0m
|
| 695 |
+
[2024-03-27 04:24:14,659] INFO: [0;31mInitiating epoch #12 train run on device rank=0[0m
|
| 696 |
+
[2024-03-27 08:50:12,730] INFO: [0;31mInitiating epoch #12 valid run on device rank=0[0m
|
| 697 |
+
[2024-03-27 08:50:12,730] INFO: [0;31mInitiating epoch #12 valid run on device rank=0[0m
|
| 698 |
+
[2024-03-27 09:10:07,240] INFO: [1mRank 0: epoch=12 / 100 train_loss=18.1875 valid_loss=18.2005 stale=0 time=285.88m eta=25170.1m[0m
|
| 699 |
+
[2024-03-27 09:10:07,240] INFO: [1mRank 0: epoch=12 / 100 train_loss=18.1875 valid_loss=18.2005 stale=0 time=285.88m eta=25170.1m[0m
|
| 700 |
+
[2024-03-27 09:10:07,255] INFO: [0;31mInitiating epoch #13 train run on device rank=0[0m
|
| 701 |
+
[2024-03-27 09:10:07,255] INFO: [0;31mInitiating epoch #13 train run on device rank=0[0m
|
| 702 |
+
[2024-03-27 13:36:41,534] INFO: [0;31mInitiating epoch #13 valid run on device rank=0[0m
|
| 703 |
+
[2024-03-27 13:36:41,534] INFO: [0;31mInitiating epoch #13 valid run on device rank=0[0m
|
| 704 |
+
[2024-03-27 13:56:51,429] INFO: [1mRank 0: epoch=13 / 100 train_loss=18.1578 valid_loss=18.1808 stale=0 time=286.74m eta=24888.9m[0m
|
| 705 |
+
[2024-03-27 13:56:51,429] INFO: [1mRank 0: epoch=13 / 100 train_loss=18.1578 valid_loss=18.1808 stale=0 time=286.74m eta=24888.9m[0m
|
| 706 |
+
[2024-03-27 13:56:51,446] INFO: [0;31mInitiating epoch #14 train run on device rank=0[0m
|
| 707 |
+
[2024-03-27 13:56:51,446] INFO: [0;31mInitiating epoch #14 train run on device rank=0[0m
|
| 708 |
+
[2024-03-27 18:24:24,174] INFO: [0;31mInitiating epoch #14 valid run on device rank=0[0m
|
| 709 |
+
[2024-03-27 18:24:24,174] INFO: [0;31mInitiating epoch #14 valid run on device rank=0[0m
|
| 710 |
+
[2024-03-27 18:44:20,853] INFO: [1mRank 0: epoch=14 / 100 train_loss=18.1282 valid_loss=18.1575 stale=0 time=287.49m eta=24611.5m[0m
|
| 711 |
+
[2024-03-27 18:44:20,853] INFO: [1mRank 0: epoch=14 / 100 train_loss=18.1282 valid_loss=18.1575 stale=0 time=287.49m eta=24611.5m[0m
|
| 712 |
+
[2024-03-27 18:44:20,870] INFO: [0;31mInitiating epoch #15 train run on device rank=0[0m
|
| 713 |
+
[2024-03-27 18:44:20,870] INFO: [0;31mInitiating epoch #15 train run on device rank=0[0m
|
| 714 |
+
[2024-03-27 23:12:11,710] INFO: [0;31mInitiating epoch #15 valid run on device rank=0[0m
|
| 715 |
+
[2024-03-27 23:12:11,710] INFO: [0;31mInitiating epoch #15 valid run on device rank=0[0m
|
| 716 |
+
[2024-03-27 23:32:20,988] INFO: [1mRank 0: epoch=15 / 100 train_loss=18.0996 valid_loss=18.1267 stale=0 time=288.0m eta=24335.6m[0m
|
| 717 |
+
[2024-03-27 23:32:20,988] INFO: [1mRank 0: epoch=15 / 100 train_loss=18.0996 valid_loss=18.1267 stale=0 time=288.0m eta=24335.6m[0m
|
| 718 |
+
[2024-03-27 23:32:21,002] INFO: [0;31mInitiating epoch #16 train run on device rank=0[0m
|
| 719 |
+
[2024-03-27 23:32:21,002] INFO: [0;31mInitiating epoch #16 train run on device rank=0[0m
|
| 720 |
+
[2024-03-28 03:58:27,660] INFO: [0;31mInitiating epoch #16 valid run on device rank=0[0m
|
| 721 |
+
[2024-03-28 03:58:27,660] INFO: [0;31mInitiating epoch #16 valid run on device rank=0[0m
|
| 722 |
+
[2024-03-28 04:18:21,127] INFO: [1mRank 0: epoch=16 / 100 train_loss=18.0729 valid_loss=18.1030 stale=0 time=286.0m eta=24047.7m[0m
|
| 723 |
+
[2024-03-28 04:18:21,127] INFO: [1mRank 0: epoch=16 / 100 train_loss=18.0729 valid_loss=18.1030 stale=0 time=286.0m eta=24047.7m[0m
|
| 724 |
+
[2024-03-28 04:18:21,141] INFO: [0;31mInitiating epoch #17 train run on device rank=0[0m
|
| 725 |
+
[2024-03-28 04:18:21,141] INFO: [0;31mInitiating epoch #17 train run on device rank=0[0m
|
| 726 |
+
[2024-03-28 08:44:11,691] INFO: [0;31mInitiating epoch #17 valid run on device rank=0[0m
|
| 727 |
+
[2024-03-28 08:44:11,691] INFO: [0;31mInitiating epoch #17 valid run on device rank=0[0m
|
| 728 |
+
[2024-03-28 09:04:09,550] INFO: [1mRank 0: epoch=17 / 100 train_loss=18.0478 valid_loss=18.0912 stale=0 time=285.81m eta=23759.1m[0m
|
| 729 |
+
[2024-03-28 09:04:09,550] INFO: [1mRank 0: epoch=17 / 100 train_loss=18.0478 valid_loss=18.0912 stale=0 time=285.81m eta=23759.1m[0m
|
| 730 |
+
[2024-03-28 09:04:09,566] INFO: [0;31mInitiating epoch #18 train run on device rank=0[0m
|
| 731 |
+
[2024-03-28 09:04:09,566] INFO: [0;31mInitiating epoch #18 train run on device rank=0[0m
|
| 732 |
+
[2024-03-28 13:29:43,054] INFO: [0;31mInitiating epoch #18 valid run on device rank=0[0m
|
| 733 |
+
[2024-03-28 13:29:43,054] INFO: [0;31mInitiating epoch #18 valid run on device rank=0[0m
|
| 734 |
+
[2024-03-28 13:49:42,746] INFO: [1mRank 0: epoch=18 / 100 train_loss=18.0235 valid_loss=18.0697 stale=0 time=285.55m eta=23469.7m[0m
|
| 735 |
+
[2024-03-28 13:49:42,746] INFO: [1mRank 0: epoch=18 / 100 train_loss=18.0235 valid_loss=18.0697 stale=0 time=285.55m eta=23469.7m[0m
|
| 736 |
+
[2024-03-28 13:49:42,760] INFO: [0;31mInitiating epoch #19 train run on device rank=0[0m
|
| 737 |
+
[2024-03-28 13:49:42,760] INFO: [0;31mInitiating epoch #19 train run on device rank=0[0m
|
| 738 |
+
[2024-03-28 18:15:26,865] INFO: [0;31mInitiating epoch #19 valid run on device rank=0[0m
|
| 739 |
+
[2024-03-28 18:15:26,865] INFO: [0;31mInitiating epoch #19 valid run on device rank=0[0m
|
| 740 |
+
[2024-03-28 18:35:24,119] INFO: [1mRank 0: epoch=19 / 100 train_loss=18.0008 valid_loss=18.0532 stale=0 time=285.69m eta=23181.2m[0m
|
| 741 |
+
[2024-03-28 18:35:24,119] INFO: [1mRank 0: epoch=19 / 100 train_loss=18.0008 valid_loss=18.0532 stale=0 time=285.69m eta=23181.2m[0m
|
| 742 |
+
[2024-03-28 18:35:24,137] INFO: [0;31mInitiating epoch #20 train run on device rank=0[0m
|
| 743 |
+
[2024-03-28 18:35:24,137] INFO: [0;31mInitiating epoch #20 train run on device rank=0[0m
|
| 744 |
+
[2024-03-28 23:00:36,705] INFO: [0;31mInitiating epoch #20 valid run on device rank=0[0m
|
| 745 |
+
[2024-03-28 23:00:36,705] INFO: [0;31mInitiating epoch #20 valid run on device rank=0[0m
|
| 746 |
+
[2024-03-28 23:20:35,806] INFO: [1mRank 0: epoch=20 / 100 train_loss=17.9783 valid_loss=18.0347 stale=0 time=285.19m eta=22891.1m[0m
|
| 747 |
+
[2024-03-28 23:20:35,806] INFO: [1mRank 0: epoch=20 / 100 train_loss=17.9783 valid_loss=18.0347 stale=0 time=285.19m eta=22891.1m[0m
|
| 748 |
+
[2024-03-28 23:20:35,825] INFO: [0;31mInitiating epoch #21 train run on device rank=0[0m
|
| 749 |
+
[2024-03-28 23:20:35,825] INFO: [0;31mInitiating epoch #21 train run on device rank=0[0m
|
| 750 |
+
[2024-03-29 03:46:25,188] INFO: [0;31mInitiating epoch #21 valid run on device rank=0[0m
|
| 751 |
+
[2024-03-29 03:46:25,188] INFO: [0;31mInitiating epoch #21 valid run on device rank=0[0m
|
| 752 |
+
[2024-03-29 04:06:24,286] INFO: [1mRank 0: epoch=21 / 100 train_loss=17.9565 valid_loss=18.0197 stale=0 time=285.81m eta=22603.7m[0m
|
| 753 |
+
[2024-03-29 04:06:24,286] INFO: [1mRank 0: epoch=21 / 100 train_loss=17.9565 valid_loss=18.0197 stale=0 time=285.81m eta=22603.7m[0m
|
| 754 |
+
[2024-03-29 04:06:24,301] INFO: [0;31mInitiating epoch #22 train run on device rank=0[0m
|
| 755 |
+
[2024-03-29 04:06:24,301] INFO: [0;31mInitiating epoch #22 train run on device rank=0[0m
|
| 756 |
+
[2024-03-29 08:31:11,736] INFO: [0;31mInitiating epoch #22 valid run on device rank=0[0m
|
| 757 |
+
[2024-03-29 08:31:11,736] INFO: [0;31mInitiating epoch #22 valid run on device rank=0[0m
|
| 758 |
+
[2024-03-29 08:51:08,443] INFO: [1mRank 0: epoch=22 / 100 train_loss=17.9356 valid_loss=18.0009 stale=0 time=284.74m eta=22312.7m[0m
|
| 759 |
+
[2024-03-29 08:51:08,443] INFO: [1mRank 0: epoch=22 / 100 train_loss=17.9356 valid_loss=18.0009 stale=0 time=284.74m eta=22312.7m[0m
|
| 760 |
+
[2024-03-29 08:51:08,458] INFO: [0;31mInitiating epoch #23 train run on device rank=0[0m
|
| 761 |
+
[2024-03-29 08:51:08,458] INFO: [0;31mInitiating epoch #23 train run on device rank=0[0m
|
| 762 |
+
[2024-03-29 13:16:30,304] INFO: [0;31mInitiating epoch #23 valid run on device rank=0[0m
|
| 763 |
+
[2024-03-29 13:16:30,304] INFO: [0;31mInitiating epoch #23 valid run on device rank=0[0m
|
| 764 |
+
[2024-03-29 13:36:29,623] INFO: [1mRank 0: epoch=23 / 100 train_loss=17.9150 valid_loss=17.9919 stale=0 time=285.35m eta=22024.2m[0m
|
| 765 |
+
[2024-03-29 13:36:29,623] INFO: [1mRank 0: epoch=23 / 100 train_loss=17.9150 valid_loss=17.9919 stale=0 time=285.35m eta=22024.2m[0m
|
| 766 |
+
[2024-03-29 13:36:29,637] INFO: [0;31mInitiating epoch #24 train run on device rank=0[0m
|
| 767 |
+
[2024-03-29 13:36:29,637] INFO: [0;31mInitiating epoch #24 train run on device rank=0[0m
|
| 768 |
+
[2024-03-29 18:01:59,324] INFO: [0;31mInitiating epoch #24 valid run on device rank=0[0m
|
| 769 |
+
[2024-03-29 18:01:59,324] INFO: [0;31mInitiating epoch #24 valid run on device rank=0[0m
|
| 770 |
+
[2024-03-29 18:21:58,557] INFO: [1mRank 0: epoch=24 / 100 train_loss=17.8948 valid_loss=17.9806 stale=0 time=285.48m eta=21736.5m[0m
|
| 771 |
+
[2024-03-29 18:21:58,557] INFO: [1mRank 0: epoch=24 / 100 train_loss=17.8948 valid_loss=17.9806 stale=0 time=285.48m eta=21736.5m[0m
|
| 772 |
+
[2024-03-29 18:21:58,573] INFO: [0;31mInitiating epoch #25 train run on device rank=0[0m
|
| 773 |
+
[2024-03-29 18:21:58,573] INFO: [0;31mInitiating epoch #25 train run on device rank=0[0m
|
| 774 |
+
[2024-03-29 22:47:04,103] INFO: [0;31mInitiating epoch #25 valid run on device rank=0[0m
|
| 775 |
+
[2024-03-29 22:47:04,103] INFO: [0;31mInitiating epoch #25 valid run on device rank=0[0m
|
| 776 |
+
[2024-03-29 23:06:58,509] INFO: [1mRank 0: epoch=25 / 100 train_loss=17.8745 valid_loss=17.9677 stale=0 time=285.0m eta=21447.4m[0m
|
| 777 |
+
[2024-03-29 23:06:58,509] INFO: [1mRank 0: epoch=25 / 100 train_loss=17.8745 valid_loss=17.9677 stale=0 time=285.0m eta=21447.4m[0m
|
| 778 |
+
[2024-03-29 23:06:58,528] INFO: [0;31mInitiating epoch #26 train run on device rank=0[0m
|
| 779 |
+
[2024-03-29 23:06:58,528] INFO: [0;31mInitiating epoch #26 train run on device rank=0[0m
|
| 780 |
+
[2024-03-30 03:32:08,173] INFO: [0;31mInitiating epoch #26 valid run on device rank=0[0m
|
| 781 |
+
[2024-03-30 03:32:08,173] INFO: [0;31mInitiating epoch #26 valid run on device rank=0[0m
|
| 782 |
+
[2024-03-30 03:52:08,592] INFO: [1mRank 0: epoch=26 / 100 train_loss=17.8549 valid_loss=17.9569 stale=0 time=285.17m eta=21159.2m[0m
|
| 783 |
+
[2024-03-30 03:52:08,592] INFO: [1mRank 0: epoch=26 / 100 train_loss=17.8549 valid_loss=17.9569 stale=0 time=285.17m eta=21159.2m[0m
|
| 784 |
+
[2024-03-30 03:52:08,608] INFO: [0;31mInitiating epoch #27 train run on device rank=0[0m
|
| 785 |
+
[2024-03-30 03:52:08,608] INFO: [0;31mInitiating epoch #27 train run on device rank=0[0m
|
| 786 |
+
[2024-03-30 08:17:59,526] INFO: [0;31mInitiating epoch #27 valid run on device rank=0[0m
|
| 787 |
+
[2024-03-30 08:17:59,526] INFO: [0;31mInitiating epoch #27 valid run on device rank=0[0m
|
| 788 |
+
[2024-03-30 08:38:00,700] INFO: [1mRank 0: epoch=27 / 100 train_loss=17.8347 valid_loss=17.9366 stale=0 time=285.87m eta=20873.1m[0m
|
| 789 |
+
[2024-03-30 08:38:00,700] INFO: [1mRank 0: epoch=27 / 100 train_loss=17.8347 valid_loss=17.9366 stale=0 time=285.87m eta=20873.1m[0m
|
| 790 |
+
[2024-03-30 08:38:00,714] INFO: [0;31mInitiating epoch #28 train run on device rank=0[0m
|
| 791 |
+
[2024-03-30 08:38:00,714] INFO: [0;31mInitiating epoch #28 train run on device rank=0[0m
|
| 792 |
+
[2024-03-30 13:03:33,057] INFO: [0;31mInitiating epoch #28 valid run on device rank=0[0m
|
| 793 |
+
[2024-03-30 13:03:33,057] INFO: [0;31mInitiating epoch #28 valid run on device rank=0[0m
|
| 794 |
+
[2024-03-30 13:23:29,411] INFO: [1mRank 0: epoch=28 / 100 train_loss=17.8149 valid_loss=17.9264 stale=0 time=285.48m eta=20586.0m[0m
|
| 795 |
+
[2024-03-30 13:23:29,411] INFO: [1mRank 0: epoch=28 / 100 train_loss=17.8149 valid_loss=17.9264 stale=0 time=285.48m eta=20586.0m[0m
|
| 796 |
+
[2024-03-30 13:23:29,424] INFO: [0;31mInitiating epoch #29 train run on device rank=0[0m
|
| 797 |
+
[2024-03-30 13:23:29,424] INFO: [0;31mInitiating epoch #29 train run on device rank=0[0m
|
| 798 |
+
[2024-03-30 17:49:27,995] INFO: [0;31mInitiating epoch #29 valid run on device rank=0[0m
|
| 799 |
+
[2024-03-30 17:49:27,995] INFO: [0;31mInitiating epoch #29 valid run on device rank=0[0m
|
| 800 |
+
[2024-03-30 18:09:26,968] INFO: [1mRank 0: epoch=29 / 100 train_loss=17.7950 valid_loss=17.9112 stale=0 time=285.96m eta=20300.2m[0m
|
| 801 |
+
[2024-03-30 18:09:26,968] INFO: [1mRank 0: epoch=29 / 100 train_loss=17.7950 valid_loss=17.9112 stale=0 time=285.96m eta=20300.2m[0m
|
| 802 |
+
[2024-03-30 18:09:26,982] INFO: [0;31mInitiating epoch #30 train run on device rank=0[0m
|
| 803 |
+
[2024-03-30 18:09:26,982] INFO: [0;31mInitiating epoch #30 train run on device rank=0[0m
|
| 804 |
+
[2024-03-30 22:34:51,876] INFO: [0;31mInitiating epoch #30 valid run on device rank=0[0m
|
| 805 |
+
[2024-03-30 22:34:51,876] INFO: [0;31mInitiating epoch #30 valid run on device rank=0[0m
|
| 806 |
+
[2024-03-30 22:54:53,145] INFO: [1mRank 0: epoch=30 / 100 train_loss=17.7757 valid_loss=17.9006 stale=0 time=285.44m eta=20013.1m[0m
|
| 807 |
+
[2024-03-30 22:54:53,145] INFO: [1mRank 0: epoch=30 / 100 train_loss=17.7757 valid_loss=17.9006 stale=0 time=285.44m eta=20013.1m[0m
|
| 808 |
+
[2024-03-30 22:54:53,159] INFO: [0;31mInitiating epoch #31 train run on device rank=0[0m
|
| 809 |
+
[2024-03-30 22:54:53,159] INFO: [0;31mInitiating epoch #31 train run on device rank=0[0m
|
| 810 |
+
[2024-03-31 04:20:44,340] INFO: [0;31mInitiating epoch #31 valid run on device rank=0[0m
|
| 811 |
+
[2024-03-31 04:20:44,340] INFO: [0;31mInitiating epoch #31 valid run on device rank=0[0m
|
| 812 |
+
[2024-03-31 04:40:42,326] INFO: [1mRank 0: epoch=31 / 100 train_loss=17.7565 valid_loss=17.8932 stale=0 time=285.82m eta=19727.0m[0m
|
| 813 |
+
[2024-03-31 04:40:42,326] INFO: [1mRank 0: epoch=31 / 100 train_loss=17.7565 valid_loss=17.8932 stale=0 time=285.82m eta=19727.0m[0m
|
| 814 |
+
[2024-03-31 04:40:42,350] INFO: [0;31mInitiating epoch #32 train run on device rank=0[0m
|
| 815 |
+
[2024-03-31 04:40:42,350] INFO: [0;31mInitiating epoch #32 train run on device rank=0[0m
|
| 816 |
+
[2024-03-31 09:06:05,665] INFO: [0;31mInitiating epoch #32 valid run on device rank=0[0m
|
| 817 |
+
[2024-03-31 09:06:05,665] INFO: [0;31mInitiating epoch #32 valid run on device rank=0[0m
|
| 818 |
+
[2024-03-31 09:26:03,353] INFO: [1mRank 0: epoch=32 / 100 train_loss=17.7375 valid_loss=17.8774 stale=0 time=285.35m eta=19440.0m[0m
|
| 819 |
+
[2024-03-31 09:26:03,353] INFO: [1mRank 0: epoch=32 / 100 train_loss=17.7375 valid_loss=17.8774 stale=0 time=285.35m eta=19440.0m[0m
|
| 820 |
+
[2024-03-31 09:26:03,368] INFO: [0;31mInitiating epoch #33 train run on device rank=0[0m
|
| 821 |
+
[2024-03-31 09:26:03,368] INFO: [0;31mInitiating epoch #33 train run on device rank=0[0m
|