zhoupeng commited on
Commit
15671e7
·
1 Parent(s): 38a3f50

Add 100000_titok_gen.bin

Browse files
Files changed (30) hide show
  1. stage1/rar_baseline/checkpoint-20000/ema_model/pytorch_model.bin +3 -0
  2. stage1/rar_baseline/checkpoint-20000/metadata.json +1 -0
  3. stage1/rar_baseline/checkpoint-20000/optimizer.bin +3 -0
  4. stage1/rar_baseline/checkpoint-20000/pytorch_model.bin +3 -0
  5. stage1/rar_baseline/checkpoint-20000/random_states_0.pkl +3 -0
  6. stage1/rar_baseline/checkpoint-20000/random_states_1.pkl +3 -0
  7. stage1/rar_baseline/checkpoint-20000/random_states_2.pkl +3 -0
  8. stage1/rar_baseline/checkpoint-20000/random_states_3.pkl +3 -0
  9. stage1/rar_baseline/checkpoint-20000/scheduler.bin +3 -0
  10. stage1/rar_baseline/checkpoint-20000/unwrapped_model/pytorch_model.bin +3 -0
  11. stage1/rar_baseline/config.yaml +80 -0
  12. stage1/rar_baseline/log0.txt +389 -0
  13. stage1/rar_baseline/log1.txt +0 -0
  14. stage1/rar_baseline/log2.txt +0 -0
  15. stage1/rar_baseline/log3.txt +0 -0
  16. stage1/rar_ordertok/checkpoint-40000/ema_model/pytorch_model.bin +3 -0
  17. stage1/rar_ordertok/checkpoint-40000/metadata.json +1 -0
  18. stage1/rar_ordertok/checkpoint-40000/optimizer.bin +3 -0
  19. stage1/rar_ordertok/checkpoint-40000/pytorch_model.bin +3 -0
  20. stage1/rar_ordertok/checkpoint-40000/random_states_0.pkl +3 -0
  21. stage1/rar_ordertok/checkpoint-40000/random_states_1.pkl +3 -0
  22. stage1/rar_ordertok/checkpoint-40000/random_states_2.pkl +3 -0
  23. stage1/rar_ordertok/checkpoint-40000/random_states_3.pkl +3 -0
  24. stage1/rar_ordertok/checkpoint-40000/scheduler.bin +3 -0
  25. stage1/rar_ordertok/checkpoint-40000/unwrapped_model/pytorch_model.bin +3 -0
  26. stage1/rar_ordertok/config.yaml +80 -0
  27. stage1/rar_ordertok/log0.txt +682 -0
  28. stage1/rar_ordertok/log1.txt +0 -0
  29. stage1/rar_ordertok/log2.txt +0 -0
  30. stage1/rar_ordertok/log3.txt +0 -0
stage1/rar_baseline/checkpoint-20000/ema_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efcdfb031d5615d27de63cfe298ab6a9ce6c52e7e8ad3e3a2c8da10376114371
3
+ size 1869255086
stage1/rar_baseline/checkpoint-20000/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"global_step": 20000}
stage1/rar_baseline/checkpoint-20000/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4faac2b4fc55fe3cd2f0a2f809ceedc47b3d413ea6c9495dfebf45270ac89f00
3
+ size 3738566397
stage1/rar_baseline/checkpoint-20000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:383bf318db84206160363dfc3aae0b07b584fdaa67019c05b088e689c287cb56
3
+ size 1869261230
stage1/rar_baseline/checkpoint-20000/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ac0f4dd2bf5bfca612ce2cac18e01892c868545b7e4bcda7de1257843042b81
3
+ size 15060
stage1/rar_baseline/checkpoint-20000/random_states_1.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb4811e1783783146044fb8bd9032daf7f7799da6381ac819bdf599957290619
3
+ size 15124
stage1/rar_baseline/checkpoint-20000/random_states_2.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ba2163f421466f90ac03c27fc8c171ebc3b98795de43da9a6e4d693ce8b04f9
3
+ size 15124
stage1/rar_baseline/checkpoint-20000/random_states_3.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99e1b77d4f03e9da7262c7941ae8b545f5a3740a1390cd7379621dd25845ebaa
3
+ size 15124
stage1/rar_baseline/checkpoint-20000/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2c0f384522475d365d71add24d50110e47dd5ff060fbeea59bc6767b3755835
3
+ size 1064
stage1/rar_baseline/checkpoint-20000/unwrapped_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:383bf318db84206160363dfc3aae0b07b584fdaa67019c05b088e689c287cb56
3
+ size 1869261230
stage1/rar_baseline/config.yaml ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment:
2
+ project: titok_ca_rar
3
+ name: titok_ca_rar
4
+ max_train_examples: 1281167
5
+ save_every: 10000
6
+ eval_every: 5000000
7
+ generate_every: 10000000
8
+ log_every: 100
9
+ log_grad_norm_every: 1000
10
+ resume: true
11
+ tokenizer_checkpoint: /mnt/yscfs/huangmengqi/projects/visual_tokenization/order-tok/train_stage1/baseline_simvq/checkpoint-100000/ema_model/pytorch_model.bin
12
+ output_dir: stage1/rar_baseline
13
+ logging_dir: stage1/rar_baseline/logs
14
+ model:
15
+ vq_model:
16
+ codebook_size: 4096
17
+ token_size: 256
18
+ use_l2_norm: true
19
+ commitment_cost: 0.25
20
+ vit_enc_model_size: large
21
+ vit_dec_model_size: large
22
+ vit_enc_patch_size: 16
23
+ vit_dec_patch_size: 16
24
+ num_latent_tokens: 32
25
+ layers_x: 18
26
+ layers_token: 2
27
+ embedding_width: 1024
28
+ width: 256
29
+ finetune_decoder: false
30
+ pretrained_tokenizer_weight: maskgit-vqgan-imagenet-f16-256.bin
31
+ generator:
32
+ hidden_size: 1024
33
+ num_hidden_layers: 24
34
+ num_attention_heads: 16
35
+ intermediate_size: 4096
36
+ dropout: 0.1
37
+ attn_drop: 0.1
38
+ class_label_dropout: 0.1
39
+ image_seq_len: 32
40
+ condition_num_classes: 1000
41
+ randomize_temperature: 1.02
42
+ guidance_scale: 15.5
43
+ guidance_scale_pow: 2.5
44
+ use_checkpoint: false
45
+ randomness_anneal_start: 0
46
+ randomness_anneal_end: 0
47
+ dataset:
48
+ params:
49
+ train_shards_path_or_url: imagenet/imagenet1k-train-{0000..1023}.tar
50
+ eval_shards_path_or_url: imagenet/imagenet1k-validation-{00..63}.tar
51
+ num_workers_per_gpu: 2
52
+ preprocessing:
53
+ resize_shorter_edge: 256
54
+ crop_size: 256
55
+ random_crop: false
56
+ random_flip: true
57
+ optimizer:
58
+ name: adamw
59
+ params:
60
+ learning_rate: 0.00035
61
+ beta1: 0.9
62
+ beta2: 0.96
63
+ weight_decay: 0.03
64
+ lr_scheduler:
65
+ scheduler: cosine
66
+ params:
67
+ learning_rate: ${optimizer.params.learning_rate}
68
+ warmup_steps: 0
69
+ end_lr: 1.0e-05
70
+ training:
71
+ gradient_accumulation_steps: 1
72
+ per_gpu_batch_size: 256
73
+ mixed_precision: bf16
74
+ enable_tf32: true
75
+ enable_wandb: true
76
+ use_ema: true
77
+ seed: 42
78
+ max_train_steps: 500000
79
+ max_grad_norm: 1.0
80
+ config: configs/training/generator/rar.yaml
stage1/rar_baseline/log0.txt ADDED
@@ -0,0 +1,389 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [04/09 14:31:10 RAR]: Saving config to stage1/rar_baseline/config.yaml
2
+ [04/09 14:31:10 RAR]: Config:
3
+ experiment:
4
+ project: titok_ca_rar
5
+ name: titok_ca_rar
6
+ max_train_examples: 1281167
7
+ save_every: 10000
8
+ eval_every: 5000000
9
+ generate_every: 10000000
10
+ log_every: 100
11
+ log_grad_norm_every: 1000
12
+ resume: true
13
+ tokenizer_checkpoint: /mnt/yscfs/huangmengqi/projects/visual_tokenization/order-tok/train_stage1/baseline_simvq/checkpoint-100000/ema_model/pytorch_model.bin
14
+ output_dir: stage1/rar_baseline
15
+ logging_dir: stage1/rar_baseline/logs
16
+ model:
17
+ vq_model:
18
+ codebook_size: 4096
19
+ token_size: 12
20
+ use_l2_norm: true
21
+ commitment_cost: 0.25
22
+ vit_enc_model_size: large
23
+ vit_dec_model_size: large
24
+ vit_enc_patch_size: 16
25
+ vit_dec_patch_size: 16
26
+ num_latent_tokens: 32
27
+ layers_x: 18
28
+ layers_token: 2
29
+ embedding_width: 1024
30
+ width: 256
31
+ finetune_decoder: false
32
+ pretrained_tokenizer_weight: maskgit-vqgan-imagenet-f16-256.bin
33
+ generator:
34
+ hidden_size: 1024
35
+ num_hidden_layers: 24
36
+ num_attention_heads: 16
37
+ intermediate_size: 4096
38
+ dropout: 0.1
39
+ attn_drop: 0.1
40
+ class_label_dropout: 0.1
41
+ image_seq_len: 32
42
+ condition_num_classes: 1000
43
+ randomize_temperature: 1.02
44
+ guidance_scale: 15.5
45
+ guidance_scale_pow: 2.5
46
+ use_checkpoint: false
47
+ randomness_anneal_start: 0
48
+ randomness_anneal_end: 0
49
+ dataset:
50
+ params:
51
+ train_shards_path_or_url: imagenet/imagenet1k-train-{0000..1023}.tar
52
+ eval_shards_path_or_url: imagenet/imagenet1k-validation-{00..63}.tar
53
+ num_workers_per_gpu: 2
54
+ preprocessing:
55
+ resize_shorter_edge: 256
56
+ crop_size: 256
57
+ random_crop: false
58
+ random_flip: true
59
+ optimizer:
60
+ name: adamw
61
+ params:
62
+ learning_rate: 0.00035
63
+ beta1: 0.9
64
+ beta2: 0.96
65
+ weight_decay: 0.03
66
+ lr_scheduler:
67
+ scheduler: cosine
68
+ params:
69
+ learning_rate: ${optimizer.params.learning_rate}
70
+ warmup_steps: 0
71
+ end_lr: 1.0e-05
72
+ training:
73
+ gradient_accumulation_steps: 1
74
+ per_gpu_batch_size: 256
75
+ mixed_precision: bf16
76
+ enable_tf32: true
77
+ enable_wandb: true
78
+ use_ema: true
79
+ seed: 42
80
+ max_train_steps: 500000
81
+ max_grad_norm: 1.0
82
+ config: configs/training/generator/rar.yaml
83
+
84
+ [04/09 14:32:51 RAR]: Saving config to stage1/rar_baseline/config.yaml
85
+ [04/09 14:32:51 RAR]: Config:
86
+ experiment:
87
+ project: titok_ca_rar
88
+ name: titok_ca_rar
89
+ max_train_examples: 1281167
90
+ save_every: 10000
91
+ eval_every: 5000000
92
+ generate_every: 10000000
93
+ log_every: 100
94
+ log_grad_norm_every: 1000
95
+ resume: true
96
+ tokenizer_checkpoint: /mnt/yscfs/huangmengqi/projects/visual_tokenization/order-tok/train_stage1/baseline_simvq/checkpoint-100000/ema_model/pytorch_model.bin
97
+ output_dir: stage1/rar_baseline
98
+ logging_dir: stage1/rar_baseline/logs
99
+ model:
100
+ vq_model:
101
+ codebook_size: 4096
102
+ token_size: 256
103
+ use_l2_norm: true
104
+ commitment_cost: 0.25
105
+ vit_enc_model_size: large
106
+ vit_dec_model_size: large
107
+ vit_enc_patch_size: 16
108
+ vit_dec_patch_size: 16
109
+ num_latent_tokens: 32
110
+ layers_x: 18
111
+ layers_token: 2
112
+ embedding_width: 1024
113
+ width: 256
114
+ finetune_decoder: false
115
+ pretrained_tokenizer_weight: maskgit-vqgan-imagenet-f16-256.bin
116
+ generator:
117
+ hidden_size: 1024
118
+ num_hidden_layers: 24
119
+ num_attention_heads: 16
120
+ intermediate_size: 4096
121
+ dropout: 0.1
122
+ attn_drop: 0.1
123
+ class_label_dropout: 0.1
124
+ image_seq_len: 32
125
+ condition_num_classes: 1000
126
+ randomize_temperature: 1.02
127
+ guidance_scale: 15.5
128
+ guidance_scale_pow: 2.5
129
+ use_checkpoint: false
130
+ randomness_anneal_start: 0
131
+ randomness_anneal_end: 0
132
+ dataset:
133
+ params:
134
+ train_shards_path_or_url: imagenet/imagenet1k-train-{0000..1023}.tar
135
+ eval_shards_path_or_url: imagenet/imagenet1k-validation-{00..63}.tar
136
+ num_workers_per_gpu: 2
137
+ preprocessing:
138
+ resize_shorter_edge: 256
139
+ crop_size: 256
140
+ random_crop: false
141
+ random_flip: true
142
+ optimizer:
143
+ name: adamw
144
+ params:
145
+ learning_rate: 0.00035
146
+ beta1: 0.9
147
+ beta2: 0.96
148
+ weight_decay: 0.03
149
+ lr_scheduler:
150
+ scheduler: cosine
151
+ params:
152
+ learning_rate: ${optimizer.params.learning_rate}
153
+ warmup_steps: 0
154
+ end_lr: 1.0e-05
155
+ training:
156
+ gradient_accumulation_steps: 1
157
+ per_gpu_batch_size: 256
158
+ mixed_precision: bf16
159
+ enable_tf32: true
160
+ enable_wandb: true
161
+ use_ema: true
162
+ seed: 42
163
+ max_train_steps: 500000
164
+ max_grad_norm: 1.0
165
+ config: configs/training/generator/rar.yaml
166
+
167
+ [04/09 14:33:11 RAR]: Creating model and loss module.
168
+ [04/09 14:33:22 RAR]: Creating optimizers.
169
+ [04/09 14:33:22 RAR]: Creating lr_schedulers.
170
+ [04/09 14:33:22 RAR]: Creating dataloaders.
171
+ [04/09 14:33:22 RAR]: Preparing model, optimizer and dataloaders
172
+ [04/09 14:33:23 RAR]: ***** Running training *****
173
+ [04/09 14:33:23 RAR]:  Num training steps = 500000
174
+ [04/09 14:33:23 RAR]:  Gradient Accumulation steps = 1
175
+ [04/09 14:33:23 RAR]:  Instantaneous batch size per gpu = 256
176
+ [04/09 14:33:23 RAR]:  Total train batch size (w. parallel, distributed & accumulation) = 1024
177
+ [04/09 14:33:23 RAR]: All globbed checkpoints are: []
178
+ [04/09 14:33:23 RAR]: Training from scratch.
179
+ [04/09 14:36:02 RAR]: Data (t): 0.0213, 468.71/s/gpu Batch (t): 0.5462 LR: 0.000350 Step: 100 Loss: 8.2635 Accuracy: 0.0004
180
+ [04/09 14:38:38 RAR]: Data (t): 0.0337, 455.24/s/gpu Batch (t): 0.5623 LR: 0.000350 Step: 200 Loss: 8.2532 Accuracy: 0.0007
181
+ [04/09 14:41:10 RAR]: Data (t): 0.0298, 95.03/s/gpu Batch (t): 2.6939 LR: 0.000350 Step: 300 Loss: 8.2315 Accuracy: 0.0007
182
+ [04/09 14:43:39 RAR]: Data (t): 0.0208, 286.32/s/gpu Batch (t): 0.8941 LR: 0.000350 Step: 400 Loss: 8.2100 Accuracy: 0.0006
183
+ [04/09 14:46:14 RAR]: Data (t): 0.0290, 469.80/s/gpu Batch (t): 0.5449 LR: 0.000350 Step: 500 Loss: 8.1891 Accuracy: 0.0009
184
+ [04/09 14:48:44 RAR]: Data (t): 0.0243, 462.52/s/gpu Batch (t): 0.5535 LR: 0.000350 Step: 600 Loss: 8.1766 Accuracy: 0.0004
185
+ [04/09 14:51:15 RAR]: Data (t): 0.0221, 469.11/s/gpu Batch (t): 0.5457 LR: 0.000350 Step: 700 Loss: 8.1651 Accuracy: 0.0008
186
+ [04/09 14:53:45 RAR]: Data (t): 0.0261, 345.31/s/gpu Batch (t): 0.7414 LR: 0.000350 Step: 800 Loss: 8.1420 Accuracy: 0.0015
187
+ [04/09 14:56:15 RAR]: Data (t): 0.0321, 200.74/s/gpu Batch (t): 1.2753 LR: 0.000350 Step: 900 Loss: 8.1252 Accuracy: 0.0010
188
+ [04/09 14:58:45 RAR]: Data (t): 0.0287, 363.06/s/gpu Batch (t): 0.7051 LR: 0.000350 Step: 1000 Loss: 8.1242 Accuracy: 0.0011
189
+ [04/09 15:01:18 RAR]: Data (t): 1.9396, 103.94/s/gpu Batch (t): 2.4629 LR: 0.000350 Step: 1100 Loss: 8.1143 Accuracy: 0.0013
190
+ [04/09 15:03:48 RAR]: Data (t): 0.1397, 104.28/s/gpu Batch (t): 2.4548 LR: 0.000350 Step: 1200 Loss: 8.0997 Accuracy: 0.0010
191
+ [04/09 15:06:22 RAR]: Data (t): 0.0290, 100.72/s/gpu Batch (t): 2.5418 LR: 0.000350 Step: 1300 Loss: 8.0941 Accuracy: 0.0017
192
+ [04/09 15:08:55 RAR]: Data (t): 0.0362, 120.63/s/gpu Batch (t): 2.1222 LR: 0.000350 Step: 1400 Loss: 8.0953 Accuracy: 0.0013
193
+ [04/09 15:11:31 RAR]: Data (t): 0.0380, 425.68/s/gpu Batch (t): 0.6014 LR: 0.000350 Step: 1500 Loss: 8.0826 Accuracy: 0.0011
194
+ [04/09 15:14:07 RAR]: Data (t): 0.0320, 272.74/s/gpu Batch (t): 0.9386 LR: 0.000350 Step: 1600 Loss: 8.0747 Accuracy: 0.0013
195
+ [04/09 15:16:46 RAR]: Data (t): 0.0312, 142.38/s/gpu Batch (t): 1.7980 LR: 0.000350 Step: 1700 Loss: 8.0645 Accuracy: 0.0013
196
+ [04/09 15:19:20 RAR]: Data (t): 0.0897, 242.02/s/gpu Batch (t): 1.0577 LR: 0.000350 Step: 1800 Loss: 8.0636 Accuracy: 0.0016
197
+ [04/09 15:21:58 RAR]: Data (t): 0.0293, 470.92/s/gpu Batch (t): 0.5436 LR: 0.000350 Step: 1900 Loss: 8.0468 Accuracy: 0.0015
198
+ [04/09 15:24:34 RAR]: Data (t): 0.0311, 308.03/s/gpu Batch (t): 0.8311 LR: 0.000350 Step: 2000 Loss: 8.0544 Accuracy: 0.0012
199
+ [04/09 15:27:14 RAR]: Data (t): 0.0445, 461.58/s/gpu Batch (t): 0.5546 LR: 0.000350 Step: 2100 Loss: 8.0320 Accuracy: 0.0017
200
+ [04/09 15:29:51 RAR]: Data (t): 0.0287, 130.43/s/gpu Batch (t): 1.9628 LR: 0.000350 Step: 2200 Loss: 8.0260 Accuracy: 0.0014
201
+ [04/09 15:32:20 RAR]: Data (t): 0.9300, 124.24/s/gpu Batch (t): 2.0606 LR: 0.000350 Step: 2300 Loss: 8.0257 Accuracy: 0.0015
202
+ [04/09 15:34:53 RAR]: Data (t): 0.0290, 386.40/s/gpu Batch (t): 0.6625 LR: 0.000350 Step: 2400 Loss: 8.0209 Accuracy: 0.0019
203
+ [04/09 15:37:26 RAR]: Data (t): 0.0309, 97.08/s/gpu Batch (t): 2.6369 LR: 0.000350 Step: 2500 Loss: 8.0184 Accuracy: 0.0019
204
+ [04/09 15:40:00 RAR]: Data (t): 0.0203, 96.40/s/gpu Batch (t): 2.6557 LR: 0.000350 Step: 2600 Loss: 8.0078 Accuracy: 0.0017
205
+ [04/09 15:42:27 RAR]: Data (t): 0.0305, 444.62/s/gpu Batch (t): 0.5758 LR: 0.000350 Step: 2700 Loss: 8.0096 Accuracy: 0.0016
206
+ [04/09 15:45:00 RAR]: Data (t): 0.0281, 248.11/s/gpu Batch (t): 1.0318 LR: 0.000350 Step: 2800 Loss: 7.9780 Accuracy: 0.0019
207
+ [04/09 15:47:31 RAR]: Data (t): 0.0192, 111.52/s/gpu Batch (t): 2.2955 LR: 0.000350 Step: 2900 Loss: 7.9892 Accuracy: 0.0019
208
+ [04/09 15:49:58 RAR]: Data (t): 0.4393, 87.25/s/gpu Batch (t): 2.9341 LR: 0.000350 Step: 3000 Loss: 7.9847 Accuracy: 0.0018
209
+ [04/09 15:52:25 RAR]: Data (t): 1.1977, 149.33/s/gpu Batch (t): 1.7144 LR: 0.000350 Step: 3100 Loss: 7.9832 Accuracy: 0.0016
210
+ [04/09 15:54:56 RAR]: Data (t): 1.3806, 133.51/s/gpu Batch (t): 1.9175 LR: 0.000350 Step: 3200 Loss: 7.9699 Accuracy: 0.0021
211
+ [04/09 15:57:25 RAR]: Data (t): 1.3350, 128.34/s/gpu Batch (t): 1.9947 LR: 0.000350 Step: 3300 Loss: 7.9745 Accuracy: 0.0018
212
+ [04/09 15:59:54 RAR]: Data (t): 1.3171, 139.27/s/gpu Batch (t): 1.8382 LR: 0.000350 Step: 3400 Loss: 7.9459 Accuracy: 0.0020
213
+ [04/09 16:02:19 RAR]: Data (t): 0.1564, 375.66/s/gpu Batch (t): 0.6815 LR: 0.000350 Step: 3500 Loss: 7.9592 Accuracy: 0.0019
214
+ [04/09 16:04:48 RAR]: Data (t): 0.0272, 459.47/s/gpu Batch (t): 0.5572 LR: 0.000350 Step: 3600 Loss: 7.9532 Accuracy: 0.0019
215
+ [04/09 16:07:19 RAR]: Data (t): 0.0233, 112.66/s/gpu Batch (t): 2.2723 LR: 0.000350 Step: 3700 Loss: 7.9582 Accuracy: 0.0022
216
+ [04/09 16:09:52 RAR]: Data (t): 0.0291, 456.30/s/gpu Batch (t): 0.5610 LR: 0.000350 Step: 3800 Loss: 7.9390 Accuracy: 0.0027
217
+ [04/09 16:12:25 RAR]: Data (t): 0.0267, 333.19/s/gpu Batch (t): 0.7683 LR: 0.000350 Step: 3900 Loss: 7.9175 Accuracy: 0.0025
218
+ [04/09 16:14:59 RAR]: Data (t): 0.0206, 405.87/s/gpu Batch (t): 0.6307 LR: 0.000350 Step: 4000 Loss: 7.9305 Accuracy: 0.0024
219
+ [04/09 16:17:31 RAR]: Data (t): 0.0171, 475.17/s/gpu Batch (t): 0.5388 LR: 0.000350 Step: 4100 Loss: 7.9275 Accuracy: 0.0024
220
+ [04/09 16:20:00 RAR]: Data (t): 1.4238, 130.32/s/gpu Batch (t): 1.9644 LR: 0.000350 Step: 4200 Loss: 7.9302 Accuracy: 0.0023
221
+ [04/09 16:22:27 RAR]: Data (t): 1.2357, 144.41/s/gpu Batch (t): 1.7727 LR: 0.000350 Step: 4300 Loss: 7.9217 Accuracy: 0.0023
222
+ [04/09 16:25:00 RAR]: Data (t): 1.8754, 106.84/s/gpu Batch (t): 2.3962 LR: 0.000350 Step: 4400 Loss: 7.9117 Accuracy: 0.0024
223
+ [04/09 16:27:29 RAR]: Data (t): 1.0540, 116.97/s/gpu Batch (t): 2.1886 LR: 0.000350 Step: 4500 Loss: 7.9029 Accuracy: 0.0026
224
+ [04/09 16:29:55 RAR]: Data (t): 0.0194, 464.33/s/gpu Batch (t): 0.5513 LR: 0.000350 Step: 4600 Loss: 7.8965 Accuracy: 0.0024
225
+ [04/09 16:32:25 RAR]: Data (t): 0.7719, 196.73/s/gpu Batch (t): 1.3012 LR: 0.000350 Step: 4700 Loss: 7.8757 Accuracy: 0.0027
226
+ [04/09 16:34:55 RAR]: Data (t): 0.0229, 454.04/s/gpu Batch (t): 0.5638 LR: 0.000350 Step: 4800 Loss: 7.9061 Accuracy: 0.0026
227
+ [04/09 16:37:22 RAR]: Data (t): 0.0208, 468.98/s/gpu Batch (t): 0.5459 LR: 0.000350 Step: 4900 Loss: 7.8847 Accuracy: 0.0029
228
+ [04/09 16:39:53 RAR]: Data (t): 0.0202, 89.31/s/gpu Batch (t): 2.8663 LR: 0.000350 Step: 5000 Loss: 7.8835 Accuracy: 0.0028
229
+ [04/09 16:42:16 RAR]: Data (t): 0.0154, 242.01/s/gpu Batch (t): 1.0578 LR: 0.000350 Step: 5100 Loss: 7.8674 Accuracy: 0.0028
230
+ [04/09 16:44:26 RAR]: Data (t): 0.0358, 96.28/s/gpu Batch (t): 2.6590 LR: 0.000350 Step: 5200 Loss: 7.8907 Accuracy: 0.0025
231
+ [04/09 16:47:07 RAR]: Data (t): 0.0329, 279.45/s/gpu Batch (t): 0.9161 LR: 0.000350 Step: 5300 Loss: 7.8895 Accuracy: 0.0027
232
+ [04/09 16:49:52 RAR]: Data (t): 0.0343, 320.08/s/gpu Batch (t): 0.7998 LR: 0.000350 Step: 5400 Loss: 7.8690 Accuracy: 0.0027
233
+ [04/09 16:52:36 RAR]: Data (t): 0.0262, 453.18/s/gpu Batch (t): 0.5649 LR: 0.000350 Step: 5500 Loss: 7.8589 Accuracy: 0.0031
234
+ [04/09 16:55:22 RAR]: Data (t): 0.0335, 118.92/s/gpu Batch (t): 2.1527 LR: 0.000350 Step: 5600 Loss: 7.8661 Accuracy: 0.0031
235
+ [04/09 16:58:05 RAR]: Data (t): 0.0348, 458.25/s/gpu Batch (t): 0.5587 LR: 0.000350 Step: 5700 Loss: 7.8510 Accuracy: 0.0029
236
+ [04/09 17:00:48 RAR]: Data (t): 0.0305, 454.06/s/gpu Batch (t): 0.5638 LR: 0.000350 Step: 5800 Loss: 7.8386 Accuracy: 0.0035
237
+ [04/09 17:03:34 RAR]: Data (t): 0.0281, 297.65/s/gpu Batch (t): 0.8601 LR: 0.000350 Step: 5900 Loss: 7.8500 Accuracy: 0.0034
238
+ [04/09 17:06:16 RAR]: Data (t): 0.0280, 374.85/s/gpu Batch (t): 0.6829 LR: 0.000350 Step: 6000 Loss: 7.8506 Accuracy: 0.0028
239
+ [04/09 17:08:56 RAR]: Data (t): 0.0336, 453.39/s/gpu Batch (t): 0.5646 LR: 0.000350 Step: 6100 Loss: 7.8508 Accuracy: 0.0031
240
+ [04/09 17:11:38 RAR]: Data (t): 0.0217, 442.85/s/gpu Batch (t): 0.5781 LR: 0.000350 Step: 6200 Loss: 7.8361 Accuracy: 0.0036
241
+ [04/09 17:14:24 RAR]: Data (t): 0.0343, 416.47/s/gpu Batch (t): 0.6147 LR: 0.000350 Step: 6300 Loss: 7.8417 Accuracy: 0.0033
242
+ [04/09 17:17:09 RAR]: Data (t): 0.0225, 96.65/s/gpu Batch (t): 2.6488 LR: 0.000350 Step: 6400 Loss: 7.8501 Accuracy: 0.0033
243
+ [04/09 17:19:53 RAR]: Data (t): 0.0304, 95.96/s/gpu Batch (t): 2.6679 LR: 0.000350 Step: 6500 Loss: 7.8394 Accuracy: 0.0043
244
+ [04/09 17:22:33 RAR]: Data (t): 2.1302, 82.85/s/gpu Batch (t): 3.0901 LR: 0.000350 Step: 6600 Loss: 7.8010 Accuracy: 0.0043
245
+ [04/09 17:25:15 RAR]: Data (t): 0.0330, 453.84/s/gpu Batch (t): 0.5641 LR: 0.000350 Step: 6700 Loss: 7.8082 Accuracy: 0.0040
246
+ [04/09 17:27:58 RAR]: Data (t): 0.0338, 440.68/s/gpu Batch (t): 0.5809 LR: 0.000350 Step: 6800 Loss: 7.7663 Accuracy: 0.0051
247
+ [04/09 17:30:38 RAR]: Data (t): 0.0268, 146.57/s/gpu Batch (t): 1.7466 LR: 0.000350 Step: 6900 Loss: 7.8118 Accuracy: 0.0045
248
+ [04/09 17:33:21 RAR]: Data (t): 0.0291, 355.63/s/gpu Batch (t): 0.7198 LR: 0.000350 Step: 7000 Loss: 7.8211 Accuracy: 0.0037
249
+ [04/09 17:36:05 RAR]: Data (t): 0.0307, 442.56/s/gpu Batch (t): 0.5785 LR: 0.000350 Step: 7100 Loss: 7.8315 Accuracy: 0.0038
250
+ [04/09 17:38:37 RAR]: Data (t): 0.0150, 253.02/s/gpu Batch (t): 1.0118 LR: 0.000350 Step: 7200 Loss: 7.7748 Accuracy: 0.0046
251
+ [04/09 17:40:09 RAR]: Data (t): 0.0150, 190.24/s/gpu Batch (t): 1.3457 LR: 0.000350 Step: 7300 Loss: 7.7952 Accuracy: 0.0045
252
+ [04/09 17:42:08 RAR]: Data (t): 0.0232, 329.59/s/gpu Batch (t): 0.7767 LR: 0.000350 Step: 7400 Loss: 7.8065 Accuracy: 0.0043
253
+ [04/09 17:44:28 RAR]: Data (t): 0.0235, 479.76/s/gpu Batch (t): 0.5336 LR: 0.000350 Step: 7500 Loss: 7.8319 Accuracy: 0.0036
254
+ [04/09 17:46:49 RAR]: Data (t): 0.0293, 124.55/s/gpu Batch (t): 2.0554 LR: 0.000350 Step: 7600 Loss: 7.7934 Accuracy: 0.0050
255
+ [04/09 17:49:08 RAR]: Data (t): 0.0307, 353.23/s/gpu Batch (t): 0.7247 LR: 0.000350 Step: 7700 Loss: 7.7900 Accuracy: 0.0041
256
+ [04/09 17:51:25 RAR]: Data (t): 0.0369, 186.27/s/gpu Batch (t): 1.3743 LR: 0.000350 Step: 7800 Loss: 7.7738 Accuracy: 0.0047
257
+ [04/09 17:53:38 RAR]: Data (t): 0.0219, 452.16/s/gpu Batch (t): 0.5662 LR: 0.000350 Step: 7900 Loss: 7.7705 Accuracy: 0.0053
258
+ [04/09 17:55:55 RAR]: Data (t): 0.0312, 410.15/s/gpu Batch (t): 0.6242 LR: 0.000350 Step: 8000 Loss: 7.7487 Accuracy: 0.0049
259
+ [04/09 17:58:10 RAR]: Data (t): 0.0213, 173.62/s/gpu Batch (t): 1.4745 LR: 0.000350 Step: 8100 Loss: 7.7421 Accuracy: 0.0055
260
+ [04/09 18:00:25 RAR]: Data (t): 0.0275, 458.48/s/gpu Batch (t): 0.5584 LR: 0.000350 Step: 8200 Loss: 7.7826 Accuracy: 0.0043
261
+ [04/09 18:02:40 RAR]: Data (t): 0.0278, 468.36/s/gpu Batch (t): 0.5466 LR: 0.000350 Step: 8300 Loss: 7.7622 Accuracy: 0.0042
262
+ [04/09 18:04:56 RAR]: Data (t): 0.0174, 115.80/s/gpu Batch (t): 2.2108 LR: 0.000350 Step: 8400 Loss: 7.7423 Accuracy: 0.0051
263
+ [04/09 18:07:06 RAR]: Data (t): 0.0176, 253.29/s/gpu Batch (t): 1.0107 LR: 0.000350 Step: 8500 Loss: 7.7696 Accuracy: 0.0045
264
+ [04/09 18:09:21 RAR]: Data (t): 0.0163, 477.05/s/gpu Batch (t): 0.5366 LR: 0.000350 Step: 8600 Loss: 7.7322 Accuracy: 0.0063
265
+ [04/09 18:11:35 RAR]: Data (t): 0.0262, 132.94/s/gpu Batch (t): 1.9257 LR: 0.000350 Step: 8700 Loss: 7.7139 Accuracy: 0.0065
266
+ [04/09 18:13:51 RAR]: Data (t): 0.0273, 466.46/s/gpu Batch (t): 0.5488 LR: 0.000350 Step: 8800 Loss: 7.7708 Accuracy: 0.0046
267
+ [04/09 18:16:05 RAR]: Data (t): 0.0274, 468.00/s/gpu Batch (t): 0.5470 LR: 0.000350 Step: 8900 Loss: 7.7516 Accuracy: 0.0053
268
+ [04/09 18:18:18 RAR]: Data (t): 0.0266, 365.50/s/gpu Batch (t): 0.7004 LR: 0.000350 Step: 9000 Loss: 7.7470 Accuracy: 0.0057
269
+ [04/09 18:20:36 RAR]: Data (t): 1.4101, 132.29/s/gpu Batch (t): 1.9351 LR: 0.000350 Step: 9100 Loss: 7.7165 Accuracy: 0.0056
270
+ [04/09 18:22:50 RAR]: Data (t): 1.2962, 141.31/s/gpu Batch (t): 1.8116 LR: 0.000350 Step: 9200 Loss: 7.7291 Accuracy: 0.0052
271
+ [04/09 18:25:04 RAR]: Data (t): 1.3310, 137.62/s/gpu Batch (t): 1.8602 LR: 0.000350 Step: 9300 Loss: 7.7293 Accuracy: 0.0059
272
+ [04/09 18:27:14 RAR]: Data (t): 0.0289, 473.20/s/gpu Batch (t): 0.5410 LR: 0.000350 Step: 9400 Loss: 7.7444 Accuracy: 0.0050
273
+ [04/09 18:29:28 RAR]: Data (t): 0.0201, 225.95/s/gpu Batch (t): 1.1330 LR: 0.000350 Step: 9500 Loss: 7.7485 Accuracy: 0.0057
274
+ [04/09 18:31:42 RAR]: Data (t): 0.0317, 130.88/s/gpu Batch (t): 1.9559 LR: 0.000350 Step: 9600 Loss: 7.7604 Accuracy: 0.0049
275
+ [04/09 18:33:57 RAR]: Data (t): 0.0258, 136.98/s/gpu Batch (t): 1.8689 LR: 0.000350 Step: 9700 Loss: 7.6677 Accuracy: 0.0082
276
+ [04/09 18:36:09 RAR]: Data (t): 0.0294, 136.09/s/gpu Batch (t): 1.8811 LR: 0.000350 Step: 9800 Loss: 7.6954 Accuracy: 0.0060
277
+ [04/09 18:38:22 RAR]: Data (t): 0.0173, 110.43/s/gpu Batch (t): 2.3183 LR: 0.000350 Step: 9900 Loss: 7.6930 Accuracy: 0.0072
278
+ [04/09 18:40:34 RAR]: Data (t): 0.0157, 158.25/s/gpu Batch (t): 1.6177 LR: 0.000350 Step: 10000 Loss: 7.7283 Accuracy: 0.0063
279
+ [04/09 18:40:44 RAR]: Saved state to stage1/rar_baseline/checkpoint-10000
280
+ [04/09 18:43:52 RAR]: Data (t): 0.0324, 480.00/s/gpu Batch (t): 0.5333 LR: 0.000350 Step: 10100 Loss: 7.7112 Accuracy: 0.0064
281
+ [04/09 18:46:04 RAR]: Data (t): 0.0198, 457.55/s/gpu Batch (t): 0.5595 LR: 0.000350 Step: 10200 Loss: 7.7176 Accuracy: 0.0065
282
+ [04/09 18:48:20 RAR]: Data (t): 0.0209, 453.42/s/gpu Batch (t): 0.5646 LR: 0.000350 Step: 10300 Loss: 7.6900 Accuracy: 0.0071
283
+ [04/09 18:50:35 RAR]: Data (t): 0.0324, 460.26/s/gpu Batch (t): 0.5562 LR: 0.000350 Step: 10400 Loss: 7.6538 Accuracy: 0.0081
284
+ [04/09 18:52:51 RAR]: Data (t): 0.0223, 459.99/s/gpu Batch (t): 0.5565 LR: 0.000350 Step: 10500 Loss: 7.7063 Accuracy: 0.0066
285
+ [04/09 18:55:03 RAR]: Data (t): 0.0182, 336.27/s/gpu Batch (t): 0.7613 LR: 0.000350 Step: 10600 Loss: 7.6800 Accuracy: 0.0064
286
+ [04/09 18:57:16 RAR]: Data (t): 0.0193, 237.72/s/gpu Batch (t): 1.0769 LR: 0.000350 Step: 10700 Loss: 7.7280 Accuracy: 0.0062
287
+ [04/09 18:59:29 RAR]: Data (t): 0.0265, 129.92/s/gpu Batch (t): 1.9704 LR: 0.000350 Step: 10800 Loss: 7.6473 Accuracy: 0.0092
288
+ [04/09 19:01:39 RAR]: Data (t): 0.0258, 400.84/s/gpu Batch (t): 0.6387 LR: 0.000350 Step: 10900 Loss: 7.6826 Accuracy: 0.0065
289
+ [04/09 19:03:51 RAR]: Data (t): 0.0272, 121.14/s/gpu Batch (t): 2.1133 LR: 0.000350 Step: 11000 Loss: 7.6701 Accuracy: 0.0073
290
+ [04/09 19:06:00 RAR]: Data (t): 0.0283, 296.92/s/gpu Batch (t): 0.8622 LR: 0.000350 Step: 11100 Loss: 7.6169 Accuracy: 0.0106
291
+ [04/09 19:08:12 RAR]: Data (t): 0.0350, 454.38/s/gpu Batch (t): 0.5634 LR: 0.000350 Step: 11200 Loss: 7.6183 Accuracy: 0.0103
292
+ [04/09 19:10:30 RAR]: Data (t): 0.0297, 464.83/s/gpu Batch (t): 0.5507 LR: 0.000350 Step: 11300 Loss: 7.6654 Accuracy: 0.0078
293
+ [04/09 19:12:41 RAR]: Data (t): 0.0185, 229.37/s/gpu Batch (t): 1.1161 LR: 0.000350 Step: 11400 Loss: 7.6608 Accuracy: 0.0079
294
+ [04/09 19:14:55 RAR]: Data (t): 0.0279, 106.44/s/gpu Batch (t): 2.4052 LR: 0.000350 Step: 11500 Loss: 7.6210 Accuracy: 0.0085
295
+ [04/09 19:17:05 RAR]: Data (t): 1.3357, 138.17/s/gpu Batch (t): 1.8528 LR: 0.000350 Step: 11600 Loss: 7.6746 Accuracy: 0.0071
296
+ [04/09 19:19:14 RAR]: Data (t): 0.9504, 147.81/s/gpu Batch (t): 1.7320 LR: 0.000350 Step: 11700 Loss: 7.6023 Accuracy: 0.0089
297
+ [04/09 19:21:25 RAR]: Data (t): 0.0221, 477.52/s/gpu Batch (t): 0.5361 LR: 0.000350 Step: 11800 Loss: 7.6322 Accuracy: 0.0089
298
+ [04/09 19:23:38 RAR]: Data (t): 0.0299, 132.90/s/gpu Batch (t): 1.9262 LR: 0.000350 Step: 11900 Loss: 7.6542 Accuracy: 0.0081
299
+ [04/09 19:25:49 RAR]: Data (t): 0.0187, 113.68/s/gpu Batch (t): 2.2520 LR: 0.000350 Step: 12000 Loss: 7.6159 Accuracy: 0.0092
300
+ [04/09 19:28:01 RAR]: Data (t): 0.0387, 458.26/s/gpu Batch (t): 0.5586 LR: 0.000350 Step: 12100 Loss: 7.6949 Accuracy: 0.0071
301
+ [04/09 19:30:12 RAR]: Data (t): 0.0188, 469.12/s/gpu Batch (t): 0.5457 LR: 0.000350 Step: 12200 Loss: 7.6547 Accuracy: 0.0085
302
+ [04/09 19:32:24 RAR]: Data (t): 0.0267, 467.39/s/gpu Batch (t): 0.5477 LR: 0.000349 Step: 12300 Loss: 7.6668 Accuracy: 0.0077
303
+ [04/09 19:34:35 RAR]: Data (t): 0.0271, 471.15/s/gpu Batch (t): 0.5434 LR: 0.000349 Step: 12400 Loss: 7.5743 Accuracy: 0.0117
304
+ [04/09 19:36:47 RAR]: Data (t): 0.0279, 120.65/s/gpu Batch (t): 2.1219 LR: 0.000349 Step: 12500 Loss: 7.5949 Accuracy: 0.0100
305
+ [04/09 19:39:03 RAR]: Data (t): 0.0183, 109.50/s/gpu Batch (t): 2.3378 LR: 0.000349 Step: 12600 Loss: 7.6420 Accuracy: 0.0089
306
+ [04/09 19:41:09 RAR]: Data (t): 0.0235, 478.35/s/gpu Batch (t): 0.5352 LR: 0.000349 Step: 12700 Loss: 7.5710 Accuracy: 0.0117
307
+ [04/09 19:43:18 RAR]: Data (t): 0.0178, 474.23/s/gpu Batch (t): 0.5398 LR: 0.000349 Step: 12800 Loss: 7.5221 Accuracy: 0.0139
308
+ [04/09 19:45:27 RAR]: Data (t): 1.0628, 160.46/s/gpu Batch (t): 1.5954 LR: 0.000349 Step: 12900 Loss: 7.5700 Accuracy: 0.0114
309
+ [04/09 19:47:41 RAR]: Data (t): 0.0193, 294.49/s/gpu Batch (t): 0.8693 LR: 0.000349 Step: 13000 Loss: 7.5716 Accuracy: 0.0104
310
+ [04/09 19:49:52 RAR]: Data (t): 1.0829, 160.02/s/gpu Batch (t): 1.5998 LR: 0.000349 Step: 13100 Loss: 7.5956 Accuracy: 0.0111
311
+ [04/09 19:52:07 RAR]: Data (t): 0.0308, 120.54/s/gpu Batch (t): 2.1237 LR: 0.000349 Step: 13200 Loss: 7.5196 Accuracy: 0.0136
312
+ [04/09 19:54:19 RAR]: Data (t): 0.0237, 103.74/s/gpu Batch (t): 2.4677 LR: 0.000349 Step: 13300 Loss: 7.5724 Accuracy: 0.0105
313
+ [04/09 19:56:31 RAR]: Data (t): 0.0209, 93.88/s/gpu Batch (t): 2.7269 LR: 0.000349 Step: 13400 Loss: 7.5601 Accuracy: 0.0123
314
+ [04/09 19:58:40 RAR]: Data (t): 0.0183, 232.38/s/gpu Batch (t): 1.1016 LR: 0.000349 Step: 13500 Loss: 7.5515 Accuracy: 0.0130
315
+ [04/09 20:00:52 RAR]: Data (t): 0.0169, 478.25/s/gpu Batch (t): 0.5353 LR: 0.000349 Step: 13600 Loss: 7.5938 Accuracy: 0.0106
316
+ [04/09 20:03:03 RAR]: Data (t): 0.0203, 131.23/s/gpu Batch (t): 1.9508 LR: 0.000349 Step: 13700 Loss: 7.5743 Accuracy: 0.0096
317
+ [04/09 20:05:18 RAR]: Data (t): 0.0308, 465.77/s/gpu Batch (t): 0.5496 LR: 0.000349 Step: 13800 Loss: 7.6002 Accuracy: 0.0110
318
+ [04/09 20:07:29 RAR]: Data (t): 0.0208, 183.60/s/gpu Batch (t): 1.3943 LR: 0.000349 Step: 13900 Loss: 7.5775 Accuracy: 0.0107
319
+ [04/09 20:09:38 RAR]: Data (t): 0.0207, 414.58/s/gpu Batch (t): 0.6175 LR: 0.000349 Step: 14000 Loss: 7.5665 Accuracy: 0.0107
320
+ [04/09 20:11:48 RAR]: Data (t): 0.0261, 474.85/s/gpu Batch (t): 0.5391 LR: 0.000349 Step: 14100 Loss: 7.6009 Accuracy: 0.0090
321
+ [04/09 20:13:56 RAR]: Data (t): 0.2095, 350.60/s/gpu Batch (t): 0.7302 LR: 0.000349 Step: 14200 Loss: 7.5451 Accuracy: 0.0117
322
+ [04/09 20:16:03 RAR]: Data (t): 0.0164, 175.17/s/gpu Batch (t): 1.4614 LR: 0.000349 Step: 14300 Loss: 7.5336 Accuracy: 0.0109
323
+ [04/09 20:18:13 RAR]: Data (t): 0.0264, 152.29/s/gpu Batch (t): 1.6810 LR: 0.000349 Step: 14400 Loss: 7.6186 Accuracy: 0.0085
324
+ [04/09 20:20:21 RAR]: Data (t): 1.5292, 124.79/s/gpu Batch (t): 2.0515 LR: 0.000349 Step: 14500 Loss: 7.5014 Accuracy: 0.0129
325
+ [04/09 20:22:27 RAR]: Data (t): 0.0203, 476.30/s/gpu Batch (t): 0.5375 LR: 0.000349 Step: 14600 Loss: 7.5512 Accuracy: 0.0116
326
+ [04/09 20:24:35 RAR]: Data (t): 0.0252, 478.23/s/gpu Batch (t): 0.5353 LR: 0.000349 Step: 14700 Loss: 7.5559 Accuracy: 0.0121
327
+ [04/09 20:26:43 RAR]: Data (t): 0.0295, 466.32/s/gpu Batch (t): 0.5490 LR: 0.000349 Step: 14800 Loss: 7.5176 Accuracy: 0.0132
328
+ [04/09 20:28:52 RAR]: Data (t): 0.1995, 354.09/s/gpu Batch (t): 0.7230 LR: 0.000349 Step: 14900 Loss: 7.5679 Accuracy: 0.0123
329
+ [04/09 20:30:58 RAR]: Data (t): 0.1902, 287.43/s/gpu Batch (t): 0.8906 LR: 0.000349 Step: 15000 Loss: 7.5758 Accuracy: 0.0110
330
+ [04/09 20:33:11 RAR]: Data (t): 0.0178, 473.38/s/gpu Batch (t): 0.5408 LR: 0.000349 Step: 15100 Loss: 7.5337 Accuracy: 0.0132
331
+ [04/09 20:35:18 RAR]: Data (t): 0.0182, 471.67/s/gpu Batch (t): 0.5428 LR: 0.000349 Step: 15200 Loss: 7.5611 Accuracy: 0.0122
332
+ [04/09 20:37:26 RAR]: Data (t): 0.0197, 458.53/s/gpu Batch (t): 0.5583 LR: 0.000349 Step: 15300 Loss: 7.5166 Accuracy: 0.0116
333
+ [04/09 20:39:35 RAR]: Data (t): 0.0273, 189.32/s/gpu Batch (t): 1.3522 LR: 0.000349 Step: 15400 Loss: 7.5179 Accuracy: 0.0133
334
+ [04/09 20:41:45 RAR]: Data (t): 0.0183, 119.68/s/gpu Batch (t): 2.1389 LR: 0.000349 Step: 15500 Loss: 7.5539 Accuracy: 0.0109
335
+ [04/09 20:43:53 RAR]: Data (t): 0.0229, 145.22/s/gpu Batch (t): 1.7629 LR: 0.000349 Step: 15600 Loss: 7.5216 Accuracy: 0.0122
336
+ [04/09 20:46:00 RAR]: Data (t): 0.0302, 477.56/s/gpu Batch (t): 0.5361 LR: 0.000349 Step: 15700 Loss: 7.4516 Accuracy: 0.0158
337
+ [04/09 20:48:08 RAR]: Data (t): 0.0321, 461.15/s/gpu Batch (t): 0.5551 LR: 0.000349 Step: 15800 Loss: 7.4875 Accuracy: 0.0147
338
+ [04/09 20:50:18 RAR]: Data (t): 0.0191, 479.63/s/gpu Batch (t): 0.5337 LR: 0.000349 Step: 15900 Loss: 7.4933 Accuracy: 0.0125
339
+ [04/09 20:52:25 RAR]: Data (t): 0.0162, 130.70/s/gpu Batch (t): 1.9586 LR: 0.000349 Step: 16000 Loss: 7.5310 Accuracy: 0.0122
340
+ [04/09 20:54:34 RAR]: Data (t): 0.0192, 472.66/s/gpu Batch (t): 0.5416 LR: 0.000349 Step: 16100 Loss: 7.5256 Accuracy: 0.0135
341
+ [04/09 20:56:43 RAR]: Data (t): 0.0273, 161.22/s/gpu Batch (t): 1.5879 LR: 0.000349 Step: 16200 Loss: 7.5086 Accuracy: 0.0139
342
+ [04/09 20:58:54 RAR]: Data (t): 0.0148, 185.58/s/gpu Batch (t): 1.3794 LR: 0.000349 Step: 16300 Loss: 7.4516 Accuracy: 0.0157
343
+ [04/09 21:01:03 RAR]: Data (t): 0.0190, 362.92/s/gpu Batch (t): 0.7054 LR: 0.000349 Step: 16400 Loss: 7.5331 Accuracy: 0.0123
344
+ [04/09 21:03:10 RAR]: Data (t): 1.0213, 101.27/s/gpu Batch (t): 2.5278 LR: 0.000349 Step: 16500 Loss: 7.4461 Accuracy: 0.0168
345
+ [04/09 21:05:18 RAR]: Data (t): 0.0193, 253.79/s/gpu Batch (t): 1.0087 LR: 0.000349 Step: 16600 Loss: 7.4972 Accuracy: 0.0154
346
+ [04/09 21:07:26 RAR]: Data (t): 0.0306, 99.16/s/gpu Batch (t): 2.5818 LR: 0.000349 Step: 16700 Loss: 7.5541 Accuracy: 0.0120
347
+ [04/09 21:09:34 RAR]: Data (t): 0.0177, 275.37/s/gpu Batch (t): 0.9296 LR: 0.000349 Step: 16800 Loss: 7.5800 Accuracy: 0.0115
348
+ [04/09 21:11:42 RAR]: Data (t): 0.0322, 121.95/s/gpu Batch (t): 2.0992 LR: 0.000349 Step: 16900 Loss: 7.4388 Accuracy: 0.0165
349
+ [04/09 21:13:47 RAR]: Data (t): 0.0204, 369.09/s/gpu Batch (t): 0.6936 LR: 0.000349 Step: 17000 Loss: 7.4871 Accuracy: 0.0156
350
+ [04/09 21:15:55 RAR]: Data (t): 0.0193, 477.89/s/gpu Batch (t): 0.5357 LR: 0.000349 Step: 17100 Loss: 7.5073 Accuracy: 0.0132
351
+ [04/09 21:18:05 RAR]: Data (t): 0.0163, 468.58/s/gpu Batch (t): 0.5463 LR: 0.000349 Step: 17200 Loss: 7.4590 Accuracy: 0.0152
352
+ [04/09 21:20:12 RAR]: Data (t): 0.0222, 477.64/s/gpu Batch (t): 0.5360 LR: 0.000349 Step: 17300 Loss: 7.4255 Accuracy: 0.0167
353
+ [04/09 21:22:20 RAR]: Data (t): 1.3375, 137.86/s/gpu Batch (t): 1.8569 LR: 0.000349 Step: 17400 Loss: 7.4961 Accuracy: 0.0154
354
+ [04/09 21:24:24 RAR]: Data (t): 0.0173, 435.59/s/gpu Batch (t): 0.5877 LR: 0.000349 Step: 17500 Loss: 7.4699 Accuracy: 0.0143
355
+ [04/09 21:26:36 RAR]: Data (t): 0.0305, 456.26/s/gpu Batch (t): 0.5611 LR: 0.000349 Step: 17600 Loss: 7.5215 Accuracy: 0.0140
356
+ [04/09 21:28:43 RAR]: Data (t): 0.0169, 205.39/s/gpu Batch (t): 1.2464 LR: 0.000349 Step: 17700 Loss: 7.4158 Accuracy: 0.0191
357
+ [04/09 21:30:51 RAR]: Data (t): 1.1754, 151.73/s/gpu Batch (t): 1.6872 LR: 0.000349 Step: 17800 Loss: 7.4366 Accuracy: 0.0165
358
+ [04/09 21:32:58 RAR]: Data (t): 0.1764, 133.33/s/gpu Batch (t): 1.9201 LR: 0.000349 Step: 17900 Loss: 7.4378 Accuracy: 0.0159
359
+ [04/09 21:35:04 RAR]: Data (t): 0.0249, 185.19/s/gpu Batch (t): 1.3824 LR: 0.000349 Step: 18000 Loss: 7.4615 Accuracy: 0.0170
360
+ [04/09 21:37:13 RAR]: Data (t): 0.0198, 134.90/s/gpu Batch (t): 1.8977 LR: 0.000349 Step: 18100 Loss: 7.4378 Accuracy: 0.0168
361
+ [04/09 21:39:19 RAR]: Data (t): 0.5676, 234.94/s/gpu Batch (t): 1.0896 LR: 0.000349 Step: 18200 Loss: 7.4573 Accuracy: 0.0169
362
+ [04/09 21:41:27 RAR]: Data (t): 0.0267, 211.53/s/gpu Batch (t): 1.2103 LR: 0.000349 Step: 18300 Loss: 7.4127 Accuracy: 0.0175
363
+ [04/09 21:43:37 RAR]: Data (t): 0.0966, 135.10/s/gpu Batch (t): 1.8949 LR: 0.000349 Step: 18400 Loss: 7.5078 Accuracy: 0.0145
364
+ [04/09 21:45:44 RAR]: Data (t): 1.5958, 121.78/s/gpu Batch (t): 2.1021 LR: 0.000349 Step: 18500 Loss: 7.4458 Accuracy: 0.0156
365
+ [04/09 21:47:51 RAR]: Data (t): 0.4248, 274.79/s/gpu Batch (t): 0.9316 LR: 0.000349 Step: 18600 Loss: 7.5041 Accuracy: 0.0129
366
+ [04/09 21:50:00 RAR]: Data (t): 0.0175, 313.76/s/gpu Batch (t): 0.8159 LR: 0.000349 Step: 18700 Loss: 7.4175 Accuracy: 0.0153
367
+ [04/09 21:52:09 RAR]: Data (t): 0.0257, 301.78/s/gpu Batch (t): 0.8483 LR: 0.000349 Step: 18800 Loss: 7.4102 Accuracy: 0.0181
368
+ [04/09 21:54:18 RAR]: Data (t): 1.3182, 139.55/s/gpu Batch (t): 1.8345 LR: 0.000349 Step: 18900 Loss: 7.4618 Accuracy: 0.0143
369
+ [04/09 21:56:24 RAR]: Data (t): 0.7834, 181.30/s/gpu Batch (t): 1.4120 LR: 0.000349 Step: 19000 Loss: 7.3735 Accuracy: 0.0203
370
+ [04/09 21:58:31 RAR]: Data (t): 0.0162, 467.23/s/gpu Batch (t): 0.5479 LR: 0.000349 Step: 19100 Loss: 7.4900 Accuracy: 0.0129
371
+ [04/09 22:00:44 RAR]: Data (t): 0.0181, 125.27/s/gpu Batch (t): 2.0436 LR: 0.000349 Step: 19200 Loss: 7.4067 Accuracy: 0.0188
372
+ [04/09 22:02:52 RAR]: Data (t): 0.0194, 131.64/s/gpu Batch (t): 1.9447 LR: 0.000349 Step: 19300 Loss: 7.4270 Accuracy: 0.0189
373
+ [04/09 22:04:59 RAR]: Data (t): 0.0159, 102.92/s/gpu Batch (t): 2.4873 LR: 0.000349 Step: 19400 Loss: 7.4025 Accuracy: 0.0181
374
+ [04/09 22:07:08 RAR]: Data (t): 0.0160, 133.97/s/gpu Batch (t): 1.9109 LR: 0.000349 Step: 19500 Loss: 7.4646 Accuracy: 0.0151
375
+ [04/09 22:09:12 RAR]: Data (t): 0.0195, 472.05/s/gpu Batch (t): 0.5423 LR: 0.000349 Step: 19600 Loss: 7.3769 Accuracy: 0.0193
376
+ [04/09 22:11:16 RAR]: Data (t): 0.0181, 140.97/s/gpu Batch (t): 1.8159 LR: 0.000349 Step: 19700 Loss: 7.3825 Accuracy: 0.0201
377
+ [04/09 22:13:20 RAR]: Data (t): 0.9219, 119.45/s/gpu Batch (t): 2.1432 LR: 0.000349 Step: 19800 Loss: 7.4473 Accuracy: 0.0169
378
+ [04/09 22:15:24 RAR]: Data (t): 0.0683, 438.75/s/gpu Batch (t): 0.5835 LR: 0.000349 Step: 19900 Loss: 7.3499 Accuracy: 0.0192
379
+ [04/09 22:17:30 RAR]: Data (t): 0.0205, 190.14/s/gpu Batch (t): 1.3464 LR: 0.000349 Step: 20000 Loss: 7.4171 Accuracy: 0.0165
380
+ [04/09 22:17:42 RAR]: Saved state to stage1/rar_baseline/checkpoint-20000
381
+ [04/09 22:20:34 RAR]: Data (t): 0.0176, 286.12/s/gpu Batch (t): 0.8947 LR: 0.000349 Step: 20100 Loss: 7.2654 Accuracy: 0.0245
382
+ [04/09 22:22:40 RAR]: Data (t): 1.0574, 162.63/s/gpu Batch (t): 1.5741 LR: 0.000349 Step: 20200 Loss: 7.3570 Accuracy: 0.0210
383
+ [04/09 22:24:45 RAR]: Data (t): 0.3605, 136.18/s/gpu Batch (t): 1.8798 LR: 0.000349 Step: 20300 Loss: 7.4934 Accuracy: 0.0144
384
+ [04/09 22:26:51 RAR]: Data (t): 1.1197, 144.68/s/gpu Batch (t): 1.7694 LR: 0.000349 Step: 20400 Loss: 7.3943 Accuracy: 0.0188
385
+ [04/09 22:28:56 RAR]: Data (t): 0.0213, 475.28/s/gpu Batch (t): 0.5386 LR: 0.000349 Step: 20500 Loss: 7.3888 Accuracy: 0.0186
386
+ [04/09 22:31:00 RAR]: Data (t): 0.1139, 143.87/s/gpu Batch (t): 1.7793 LR: 0.000349 Step: 20600 Loss: 7.3411 Accuracy: 0.0225
387
+ [04/09 22:33:06 RAR]: Data (t): 0.0300, 461.27/s/gpu Batch (t): 0.5550 LR: 0.000349 Step: 20700 Loss: 7.3853 Accuracy: 0.0174
388
+ [04/09 22:35:12 RAR]: Data (t): 0.0262, 475.26/s/gpu Batch (t): 0.5387 LR: 0.000349 Step: 20800 Loss: 7.3841 Accuracy: 0.0210
389
+ [04/09 22:37:17 RAR]: Data (t): 0.0215, 480.08/s/gpu Batch (t): 0.5332 LR: 0.000349 Step: 20900 Loss: 7.3635 Accuracy: 0.0210
stage1/rar_baseline/log1.txt ADDED
File without changes
stage1/rar_baseline/log2.txt ADDED
File without changes
stage1/rar_baseline/log3.txt ADDED
File without changes
stage1/rar_ordertok/checkpoint-40000/ema_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a4ae00c8789beda789bf2d4d15f56bdc100ceaa298a2392a519d601249275fa
3
+ size 1869255086
stage1/rar_ordertok/checkpoint-40000/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"global_step": 40000}
stage1/rar_ordertok/checkpoint-40000/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b33f6ced587f05fb7e3e8f0e11814c437645abe3b719e4530471da25a7b32f5
3
+ size 3738572541
stage1/rar_ordertok/checkpoint-40000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:703c1fce0259a69a125a9454657bfd85a34d5f580906a8fd5859b79b2749cdbc
3
+ size 1869261230
stage1/rar_ordertok/checkpoint-40000/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a392ec6759485c46d5e6f447cb1c55abd77e6ee26be8ee92549214dfd50e4167
3
+ size 15124
stage1/rar_ordertok/checkpoint-40000/random_states_1.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7391e074557ace3491d490a158aca05fd21ac872741d271dfb481b37caf6627
3
+ size 15124
stage1/rar_ordertok/checkpoint-40000/random_states_2.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6e97b279a9d5545147dfd3ea3d378a8029e00c10112d0a05cdbef6a2909ef79
3
+ size 15124
stage1/rar_ordertok/checkpoint-40000/random_states_3.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2f850dd4dd50f1bb4534258c2bb8a31110d4bddbbaad0ea242e66557601fbef
3
+ size 15060
stage1/rar_ordertok/checkpoint-40000/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04843554536f8e6800a3c88527835c6046e3124db2ed37d10a954e5a9dff0040
3
+ size 1064
stage1/rar_ordertok/checkpoint-40000/unwrapped_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:703c1fce0259a69a125a9454657bfd85a34d5f580906a8fd5859b79b2749cdbc
3
+ size 1869261230
stage1/rar_ordertok/config.yaml ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment:
2
+ project: titok_ca_rar
3
+ name: titok_ca_rar
4
+ max_train_examples: 1281167
5
+ save_every: 10000
6
+ eval_every: 5000000
7
+ generate_every: 10000000
8
+ log_every: 100
9
+ log_grad_norm_every: 1000
10
+ resume: true
11
+ tokenizer_checkpoint: /mnt/yscfs/huangmengqi/projects/visual_tokenization/order-tok/train_stage1/ordertok_simvq/checkpoint-100000/ema_model/pytorch_model.bin
12
+ output_dir: stage1/rar_ordertok
13
+ logging_dir: stage1/rar_ordertok/logs
14
+ model:
15
+ vq_model:
16
+ codebook_size: 4096
17
+ token_size: 256
18
+ use_l2_norm: true
19
+ commitment_cost: 0.25
20
+ vit_enc_model_size: large
21
+ vit_dec_model_size: large
22
+ vit_enc_patch_size: 16
23
+ vit_dec_patch_size: 16
24
+ num_latent_tokens: 32
25
+ layers_x: 18
26
+ layers_token: 2
27
+ embedding_width: 1024
28
+ width: 256
29
+ finetune_decoder: false
30
+ pretrained_tokenizer_weight: maskgit-vqgan-imagenet-f16-256.bin
31
+ generator:
32
+ hidden_size: 1024
33
+ num_hidden_layers: 24
34
+ num_attention_heads: 16
35
+ intermediate_size: 4096
36
+ dropout: 0.1
37
+ attn_drop: 0.1
38
+ class_label_dropout: 0.1
39
+ image_seq_len: 32
40
+ condition_num_classes: 1000
41
+ randomize_temperature: 1.02
42
+ guidance_scale: 15.5
43
+ guidance_scale_pow: 2.5
44
+ use_checkpoint: false
45
+ randomness_anneal_start: 0
46
+ randomness_anneal_end: 0
47
+ dataset:
48
+ params:
49
+ train_shards_path_or_url: imagenet/imagenet1k-train-{0000..1023}.tar
50
+ eval_shards_path_or_url: imagenet/imagenet1k-validation-{00..63}.tar
51
+ num_workers_per_gpu: 2
52
+ preprocessing:
53
+ resize_shorter_edge: 256
54
+ crop_size: 256
55
+ random_crop: false
56
+ random_flip: true
57
+ optimizer:
58
+ name: adamw
59
+ params:
60
+ learning_rate: 0.00035
61
+ beta1: 0.9
62
+ beta2: 0.96
63
+ weight_decay: 0.03
64
+ lr_scheduler:
65
+ scheduler: cosine
66
+ params:
67
+ learning_rate: ${optimizer.params.learning_rate}
68
+ warmup_steps: 0
69
+ end_lr: 1.0e-05
70
+ training:
71
+ gradient_accumulation_steps: 1
72
+ per_gpu_batch_size: 256
73
+ mixed_precision: bf16
74
+ enable_tf32: true
75
+ enable_wandb: true
76
+ use_ema: true
77
+ seed: 42
78
+ max_train_steps: 500000
79
+ max_grad_norm: 1.0
80
+ config: configs/training/generator/rar.yaml
stage1/rar_ordertok/log0.txt ADDED
@@ -0,0 +1,682 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [04/09 19:44:04 RAR]: Saving config to stage1/rar_ordertok/config.yaml
2
+ [04/09 19:44:04 RAR]: Config:
3
+ experiment:
4
+ project: titok_ca_rar
5
+ name: titok_ca_rar
6
+ max_train_examples: 1281167
7
+ save_every: 10000
8
+ eval_every: 5000000
9
+ generate_every: 10000000
10
+ log_every: 100
11
+ log_grad_norm_every: 1000
12
+ resume: true
13
+ tokenizer_checkpoint: /mnt/yscfs/huangmengqi/projects/visual_tokenization/order-tok/train_stage1/ordertok_simvq/checkpoint-100000/ema_model/pytorch_model.bin
14
+ output_dir: stage1/rar_ordertok
15
+ logging_dir: stage1/rar_ordertok/logs
16
+ model:
17
+ vq_model:
18
+ codebook_size: 4096
19
+ token_size: 256
20
+ use_l2_norm: true
21
+ commitment_cost: 0.25
22
+ vit_enc_model_size: large
23
+ vit_dec_model_size: large
24
+ vit_enc_patch_size: 16
25
+ vit_dec_patch_size: 16
26
+ num_latent_tokens: 32
27
+ layers_x: 18
28
+ layers_token: 2
29
+ embedding_width: 1024
30
+ width: 256
31
+ finetune_decoder: false
32
+ pretrained_tokenizer_weight: maskgit-vqgan-imagenet-f16-256.bin
33
+ generator:
34
+ hidden_size: 1024
35
+ num_hidden_layers: 24
36
+ num_attention_heads: 16
37
+ intermediate_size: 4096
38
+ dropout: 0.1
39
+ attn_drop: 0.1
40
+ class_label_dropout: 0.1
41
+ image_seq_len: 32
42
+ condition_num_classes: 1000
43
+ randomize_temperature: 1.02
44
+ guidance_scale: 15.5
45
+ guidance_scale_pow: 2.5
46
+ use_checkpoint: false
47
+ randomness_anneal_start: 0
48
+ randomness_anneal_end: 0
49
+ dataset:
50
+ params:
51
+ train_shards_path_or_url: imagenet/imagenet1k-train-{0000..1023}.tar
52
+ eval_shards_path_or_url: imagenet/imagenet1k-validation-{00..63}.tar
53
+ num_workers_per_gpu: 2
54
+ preprocessing:
55
+ resize_shorter_edge: 256
56
+ crop_size: 256
57
+ random_crop: false
58
+ random_flip: true
59
+ optimizer:
60
+ name: adamw
61
+ params:
62
+ learning_rate: 0.00035
63
+ beta1: 0.9
64
+ beta2: 0.96
65
+ weight_decay: 0.03
66
+ lr_scheduler:
67
+ scheduler: cosine
68
+ params:
69
+ learning_rate: ${optimizer.params.learning_rate}
70
+ warmup_steps: 0
71
+ end_lr: 1.0e-05
72
+ training:
73
+ gradient_accumulation_steps: 1
74
+ per_gpu_batch_size: 256
75
+ mixed_precision: bf16
76
+ enable_tf32: true
77
+ enable_wandb: true
78
+ use_ema: true
79
+ seed: 42
80
+ max_train_steps: 500000
81
+ max_grad_norm: 1.0
82
+ config: configs/training/generator/rar.yaml
83
+
84
+ [04/09 19:44:22 RAR]: Creating model and loss module.
85
+ [04/09 19:44:30 RAR]: Creating optimizers.
86
+ [04/09 19:44:30 RAR]: Creating lr_schedulers.
87
+ [04/09 19:44:30 RAR]: Creating dataloaders.
88
+ [04/09 19:44:30 RAR]: Preparing model, optimizer and dataloaders
89
+ [04/09 19:44:31 RAR]: ***** Running training *****
90
+ [04/09 19:44:31 RAR]:  Num training steps = 500000
91
+ [04/09 19:44:31 RAR]:  Gradient Accumulation steps = 1
92
+ [04/09 19:44:31 RAR]:  Instantaneous batch size per gpu = 256
93
+ [04/09 19:44:31 RAR]:  Total train batch size (w. parallel, distributed & accumulation) = 1024
94
+ [04/09 19:44:31 RAR]: All globbed checkpoints are: []
95
+ [04/09 19:44:31 RAR]: Training from scratch.
96
+ [04/09 19:46:49 RAR]: Data (t): 0.4487, 319.72/s/gpu Batch (t): 0.8007 LR: 0.000350 Step: 100 Loss: 7.7370 Accuracy: 0.0491
97
+ [04/09 19:49:03 RAR]: Data (t): 1.0769, 181.23/s/gpu Batch (t): 1.4126 LR: 0.000350 Step: 200 Loss: 7.6568 Accuracy: 0.0526
98
+ [04/09 19:51:14 RAR]: Data (t): 1.1857, 167.17/s/gpu Batch (t): 1.5314 LR: 0.000350 Step: 300 Loss: 7.6095 Accuracy: 0.0514
99
+ [04/09 19:53:25 RAR]: Data (t): 1.6314, 129.58/s/gpu Batch (t): 1.9756 LR: 0.000350 Step: 400 Loss: 7.5645 Accuracy: 0.0537
100
+ [04/09 19:55:35 RAR]: Data (t): 1.4973, 138.30/s/gpu Batch (t): 1.8511 LR: 0.000350 Step: 500 Loss: 7.5316 Accuracy: 0.0526
101
+ [04/09 19:57:48 RAR]: Data (t): 0.2407, 439.59/s/gpu Batch (t): 0.5824 LR: 0.000350 Step: 600 Loss: 7.5113 Accuracy: 0.0530
102
+ [04/09 20:00:03 RAR]: Data (t): 0.2408, 147.40/s/gpu Batch (t): 1.7367 LR: 0.000350 Step: 700 Loss: 7.4910 Accuracy: 0.0542
103
+ [04/09 20:02:09 RAR]: Data (t): 0.2405, 302.02/s/gpu Batch (t): 0.8476 LR: 0.000350 Step: 800 Loss: 7.4666 Accuracy: 0.0534
104
+ [04/09 20:04:23 RAR]: Data (t): 0.2412, 128.00/s/gpu Batch (t): 2.0000 LR: 0.000350 Step: 900 Loss: 7.4636 Accuracy: 0.0530
105
+ [04/09 20:06:30 RAR]: Data (t): 0.6515, 237.18/s/gpu Batch (t): 1.0794 LR: 0.000350 Step: 1000 Loss: 7.4374 Accuracy: 0.0528
106
+ [04/09 20:08:40 RAR]: Data (t): 0.2405, 176.90/s/gpu Batch (t): 1.4471 LR: 0.000350 Step: 1100 Loss: 7.4485 Accuracy: 0.0531
107
+ [04/09 20:10:50 RAR]: Data (t): 0.3293, 172.75/s/gpu Batch (t): 1.4820 LR: 0.000350 Step: 1200 Loss: 7.3568 Accuracy: 0.0562
108
+ [04/09 20:13:01 RAR]: Data (t): 0.2398, 406.35/s/gpu Batch (t): 0.6300 LR: 0.000350 Step: 1300 Loss: 7.3918 Accuracy: 0.0541
109
+ [04/09 20:15:09 RAR]: Data (t): 1.7374, 122.92/s/gpu Batch (t): 2.0827 LR: 0.000350 Step: 1400 Loss: 7.3811 Accuracy: 0.0552
110
+ [04/09 20:17:14 RAR]: Data (t): 0.2421, 138.35/s/gpu Batch (t): 1.8503 LR: 0.000350 Step: 1500 Loss: 7.3792 Accuracy: 0.0542
111
+ [04/09 20:19:26 RAR]: Data (t): 0.2396, 143.85/s/gpu Batch (t): 1.7797 LR: 0.000350 Step: 1600 Loss: 7.3884 Accuracy: 0.0532
112
+ [04/09 20:21:35 RAR]: Data (t): 0.2420, 404.60/s/gpu Batch (t): 0.6327 LR: 0.000350 Step: 1700 Loss: 7.3565 Accuracy: 0.0541
113
+ [04/09 20:23:43 RAR]: Data (t): 0.2386, 441.29/s/gpu Batch (t): 0.5801 LR: 0.000350 Step: 1800 Loss: 7.3548 Accuracy: 0.0542
114
+ [04/09 20:25:52 RAR]: Data (t): 0.2425, 435.43/s/gpu Batch (t): 0.5879 LR: 0.000350 Step: 1900 Loss: 7.3178 Accuracy: 0.0555
115
+ [04/09 20:28:00 RAR]: Data (t): 0.2429, 136.54/s/gpu Batch (t): 1.8749 LR: 0.000350 Step: 2000 Loss: 7.3259 Accuracy: 0.0549
116
+ [04/09 20:30:08 RAR]: Data (t): 1.1349, 172.82/s/gpu Batch (t): 1.4813 LR: 0.000350 Step: 2100 Loss: 7.3119 Accuracy: 0.0563
117
+ [04/09 20:32:19 RAR]: Data (t): 0.2428, 111.57/s/gpu Batch (t): 2.2945 LR: 0.000350 Step: 2200 Loss: 7.3042 Accuracy: 0.0549
118
+ [04/09 20:34:30 RAR]: Data (t): 0.2418, 144.67/s/gpu Batch (t): 1.7696 LR: 0.000350 Step: 2300 Loss: 7.2698 Accuracy: 0.0565
119
+ [04/09 20:36:38 RAR]: Data (t): 0.2406, 131.07/s/gpu Batch (t): 1.9531 LR: 0.000350 Step: 2400 Loss: 7.2964 Accuracy: 0.0548
120
+ [04/09 20:38:47 RAR]: Data (t): 0.2416, 124.64/s/gpu Batch (t): 2.0539 LR: 0.000350 Step: 2500 Loss: 7.2797 Accuracy: 0.0550
121
+ [04/09 20:41:01 RAR]: Data (t): 0.2407, 423.05/s/gpu Batch (t): 0.6051 LR: 0.000350 Step: 2600 Loss: 7.2481 Accuracy: 0.0582
122
+ [04/09 20:43:12 RAR]: Data (t): 0.2430, 108.57/s/gpu Batch (t): 2.3580 LR: 0.000350 Step: 2700 Loss: 7.2596 Accuracy: 0.0573
123
+ [04/09 20:45:19 RAR]: Data (t): 0.2425, 428.26/s/gpu Batch (t): 0.5978 LR: 0.000350 Step: 2800 Loss: 7.2665 Accuracy: 0.0542
124
+ [04/09 20:47:29 RAR]: Data (t): 1.6807, 125.75/s/gpu Batch (t): 2.0357 LR: 0.000350 Step: 2900 Loss: 7.2275 Accuracy: 0.0576
125
+ [04/09 20:49:40 RAR]: Data (t): 1.6134, 125.35/s/gpu Batch (t): 2.0423 LR: 0.000350 Step: 3000 Loss: 7.2345 Accuracy: 0.0559
126
+ [04/09 20:51:48 RAR]: Data (t): 0.2428, 428.26/s/gpu Batch (t): 0.5978 LR: 0.000350 Step: 3100 Loss: 7.1989 Accuracy: 0.0567
127
+ [04/09 20:53:58 RAR]: Data (t): 0.2404, 441.79/s/gpu Batch (t): 0.5795 LR: 0.000350 Step: 3200 Loss: 7.2079 Accuracy: 0.0564
128
+ [04/09 20:56:07 RAR]: Data (t): 0.2403, 418.98/s/gpu Batch (t): 0.6110 LR: 0.000350 Step: 3300 Loss: 7.1680 Accuracy: 0.0583
129
+ [04/09 20:58:18 RAR]: Data (t): 0.2408, 410.66/s/gpu Batch (t): 0.6234 LR: 0.000350 Step: 3400 Loss: 7.1991 Accuracy: 0.0557
130
+ [04/09 21:00:28 RAR]: Data (t): 0.2406, 162.88/s/gpu Batch (t): 1.5717 LR: 0.000350 Step: 3500 Loss: 7.1996 Accuracy: 0.0582
131
+ [04/09 21:02:34 RAR]: Data (t): 0.2409, 429.21/s/gpu Batch (t): 0.5964 LR: 0.000350 Step: 3600 Loss: 7.1947 Accuracy: 0.0569
132
+ [04/09 21:04:45 RAR]: Data (t): 0.2403, 427.45/s/gpu Batch (t): 0.5989 LR: 0.000350 Step: 3700 Loss: 7.2070 Accuracy: 0.0589
133
+ [04/09 21:06:58 RAR]: Data (t): 0.2427, 434.82/s/gpu Batch (t): 0.5888 LR: 0.000350 Step: 3800 Loss: 7.1157 Accuracy: 0.0593
134
+ [04/09 21:09:04 RAR]: Data (t): 0.2411, 158.39/s/gpu Batch (t): 1.6162 LR: 0.000350 Step: 3900 Loss: 7.1717 Accuracy: 0.0576
135
+ [04/09 21:11:14 RAR]: Data (t): 0.2393, 351.87/s/gpu Batch (t): 0.7275 LR: 0.000350 Step: 4000 Loss: 7.1131 Accuracy: 0.0594
136
+ [04/09 21:13:25 RAR]: Data (t): 0.2421, 426.85/s/gpu Batch (t): 0.5997 LR: 0.000350 Step: 4100 Loss: 7.1558 Accuracy: 0.0574
137
+ [04/09 21:15:34 RAR]: Data (t): 0.2409, 129.27/s/gpu Batch (t): 1.9804 LR: 0.000350 Step: 4200 Loss: 7.1579 Accuracy: 0.0564
138
+ [04/09 21:17:44 RAR]: Data (t): 0.2407, 136.31/s/gpu Batch (t): 1.8780 LR: 0.000350 Step: 4300 Loss: 7.1697 Accuracy: 0.0571
139
+ [04/09 21:19:51 RAR]: Data (t): 1.9347, 112.28/s/gpu Batch (t): 2.2801 LR: 0.000350 Step: 4400 Loss: 7.1645 Accuracy: 0.0573
140
+ [04/09 21:21:57 RAR]: Data (t): 0.2421, 227.82/s/gpu Batch (t): 1.1237 LR: 0.000350 Step: 4500 Loss: 7.1225 Accuracy: 0.0578
141
+ [04/09 21:24:04 RAR]: Data (t): 0.2413, 428.63/s/gpu Batch (t): 0.5973 LR: 0.000350 Step: 4600 Loss: 7.1381 Accuracy: 0.0568
142
+ [04/09 21:26:14 RAR]: Data (t): 0.2401, 430.03/s/gpu Batch (t): 0.5953 LR: 0.000350 Step: 4700 Loss: 7.1175 Accuracy: 0.0582
143
+ [04/09 21:28:23 RAR]: Data (t): 0.2444, 432.44/s/gpu Batch (t): 0.5920 LR: 0.000350 Step: 4800 Loss: 7.1817 Accuracy: 0.0566
144
+ [04/09 21:30:32 RAR]: Data (t): 0.2395, 136.53/s/gpu Batch (t): 1.8750 LR: 0.000350 Step: 4900 Loss: 7.1153 Accuracy: 0.0582
145
+ [04/09 21:32:39 RAR]: Data (t): 0.2412, 361.58/s/gpu Batch (t): 0.7080 LR: 0.000350 Step: 5000 Loss: 7.1364 Accuracy: 0.0586
146
+ [04/09 21:34:51 RAR]: Data (t): 0.2403, 283.39/s/gpu Batch (t): 0.9033 LR: 0.000350 Step: 5100 Loss: 7.0637 Accuracy: 0.0602
147
+ [04/09 21:37:03 RAR]: Data (t): 1.2199, 163.15/s/gpu Batch (t): 1.5691 LR: 0.000350 Step: 5200 Loss: 7.0235 Accuracy: 0.0636
148
+ [04/09 21:39:10 RAR]: Data (t): 1.0205, 186.70/s/gpu Batch (t): 1.3712 LR: 0.000350 Step: 5300 Loss: 7.0862 Accuracy: 0.0587
149
+ [04/09 21:41:16 RAR]: Data (t): 0.2420, 428.57/s/gpu Batch (t): 0.5973 LR: 0.000350 Step: 5400 Loss: 7.0640 Accuracy: 0.0609
150
+ [04/09 21:43:24 RAR]: Data (t): 0.2407, 371.48/s/gpu Batch (t): 0.6891 LR: 0.000350 Step: 5500 Loss: 7.0628 Accuracy: 0.0611
151
+ [04/09 21:45:35 RAR]: Data (t): 0.2408, 130.00/s/gpu Batch (t): 1.9693 LR: 0.000350 Step: 5600 Loss: 7.0531 Accuracy: 0.0625
152
+ [04/09 21:47:42 RAR]: Data (t): 0.2397, 131.43/s/gpu Batch (t): 1.9479 LR: 0.000350 Step: 5700 Loss: 7.0584 Accuracy: 0.0609
153
+ [04/09 21:49:49 RAR]: Data (t): 0.2413, 115.24/s/gpu Batch (t): 2.2215 LR: 0.000350 Step: 5800 Loss: 7.0402 Accuracy: 0.0612
154
+ [04/09 21:51:55 RAR]: Data (t): 0.2405, 422.41/s/gpu Batch (t): 0.6060 LR: 0.000350 Step: 5900 Loss: 7.0414 Accuracy: 0.0611
155
+ [04/09 21:54:03 RAR]: Data (t): 0.2411, 249.39/s/gpu Batch (t): 1.0265 LR: 0.000350 Step: 6000 Loss: 7.0580 Accuracy: 0.0610
156
+ [04/09 21:56:09 RAR]: Data (t): 0.4045, 343.08/s/gpu Batch (t): 0.7462 LR: 0.000350 Step: 6100 Loss: 7.0523 Accuracy: 0.0610
157
+ [04/09 21:58:15 RAR]: Data (t): 0.2417, 439.49/s/gpu Batch (t): 0.5825 LR: 0.000350 Step: 6200 Loss: 7.0774 Accuracy: 0.0597
158
+ [04/09 22:00:28 RAR]: Data (t): 1.7282, 122.68/s/gpu Batch (t): 2.0867 LR: 0.000350 Step: 6300 Loss: 7.0397 Accuracy: 0.0609
159
+ [04/09 22:02:35 RAR]: Data (t): 1.2056, 162.91/s/gpu Batch (t): 1.5714 LR: 0.000350 Step: 6400 Loss: 7.0136 Accuracy: 0.0623
160
+ [04/09 22:04:44 RAR]: Data (t): 0.2426, 110.19/s/gpu Batch (t): 2.3232 LR: 0.000350 Step: 6500 Loss: 7.0225 Accuracy: 0.0597
161
+ [04/09 22:06:49 RAR]: Data (t): 1.5437, 134.59/s/gpu Batch (t): 1.9020 LR: 0.000350 Step: 6600 Loss: 6.9971 Accuracy: 0.0624
162
+ [04/09 22:08:54 RAR]: Data (t): 0.8011, 218.88/s/gpu Batch (t): 1.1696 LR: 0.000350 Step: 6700 Loss: 7.0193 Accuracy: 0.0623
163
+ [04/09 22:11:01 RAR]: Data (t): 0.2411, 133.29/s/gpu Batch (t): 1.9206 LR: 0.000350 Step: 6800 Loss: 6.9986 Accuracy: 0.0627
164
+ [04/09 22:13:06 RAR]: Data (t): 0.9698, 137.20/s/gpu Batch (t): 1.8659 LR: 0.000350 Step: 6900 Loss: 6.9828 Accuracy: 0.0634
165
+ [04/09 22:15:13 RAR]: Data (t): 0.2421, 347.98/s/gpu Batch (t): 0.7357 LR: 0.000350 Step: 7000 Loss: 6.9813 Accuracy: 0.0639
166
+ [04/09 22:17:20 RAR]: Data (t): 0.2418, 404.03/s/gpu Batch (t): 0.6336 LR: 0.000350 Step: 7100 Loss: 7.0194 Accuracy: 0.0625
167
+ [04/09 22:19:22 RAR]: Data (t): 1.9369, 111.86/s/gpu Batch (t): 2.2886 LR: 0.000350 Step: 7200 Loss: 6.9871 Accuracy: 0.0643
168
+ [04/09 22:21:26 RAR]: Data (t): 0.2422, 142.53/s/gpu Batch (t): 1.7961 LR: 0.000350 Step: 7300 Loss: 6.9916 Accuracy: 0.0632
169
+ [04/09 22:23:33 RAR]: Data (t): 0.2437, 264.25/s/gpu Batch (t): 0.9688 LR: 0.000350 Step: 7400 Loss: 6.9267 Accuracy: 0.0660
170
+ [04/09 22:25:40 RAR]: Data (t): 0.2416, 106.36/s/gpu Batch (t): 2.4069 LR: 0.000350 Step: 7500 Loss: 6.9249 Accuracy: 0.0652
171
+ [04/09 22:27:51 RAR]: Data (t): 1.1477, 171.37/s/gpu Batch (t): 1.4939 LR: 0.000350 Step: 7600 Loss: 6.9094 Accuracy: 0.0672
172
+ [04/09 22:30:00 RAR]: Data (t): 0.2416, 434.23/s/gpu Batch (t): 0.5896 LR: 0.000350 Step: 7700 Loss: 6.8849 Accuracy: 0.0692
173
+ [04/09 22:32:04 RAR]: Data (t): 0.4483, 321.82/s/gpu Batch (t): 0.7955 LR: 0.000350 Step: 7800 Loss: 6.9377 Accuracy: 0.0655
174
+ [04/09 22:34:11 RAR]: Data (t): 0.2408, 136.70/s/gpu Batch (t): 1.8727 LR: 0.000350 Step: 7900 Loss: 6.9585 Accuracy: 0.0673
175
+ [04/09 22:36:19 RAR]: Data (t): 0.2423, 113.28/s/gpu Batch (t): 2.2600 LR: 0.000350 Step: 8000 Loss: 6.9123 Accuracy: 0.0677
176
+ [04/09 22:38:26 RAR]: Data (t): 0.2411, 153.56/s/gpu Batch (t): 1.6671 LR: 0.000350 Step: 8100 Loss: 6.9373 Accuracy: 0.0644
177
+ [04/09 22:40:23 RAR]: Data (t): 0.2423, 149.97/s/gpu Batch (t): 1.7070 LR: 0.000350 Step: 8200 Loss: 6.9174 Accuracy: 0.0662
178
+ [04/09 22:42:23 RAR]: Data (t): 0.2424, 429.06/s/gpu Batch (t): 0.5967 LR: 0.000350 Step: 8300 Loss: 6.8878 Accuracy: 0.0683
179
+ [04/09 22:44:19 RAR]: Data (t): 0.2402, 437.44/s/gpu Batch (t): 0.5852 LR: 0.000350 Step: 8400 Loss: 6.9545 Accuracy: 0.0645
180
+ [04/09 22:46:17 RAR]: Data (t): 0.2422, 438.22/s/gpu Batch (t): 0.5842 LR: 0.000350 Step: 8500 Loss: 6.9301 Accuracy: 0.0666
181
+ [04/09 22:48:15 RAR]: Data (t): 0.2440, 414.68/s/gpu Batch (t): 0.6173 LR: 0.000350 Step: 8600 Loss: 6.9468 Accuracy: 0.0650
182
+ [04/09 22:50:14 RAR]: Data (t): 0.2427, 142.32/s/gpu Batch (t): 1.7988 LR: 0.000350 Step: 8700 Loss: 6.9360 Accuracy: 0.0670
183
+ [04/09 22:52:14 RAR]: Data (t): 0.2403, 427.70/s/gpu Batch (t): 0.5986 LR: 0.000350 Step: 8800 Loss: 6.7623 Accuracy: 0.0772
184
+ [04/09 22:54:13 RAR]: Data (t): 0.2437, 432.72/s/gpu Batch (t): 0.5916 LR: 0.000350 Step: 8900 Loss: 6.9327 Accuracy: 0.0658
185
+ [04/09 22:56:12 RAR]: Data (t): 1.2269, 155.19/s/gpu Batch (t): 1.6496 LR: 0.000350 Step: 9000 Loss: 6.9053 Accuracy: 0.0670
186
+ [04/09 22:58:08 RAR]: Data (t): 0.8591, 212.44/s/gpu Batch (t): 1.2050 LR: 0.000350 Step: 9100 Loss: 6.9131 Accuracy: 0.0656
187
+ [04/09 23:00:06 RAR]: Data (t): 0.2420, 433.90/s/gpu Batch (t): 0.5900 LR: 0.000350 Step: 9200 Loss: 6.8654 Accuracy: 0.0680
188
+ [04/09 23:02:02 RAR]: Data (t): 0.2399, 416.51/s/gpu Batch (t): 0.6146 LR: 0.000350 Step: 9300 Loss: 6.8909 Accuracy: 0.0666
189
+ [04/09 23:04:03 RAR]: Data (t): 1.2222, 162.87/s/gpu Batch (t): 1.5718 LR: 0.000350 Step: 9400 Loss: 6.8625 Accuracy: 0.0675
190
+ [04/09 23:05:59 RAR]: Data (t): 0.4683, 163.30/s/gpu Batch (t): 1.5677 LR: 0.000350 Step: 9500 Loss: 6.8425 Accuracy: 0.0697
191
+ [04/09 23:07:57 RAR]: Data (t): 0.2414, 433.74/s/gpu Batch (t): 0.5902 LR: 0.000350 Step: 9600 Loss: 6.8230 Accuracy: 0.0722
192
+ [04/09 23:09:56 RAR]: Data (t): 0.2396, 424.66/s/gpu Batch (t): 0.6028 LR: 0.000350 Step: 9700 Loss: 6.8011 Accuracy: 0.0724
193
+ [04/09 23:11:54 RAR]: Data (t): 1.2448, 160.32/s/gpu Batch (t): 1.5968 LR: 0.000350 Step: 9800 Loss: 6.9288 Accuracy: 0.0665
194
+ [04/09 23:13:50 RAR]: Data (t): 1.2820, 157.14/s/gpu Batch (t): 1.6291 LR: 0.000350 Step: 9900 Loss: 6.9490 Accuracy: 0.0639
195
+ [04/09 23:15:49 RAR]: Data (t): 1.1791, 159.54/s/gpu Batch (t): 1.6047 LR: 0.000350 Step: 10000 Loss: 6.9179 Accuracy: 0.0678
196
+ [04/09 23:16:00 RAR]: Saved state to stage1/rar_ordertok/checkpoint-10000
197
+ [04/09 23:24:12 RAR]: Saving config to stage1/rar_ordertok/config.yaml
198
+ [04/09 23:24:12 RAR]: Config:
199
+ experiment:
200
+ project: titok_ca_rar
201
+ name: titok_ca_rar
202
+ max_train_examples: 1281167
203
+ save_every: 10000
204
+ eval_every: 5000000
205
+ generate_every: 10000000
206
+ log_every: 100
207
+ log_grad_norm_every: 1000
208
+ resume: true
209
+ tokenizer_checkpoint: /mnt/yscfs/huangmengqi/projects/visual_tokenization/order-tok/train_stage1/ordertok_simvq/checkpoint-100000/ema_model/pytorch_model.bin
210
+ output_dir: stage1/rar_ordertok
211
+ logging_dir: stage1/rar_ordertok/logs
212
+ model:
213
+ vq_model:
214
+ codebook_size: 4096
215
+ token_size: 256
216
+ use_l2_norm: true
217
+ commitment_cost: 0.25
218
+ vit_enc_model_size: large
219
+ vit_dec_model_size: large
220
+ vit_enc_patch_size: 16
221
+ vit_dec_patch_size: 16
222
+ num_latent_tokens: 32
223
+ layers_x: 18
224
+ layers_token: 2
225
+ embedding_width: 1024
226
+ width: 256
227
+ finetune_decoder: false
228
+ pretrained_tokenizer_weight: maskgit-vqgan-imagenet-f16-256.bin
229
+ generator:
230
+ hidden_size: 1024
231
+ num_hidden_layers: 24
232
+ num_attention_heads: 16
233
+ intermediate_size: 4096
234
+ dropout: 0.1
235
+ attn_drop: 0.1
236
+ class_label_dropout: 0.1
237
+ image_seq_len: 32
238
+ condition_num_classes: 1000
239
+ randomize_temperature: 1.02
240
+ guidance_scale: 15.5
241
+ guidance_scale_pow: 2.5
242
+ use_checkpoint: false
243
+ randomness_anneal_start: 0
244
+ randomness_anneal_end: 0
245
+ dataset:
246
+ params:
247
+ train_shards_path_or_url: imagenet/imagenet1k-train-{0000..1023}.tar
248
+ eval_shards_path_or_url: imagenet/imagenet1k-validation-{00..63}.tar
249
+ num_workers_per_gpu: 2
250
+ preprocessing:
251
+ resize_shorter_edge: 256
252
+ crop_size: 256
253
+ random_crop: false
254
+ random_flip: true
255
+ optimizer:
256
+ name: adamw
257
+ params:
258
+ learning_rate: 0.00035
259
+ beta1: 0.9
260
+ beta2: 0.96
261
+ weight_decay: 0.03
262
+ lr_scheduler:
263
+ scheduler: cosine
264
+ params:
265
+ learning_rate: ${optimizer.params.learning_rate}
266
+ warmup_steps: 0
267
+ end_lr: 1.0e-05
268
+ training:
269
+ gradient_accumulation_steps: 1
270
+ per_gpu_batch_size: 256
271
+ mixed_precision: bf16
272
+ enable_tf32: true
273
+ enable_wandb: true
274
+ use_ema: true
275
+ seed: 42
276
+ max_train_steps: 500000
277
+ max_grad_norm: 1.0
278
+ config: configs/training/generator/rar.yaml
279
+
280
+ [04/09 23:24:36 RAR]: Creating model and loss module.
281
+ [04/09 23:24:44 RAR]: Creating optimizers.
282
+ [04/09 23:24:44 RAR]: Creating lr_schedulers.
283
+ [04/09 23:24:44 RAR]: Creating dataloaders.
284
+ [04/09 23:24:44 RAR]: Preparing model, optimizer and dataloaders
285
+ [04/09 23:24:45 RAR]: ***** Running training *****
286
+ [04/09 23:24:45 RAR]:  Num training steps = 500000
287
+ [04/09 23:24:45 RAR]:  Gradient Accumulation steps = 1
288
+ [04/09 23:24:45 RAR]:  Instantaneous batch size per gpu = 256
289
+ [04/09 23:24:45 RAR]:  Total train batch size (w. parallel, distributed & accumulation) = 1024
290
+ [04/09 23:24:45 RAR]: All globbed checkpoints are: ['stage1/rar_ordertok/checkpoint-10000']
291
+ [04/09 23:24:45 RAR]: Load checkpoint from stage1/rar_ordertok/checkpoint-10000
292
+ [04/09 23:25:18 RAR]: Resuming at global_step 10000
293
+ [04/09 23:27:22 RAR]: Data (t): 0.2394, 302.35/s/gpu Batch (t): 0.8467 LR: 0.000350 Step: 10100 Loss: 6.7939 Accuracy: 0.0742
294
+ [04/09 23:29:23 RAR]: Data (t): 0.2827, 148.87/s/gpu Batch (t): 1.7197 LR: 0.000350 Step: 10200 Loss: 6.8112 Accuracy: 0.0714
295
+ [04/09 23:31:24 RAR]: Data (t): 0.2391, 174.19/s/gpu Batch (t): 1.4696 LR: 0.000350 Step: 10300 Loss: 6.8923 Accuracy: 0.0669
296
+ [04/09 23:33:20 RAR]: Data (t): 0.2391, 201.44/s/gpu Batch (t): 1.2709 LR: 0.000350 Step: 10400 Loss: 6.8976 Accuracy: 0.0654
297
+ [04/09 23:35:18 RAR]: Data (t): 0.2408, 393.32/s/gpu Batch (t): 0.6509 LR: 0.000350 Step: 10500 Loss: 6.8749 Accuracy: 0.0689
298
+ [04/09 23:37:18 RAR]: Data (t): 0.2396, 411.08/s/gpu Batch (t): 0.6228 LR: 0.000350 Step: 10600 Loss: 6.7787 Accuracy: 0.0739
299
+ [04/09 23:39:15 RAR]: Data (t): 0.2406, 419.98/s/gpu Batch (t): 0.6095 LR: 0.000350 Step: 10700 Loss: 6.8481 Accuracy: 0.0681
300
+ [04/09 23:41:14 RAR]: Data (t): 0.2399, 119.44/s/gpu Batch (t): 2.1433 LR: 0.000350 Step: 10800 Loss: 6.8456 Accuracy: 0.0693
301
+ [04/09 23:43:11 RAR]: Data (t): 1.0869, 134.38/s/gpu Batch (t): 1.9051 LR: 0.000350 Step: 10900 Loss: 6.8538 Accuracy: 0.0685
302
+ [04/09 23:45:02 RAR]: Data (t): 0.2392, 182.20/s/gpu Batch (t): 1.4050 LR: 0.000350 Step: 11000 Loss: 6.8128 Accuracy: 0.0711
303
+ [04/09 23:47:03 RAR]: Data (t): 1.2431, 158.32/s/gpu Batch (t): 1.6169 LR: 0.000350 Step: 11100 Loss: 6.8337 Accuracy: 0.0722
304
+ [04/09 23:49:15 RAR]: Data (t): 0.2409, 409.80/s/gpu Batch (t): 0.6247 LR: 0.000350 Step: 11200 Loss: 6.8296 Accuracy: 0.0716
305
+ [04/09 23:51:30 RAR]: Data (t): 0.2402, 408.05/s/gpu Batch (t): 0.6274 LR: 0.000350 Step: 11300 Loss: 6.8048 Accuracy: 0.0713
306
+ [04/09 23:53:38 RAR]: Data (t): 0.2407, 241.55/s/gpu Batch (t): 1.0598 LR: 0.000350 Step: 11400 Loss: 6.7893 Accuracy: 0.0728
307
+ [04/09 23:55:46 RAR]: Data (t): 1.6834, 124.96/s/gpu Batch (t): 2.0486 LR: 0.000350 Step: 11500 Loss: 6.8478 Accuracy: 0.0695
308
+ [04/09 23:57:49 RAR]: Data (t): 0.2407, 415.18/s/gpu Batch (t): 0.6166 LR: 0.000350 Step: 11600 Loss: 6.7026 Accuracy: 0.0788
309
+ [04/09 23:59:51 RAR]: Data (t): 0.2394, 402.73/s/gpu Batch (t): 0.6357 LR: 0.000350 Step: 11700 Loss: 6.7698 Accuracy: 0.0728
310
+ [04/10 00:01:53 RAR]: Data (t): 0.2440, 424.36/s/gpu Batch (t): 0.6033 LR: 0.000350 Step: 11800 Loss: 6.8780 Accuracy: 0.0670
311
+ [04/10 00:03:55 RAR]: Data (t): 0.2403, 220.63/s/gpu Batch (t): 1.1603 LR: 0.000350 Step: 11900 Loss: 6.8096 Accuracy: 0.0728
312
+ [04/10 00:05:56 RAR]: Data (t): 0.2406, 239.59/s/gpu Batch (t): 1.0685 LR: 0.000350 Step: 12000 Loss: 6.7901 Accuracy: 0.0722
313
+ [04/10 00:07:57 RAR]: Data (t): 0.2404, 418.47/s/gpu Batch (t): 0.6117 LR: 0.000350 Step: 12100 Loss: 6.7951 Accuracy: 0.0732
314
+ [04/10 00:09:58 RAR]: Data (t): 0.2392, 419.58/s/gpu Batch (t): 0.6101 LR: 0.000350 Step: 12200 Loss: 6.6813 Accuracy: 0.0811
315
+ [04/10 00:11:58 RAR]: Data (t): 0.2398, 414.99/s/gpu Batch (t): 0.6169 LR: 0.000349 Step: 12300 Loss: 6.7997 Accuracy: 0.0754
316
+ [04/10 00:13:59 RAR]: Data (t): 0.2390, 422.25/s/gpu Batch (t): 0.6063 LR: 0.000349 Step: 12400 Loss: 6.7223 Accuracy: 0.0775
317
+ [04/10 00:15:58 RAR]: Data (t): 0.2389, 390.46/s/gpu Batch (t): 0.6556 LR: 0.000349 Step: 12500 Loss: 6.7630 Accuracy: 0.0750
318
+ [04/10 00:18:02 RAR]: Data (t): 0.2391, 414.58/s/gpu Batch (t): 0.6175 LR: 0.000349 Step: 12600 Loss: 6.8571 Accuracy: 0.0694
319
+ [04/10 00:20:04 RAR]: Data (t): 0.2424, 322.32/s/gpu Batch (t): 0.7942 LR: 0.000349 Step: 12700 Loss: 6.7317 Accuracy: 0.0754
320
+ [04/10 00:22:06 RAR]: Data (t): 1.3945, 146.50/s/gpu Batch (t): 1.7475 LR: 0.000349 Step: 12800 Loss: 6.7510 Accuracy: 0.0730
321
+ [04/10 00:24:12 RAR]: Data (t): 0.2386, 427.60/s/gpu Batch (t): 0.5987 LR: 0.000349 Step: 12900 Loss: 6.7673 Accuracy: 0.0750
322
+ [04/10 00:26:13 RAR]: Data (t): 0.2409, 271.40/s/gpu Batch (t): 0.9433 LR: 0.000349 Step: 13000 Loss: 6.7291 Accuracy: 0.0780
323
+ [04/10 00:28:15 RAR]: Data (t): 0.2438, 146.08/s/gpu Batch (t): 1.7525 LR: 0.000349 Step: 13100 Loss: 6.6553 Accuracy: 0.0790
324
+ [04/10 00:30:16 RAR]: Data (t): 0.2483, 139.69/s/gpu Batch (t): 1.8326 LR: 0.000349 Step: 13200 Loss: 6.7800 Accuracy: 0.0743
325
+ [04/10 00:32:17 RAR]: Data (t): 0.2401, 134.29/s/gpu Batch (t): 1.9063 LR: 0.000349 Step: 13300 Loss: 6.7193 Accuracy: 0.0751
326
+ [04/10 00:34:16 RAR]: Data (t): 0.2453, 179.33/s/gpu Batch (t): 1.4275 LR: 0.000349 Step: 13400 Loss: 6.8340 Accuracy: 0.0709
327
+ [04/10 00:36:15 RAR]: Data (t): 0.2441, 169.07/s/gpu Batch (t): 1.5141 LR: 0.000349 Step: 13500 Loss: 6.6396 Accuracy: 0.0794
328
+ [04/10 00:38:12 RAR]: Data (t): 0.2450, 156.00/s/gpu Batch (t): 1.6411 LR: 0.000349 Step: 13600 Loss: 6.7173 Accuracy: 0.0768
329
+ [04/10 00:40:12 RAR]: Data (t): 0.2421, 392.56/s/gpu Batch (t): 0.6521 LR: 0.000349 Step: 13700 Loss: 6.7068 Accuracy: 0.0773
330
+ [04/10 00:42:15 RAR]: Data (t): 0.2409, 264.71/s/gpu Batch (t): 0.9671 LR: 0.000349 Step: 13800 Loss: 6.6112 Accuracy: 0.0838
331
+ [04/10 00:44:13 RAR]: Data (t): 0.2451, 352.13/s/gpu Batch (t): 0.7270 LR: 0.000349 Step: 13900 Loss: 6.7261 Accuracy: 0.0761
332
+ [04/10 00:46:13 RAR]: Data (t): 0.2400, 294.76/s/gpu Batch (t): 0.8685 LR: 0.000349 Step: 14000 Loss: 6.7182 Accuracy: 0.0757
333
+ [04/10 00:48:09 RAR]: Data (t): 1.1072, 168.60/s/gpu Batch (t): 1.5184 LR: 0.000349 Step: 14100 Loss: 6.6402 Accuracy: 0.0829
334
+ [04/10 00:50:04 RAR]: Data (t): 1.2534, 148.51/s/gpu Batch (t): 1.7238 LR: 0.000349 Step: 14200 Loss: 6.6917 Accuracy: 0.0809
335
+ [04/10 00:52:00 RAR]: Data (t): 0.2482, 179.70/s/gpu Batch (t): 1.4246 LR: 0.000349 Step: 14300 Loss: 6.6892 Accuracy: 0.0777
336
+ [04/10 00:53:57 RAR]: Data (t): 0.2452, 159.43/s/gpu Batch (t): 1.6057 LR: 0.000349 Step: 14400 Loss: 6.6103 Accuracy: 0.0827
337
+ [04/10 00:55:52 RAR]: Data (t): 0.2438, 384.88/s/gpu Batch (t): 0.6651 LR: 0.000349 Step: 14500 Loss: 6.5876 Accuracy: 0.0834
338
+ [04/10 00:57:49 RAR]: Data (t): 0.2405, 389.01/s/gpu Batch (t): 0.6581 LR: 0.000349 Step: 14600 Loss: 6.7004 Accuracy: 0.0772
339
+ [04/10 00:59:46 RAR]: Data (t): 0.2408, 413.25/s/gpu Batch (t): 0.6195 LR: 0.000349 Step: 14700 Loss: 6.7230 Accuracy: 0.0765
340
+ [04/10 01:01:41 RAR]: Data (t): 0.2425, 134.19/s/gpu Batch (t): 1.9077 LR: 0.000349 Step: 14800 Loss: 6.6798 Accuracy: 0.0797
341
+ [04/10 01:03:36 RAR]: Data (t): 0.2417, 413.39/s/gpu Batch (t): 0.6193 LR: 0.000349 Step: 14900 Loss: 6.7233 Accuracy: 0.0786
342
+ [04/10 01:05:31 RAR]: Data (t): 0.2426, 287.32/s/gpu Batch (t): 0.8910 LR: 0.000349 Step: 15000 Loss: 6.6847 Accuracy: 0.0751
343
+ [04/10 01:07:28 RAR]: Data (t): 0.2409, 374.68/s/gpu Batch (t): 0.6833 LR: 0.000349 Step: 15100 Loss: 6.7235 Accuracy: 0.0779
344
+ [04/10 01:09:23 RAR]: Data (t): 0.2411, 431.09/s/gpu Batch (t): 0.5938 LR: 0.000349 Step: 15200 Loss: 6.6577 Accuracy: 0.0801
345
+ [04/10 01:11:16 RAR]: Data (t): 0.2409, 229.09/s/gpu Batch (t): 1.1175 LR: 0.000349 Step: 15300 Loss: 6.7002 Accuracy: 0.0785
346
+ [04/10 01:13:10 RAR]: Data (t): 0.6592, 249.24/s/gpu Batch (t): 1.0271 LR: 0.000349 Step: 15400 Loss: 6.6358 Accuracy: 0.0821
347
+ [04/10 01:15:02 RAR]: Data (t): 0.2409, 423.55/s/gpu Batch (t): 0.6044 LR: 0.000349 Step: 15500 Loss: 6.5859 Accuracy: 0.0845
348
+ [04/10 01:16:55 RAR]: Data (t): 0.2404, 423.03/s/gpu Batch (t): 0.6052 LR: 0.000349 Step: 15600 Loss: 6.5701 Accuracy: 0.0861
349
+ [04/10 01:18:48 RAR]: Data (t): 0.2407, 179.74/s/gpu Batch (t): 1.4243 LR: 0.000349 Step: 15700 Loss: 6.5720 Accuracy: 0.0838
350
+ [04/10 01:20:41 RAR]: Data (t): 0.2399, 416.97/s/gpu Batch (t): 0.6140 LR: 0.000349 Step: 15800 Loss: 6.5419 Accuracy: 0.0893
351
+ [04/10 01:22:31 RAR]: Data (t): 0.6126, 262.84/s/gpu Batch (t): 0.9740 LR: 0.000349 Step: 15900 Loss: 6.6150 Accuracy: 0.0823
352
+ [04/10 01:24:23 RAR]: Data (t): 0.5884, 251.50/s/gpu Batch (t): 1.0179 LR: 0.000349 Step: 16000 Loss: 6.7369 Accuracy: 0.0768
353
+ [04/10 01:26:13 RAR]: Data (t): 0.2389, 420.02/s/gpu Batch (t): 0.6095 LR: 0.000349 Step: 16100 Loss: 6.6850 Accuracy: 0.0791
354
+ [04/10 01:28:04 RAR]: Data (t): 0.2392, 411.52/s/gpu Batch (t): 0.6221 LR: 0.000349 Step: 16200 Loss: 6.6096 Accuracy: 0.0811
355
+ [04/10 01:29:58 RAR]: Data (t): 0.2391, 429.59/s/gpu Batch (t): 0.5959 LR: 0.000349 Step: 16300 Loss: 6.6119 Accuracy: 0.0858
356
+ [04/10 01:31:45 RAR]: Data (t): 0.2396, 409.34/s/gpu Batch (t): 0.6254 LR: 0.000349 Step: 16400 Loss: 6.5017 Accuracy: 0.0898
357
+ [04/10 01:33:34 RAR]: Data (t): 0.2393, 169.31/s/gpu Batch (t): 1.5121 LR: 0.000349 Step: 16500 Loss: 6.5670 Accuracy: 0.0872
358
+ [04/10 01:35:23 RAR]: Data (t): 0.2408, 133.98/s/gpu Batch (t): 1.9107 LR: 0.000349 Step: 16600 Loss: 6.7087 Accuracy: 0.0767
359
+ [04/10 01:37:12 RAR]: Data (t): 0.2396, 177.02/s/gpu Batch (t): 1.4462 LR: 0.000349 Step: 16700 Loss: 6.7376 Accuracy: 0.0775
360
+ [04/10 01:39:03 RAR]: Data (t): 0.2397, 161.10/s/gpu Batch (t): 1.5891 LR: 0.000349 Step: 16800 Loss: 6.4960 Accuracy: 0.0907
361
+ [04/10 01:40:52 RAR]: Data (t): 0.2407, 158.58/s/gpu Batch (t): 1.6143 LR: 0.000349 Step: 16900 Loss: 6.5817 Accuracy: 0.0857
362
+ [04/10 01:42:41 RAR]: Data (t): 0.2397, 121.38/s/gpu Batch (t): 2.1091 LR: 0.000349 Step: 17000 Loss: 6.6149 Accuracy: 0.0819
363
+ [04/10 01:44:29 RAR]: Data (t): 0.9621, 194.36/s/gpu Batch (t): 1.3171 LR: 0.000349 Step: 17100 Loss: 6.6867 Accuracy: 0.0789
364
+ [04/10 01:46:16 RAR]: Data (t): 1.0876, 175.81/s/gpu Batch (t): 1.4561 LR: 0.000349 Step: 17200 Loss: 6.5490 Accuracy: 0.0879
365
+ [04/10 01:48:05 RAR]: Data (t): 0.2413, 183.53/s/gpu Batch (t): 1.3949 LR: 0.000349 Step: 17300 Loss: 6.5923 Accuracy: 0.0843
366
+ [04/10 01:49:52 RAR]: Data (t): 0.2391, 421.70/s/gpu Batch (t): 0.6071 LR: 0.000349 Step: 17400 Loss: 6.4851 Accuracy: 0.0912
367
+ [04/10 01:51:40 RAR]: Data (t): 0.2408, 425.68/s/gpu Batch (t): 0.6014 LR: 0.000349 Step: 17500 Loss: 6.5454 Accuracy: 0.0852
368
+ [04/10 01:53:31 RAR]: Data (t): 0.2390, 420.43/s/gpu Batch (t): 0.6089 LR: 0.000349 Step: 17600 Loss: 6.5020 Accuracy: 0.0941
369
+ [04/10 01:55:21 RAR]: Data (t): 0.2395, 428.88/s/gpu Batch (t): 0.5969 LR: 0.000349 Step: 17700 Loss: 6.5895 Accuracy: 0.0847
370
+ [04/10 01:57:09 RAR]: Data (t): 0.2410, 199.00/s/gpu Batch (t): 1.2864 LR: 0.000349 Step: 17800 Loss: 6.7317 Accuracy: 0.0743
371
+ [04/10 01:58:57 RAR]: Data (t): 0.2396, 147.60/s/gpu Batch (t): 1.7344 LR: 0.000349 Step: 17900 Loss: 6.5754 Accuracy: 0.0856
372
+ [04/10 02:00:46 RAR]: Data (t): 0.2394, 147.03/s/gpu Batch (t): 1.7412 LR: 0.000349 Step: 18000 Loss: 6.4988 Accuracy: 0.0895
373
+ [04/10 02:02:35 RAR]: Data (t): 0.2403, 191.16/s/gpu Batch (t): 1.3392 LR: 0.000349 Step: 18100 Loss: 6.6187 Accuracy: 0.0821
374
+ [04/10 02:04:23 RAR]: Data (t): 0.2391, 427.66/s/gpu Batch (t): 0.5986 LR: 0.000349 Step: 18200 Loss: 6.5419 Accuracy: 0.0882
375
+ [04/10 02:06:10 RAR]: Data (t): 0.2385, 416.14/s/gpu Batch (t): 0.6152 LR: 0.000349 Step: 18300 Loss: 6.6064 Accuracy: 0.0846
376
+ [04/10 02:07:59 RAR]: Data (t): 0.2396, 414.75/s/gpu Batch (t): 0.6172 LR: 0.000349 Step: 18400 Loss: 6.6029 Accuracy: 0.0831
377
+ [04/10 02:09:46 RAR]: Data (t): 0.2398, 247.09/s/gpu Batch (t): 1.0361 LR: 0.000349 Step: 18500 Loss: 6.6392 Accuracy: 0.0843
378
+ [04/10 02:11:35 RAR]: Data (t): 0.2399, 188.21/s/gpu Batch (t): 1.3602 LR: 0.000349 Step: 18600 Loss: 6.6179 Accuracy: 0.0836
379
+ [04/10 02:13:22 RAR]: Data (t): 0.2392, 171.94/s/gpu Batch (t): 1.4889 LR: 0.000349 Step: 18700 Loss: 6.5997 Accuracy: 0.0837
380
+ [04/10 02:15:14 RAR]: Data (t): 0.2400, 416.19/s/gpu Batch (t): 0.6151 LR: 0.000349 Step: 18800 Loss: 6.6147 Accuracy: 0.0840
381
+ [04/10 02:17:03 RAR]: Data (t): 0.2400, 168.28/s/gpu Batch (t): 1.5212 LR: 0.000349 Step: 18900 Loss: 6.5682 Accuracy: 0.0876
382
+ [04/10 02:18:52 RAR]: Data (t): 0.2395, 150.70/s/gpu Batch (t): 1.6987 LR: 0.000349 Step: 19000 Loss: 6.6053 Accuracy: 0.0839
383
+ [04/10 02:20:40 RAR]: Data (t): 0.2404, 426.11/s/gpu Batch (t): 0.6008 LR: 0.000349 Step: 19100 Loss: 6.4458 Accuracy: 0.0962
384
+ [04/10 02:22:29 RAR]: Data (t): 0.2394, 421.52/s/gpu Batch (t): 0.6073 LR: 0.000349 Step: 19200 Loss: 6.5699 Accuracy: 0.0858
385
+ [04/10 02:24:15 RAR]: Data (t): 0.2399, 384.89/s/gpu Batch (t): 0.6651 LR: 0.000349 Step: 19300 Loss: 6.5726 Accuracy: 0.0854
386
+ [04/10 02:26:03 RAR]: Data (t): 0.2399, 209.02/s/gpu Batch (t): 1.2248 LR: 0.000349 Step: 19400 Loss: 6.6238 Accuracy: 0.0832
387
+ [04/10 02:27:50 RAR]: Data (t): 0.2389, 186.81/s/gpu Batch (t): 1.3704 LR: 0.000349 Step: 19500 Loss: 6.4949 Accuracy: 0.0907
388
+ [04/10 02:29:34 RAR]: Data (t): 0.2398, 380.67/s/gpu Batch (t): 0.6725 LR: 0.000349 Step: 19600 Loss: 6.5179 Accuracy: 0.0889
389
+ [04/10 02:31:21 RAR]: Data (t): 0.2397, 169.09/s/gpu Batch (t): 1.5140 LR: 0.000349 Step: 19700 Loss: 6.3948 Accuracy: 0.0963
390
+ [04/10 02:33:08 RAR]: Data (t): 0.2408, 421.75/s/gpu Batch (t): 0.6070 LR: 0.000349 Step: 19800 Loss: 6.4526 Accuracy: 0.0949
391
+ [04/10 02:34:54 RAR]: Data (t): 0.2390, 256.20/s/gpu Batch (t): 0.9992 LR: 0.000349 Step: 19900 Loss: 6.5613 Accuracy: 0.0859
392
+ [04/10 02:36:40 RAR]: Data (t): 0.2390, 375.41/s/gpu Batch (t): 0.6819 LR: 0.000349 Step: 20000 Loss: 6.6044 Accuracy: 0.0838
393
+ [04/10 02:36:52 RAR]: Saved state to stage1/rar_ordertok/checkpoint-20000
394
+ [04/10 02:39:29 RAR]: Data (t): 1.0652, 179.34/s/gpu Batch (t): 1.4274 LR: 0.000349 Step: 20100 Loss: 6.5849 Accuracy: 0.0829
395
+ [04/10 02:41:15 RAR]: Data (t): 0.5811, 270.38/s/gpu Batch (t): 0.9468 LR: 0.000349 Step: 20200 Loss: 6.5742 Accuracy: 0.0850
396
+ [04/10 02:43:02 RAR]: Data (t): 1.2709, 157.50/s/gpu Batch (t): 1.6254 LR: 0.000349 Step: 20300 Loss: 6.5609 Accuracy: 0.0842
397
+ [04/10 02:44:51 RAR]: Data (t): 1.1979, 164.99/s/gpu Batch (t): 1.5516 LR: 0.000349 Step: 20400 Loss: 6.5873 Accuracy: 0.0844
398
+ [04/10 02:46:34 RAR]: Data (t): 0.8413, 214.24/s/gpu Batch (t): 1.1949 LR: 0.000349 Step: 20500 Loss: 6.5546 Accuracy: 0.0860
399
+ [04/10 02:48:17 RAR]: Data (t): 1.0061, 188.14/s/gpu Batch (t): 1.3607 LR: 0.000349 Step: 20600 Loss: 6.5828 Accuracy: 0.0854
400
+ [04/10 02:50:00 RAR]: Data (t): 0.2394, 423.04/s/gpu Batch (t): 0.6052 LR: 0.000349 Step: 20700 Loss: 6.4910 Accuracy: 0.0898
401
+ [04/10 02:51:44 RAR]: Data (t): 0.2393, 390.25/s/gpu Batch (t): 0.6560 LR: 0.000349 Step: 20800 Loss: 6.4989 Accuracy: 0.0894
402
+ [04/10 02:53:30 RAR]: Data (t): 0.2401, 131.39/s/gpu Batch (t): 1.9485 LR: 0.000349 Step: 20900 Loss: 6.5236 Accuracy: 0.0895
403
+ [04/10 02:55:14 RAR]: Data (t): 0.2407, 189.33/s/gpu Batch (t): 1.3522 LR: 0.000349 Step: 21000 Loss: 6.3643 Accuracy: 0.1031
404
+ [04/10 02:56:58 RAR]: Data (t): 0.2406, 420.13/s/gpu Batch (t): 0.6093 LR: 0.000349 Step: 21100 Loss: 6.5122 Accuracy: 0.0897
405
+ [04/10 02:58:42 RAR]: Data (t): 0.2396, 431.62/s/gpu Batch (t): 0.5931 LR: 0.000348 Step: 21200 Loss: 6.4352 Accuracy: 0.0966
406
+ [04/10 03:00:25 RAR]: Data (t): 0.2396, 414.23/s/gpu Batch (t): 0.6180 LR: 0.000348 Step: 21300 Loss: 6.5085 Accuracy: 0.0922
407
+ [04/10 03:02:04 RAR]: Data (t): 0.2395, 430.50/s/gpu Batch (t): 0.5947 LR: 0.000348 Step: 21400 Loss: 6.4463 Accuracy: 0.0934
408
+ [04/10 03:03:45 RAR]: Data (t): 0.2397, 201.67/s/gpu Batch (t): 1.2694 LR: 0.000348 Step: 21500 Loss: 6.3784 Accuracy: 0.0994
409
+ [04/10 03:05:25 RAR]: Data (t): 0.2389, 430.95/s/gpu Batch (t): 0.5940 LR: 0.000348 Step: 21600 Loss: 6.3921 Accuracy: 0.0987
410
+ [04/10 03:07:04 RAR]: Data (t): 0.2391, 303.00/s/gpu Batch (t): 0.8449 LR: 0.000348 Step: 21700 Loss: 6.5963 Accuracy: 0.0829
411
+ [04/10 03:08:43 RAR]: Data (t): 0.2410, 175.79/s/gpu Batch (t): 1.4563 LR: 0.000348 Step: 21800 Loss: 6.5262 Accuracy: 0.0858
412
+ [04/10 03:10:21 RAR]: Data (t): 0.2397, 430.78/s/gpu Batch (t): 0.5943 LR: 0.000348 Step: 21900 Loss: 6.5149 Accuracy: 0.0878
413
+ [04/10 03:12:01 RAR]: Data (t): 0.2402, 383.73/s/gpu Batch (t): 0.6671 LR: 0.000348 Step: 22000 Loss: 6.3911 Accuracy: 0.0972
414
+ [04/10 03:13:41 RAR]: Data (t): 0.2399, 202.75/s/gpu Batch (t): 1.2627 LR: 0.000348 Step: 22100 Loss: 6.4119 Accuracy: 0.0943
415
+ [04/10 03:15:20 RAR]: Data (t): 0.8012, 221.32/s/gpu Batch (t): 1.1567 LR: 0.000348 Step: 22200 Loss: 6.4710 Accuracy: 0.0916
416
+ [04/10 03:16:59 RAR]: Data (t): 0.2400, 198.01/s/gpu Batch (t): 1.2929 LR: 0.000348 Step: 22300 Loss: 6.4837 Accuracy: 0.0933
417
+ [04/10 03:18:37 RAR]: Data (t): 0.2388, 420.51/s/gpu Batch (t): 0.6088 LR: 0.000348 Step: 22400 Loss: 6.5020 Accuracy: 0.0906
418
+ [04/10 03:20:16 RAR]: Data (t): 0.2393, 230.54/s/gpu Batch (t): 1.1104 LR: 0.000348 Step: 22500 Loss: 6.3576 Accuracy: 0.0981
419
+ [04/10 03:22:00 RAR]: Data (t): 0.2394, 420.41/s/gpu Batch (t): 0.6089 LR: 0.000348 Step: 22600 Loss: 6.5808 Accuracy: 0.0832
420
+ [04/10 03:23:38 RAR]: Data (t): 0.2394, 416.70/s/gpu Batch (t): 0.6143 LR: 0.000348 Step: 22700 Loss: 6.3840 Accuracy: 0.1017
421
+ [04/10 03:25:19 RAR]: Data (t): 0.2391, 207.41/s/gpu Batch (t): 1.2343 LR: 0.000348 Step: 22800 Loss: 6.3529 Accuracy: 0.1032
422
+ [04/10 03:26:58 RAR]: Data (t): 0.2405, 371.53/s/gpu Batch (t): 0.6890 LR: 0.000348 Step: 22900 Loss: 6.4973 Accuracy: 0.0897
423
+ [04/10 03:28:38 RAR]: Data (t): 0.2388, 388.81/s/gpu Batch (t): 0.6584 LR: 0.000348 Step: 23000 Loss: 6.4693 Accuracy: 0.0928
424
+ [04/10 03:30:17 RAR]: Data (t): 0.2391, 281.99/s/gpu Batch (t): 0.9078 LR: 0.000348 Step: 23100 Loss: 6.4065 Accuracy: 0.0964
425
+ [04/10 03:31:56 RAR]: Data (t): 0.2396, 433.88/s/gpu Batch (t): 0.5900 LR: 0.000348 Step: 23200 Loss: 6.6215 Accuracy: 0.0843
426
+ [04/10 03:33:36 RAR]: Data (t): 0.2391, 192.85/s/gpu Batch (t): 1.3275 LR: 0.000348 Step: 23300 Loss: 6.3362 Accuracy: 0.1012
427
+ [04/10 03:35:15 RAR]: Data (t): 0.2399, 172.83/s/gpu Batch (t): 1.4812 LR: 0.000348 Step: 23400 Loss: 6.5375 Accuracy: 0.0835
428
+ [04/10 03:36:54 RAR]: Data (t): 0.2394, 155.82/s/gpu Batch (t): 1.6429 LR: 0.000348 Step: 23500 Loss: 6.3908 Accuracy: 0.0955
429
+ [04/10 03:38:36 RAR]: Data (t): 0.2390, 179.73/s/gpu Batch (t): 1.4244 LR: 0.000348 Step: 23600 Loss: 6.3857 Accuracy: 0.0986
430
+ [04/10 03:40:13 RAR]: Data (t): 0.3466, 193.09/s/gpu Batch (t): 1.3258 LR: 0.000348 Step: 23700 Loss: 6.3799 Accuracy: 0.0978
431
+ [04/10 03:41:55 RAR]: Data (t): 0.2391, 423.46/s/gpu Batch (t): 0.6045 LR: 0.000348 Step: 23800 Loss: 6.4039 Accuracy: 0.0951
432
+ [04/10 03:43:35 RAR]: Data (t): 0.9340, 197.22/s/gpu Batch (t): 1.2981 LR: 0.000348 Step: 23900 Loss: 6.3856 Accuracy: 0.0953
433
+ [04/10 03:45:12 RAR]: Data (t): 0.2392, 382.64/s/gpu Batch (t): 0.6690 LR: 0.000348 Step: 24000 Loss: 6.4392 Accuracy: 0.0956
434
+ [04/10 03:46:51 RAR]: Data (t): 1.0108, 187.90/s/gpu Batch (t): 1.3625 LR: 0.000348 Step: 24100 Loss: 6.4047 Accuracy: 0.0980
435
+ [04/10 03:48:28 RAR]: Data (t): 0.2390, 431.76/s/gpu Batch (t): 0.5929 LR: 0.000348 Step: 24200 Loss: 6.4889 Accuracy: 0.0900
436
+ [04/10 03:50:07 RAR]: Data (t): 0.2391, 432.86/s/gpu Batch (t): 0.5914 LR: 0.000348 Step: 24300 Loss: 6.4581 Accuracy: 0.0960
437
+ [04/10 03:51:45 RAR]: Data (t): 0.9895, 190.44/s/gpu Batch (t): 1.3443 LR: 0.000348 Step: 24400 Loss: 6.3953 Accuracy: 0.0983
438
+ [04/10 03:53:23 RAR]: Data (t): 0.2403, 193.92/s/gpu Batch (t): 1.3202 LR: 0.000348 Step: 24500 Loss: 6.4243 Accuracy: 0.0937
439
+ [04/10 03:54:59 RAR]: Data (t): 0.2394, 168.23/s/gpu Batch (t): 1.5218 LR: 0.000348 Step: 24600 Loss: 6.4958 Accuracy: 0.0914
440
+ [04/10 03:56:36 RAR]: Data (t): 0.2391, 168.03/s/gpu Batch (t): 1.5235 LR: 0.000348 Step: 24700 Loss: 6.4837 Accuracy: 0.0916
441
+ [04/10 03:58:11 RAR]: Data (t): 0.2396, 297.09/s/gpu Batch (t): 0.8617 LR: 0.000348 Step: 24800 Loss: 6.4312 Accuracy: 0.0914
442
+ [04/10 03:59:48 RAR]: Data (t): 0.2394, 430.26/s/gpu Batch (t): 0.5950 LR: 0.000348 Step: 24900 Loss: 6.2954 Accuracy: 0.1048
443
+ [04/10 04:01:23 RAR]: Data (t): 0.2402, 155.72/s/gpu Batch (t): 1.6439 LR: 0.000348 Step: 25000 Loss: 6.4204 Accuracy: 0.0961
444
+ [04/10 04:03:02 RAR]: Data (t): 0.2399, 431.31/s/gpu Batch (t): 0.5935 LR: 0.000348 Step: 25100 Loss: 6.2909 Accuracy: 0.1038
445
+ [04/10 04:04:37 RAR]: Data (t): 0.2389, 419.47/s/gpu Batch (t): 0.6103 LR: 0.000348 Step: 25200 Loss: 6.4417 Accuracy: 0.0943
446
+ [04/10 04:06:12 RAR]: Data (t): 0.2399, 433.81/s/gpu Batch (t): 0.5901 LR: 0.000348 Step: 25300 Loss: 6.4741 Accuracy: 0.0930
447
+ [04/10 04:07:47 RAR]: Data (t): 0.2402, 431.99/s/gpu Batch (t): 0.5926 LR: 0.000348 Step: 25400 Loss: 6.4183 Accuracy: 0.0948
448
+ [04/10 04:09:22 RAR]: Data (t): 0.2394, 433.69/s/gpu Batch (t): 0.5903 LR: 0.000348 Step: 25500 Loss: 6.3612 Accuracy: 0.1003
449
+ [04/10 04:10:58 RAR]: Data (t): 0.2389, 432.20/s/gpu Batch (t): 0.5923 LR: 0.000348 Step: 25600 Loss: 6.3757 Accuracy: 0.1009
450
+ [04/10 04:12:31 RAR]: Data (t): 0.2396, 245.79/s/gpu Batch (t): 1.0415 LR: 0.000348 Step: 25700 Loss: 6.3147 Accuracy: 0.1034
451
+ [04/10 04:14:06 RAR]: Data (t): 0.2391, 423.42/s/gpu Batch (t): 0.6046 LR: 0.000348 Step: 25800 Loss: 6.4155 Accuracy: 0.0947
452
+ [04/10 04:15:41 RAR]: Data (t): 0.2391, 229.03/s/gpu Batch (t): 1.1178 LR: 0.000348 Step: 25900 Loss: 6.3901 Accuracy: 0.0993
453
+ [04/10 04:17:16 RAR]: Data (t): 0.2397, 192.49/s/gpu Batch (t): 1.3300 LR: 0.000348 Step: 26000 Loss: 6.4845 Accuracy: 0.0894
454
+ [04/10 04:18:51 RAR]: Data (t): 0.2396, 193.65/s/gpu Batch (t): 1.3219 LR: 0.000348 Step: 26100 Loss: 6.3177 Accuracy: 0.1029
455
+ [04/10 04:20:25 RAR]: Data (t): 0.2392, 224.22/s/gpu Batch (t): 1.1417 LR: 0.000348 Step: 26200 Loss: 6.4239 Accuracy: 0.0916
456
+ [04/10 04:22:03 RAR]: Data (t): 0.2386, 435.26/s/gpu Batch (t): 0.5881 LR: 0.000348 Step: 26300 Loss: 6.0831 Accuracy: 0.1254
457
+ [04/10 04:23:38 RAR]: Data (t): 0.2393, 321.20/s/gpu Batch (t): 0.7970 LR: 0.000348 Step: 26400 Loss: 6.2721 Accuracy: 0.1077
458
+ [04/10 04:25:13 RAR]: Data (t): 0.2393, 210.24/s/gpu Batch (t): 1.2176 LR: 0.000348 Step: 26500 Loss: 6.4046 Accuracy: 0.0952
459
+ [04/10 04:26:48 RAR]: Data (t): 0.2393, 203.74/s/gpu Batch (t): 1.2565 LR: 0.000348 Step: 26600 Loss: 6.2342 Accuracy: 0.1111
460
+ [04/10 04:28:24 RAR]: Data (t): 0.2395, 195.60/s/gpu Batch (t): 1.3088 LR: 0.000348 Step: 26700 Loss: 6.3112 Accuracy: 0.1022
461
+ [04/10 04:29:58 RAR]: Data (t): 0.2394, 235.75/s/gpu Batch (t): 1.0859 LR: 0.000348 Step: 26800 Loss: 6.3952 Accuracy: 0.0942
462
+ [04/10 04:31:32 RAR]: Data (t): 0.2396, 232.84/s/gpu Batch (t): 1.0995 LR: 0.000348 Step: 26900 Loss: 6.3956 Accuracy: 0.0975
463
+ [04/10 04:33:06 RAR]: Data (t): 0.2394, 184.42/s/gpu Batch (t): 1.3881 LR: 0.000348 Step: 27000 Loss: 6.1997 Accuracy: 0.1134
464
+ [04/10 04:34:39 RAR]: Data (t): 0.2400, 377.91/s/gpu Batch (t): 0.6774 LR: 0.000348 Step: 27100 Loss: 6.2502 Accuracy: 0.1090
465
+ [04/10 04:36:11 RAR]: Data (t): 0.2388, 431.76/s/gpu Batch (t): 0.5929 LR: 0.000348 Step: 27200 Loss: 6.4226 Accuracy: 0.0958
466
+ [04/10 04:37:47 RAR]: Data (t): 0.2392, 409.02/s/gpu Batch (t): 0.6259 LR: 0.000348 Step: 27300 Loss: 6.3918 Accuracy: 0.0999
467
+ [04/10 04:39:20 RAR]: Data (t): 0.2389, 222.03/s/gpu Batch (t): 1.1530 LR: 0.000347 Step: 27400 Loss: 6.2948 Accuracy: 0.1064
468
+ [04/10 04:40:54 RAR]: Data (t): 0.2399, 225.29/s/gpu Batch (t): 1.1363 LR: 0.000347 Step: 27500 Loss: 6.3134 Accuracy: 0.1025
469
+ [04/10 04:42:33 RAR]: Data (t): 0.4456, 319.88/s/gpu Batch (t): 0.8003 LR: 0.000347 Step: 27600 Loss: 6.2982 Accuracy: 0.1044
470
+ [04/10 04:44:06 RAR]: Data (t): 0.2398, 430.40/s/gpu Batch (t): 0.5948 LR: 0.000347 Step: 27700 Loss: 6.1390 Accuracy: 0.1178
471
+ [04/10 04:45:40 RAR]: Data (t): 0.2391, 432.63/s/gpu Batch (t): 0.5917 LR: 0.000347 Step: 27800 Loss: 6.2865 Accuracy: 0.1046
472
+ [04/10 04:47:16 RAR]: Data (t): 0.2398, 434.08/s/gpu Batch (t): 0.5897 LR: 0.000347 Step: 27900 Loss: 6.3434 Accuracy: 0.1018
473
+ [04/10 04:48:49 RAR]: Data (t): 0.2391, 233.95/s/gpu Batch (t): 1.0942 LR: 0.000347 Step: 28000 Loss: 6.2466 Accuracy: 0.1057
474
+ [04/10 04:50:23 RAR]: Data (t): 0.2389, 430.24/s/gpu Batch (t): 0.5950 LR: 0.000347 Step: 28100 Loss: 6.3707 Accuracy: 0.0979
475
+ [04/10 04:51:56 RAR]: Data (t): 0.2401, 176.38/s/gpu Batch (t): 1.4514 LR: 0.000347 Step: 28200 Loss: 6.4386 Accuracy: 0.0948
476
+ [04/10 04:53:31 RAR]: Data (t): 0.2406, 186.75/s/gpu Batch (t): 1.3709 LR: 0.000347 Step: 28300 Loss: 6.3290 Accuracy: 0.1034
477
+ [04/10 04:55:04 RAR]: Data (t): 0.2397, 202.08/s/gpu Batch (t): 1.2668 LR: 0.000347 Step: 28400 Loss: 6.2734 Accuracy: 0.1043
478
+ [04/10 04:56:38 RAR]: Data (t): 0.2391, 201.77/s/gpu Batch (t): 1.2688 LR: 0.000347 Step: 28500 Loss: 6.3208 Accuracy: 0.1009
479
+ [04/10 04:58:12 RAR]: Data (t): 0.2403, 432.71/s/gpu Batch (t): 0.5916 LR: 0.000347 Step: 28600 Loss: 6.2674 Accuracy: 0.1092
480
+ [04/10 04:59:49 RAR]: Data (t): 0.2388, 428.33/s/gpu Batch (t): 0.5977 LR: 0.000347 Step: 28700 Loss: 6.3615 Accuracy: 0.1004
481
+ [04/10 05:01:29 RAR]: Data (t): 0.2392, 432.57/s/gpu Batch (t): 0.5918 LR: 0.000347 Step: 28800 Loss: 6.4387 Accuracy: 0.0912
482
+ [04/10 05:03:03 RAR]: Data (t): 0.2393, 436.00/s/gpu Batch (t): 0.5872 LR: 0.000347 Step: 28900 Loss: 6.2987 Accuracy: 0.1049
483
+ [04/10 05:04:38 RAR]: Data (t): 0.2402, 384.46/s/gpu Batch (t): 0.6659 LR: 0.000347 Step: 29000 Loss: 6.2174 Accuracy: 0.1106
484
+ [04/10 05:06:12 RAR]: Data (t): 0.2399, 428.78/s/gpu Batch (t): 0.5970 LR: 0.000347 Step: 29100 Loss: 6.2687 Accuracy: 0.1064
485
+ [04/10 05:07:46 RAR]: Data (t): 0.2396, 432.70/s/gpu Batch (t): 0.5916 LR: 0.000347 Step: 29200 Loss: 6.3839 Accuracy: 0.0930
486
+ [04/10 05:09:22 RAR]: Data (t): 0.2389, 430.11/s/gpu Batch (t): 0.5952 LR: 0.000347 Step: 29300 Loss: 6.3024 Accuracy: 0.1014
487
+ [04/10 05:10:58 RAR]: Data (t): 0.2398, 433.15/s/gpu Batch (t): 0.5910 LR: 0.000347 Step: 29400 Loss: 6.3020 Accuracy: 0.1014
488
+ [04/10 05:12:33 RAR]: Data (t): 0.2399, 430.18/s/gpu Batch (t): 0.5951 LR: 0.000347 Step: 29500 Loss: 6.1587 Accuracy: 0.1158
489
+ [04/10 05:14:09 RAR]: Data (t): 0.2410, 432.28/s/gpu Batch (t): 0.5922 LR: 0.000347 Step: 29600 Loss: 6.2737 Accuracy: 0.1042
490
+ [04/10 05:15:45 RAR]: Data (t): 0.2395, 427.18/s/gpu Batch (t): 0.5993 LR: 0.000347 Step: 29700 Loss: 6.3301 Accuracy: 0.0991
491
+ [04/10 05:17:20 RAR]: Data (t): 0.2402, 433.96/s/gpu Batch (t): 0.5899 LR: 0.000347 Step: 29800 Loss: 6.2807 Accuracy: 0.1044
492
+ [04/10 05:18:56 RAR]: Data (t): 0.2394, 427.65/s/gpu Batch (t): 0.5986 LR: 0.000347 Step: 29900 Loss: 6.3383 Accuracy: 0.0999
493
+ [04/10 05:20:30 RAR]: Data (t): 0.2391, 277.43/s/gpu Batch (t): 0.9227 LR: 0.000347 Step: 30000 Loss: 6.2117 Accuracy: 0.1111
494
+ [04/10 05:20:41 RAR]: Saved state to stage1/rar_ordertok/checkpoint-30000
495
+ [04/10 05:23:13 RAR]: Data (t): 0.2397, 432.11/s/gpu Batch (t): 0.5924 LR: 0.000347 Step: 30100 Loss: 6.2427 Accuracy: 0.1135
496
+ [04/10 05:24:45 RAR]: Data (t): 0.2390, 245.83/s/gpu Batch (t): 1.0414 LR: 0.000347 Step: 30200 Loss: 6.3080 Accuracy: 0.0999
497
+ [04/10 05:26:18 RAR]: Data (t): 0.2390, 187.59/s/gpu Batch (t): 1.3647 LR: 0.000347 Step: 30300 Loss: 6.2457 Accuracy: 0.1104
498
+ [04/10 05:27:53 RAR]: Data (t): 0.2392, 224.81/s/gpu Batch (t): 1.1388 LR: 0.000347 Step: 30400 Loss: 6.3108 Accuracy: 0.1006
499
+ [04/10 05:29:26 RAR]: Data (t): 0.2391, 218.73/s/gpu Batch (t): 1.1704 LR: 0.000347 Step: 30500 Loss: 6.3121 Accuracy: 0.1023
500
+ [04/10 05:30:58 RAR]: Data (t): 0.2393, 197.27/s/gpu Batch (t): 1.2977 LR: 0.000347 Step: 30600 Loss: 6.2756 Accuracy: 0.1054
501
+ [04/10 05:32:31 RAR]: Data (t): 0.2387, 430.98/s/gpu Batch (t): 0.5940 LR: 0.000347 Step: 30700 Loss: 6.1803 Accuracy: 0.1129
502
+ [04/10 05:34:04 RAR]: Data (t): 0.2393, 188.19/s/gpu Batch (t): 1.3603 LR: 0.000347 Step: 30800 Loss: 6.1768 Accuracy: 0.1129
503
+ [04/10 05:35:37 RAR]: Data (t): 0.2394, 150.37/s/gpu Batch (t): 1.7025 LR: 0.000347 Step: 30900 Loss: 6.2448 Accuracy: 0.1057
504
+ [04/10 05:37:09 RAR]: Data (t): 0.2392, 157.26/s/gpu Batch (t): 1.6279 LR: 0.000347 Step: 31000 Loss: 6.3125 Accuracy: 0.1004
505
+ [04/10 05:38:42 RAR]: Data (t): 0.2402, 246.05/s/gpu Batch (t): 1.0404 LR: 0.000347 Step: 31100 Loss: 6.3228 Accuracy: 0.1021
506
+ [04/10 05:40:15 RAR]: Data (t): 0.2393, 234.36/s/gpu Batch (t): 1.0924 LR: 0.000347 Step: 31200 Loss: 6.3593 Accuracy: 0.0972
507
+ [04/10 05:41:50 RAR]: Data (t): 0.2393, 435.08/s/gpu Batch (t): 0.5884 LR: 0.000347 Step: 31300 Loss: 6.2598 Accuracy: 0.1073
508
+ [04/10 05:43:22 RAR]: Data (t): 0.2393, 433.81/s/gpu Batch (t): 0.5901 LR: 0.000347 Step: 31400 Loss: 6.1868 Accuracy: 0.1132
509
+ [04/10 05:44:58 RAR]: Data (t): 0.2388, 432.51/s/gpu Batch (t): 0.5919 LR: 0.000347 Step: 31500 Loss: 6.2500 Accuracy: 0.1049
510
+ [04/10 05:46:31 RAR]: Data (t): 0.2390, 214.54/s/gpu Batch (t): 1.1933 LR: 0.000347 Step: 31600 Loss: 6.2585 Accuracy: 0.1045
511
+ [04/10 05:48:05 RAR]: Data (t): 0.2394, 171.94/s/gpu Batch (t): 1.4889 LR: 0.000347 Step: 31700 Loss: 6.1824 Accuracy: 0.1139
512
+ [04/10 05:49:38 RAR]: Data (t): 0.2388, 215.20/s/gpu Batch (t): 1.1896 LR: 0.000347 Step: 31800 Loss: 6.1794 Accuracy: 0.1127
513
+ [04/10 05:51:11 RAR]: Data (t): 0.2394, 429.99/s/gpu Batch (t): 0.5954 LR: 0.000347 Step: 31900 Loss: 6.1592 Accuracy: 0.1139
514
+ [04/10 05:52:44 RAR]: Data (t): 0.4398, 207.54/s/gpu Batch (t): 1.2335 LR: 0.000347 Step: 32000 Loss: 6.2938 Accuracy: 0.1022
515
+ [04/10 05:54:15 RAR]: Data (t): 0.7167, 239.67/s/gpu Batch (t): 1.0681 LR: 0.000347 Step: 32100 Loss: 6.3300 Accuracy: 0.0999
516
+ [04/10 05:55:47 RAR]: Data (t): 0.2390, 426.44/s/gpu Batch (t): 0.6003 LR: 0.000347 Step: 32200 Loss: 6.1539 Accuracy: 0.1172
517
+ [04/10 05:57:21 RAR]: Data (t): 0.2391, 430.56/s/gpu Batch (t): 0.5946 LR: 0.000347 Step: 32300 Loss: 6.2648 Accuracy: 0.1055
518
+ [04/10 05:58:53 RAR]: Data (t): 0.2387, 433.86/s/gpu Batch (t): 0.5901 LR: 0.000346 Step: 32400 Loss: 6.1414 Accuracy: 0.1183
519
+ [04/10 06:00:28 RAR]: Data (t): 0.2395, 401.17/s/gpu Batch (t): 0.6381 LR: 0.000346 Step: 32500 Loss: 6.1612 Accuracy: 0.1154
520
+ [04/10 06:02:04 RAR]: Data (t): 0.2386, 290.30/s/gpu Batch (t): 0.8818 LR: 0.000346 Step: 32600 Loss: 6.0809 Accuracy: 0.1242
521
+ [04/10 06:03:37 RAR]: Data (t): 0.8940, 205.37/s/gpu Batch (t): 1.2465 LR: 0.000346 Step: 32700 Loss: 6.1557 Accuracy: 0.1127
522
+ [04/10 06:05:10 RAR]: Data (t): 0.7105, 240.16/s/gpu Batch (t): 1.0660 LR: 0.000346 Step: 32800 Loss: 6.1568 Accuracy: 0.1145
523
+ [04/10 06:06:45 RAR]: Data (t): 0.2392, 211.53/s/gpu Batch (t): 1.2103 LR: 0.000346 Step: 32900 Loss: 6.1729 Accuracy: 0.1123
524
+ [04/10 06:08:18 RAR]: Data (t): 0.2386, 202.34/s/gpu Batch (t): 1.2652 LR: 0.000346 Step: 33000 Loss: 6.1506 Accuracy: 0.1167
525
+ [04/10 06:09:49 RAR]: Data (t): 0.2389, 357.24/s/gpu Batch (t): 0.7166 LR: 0.000346 Step: 33100 Loss: 6.1568 Accuracy: 0.1158
526
+ [04/10 06:11:21 RAR]: Data (t): 0.2389, 433.21/s/gpu Batch (t): 0.5909 LR: 0.000346 Step: 33200 Loss: 6.0311 Accuracy: 0.1257
527
+ [04/10 06:12:53 RAR]: Data (t): 0.2393, 426.14/s/gpu Batch (t): 0.6007 LR: 0.000346 Step: 33300 Loss: 6.2060 Accuracy: 0.1117
528
+ [04/10 06:14:25 RAR]: Data (t): 0.6561, 254.26/s/gpu Batch (t): 1.0068 LR: 0.000346 Step: 33400 Loss: 6.2625 Accuracy: 0.1055
529
+ [04/10 06:15:58 RAR]: Data (t): 1.0286, 185.57/s/gpu Batch (t): 1.3795 LR: 0.000346 Step: 33500 Loss: 6.1826 Accuracy: 0.1091
530
+ [04/10 06:17:29 RAR]: Data (t): 0.2389, 420.10/s/gpu Batch (t): 0.6094 LR: 0.000346 Step: 33600 Loss: 6.1407 Accuracy: 0.1139
531
+ [04/10 06:19:00 RAR]: Data (t): 0.4370, 324.79/s/gpu Batch (t): 0.7882 LR: 0.000346 Step: 33700 Loss: 6.1959 Accuracy: 0.1087
532
+ [04/10 06:20:38 RAR]: Data (t): 0.2397, 415.01/s/gpu Batch (t): 0.6169 LR: 0.000346 Step: 33800 Loss: 6.1552 Accuracy: 0.1106
533
+ [04/10 06:22:11 RAR]: Data (t): 0.2385, 416.00/s/gpu Batch (t): 0.6154 LR: 0.000346 Step: 33900 Loss: 6.1790 Accuracy: 0.1166
534
+ [04/10 06:23:42 RAR]: Data (t): 0.2391, 387.46/s/gpu Batch (t): 0.6607 LR: 0.000346 Step: 34000 Loss: 6.2160 Accuracy: 0.1099
535
+ [04/10 06:25:14 RAR]: Data (t): 0.2393, 432.88/s/gpu Batch (t): 0.5914 LR: 0.000346 Step: 34100 Loss: 6.2531 Accuracy: 0.1033
536
+ [04/10 06:26:47 RAR]: Data (t): 0.2396, 195.37/s/gpu Batch (t): 1.3103 LR: 0.000346 Step: 34200 Loss: 6.1899 Accuracy: 0.1127
537
+ [04/10 06:28:20 RAR]: Data (t): 0.8790, 207.77/s/gpu Batch (t): 1.2322 LR: 0.000346 Step: 34300 Loss: 6.1101 Accuracy: 0.1187
538
+ [04/10 06:29:50 RAR]: Data (t): 0.7144, 240.37/s/gpu Batch (t): 1.0650 LR: 0.000346 Step: 34400 Loss: 5.9886 Accuracy: 0.1258
539
+ [04/10 06:31:22 RAR]: Data (t): 0.2398, 433.75/s/gpu Batch (t): 0.5902 LR: 0.000346 Step: 34500 Loss: 6.1613 Accuracy: 0.1186
540
+ [04/10 06:32:53 RAR]: Data (t): 0.2400, 419.82/s/gpu Batch (t): 0.6098 LR: 0.000346 Step: 34600 Loss: 6.0493 Accuracy: 0.1261
541
+ [04/10 06:34:27 RAR]: Data (t): 0.2395, 430.96/s/gpu Batch (t): 0.5940 LR: 0.000346 Step: 34700 Loss: 6.1655 Accuracy: 0.1130
542
+ [04/10 06:35:59 RAR]: Data (t): 0.2390, 283.11/s/gpu Batch (t): 0.9042 LR: 0.000346 Step: 34800 Loss: 6.0727 Accuracy: 0.1245
543
+ [04/10 06:37:30 RAR]: Data (t): 0.2399, 373.06/s/gpu Batch (t): 0.6862 LR: 0.000346 Step: 34900 Loss: 6.2947 Accuracy: 0.1037
544
+ [04/10 06:39:03 RAR]: Data (t): 0.2389, 351.71/s/gpu Batch (t): 0.7279 LR: 0.000346 Step: 35000 Loss: 6.1764 Accuracy: 0.1139
545
+ [04/10 06:40:39 RAR]: Data (t): 0.2385, 428.28/s/gpu Batch (t): 0.5977 LR: 0.000346 Step: 35100 Loss: 6.0239 Accuracy: 0.1252
546
+ [04/10 06:42:12 RAR]: Data (t): 0.2392, 431.78/s/gpu Batch (t): 0.5929 LR: 0.000346 Step: 35200 Loss: 6.0476 Accuracy: 0.1244
547
+ [04/10 06:43:44 RAR]: Data (t): 0.2401, 212.12/s/gpu Batch (t): 1.2068 LR: 0.000346 Step: 35300 Loss: 6.1415 Accuracy: 0.1158
548
+ [04/10 06:45:15 RAR]: Data (t): 0.2398, 342.23/s/gpu Batch (t): 0.7480 LR: 0.000346 Step: 35400 Loss: 6.1288 Accuracy: 0.1173
549
+ [04/10 06:46:47 RAR]: Data (t): 0.2399, 435.82/s/gpu Batch (t): 0.5874 LR: 0.000346 Step: 35500 Loss: 6.3091 Accuracy: 0.1011
550
+ [04/10 06:48:20 RAR]: Data (t): 0.2394, 431.61/s/gpu Batch (t): 0.5931 LR: 0.000346 Step: 35600 Loss: 6.2807 Accuracy: 0.1050
551
+ [04/10 06:49:54 RAR]: Data (t): 0.5339, 289.59/s/gpu Batch (t): 0.8840 LR: 0.000346 Step: 35700 Loss: 6.1052 Accuracy: 0.1180
552
+ [04/10 06:51:24 RAR]: Data (t): 0.2391, 204.41/s/gpu Batch (t): 1.2524 LR: 0.000346 Step: 35800 Loss: 6.0060 Accuracy: 0.1265
553
+ [04/10 06:52:54 RAR]: Data (t): 0.2388, 433.91/s/gpu Batch (t): 0.5900 LR: 0.000346 Step: 35900 Loss: 6.0701 Accuracy: 0.1224
554
+ [04/10 06:54:25 RAR]: Data (t): 0.2385, 198.47/s/gpu Batch (t): 1.2898 LR: 0.000346 Step: 36000 Loss: 6.0461 Accuracy: 0.1244
555
+ [04/10 06:55:56 RAR]: Data (t): 0.2389, 220.65/s/gpu Batch (t): 1.1602 LR: 0.000346 Step: 36100 Loss: 6.1763 Accuracy: 0.1101
556
+ [04/10 06:57:26 RAR]: Data (t): 0.2386, 347.96/s/gpu Batch (t): 0.7357 LR: 0.000346 Step: 36200 Loss: 6.0757 Accuracy: 0.1165
557
+ [04/10 06:58:58 RAR]: Data (t): 0.2385, 434.96/s/gpu Batch (t): 0.5886 LR: 0.000346 Step: 36300 Loss: 6.1318 Accuracy: 0.1105
558
+ [04/10 07:00:28 RAR]: Data (t): 0.2396, 344.99/s/gpu Batch (t): 0.7421 LR: 0.000346 Step: 36400 Loss: 6.0949 Accuracy: 0.1195
559
+ [04/10 07:02:00 RAR]: Data (t): 0.2402, 192.35/s/gpu Batch (t): 1.3309 LR: 0.000346 Step: 36500 Loss: 6.0180 Accuracy: 0.1254
560
+ [04/10 07:03:31 RAR]: Data (t): 0.2390, 222.68/s/gpu Batch (t): 1.1496 LR: 0.000346 Step: 36600 Loss: 6.2120 Accuracy: 0.1084
561
+ [04/10 07:05:01 RAR]: Data (t): 0.2401, 226.41/s/gpu Batch (t): 1.1307 LR: 0.000346 Step: 36700 Loss: 6.1956 Accuracy: 0.1099
562
+ [04/10 07:06:31 RAR]: Data (t): 0.2397, 218.57/s/gpu Batch (t): 1.1712 LR: 0.000345 Step: 36800 Loss: 6.1731 Accuracy: 0.1103
563
+ [04/10 07:08:02 RAR]: Data (t): 0.2398, 243.34/s/gpu Batch (t): 1.0520 LR: 0.000345 Step: 36900 Loss: 5.9917 Accuracy: 0.1280
564
+ [04/10 07:09:34 RAR]: Data (t): 0.2388, 390.31/s/gpu Batch (t): 0.6559 LR: 0.000345 Step: 37000 Loss: 6.1735 Accuracy: 0.1120
565
+ [04/10 07:11:06 RAR]: Data (t): 0.2389, 230.35/s/gpu Batch (t): 1.1114 LR: 0.000345 Step: 37100 Loss: 6.0965 Accuracy: 0.1176
566
+ [04/10 07:12:36 RAR]: Data (t): 0.2395, 231.65/s/gpu Batch (t): 1.1051 LR: 0.000345 Step: 37200 Loss: 6.2205 Accuracy: 0.1089
567
+ [04/10 07:14:07 RAR]: Data (t): 0.2401, 212.12/s/gpu Batch (t): 1.2069 LR: 0.000345 Step: 37300 Loss: 5.9524 Accuracy: 0.1314
568
+ [04/10 07:15:39 RAR]: Data (t): 0.2392, 432.92/s/gpu Batch (t): 0.5913 LR: 0.000345 Step: 37400 Loss: 6.1586 Accuracy: 0.1133
569
+ [04/10 07:17:08 RAR]: Data (t): 0.2400, 435.92/s/gpu Batch (t): 0.5873 LR: 0.000345 Step: 37500 Loss: 6.0725 Accuracy: 0.1197
570
+ [04/10 07:18:41 RAR]: Data (t): 0.3492, 367.62/s/gpu Batch (t): 0.6964 LR: 0.000345 Step: 37600 Loss: 6.1542 Accuracy: 0.1121
571
+ [04/10 07:20:11 RAR]: Data (t): 0.2397, 375.00/s/gpu Batch (t): 0.6827 LR: 0.000345 Step: 37700 Loss: 6.0379 Accuracy: 0.1197
572
+ [04/10 07:21:41 RAR]: Data (t): 0.2398, 241.26/s/gpu Batch (t): 1.0611 LR: 0.000345 Step: 37800 Loss: 6.0550 Accuracy: 0.1184
573
+ [04/10 07:23:13 RAR]: Data (t): 0.2394, 188.91/s/gpu Batch (t): 1.3551 LR: 0.000345 Step: 37900 Loss: 6.2600 Accuracy: 0.1046
574
+ [04/10 07:24:44 RAR]: Data (t): 0.2396, 390.34/s/gpu Batch (t): 0.6558 LR: 0.000345 Step: 38000 Loss: 6.0059 Accuracy: 0.1243
575
+ [04/10 07:26:14 RAR]: Data (t): 0.2395, 434.67/s/gpu Batch (t): 0.5889 LR: 0.000345 Step: 38100 Loss: 6.1433 Accuracy: 0.1150
576
+ [04/10 07:27:44 RAR]: Data (t): 0.2387, 434.68/s/gpu Batch (t): 0.5889 LR: 0.000345 Step: 38200 Loss: 6.1557 Accuracy: 0.1128
577
+ [04/10 07:29:13 RAR]: Data (t): 0.7342, 236.11/s/gpu Batch (t): 1.0842 LR: 0.000345 Step: 38300 Loss: 6.1228 Accuracy: 0.1147
578
+ [04/10 07:30:45 RAR]: Data (t): 0.2399, 223.28/s/gpu Batch (t): 1.1466 LR: 0.000345 Step: 38400 Loss: 6.1425 Accuracy: 0.1137
579
+ [04/10 07:32:15 RAR]: Data (t): 0.2401, 210.32/s/gpu Batch (t): 1.2172 LR: 0.000345 Step: 38500 Loss: 6.1743 Accuracy: 0.1152
580
+ [04/10 07:33:46 RAR]: Data (t): 0.2396, 217.03/s/gpu Batch (t): 1.1796 LR: 0.000345 Step: 38600 Loss: 6.1220 Accuracy: 0.1154
581
+ [04/10 07:35:18 RAR]: Data (t): 0.2399, 208.26/s/gpu Batch (t): 1.2292 LR: 0.000345 Step: 38700 Loss: 5.9211 Accuracy: 0.1345
582
+ [04/10 07:36:50 RAR]: Data (t): 0.2395, 435.56/s/gpu Batch (t): 0.5877 LR: 0.000345 Step: 38800 Loss: 5.8699 Accuracy: 0.1347
583
+ [04/10 07:38:22 RAR]: Data (t): 0.2398, 436.27/s/gpu Batch (t): 0.5868 LR: 0.000345 Step: 38900 Loss: 6.1489 Accuracy: 0.1147
584
+ [04/10 07:39:52 RAR]: Data (t): 0.2403, 390.74/s/gpu Batch (t): 0.6552 LR: 0.000345 Step: 39000 Loss: 6.1481 Accuracy: 0.1129
585
+ [04/10 07:41:22 RAR]: Data (t): 0.4844, 221.57/s/gpu Batch (t): 1.1554 LR: 0.000345 Step: 39100 Loss: 6.1035 Accuracy: 0.1166
586
+ [04/10 07:42:51 RAR]: Data (t): 0.7720, 228.85/s/gpu Batch (t): 1.1186 LR: 0.000345 Step: 39200 Loss: 6.1624 Accuracy: 0.1128
587
+ [04/10 07:44:22 RAR]: Data (t): 0.2395, 435.33/s/gpu Batch (t): 0.5881 LR: 0.000345 Step: 39300 Loss: 6.0321 Accuracy: 0.1254
588
+ [04/10 07:45:53 RAR]: Data (t): 0.2392, 413.85/s/gpu Batch (t): 0.6186 LR: 0.000345 Step: 39400 Loss: 6.0146 Accuracy: 0.1262
589
+ [04/10 07:47:23 RAR]: Data (t): 0.2394, 433.14/s/gpu Batch (t): 0.5910 LR: 0.000345 Step: 39500 Loss: 6.0300 Accuracy: 0.1214
590
+ [04/10 07:48:55 RAR]: Data (t): 0.2400, 262.61/s/gpu Batch (t): 0.9748 LR: 0.000345 Step: 39600 Loss: 5.9418 Accuracy: 0.1360
591
+ [04/10 07:50:25 RAR]: Data (t): 0.2401, 434.90/s/gpu Batch (t): 0.5886 LR: 0.000345 Step: 39700 Loss: 6.0610 Accuracy: 0.1289
592
+ [04/10 07:51:56 RAR]: Data (t): 0.2400, 436.60/s/gpu Batch (t): 0.5864 LR: 0.000345 Step: 39800 Loss: 6.1456 Accuracy: 0.1148
593
+ [04/10 07:53:27 RAR]: Data (t): 0.2399, 432.44/s/gpu Batch (t): 0.5920 LR: 0.000345 Step: 39900 Loss: 6.0482 Accuracy: 0.1224
594
+ [04/10 07:54:56 RAR]: Data (t): 0.3013, 358.18/s/gpu Batch (t): 0.7147 LR: 0.000345 Step: 40000 Loss: 6.1175 Accuracy: 0.1166
595
+ [04/10 07:55:07 RAR]: Saved state to stage1/rar_ordertok/checkpoint-40000
596
+ [04/10 07:57:28 RAR]: Data (t): 0.2398, 435.15/s/gpu Batch (t): 0.5883 LR: 0.000345 Step: 40100 Loss: 6.1875 Accuracy: 0.1080
597
+ [04/10 07:58:59 RAR]: Data (t): 0.2401, 432.99/s/gpu Batch (t): 0.5912 LR: 0.000345 Step: 40200 Loss: 6.0055 Accuracy: 0.1248
598
+ [04/10 08:00:32 RAR]: Data (t): 0.2401, 432.47/s/gpu Batch (t): 0.5920 LR: 0.000345 Step: 40300 Loss: 6.0142 Accuracy: 0.1230
599
+ [04/10 08:02:02 RAR]: Data (t): 0.2396, 433.52/s/gpu Batch (t): 0.5905 LR: 0.000345 Step: 40400 Loss: 5.9038 Accuracy: 0.1375
600
+ [04/10 08:03:32 RAR]: Data (t): 0.5165, 214.92/s/gpu Batch (t): 1.1911 LR: 0.000345 Step: 40500 Loss: 5.9828 Accuracy: 0.1240
601
+ [04/10 08:05:03 RAR]: Data (t): 0.2407, 205.08/s/gpu Batch (t): 1.2483 LR: 0.000344 Step: 40600 Loss: 5.9923 Accuracy: 0.1271
602
+ [04/10 08:06:34 RAR]: Data (t): 0.2406, 197.20/s/gpu Batch (t): 1.2982 LR: 0.000344 Step: 40700 Loss: 6.0005 Accuracy: 0.1251
603
+ [04/10 08:08:04 RAR]: Data (t): 0.2397, 433.37/s/gpu Batch (t): 0.5907 LR: 0.000344 Step: 40800 Loss: 6.1249 Accuracy: 0.1168
604
+ [04/10 08:09:33 RAR]: Data (t): 0.2399, 433.65/s/gpu Batch (t): 0.5903 LR: 0.000344 Step: 40900 Loss: 6.0299 Accuracy: 0.1208
605
+ [04/10 08:11:04 RAR]: Data (t): 0.2401, 198.55/s/gpu Batch (t): 1.2893 LR: 0.000344 Step: 41000 Loss: 6.0511 Accuracy: 0.1200
606
+ [04/10 08:12:33 RAR]: Data (t): 0.2402, 315.03/s/gpu Batch (t): 0.8126 LR: 0.000344 Step: 41100 Loss: 5.9174 Accuracy: 0.1364
607
+ [04/10 08:14:04 RAR]: Data (t): 0.2400, 234.07/s/gpu Batch (t): 1.0937 LR: 0.000344 Step: 41200 Loss: 6.1024 Accuracy: 0.1172
608
+ [04/10 08:15:33 RAR]: Data (t): 0.2406, 436.97/s/gpu Batch (t): 0.5859 LR: 0.000344 Step: 41300 Loss: 6.0993 Accuracy: 0.1166
609
+ [04/10 08:17:06 RAR]: Data (t): 0.2394, 436.45/s/gpu Batch (t): 0.5865 LR: 0.000344 Step: 41400 Loss: 6.0978 Accuracy: 0.1152
610
+ [04/10 08:18:36 RAR]: Data (t): 0.2401, 435.39/s/gpu Batch (t): 0.5880 LR: 0.000344 Step: 41500 Loss: 6.0822 Accuracy: 0.1199
611
+ [04/10 08:20:06 RAR]: Data (t): 0.3769, 307.19/s/gpu Batch (t): 0.8334 LR: 0.000344 Step: 41600 Loss: 6.0307 Accuracy: 0.1253
612
+ [04/10 08:21:36 RAR]: Data (t): 0.2402, 435.02/s/gpu Batch (t): 0.5885 LR: 0.000344 Step: 41700 Loss: 5.9433 Accuracy: 0.1311
613
+ [04/10 08:23:06 RAR]: Data (t): 0.2398, 435.05/s/gpu Batch (t): 0.5884 LR: 0.000344 Step: 41800 Loss: 6.0109 Accuracy: 0.1250
614
+ [04/10 08:24:36 RAR]: Data (t): 0.4211, 333.20/s/gpu Batch (t): 0.7683 LR: 0.000344 Step: 41900 Loss: 5.9330 Accuracy: 0.1325
615
+ [04/10 08:26:07 RAR]: Data (t): 0.2399, 389.16/s/gpu Batch (t): 0.6578 LR: 0.000344 Step: 42000 Loss: 6.1042 Accuracy: 0.1194
616
+ [04/10 08:27:39 RAR]: Data (t): 0.2401, 435.72/s/gpu Batch (t): 0.5875 LR: 0.000344 Step: 42100 Loss: 5.9540 Accuracy: 0.1280
617
+ [04/10 08:29:11 RAR]: Data (t): 0.2400, 435.33/s/gpu Batch (t): 0.5881 LR: 0.000344 Step: 42200 Loss: 5.9822 Accuracy: 0.1261
618
+ [04/10 08:30:42 RAR]: Data (t): 0.2402, 435.19/s/gpu Batch (t): 0.5882 LR: 0.000344 Step: 42300 Loss: 6.0036 Accuracy: 0.1250
619
+ [04/10 08:32:13 RAR]: Data (t): 0.2403, 435.28/s/gpu Batch (t): 0.5881 LR: 0.000344 Step: 42400 Loss: 6.0794 Accuracy: 0.1189
620
+ [04/10 08:33:45 RAR]: Data (t): 0.2398, 434.29/s/gpu Batch (t): 0.5895 LR: 0.000344 Step: 42500 Loss: 6.0515 Accuracy: 0.1230
621
+ [04/10 08:35:18 RAR]: Data (t): 0.4139, 266.76/s/gpu Batch (t): 0.9597 LR: 0.000344 Step: 42600 Loss: 5.9696 Accuracy: 0.1287
622
+ [04/10 08:36:48 RAR]: Data (t): 0.2400, 328.69/s/gpu Batch (t): 0.7789 LR: 0.000344 Step: 42700 Loss: 6.0245 Accuracy: 0.1205
623
+ [04/10 08:38:18 RAR]: Data (t): 0.2398, 194.71/s/gpu Batch (t): 1.3148 LR: 0.000344 Step: 42800 Loss: 6.0442 Accuracy: 0.1268
624
+ [04/10 08:39:50 RAR]: Data (t): 0.2394, 435.28/s/gpu Batch (t): 0.5881 LR: 0.000344 Step: 42900 Loss: 5.8853 Accuracy: 0.1357
625
+ [04/10 08:41:23 RAR]: Data (t): 0.2399, 387.87/s/gpu Batch (t): 0.6600 LR: 0.000344 Step: 43000 Loss: 6.0098 Accuracy: 0.1247
626
+ [04/10 08:42:52 RAR]: Data (t): 0.2399, 435.45/s/gpu Batch (t): 0.5879 LR: 0.000344 Step: 43100 Loss: 5.8849 Accuracy: 0.1350
627
+ [04/10 08:44:23 RAR]: Data (t): 0.2405, 216.69/s/gpu Batch (t): 1.1814 LR: 0.000344 Step: 43200 Loss: 5.9460 Accuracy: 0.1285
628
+ [04/10 08:45:52 RAR]: Data (t): 0.2400, 186.82/s/gpu Batch (t): 1.3703 LR: 0.000344 Step: 43300 Loss: 5.9736 Accuracy: 0.1276
629
+ [04/10 08:47:22 RAR]: Data (t): 0.2887, 321.74/s/gpu Batch (t): 0.7957 LR: 0.000344 Step: 43400 Loss: 5.9291 Accuracy: 0.1319
630
+ [04/10 08:48:55 RAR]: Data (t): 0.2406, 197.81/s/gpu Batch (t): 1.2942 LR: 0.000344 Step: 43500 Loss: 6.1117 Accuracy: 0.1156
631
+ [04/10 08:50:25 RAR]: Data (t): 0.2396, 213.03/s/gpu Batch (t): 1.2017 LR: 0.000344 Step: 43600 Loss: 5.9462 Accuracy: 0.1273
632
+ [04/10 08:51:55 RAR]: Data (t): 0.2404, 213.45/s/gpu Batch (t): 1.1994 LR: 0.000344 Step: 43700 Loss: 5.9926 Accuracy: 0.1238
633
+ [04/10 08:53:25 RAR]: Data (t): 0.2401, 215.64/s/gpu Batch (t): 1.1871 LR: 0.000344 Step: 43800 Loss: 6.0041 Accuracy: 0.1258
634
+ [04/10 08:55:01 RAR]: Data (t): 0.2395, 434.20/s/gpu Batch (t): 0.5896 LR: 0.000344 Step: 43900 Loss: 6.0272 Accuracy: 0.1219
635
+ [04/10 08:56:32 RAR]: Data (t): 0.2395, 176.17/s/gpu Batch (t): 1.4531 LR: 0.000344 Step: 44000 Loss: 6.0735 Accuracy: 0.1185
636
+ [04/10 08:58:03 RAR]: Data (t): 0.2399, 235.68/s/gpu Batch (t): 1.0862 LR: 0.000344 Step: 44100 Loss: 5.8423 Accuracy: 0.1405
637
+ [04/10 08:59:33 RAR]: Data (t): 0.2862, 199.64/s/gpu Batch (t): 1.2823 LR: 0.000343 Step: 44200 Loss: 5.8970 Accuracy: 0.1391
638
+ [04/10 09:01:04 RAR]: Data (t): 0.2402, 246.19/s/gpu Batch (t): 1.0399 LR: 0.000343 Step: 44300 Loss: 5.8752 Accuracy: 0.1383
639
+ [04/10 09:02:36 RAR]: Data (t): 0.2398, 213.62/s/gpu Batch (t): 1.1984 LR: 0.000343 Step: 44400 Loss: 5.9243 Accuracy: 0.1299
640
+ [04/10 09:04:07 RAR]: Data (t): 0.2401, 429.71/s/gpu Batch (t): 0.5958 LR: 0.000343 Step: 44500 Loss: 5.8496 Accuracy: 0.1352
641
+ [04/10 09:05:37 RAR]: Data (t): 0.2394, 191.06/s/gpu Batch (t): 1.3399 LR: 0.000343 Step: 44600 Loss: 5.9806 Accuracy: 0.1262
642
+ [04/10 09:07:08 RAR]: Data (t): 0.5627, 220.15/s/gpu Batch (t): 1.1628 LR: 0.000343 Step: 44700 Loss: 6.0796 Accuracy: 0.1194
643
+ [04/10 09:08:40 RAR]: Data (t): 0.2405, 229.24/s/gpu Batch (t): 1.1167 LR: 0.000343 Step: 44800 Loss: 5.9253 Accuracy: 0.1308
644
+ [04/10 09:10:11 RAR]: Data (t): 0.2388, 432.40/s/gpu Batch (t): 0.5920 LR: 0.000343 Step: 44900 Loss: 5.9889 Accuracy: 0.1232
645
+ [04/10 09:11:41 RAR]: Data (t): 0.2392, 387.14/s/gpu Batch (t): 0.6613 LR: 0.000343 Step: 45000 Loss: 5.9109 Accuracy: 0.1295
646
+ [04/10 09:13:17 RAR]: Data (t): 0.2389, 432.84/s/gpu Batch (t): 0.5914 LR: 0.000343 Step: 45100 Loss: 6.0543 Accuracy: 0.1190
647
+ [04/10 09:14:50 RAR]: Data (t): 0.2405, 433.19/s/gpu Batch (t): 0.5910 LR: 0.000343 Step: 45200 Loss: 5.7996 Accuracy: 0.1444
648
+ [04/10 09:16:21 RAR]: Data (t): 0.4851, 307.76/s/gpu Batch (t): 0.8318 LR: 0.000343 Step: 45300 Loss: 5.9371 Accuracy: 0.1306
649
+ [04/10 09:17:52 RAR]: Data (t): 0.2395, 433.19/s/gpu Batch (t): 0.5910 LR: 0.000343 Step: 45400 Loss: 6.0728 Accuracy: 0.1202
650
+ [04/10 09:19:24 RAR]: Data (t): 0.2396, 433.30/s/gpu Batch (t): 0.5908 LR: 0.000343 Step: 45500 Loss: 5.8088 Accuracy: 0.1423
651
+ [04/10 09:20:55 RAR]: Data (t): 0.2400, 436.53/s/gpu Batch (t): 0.5864 LR: 0.000343 Step: 45600 Loss: 5.9006 Accuracy: 0.1368
652
+ [04/10 09:22:28 RAR]: Data (t): 0.2400, 206.81/s/gpu Batch (t): 1.2378 LR: 0.000343 Step: 45700 Loss: 6.0159 Accuracy: 0.1223
653
+ [04/10 09:23:58 RAR]: Data (t): 0.2401, 433.69/s/gpu Batch (t): 0.5903 LR: 0.000343 Step: 45800 Loss: 6.0333 Accuracy: 0.1198
654
+ [04/10 09:25:28 RAR]: Data (t): 0.2400, 434.21/s/gpu Batch (t): 0.5896 LR: 0.000343 Step: 45900 Loss: 5.9777 Accuracy: 0.1257
655
+ [04/10 09:27:00 RAR]: Data (t): 0.2401, 390.44/s/gpu Batch (t): 0.6557 LR: 0.000343 Step: 46000 Loss: 5.9376 Accuracy: 0.1304
656
+ [04/10 09:28:30 RAR]: Data (t): 0.2396, 433.84/s/gpu Batch (t): 0.5901 LR: 0.000343 Step: 46100 Loss: 5.8924 Accuracy: 0.1379
657
+ [04/10 09:30:02 RAR]: Data (t): 0.2390, 426.37/s/gpu Batch (t): 0.6004 LR: 0.000343 Step: 46200 Loss: 5.8541 Accuracy: 0.1404
658
+ [04/10 09:31:33 RAR]: Data (t): 0.2400, 192.42/s/gpu Batch (t): 1.3304 LR: 0.000343 Step: 46300 Loss: 5.9814 Accuracy: 0.1227
659
+ [04/10 09:33:07 RAR]: Data (t): 0.2403, 433.57/s/gpu Batch (t): 0.5904 LR: 0.000343 Step: 46400 Loss: 6.0232 Accuracy: 0.1216
660
+ [04/10 09:34:38 RAR]: Data (t): 0.2400, 431.37/s/gpu Batch (t): 0.5935 LR: 0.000343 Step: 46500 Loss: 5.9402 Accuracy: 0.1289
661
+ [04/10 09:36:11 RAR]: Data (t): 0.2397, 194.83/s/gpu Batch (t): 1.3140 LR: 0.000343 Step: 46600 Loss: 6.0304 Accuracy: 0.1195
662
+ [04/10 09:37:42 RAR]: Data (t): 0.2398, 222.66/s/gpu Batch (t): 1.1498 LR: 0.000343 Step: 46700 Loss: 5.9356 Accuracy: 0.1252
663
+ [04/10 09:39:11 RAR]: Data (t): 0.4249, 329.89/s/gpu Batch (t): 0.7760 LR: 0.000343 Step: 46800 Loss: 5.8512 Accuracy: 0.1342
664
+ [04/10 09:40:42 RAR]: Data (t): 0.2386, 432.45/s/gpu Batch (t): 0.5920 LR: 0.000343 Step: 46900 Loss: 6.0111 Accuracy: 0.1239
665
+ [04/10 09:42:15 RAR]: Data (t): 0.2394, 387.13/s/gpu Batch (t): 0.6613 LR: 0.000343 Step: 47000 Loss: 5.9578 Accuracy: 0.1294
666
+ [04/10 09:43:47 RAR]: Data (t): 0.2391, 431.82/s/gpu Batch (t): 0.5928 LR: 0.000343 Step: 47100 Loss: 6.0064 Accuracy: 0.1256
667
+ [04/10 09:45:19 RAR]: Data (t): 0.2389, 431.31/s/gpu Batch (t): 0.5935 LR: 0.000343 Step: 47200 Loss: 6.0723 Accuracy: 0.1202
668
+ [04/10 09:46:51 RAR]: Data (t): 0.2392, 433.87/s/gpu Batch (t): 0.5900 LR: 0.000343 Step: 47300 Loss: 5.9527 Accuracy: 0.1261
669
+ [04/10 09:48:22 RAR]: Data (t): 0.2400, 433.10/s/gpu Batch (t): 0.5911 LR: 0.000343 Step: 47400 Loss: 5.9705 Accuracy: 0.1325
670
+ [04/10 09:49:55 RAR]: Data (t): 0.2389, 431.50/s/gpu Batch (t): 0.5933 LR: 0.000342 Step: 47500 Loss: 5.8672 Accuracy: 0.1360
671
+ [04/10 09:51:30 RAR]: Data (t): 0.2392, 420.75/s/gpu Batch (t): 0.6084 LR: 0.000342 Step: 47600 Loss: 5.6793 Accuracy: 0.1526
672
+ [04/10 09:53:00 RAR]: Data (t): 0.2393, 431.98/s/gpu Batch (t): 0.5926 LR: 0.000342 Step: 47700 Loss: 5.8475 Accuracy: 0.1334
673
+ [04/10 09:54:32 RAR]: Data (t): 0.2391, 432.16/s/gpu Batch (t): 0.5924 LR: 0.000342 Step: 47800 Loss: 5.9315 Accuracy: 0.1320
674
+ [04/10 09:56:04 RAR]: Data (t): 0.2390, 432.23/s/gpu Batch (t): 0.5923 LR: 0.000342 Step: 47900 Loss: 5.8249 Accuracy: 0.1343
675
+ [04/10 09:57:36 RAR]: Data (t): 0.2396, 386.32/s/gpu Batch (t): 0.6627 LR: 0.000342 Step: 48000 Loss: 5.9378 Accuracy: 0.1308
676
+ [04/10 09:59:07 RAR]: Data (t): 0.2386, 432.38/s/gpu Batch (t): 0.5921 LR: 0.000342 Step: 48100 Loss: 5.9428 Accuracy: 0.1322
677
+ [04/10 10:00:38 RAR]: Data (t): 0.2399, 432.41/s/gpu Batch (t): 0.5920 LR: 0.000342 Step: 48200 Loss: 5.9292 Accuracy: 0.1306
678
+ [04/10 10:02:10 RAR]: Data (t): 0.2396, 430.78/s/gpu Batch (t): 0.5943 LR: 0.000342 Step: 48300 Loss: 5.8585 Accuracy: 0.1376
679
+ [04/10 10:03:42 RAR]: Data (t): 0.2398, 433.35/s/gpu Batch (t): 0.5907 LR: 0.000342 Step: 48400 Loss: 5.9468 Accuracy: 0.1309
680
+ [04/10 10:05:14 RAR]: Data (t): 0.2385, 434.61/s/gpu Batch (t): 0.5890 LR: 0.000342 Step: 48500 Loss: 5.9377 Accuracy: 0.1274
681
+ [04/10 10:06:46 RAR]: Data (t): 0.2393, 433.88/s/gpu Batch (t): 0.5900 LR: 0.000342 Step: 48600 Loss: 5.9654 Accuracy: 0.1251
682
+ [04/10 10:08:18 RAR]: Data (t): 0.2399, 432.14/s/gpu Batch (t): 0.5924 LR: 0.000342 Step: 48700 Loss: 5.9275 Accuracy: 0.1265
stage1/rar_ordertok/log1.txt ADDED
File without changes
stage1/rar_ordertok/log2.txt ADDED
File without changes
stage1/rar_ordertok/log3.txt ADDED
File without changes