zhoupeng commited on
Commit ·
15671e7
1
Parent(s): 38a3f50
Add 100000_titok_gen.bin
Browse files- stage1/rar_baseline/checkpoint-20000/ema_model/pytorch_model.bin +3 -0
- stage1/rar_baseline/checkpoint-20000/metadata.json +1 -0
- stage1/rar_baseline/checkpoint-20000/optimizer.bin +3 -0
- stage1/rar_baseline/checkpoint-20000/pytorch_model.bin +3 -0
- stage1/rar_baseline/checkpoint-20000/random_states_0.pkl +3 -0
- stage1/rar_baseline/checkpoint-20000/random_states_1.pkl +3 -0
- stage1/rar_baseline/checkpoint-20000/random_states_2.pkl +3 -0
- stage1/rar_baseline/checkpoint-20000/random_states_3.pkl +3 -0
- stage1/rar_baseline/checkpoint-20000/scheduler.bin +3 -0
- stage1/rar_baseline/checkpoint-20000/unwrapped_model/pytorch_model.bin +3 -0
- stage1/rar_baseline/config.yaml +80 -0
- stage1/rar_baseline/log0.txt +389 -0
- stage1/rar_baseline/log1.txt +0 -0
- stage1/rar_baseline/log2.txt +0 -0
- stage1/rar_baseline/log3.txt +0 -0
- stage1/rar_ordertok/checkpoint-40000/ema_model/pytorch_model.bin +3 -0
- stage1/rar_ordertok/checkpoint-40000/metadata.json +1 -0
- stage1/rar_ordertok/checkpoint-40000/optimizer.bin +3 -0
- stage1/rar_ordertok/checkpoint-40000/pytorch_model.bin +3 -0
- stage1/rar_ordertok/checkpoint-40000/random_states_0.pkl +3 -0
- stage1/rar_ordertok/checkpoint-40000/random_states_1.pkl +3 -0
- stage1/rar_ordertok/checkpoint-40000/random_states_2.pkl +3 -0
- stage1/rar_ordertok/checkpoint-40000/random_states_3.pkl +3 -0
- stage1/rar_ordertok/checkpoint-40000/scheduler.bin +3 -0
- stage1/rar_ordertok/checkpoint-40000/unwrapped_model/pytorch_model.bin +3 -0
- stage1/rar_ordertok/config.yaml +80 -0
- stage1/rar_ordertok/log0.txt +682 -0
- stage1/rar_ordertok/log1.txt +0 -0
- stage1/rar_ordertok/log2.txt +0 -0
- stage1/rar_ordertok/log3.txt +0 -0
stage1/rar_baseline/checkpoint-20000/ema_model/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:efcdfb031d5615d27de63cfe298ab6a9ce6c52e7e8ad3e3a2c8da10376114371
|
| 3 |
+
size 1869255086
|
stage1/rar_baseline/checkpoint-20000/metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"global_step": 20000}
|
stage1/rar_baseline/checkpoint-20000/optimizer.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4faac2b4fc55fe3cd2f0a2f809ceedc47b3d413ea6c9495dfebf45270ac89f00
|
| 3 |
+
size 3738566397
|
stage1/rar_baseline/checkpoint-20000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:383bf318db84206160363dfc3aae0b07b584fdaa67019c05b088e689c287cb56
|
| 3 |
+
size 1869261230
|
stage1/rar_baseline/checkpoint-20000/random_states_0.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ac0f4dd2bf5bfca612ce2cac18e01892c868545b7e4bcda7de1257843042b81
|
| 3 |
+
size 15060
|
stage1/rar_baseline/checkpoint-20000/random_states_1.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb4811e1783783146044fb8bd9032daf7f7799da6381ac819bdf599957290619
|
| 3 |
+
size 15124
|
stage1/rar_baseline/checkpoint-20000/random_states_2.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ba2163f421466f90ac03c27fc8c171ebc3b98795de43da9a6e4d693ce8b04f9
|
| 3 |
+
size 15124
|
stage1/rar_baseline/checkpoint-20000/random_states_3.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99e1b77d4f03e9da7262c7941ae8b545f5a3740a1390cd7379621dd25845ebaa
|
| 3 |
+
size 15124
|
stage1/rar_baseline/checkpoint-20000/scheduler.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2c0f384522475d365d71add24d50110e47dd5ff060fbeea59bc6767b3755835
|
| 3 |
+
size 1064
|
stage1/rar_baseline/checkpoint-20000/unwrapped_model/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:383bf318db84206160363dfc3aae0b07b584fdaa67019c05b088e689c287cb56
|
| 3 |
+
size 1869261230
|
stage1/rar_baseline/config.yaml
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment:
|
| 2 |
+
project: titok_ca_rar
|
| 3 |
+
name: titok_ca_rar
|
| 4 |
+
max_train_examples: 1281167
|
| 5 |
+
save_every: 10000
|
| 6 |
+
eval_every: 5000000
|
| 7 |
+
generate_every: 10000000
|
| 8 |
+
log_every: 100
|
| 9 |
+
log_grad_norm_every: 1000
|
| 10 |
+
resume: true
|
| 11 |
+
tokenizer_checkpoint: /mnt/yscfs/huangmengqi/projects/visual_tokenization/order-tok/train_stage1/baseline_simvq/checkpoint-100000/ema_model/pytorch_model.bin
|
| 12 |
+
output_dir: stage1/rar_baseline
|
| 13 |
+
logging_dir: stage1/rar_baseline/logs
|
| 14 |
+
model:
|
| 15 |
+
vq_model:
|
| 16 |
+
codebook_size: 4096
|
| 17 |
+
token_size: 256
|
| 18 |
+
use_l2_norm: true
|
| 19 |
+
commitment_cost: 0.25
|
| 20 |
+
vit_enc_model_size: large
|
| 21 |
+
vit_dec_model_size: large
|
| 22 |
+
vit_enc_patch_size: 16
|
| 23 |
+
vit_dec_patch_size: 16
|
| 24 |
+
num_latent_tokens: 32
|
| 25 |
+
layers_x: 18
|
| 26 |
+
layers_token: 2
|
| 27 |
+
embedding_width: 1024
|
| 28 |
+
width: 256
|
| 29 |
+
finetune_decoder: false
|
| 30 |
+
pretrained_tokenizer_weight: maskgit-vqgan-imagenet-f16-256.bin
|
| 31 |
+
generator:
|
| 32 |
+
hidden_size: 1024
|
| 33 |
+
num_hidden_layers: 24
|
| 34 |
+
num_attention_heads: 16
|
| 35 |
+
intermediate_size: 4096
|
| 36 |
+
dropout: 0.1
|
| 37 |
+
attn_drop: 0.1
|
| 38 |
+
class_label_dropout: 0.1
|
| 39 |
+
image_seq_len: 32
|
| 40 |
+
condition_num_classes: 1000
|
| 41 |
+
randomize_temperature: 1.02
|
| 42 |
+
guidance_scale: 15.5
|
| 43 |
+
guidance_scale_pow: 2.5
|
| 44 |
+
use_checkpoint: false
|
| 45 |
+
randomness_anneal_start: 0
|
| 46 |
+
randomness_anneal_end: 0
|
| 47 |
+
dataset:
|
| 48 |
+
params:
|
| 49 |
+
train_shards_path_or_url: imagenet/imagenet1k-train-{0000..1023}.tar
|
| 50 |
+
eval_shards_path_or_url: imagenet/imagenet1k-validation-{00..63}.tar
|
| 51 |
+
num_workers_per_gpu: 2
|
| 52 |
+
preprocessing:
|
| 53 |
+
resize_shorter_edge: 256
|
| 54 |
+
crop_size: 256
|
| 55 |
+
random_crop: false
|
| 56 |
+
random_flip: true
|
| 57 |
+
optimizer:
|
| 58 |
+
name: adamw
|
| 59 |
+
params:
|
| 60 |
+
learning_rate: 0.00035
|
| 61 |
+
beta1: 0.9
|
| 62 |
+
beta2: 0.96
|
| 63 |
+
weight_decay: 0.03
|
| 64 |
+
lr_scheduler:
|
| 65 |
+
scheduler: cosine
|
| 66 |
+
params:
|
| 67 |
+
learning_rate: ${optimizer.params.learning_rate}
|
| 68 |
+
warmup_steps: 0
|
| 69 |
+
end_lr: 1.0e-05
|
| 70 |
+
training:
|
| 71 |
+
gradient_accumulation_steps: 1
|
| 72 |
+
per_gpu_batch_size: 256
|
| 73 |
+
mixed_precision: bf16
|
| 74 |
+
enable_tf32: true
|
| 75 |
+
enable_wandb: true
|
| 76 |
+
use_ema: true
|
| 77 |
+
seed: 42
|
| 78 |
+
max_train_steps: 500000
|
| 79 |
+
max_grad_norm: 1.0
|
| 80 |
+
config: configs/training/generator/rar.yaml
|
stage1/rar_baseline/log0.txt
ADDED
|
@@ -0,0 +1,389 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[32m[04/09 14:31:10 RAR]: [0mSaving config to stage1/rar_baseline/config.yaml
|
| 2 |
+
[32m[04/09 14:31:10 RAR]: [0mConfig:
|
| 3 |
+
experiment:
|
| 4 |
+
project: titok_ca_rar
|
| 5 |
+
name: titok_ca_rar
|
| 6 |
+
max_train_examples: 1281167
|
| 7 |
+
save_every: 10000
|
| 8 |
+
eval_every: 5000000
|
| 9 |
+
generate_every: 10000000
|
| 10 |
+
log_every: 100
|
| 11 |
+
log_grad_norm_every: 1000
|
| 12 |
+
resume: true
|
| 13 |
+
tokenizer_checkpoint: /mnt/yscfs/huangmengqi/projects/visual_tokenization/order-tok/train_stage1/baseline_simvq/checkpoint-100000/ema_model/pytorch_model.bin
|
| 14 |
+
output_dir: stage1/rar_baseline
|
| 15 |
+
logging_dir: stage1/rar_baseline/logs
|
| 16 |
+
model:
|
| 17 |
+
vq_model:
|
| 18 |
+
codebook_size: 4096
|
| 19 |
+
token_size: 12
|
| 20 |
+
use_l2_norm: true
|
| 21 |
+
commitment_cost: 0.25
|
| 22 |
+
vit_enc_model_size: large
|
| 23 |
+
vit_dec_model_size: large
|
| 24 |
+
vit_enc_patch_size: 16
|
| 25 |
+
vit_dec_patch_size: 16
|
| 26 |
+
num_latent_tokens: 32
|
| 27 |
+
layers_x: 18
|
| 28 |
+
layers_token: 2
|
| 29 |
+
embedding_width: 1024
|
| 30 |
+
width: 256
|
| 31 |
+
finetune_decoder: false
|
| 32 |
+
pretrained_tokenizer_weight: maskgit-vqgan-imagenet-f16-256.bin
|
| 33 |
+
generator:
|
| 34 |
+
hidden_size: 1024
|
| 35 |
+
num_hidden_layers: 24
|
| 36 |
+
num_attention_heads: 16
|
| 37 |
+
intermediate_size: 4096
|
| 38 |
+
dropout: 0.1
|
| 39 |
+
attn_drop: 0.1
|
| 40 |
+
class_label_dropout: 0.1
|
| 41 |
+
image_seq_len: 32
|
| 42 |
+
condition_num_classes: 1000
|
| 43 |
+
randomize_temperature: 1.02
|
| 44 |
+
guidance_scale: 15.5
|
| 45 |
+
guidance_scale_pow: 2.5
|
| 46 |
+
use_checkpoint: false
|
| 47 |
+
randomness_anneal_start: 0
|
| 48 |
+
randomness_anneal_end: 0
|
| 49 |
+
dataset:
|
| 50 |
+
params:
|
| 51 |
+
train_shards_path_or_url: imagenet/imagenet1k-train-{0000..1023}.tar
|
| 52 |
+
eval_shards_path_or_url: imagenet/imagenet1k-validation-{00..63}.tar
|
| 53 |
+
num_workers_per_gpu: 2
|
| 54 |
+
preprocessing:
|
| 55 |
+
resize_shorter_edge: 256
|
| 56 |
+
crop_size: 256
|
| 57 |
+
random_crop: false
|
| 58 |
+
random_flip: true
|
| 59 |
+
optimizer:
|
| 60 |
+
name: adamw
|
| 61 |
+
params:
|
| 62 |
+
learning_rate: 0.00035
|
| 63 |
+
beta1: 0.9
|
| 64 |
+
beta2: 0.96
|
| 65 |
+
weight_decay: 0.03
|
| 66 |
+
lr_scheduler:
|
| 67 |
+
scheduler: cosine
|
| 68 |
+
params:
|
| 69 |
+
learning_rate: ${optimizer.params.learning_rate}
|
| 70 |
+
warmup_steps: 0
|
| 71 |
+
end_lr: 1.0e-05
|
| 72 |
+
training:
|
| 73 |
+
gradient_accumulation_steps: 1
|
| 74 |
+
per_gpu_batch_size: 256
|
| 75 |
+
mixed_precision: bf16
|
| 76 |
+
enable_tf32: true
|
| 77 |
+
enable_wandb: true
|
| 78 |
+
use_ema: true
|
| 79 |
+
seed: 42
|
| 80 |
+
max_train_steps: 500000
|
| 81 |
+
max_grad_norm: 1.0
|
| 82 |
+
config: configs/training/generator/rar.yaml
|
| 83 |
+
|
| 84 |
+
[32m[04/09 14:32:51 RAR]: [0mSaving config to stage1/rar_baseline/config.yaml
|
| 85 |
+
[32m[04/09 14:32:51 RAR]: [0mConfig:
|
| 86 |
+
experiment:
|
| 87 |
+
project: titok_ca_rar
|
| 88 |
+
name: titok_ca_rar
|
| 89 |
+
max_train_examples: 1281167
|
| 90 |
+
save_every: 10000
|
| 91 |
+
eval_every: 5000000
|
| 92 |
+
generate_every: 10000000
|
| 93 |
+
log_every: 100
|
| 94 |
+
log_grad_norm_every: 1000
|
| 95 |
+
resume: true
|
| 96 |
+
tokenizer_checkpoint: /mnt/yscfs/huangmengqi/projects/visual_tokenization/order-tok/train_stage1/baseline_simvq/checkpoint-100000/ema_model/pytorch_model.bin
|
| 97 |
+
output_dir: stage1/rar_baseline
|
| 98 |
+
logging_dir: stage1/rar_baseline/logs
|
| 99 |
+
model:
|
| 100 |
+
vq_model:
|
| 101 |
+
codebook_size: 4096
|
| 102 |
+
token_size: 256
|
| 103 |
+
use_l2_norm: true
|
| 104 |
+
commitment_cost: 0.25
|
| 105 |
+
vit_enc_model_size: large
|
| 106 |
+
vit_dec_model_size: large
|
| 107 |
+
vit_enc_patch_size: 16
|
| 108 |
+
vit_dec_patch_size: 16
|
| 109 |
+
num_latent_tokens: 32
|
| 110 |
+
layers_x: 18
|
| 111 |
+
layers_token: 2
|
| 112 |
+
embedding_width: 1024
|
| 113 |
+
width: 256
|
| 114 |
+
finetune_decoder: false
|
| 115 |
+
pretrained_tokenizer_weight: maskgit-vqgan-imagenet-f16-256.bin
|
| 116 |
+
generator:
|
| 117 |
+
hidden_size: 1024
|
| 118 |
+
num_hidden_layers: 24
|
| 119 |
+
num_attention_heads: 16
|
| 120 |
+
intermediate_size: 4096
|
| 121 |
+
dropout: 0.1
|
| 122 |
+
attn_drop: 0.1
|
| 123 |
+
class_label_dropout: 0.1
|
| 124 |
+
image_seq_len: 32
|
| 125 |
+
condition_num_classes: 1000
|
| 126 |
+
randomize_temperature: 1.02
|
| 127 |
+
guidance_scale: 15.5
|
| 128 |
+
guidance_scale_pow: 2.5
|
| 129 |
+
use_checkpoint: false
|
| 130 |
+
randomness_anneal_start: 0
|
| 131 |
+
randomness_anneal_end: 0
|
| 132 |
+
dataset:
|
| 133 |
+
params:
|
| 134 |
+
train_shards_path_or_url: imagenet/imagenet1k-train-{0000..1023}.tar
|
| 135 |
+
eval_shards_path_or_url: imagenet/imagenet1k-validation-{00..63}.tar
|
| 136 |
+
num_workers_per_gpu: 2
|
| 137 |
+
preprocessing:
|
| 138 |
+
resize_shorter_edge: 256
|
| 139 |
+
crop_size: 256
|
| 140 |
+
random_crop: false
|
| 141 |
+
random_flip: true
|
| 142 |
+
optimizer:
|
| 143 |
+
name: adamw
|
| 144 |
+
params:
|
| 145 |
+
learning_rate: 0.00035
|
| 146 |
+
beta1: 0.9
|
| 147 |
+
beta2: 0.96
|
| 148 |
+
weight_decay: 0.03
|
| 149 |
+
lr_scheduler:
|
| 150 |
+
scheduler: cosine
|
| 151 |
+
params:
|
| 152 |
+
learning_rate: ${optimizer.params.learning_rate}
|
| 153 |
+
warmup_steps: 0
|
| 154 |
+
end_lr: 1.0e-05
|
| 155 |
+
training:
|
| 156 |
+
gradient_accumulation_steps: 1
|
| 157 |
+
per_gpu_batch_size: 256
|
| 158 |
+
mixed_precision: bf16
|
| 159 |
+
enable_tf32: true
|
| 160 |
+
enable_wandb: true
|
| 161 |
+
use_ema: true
|
| 162 |
+
seed: 42
|
| 163 |
+
max_train_steps: 500000
|
| 164 |
+
max_grad_norm: 1.0
|
| 165 |
+
config: configs/training/generator/rar.yaml
|
| 166 |
+
|
| 167 |
+
[32m[04/09 14:33:11 RAR]: [0mCreating model and loss module.
|
| 168 |
+
[32m[04/09 14:33:22 RAR]: [0mCreating optimizers.
|
| 169 |
+
[32m[04/09 14:33:22 RAR]: [0mCreating lr_schedulers.
|
| 170 |
+
[32m[04/09 14:33:22 RAR]: [0mCreating dataloaders.
|
| 171 |
+
[32m[04/09 14:33:22 RAR]: [0mPreparing model, optimizer and dataloaders
|
| 172 |
+
[32m[04/09 14:33:23 RAR]: [0m***** Running training *****
|
| 173 |
+
[32m[04/09 14:33:23 RAR]: [0m Num training steps = 500000
|
| 174 |
+
[32m[04/09 14:33:23 RAR]: [0m Gradient Accumulation steps = 1
|
| 175 |
+
[32m[04/09 14:33:23 RAR]: [0m Instantaneous batch size per gpu = 256
|
| 176 |
+
[32m[04/09 14:33:23 RAR]: [0m Total train batch size (w. parallel, distributed & accumulation) = 1024
|
| 177 |
+
[32m[04/09 14:33:23 RAR]: [0mAll globbed checkpoints are: []
|
| 178 |
+
[32m[04/09 14:33:23 RAR]: [0mTraining from scratch.
|
| 179 |
+
[32m[04/09 14:36:02 RAR]: [0mData (t): 0.0213, 468.71/s/gpu Batch (t): 0.5462 LR: 0.000350 Step: 100 Loss: 8.2635 Accuracy: 0.0004
|
| 180 |
+
[32m[04/09 14:38:38 RAR]: [0mData (t): 0.0337, 455.24/s/gpu Batch (t): 0.5623 LR: 0.000350 Step: 200 Loss: 8.2532 Accuracy: 0.0007
|
| 181 |
+
[32m[04/09 14:41:10 RAR]: [0mData (t): 0.0298, 95.03/s/gpu Batch (t): 2.6939 LR: 0.000350 Step: 300 Loss: 8.2315 Accuracy: 0.0007
|
| 182 |
+
[32m[04/09 14:43:39 RAR]: [0mData (t): 0.0208, 286.32/s/gpu Batch (t): 0.8941 LR: 0.000350 Step: 400 Loss: 8.2100 Accuracy: 0.0006
|
| 183 |
+
[32m[04/09 14:46:14 RAR]: [0mData (t): 0.0290, 469.80/s/gpu Batch (t): 0.5449 LR: 0.000350 Step: 500 Loss: 8.1891 Accuracy: 0.0009
|
| 184 |
+
[32m[04/09 14:48:44 RAR]: [0mData (t): 0.0243, 462.52/s/gpu Batch (t): 0.5535 LR: 0.000350 Step: 600 Loss: 8.1766 Accuracy: 0.0004
|
| 185 |
+
[32m[04/09 14:51:15 RAR]: [0mData (t): 0.0221, 469.11/s/gpu Batch (t): 0.5457 LR: 0.000350 Step: 700 Loss: 8.1651 Accuracy: 0.0008
|
| 186 |
+
[32m[04/09 14:53:45 RAR]: [0mData (t): 0.0261, 345.31/s/gpu Batch (t): 0.7414 LR: 0.000350 Step: 800 Loss: 8.1420 Accuracy: 0.0015
|
| 187 |
+
[32m[04/09 14:56:15 RAR]: [0mData (t): 0.0321, 200.74/s/gpu Batch (t): 1.2753 LR: 0.000350 Step: 900 Loss: 8.1252 Accuracy: 0.0010
|
| 188 |
+
[32m[04/09 14:58:45 RAR]: [0mData (t): 0.0287, 363.06/s/gpu Batch (t): 0.7051 LR: 0.000350 Step: 1000 Loss: 8.1242 Accuracy: 0.0011
|
| 189 |
+
[32m[04/09 15:01:18 RAR]: [0mData (t): 1.9396, 103.94/s/gpu Batch (t): 2.4629 LR: 0.000350 Step: 1100 Loss: 8.1143 Accuracy: 0.0013
|
| 190 |
+
[32m[04/09 15:03:48 RAR]: [0mData (t): 0.1397, 104.28/s/gpu Batch (t): 2.4548 LR: 0.000350 Step: 1200 Loss: 8.0997 Accuracy: 0.0010
|
| 191 |
+
[32m[04/09 15:06:22 RAR]: [0mData (t): 0.0290, 100.72/s/gpu Batch (t): 2.5418 LR: 0.000350 Step: 1300 Loss: 8.0941 Accuracy: 0.0017
|
| 192 |
+
[32m[04/09 15:08:55 RAR]: [0mData (t): 0.0362, 120.63/s/gpu Batch (t): 2.1222 LR: 0.000350 Step: 1400 Loss: 8.0953 Accuracy: 0.0013
|
| 193 |
+
[32m[04/09 15:11:31 RAR]: [0mData (t): 0.0380, 425.68/s/gpu Batch (t): 0.6014 LR: 0.000350 Step: 1500 Loss: 8.0826 Accuracy: 0.0011
|
| 194 |
+
[32m[04/09 15:14:07 RAR]: [0mData (t): 0.0320, 272.74/s/gpu Batch (t): 0.9386 LR: 0.000350 Step: 1600 Loss: 8.0747 Accuracy: 0.0013
|
| 195 |
+
[32m[04/09 15:16:46 RAR]: [0mData (t): 0.0312, 142.38/s/gpu Batch (t): 1.7980 LR: 0.000350 Step: 1700 Loss: 8.0645 Accuracy: 0.0013
|
| 196 |
+
[32m[04/09 15:19:20 RAR]: [0mData (t): 0.0897, 242.02/s/gpu Batch (t): 1.0577 LR: 0.000350 Step: 1800 Loss: 8.0636 Accuracy: 0.0016
|
| 197 |
+
[32m[04/09 15:21:58 RAR]: [0mData (t): 0.0293, 470.92/s/gpu Batch (t): 0.5436 LR: 0.000350 Step: 1900 Loss: 8.0468 Accuracy: 0.0015
|
| 198 |
+
[32m[04/09 15:24:34 RAR]: [0mData (t): 0.0311, 308.03/s/gpu Batch (t): 0.8311 LR: 0.000350 Step: 2000 Loss: 8.0544 Accuracy: 0.0012
|
| 199 |
+
[32m[04/09 15:27:14 RAR]: [0mData (t): 0.0445, 461.58/s/gpu Batch (t): 0.5546 LR: 0.000350 Step: 2100 Loss: 8.0320 Accuracy: 0.0017
|
| 200 |
+
[32m[04/09 15:29:51 RAR]: [0mData (t): 0.0287, 130.43/s/gpu Batch (t): 1.9628 LR: 0.000350 Step: 2200 Loss: 8.0260 Accuracy: 0.0014
|
| 201 |
+
[32m[04/09 15:32:20 RAR]: [0mData (t): 0.9300, 124.24/s/gpu Batch (t): 2.0606 LR: 0.000350 Step: 2300 Loss: 8.0257 Accuracy: 0.0015
|
| 202 |
+
[32m[04/09 15:34:53 RAR]: [0mData (t): 0.0290, 386.40/s/gpu Batch (t): 0.6625 LR: 0.000350 Step: 2400 Loss: 8.0209 Accuracy: 0.0019
|
| 203 |
+
[32m[04/09 15:37:26 RAR]: [0mData (t): 0.0309, 97.08/s/gpu Batch (t): 2.6369 LR: 0.000350 Step: 2500 Loss: 8.0184 Accuracy: 0.0019
|
| 204 |
+
[32m[04/09 15:40:00 RAR]: [0mData (t): 0.0203, 96.40/s/gpu Batch (t): 2.6557 LR: 0.000350 Step: 2600 Loss: 8.0078 Accuracy: 0.0017
|
| 205 |
+
[32m[04/09 15:42:27 RAR]: [0mData (t): 0.0305, 444.62/s/gpu Batch (t): 0.5758 LR: 0.000350 Step: 2700 Loss: 8.0096 Accuracy: 0.0016
|
| 206 |
+
[32m[04/09 15:45:00 RAR]: [0mData (t): 0.0281, 248.11/s/gpu Batch (t): 1.0318 LR: 0.000350 Step: 2800 Loss: 7.9780 Accuracy: 0.0019
|
| 207 |
+
[32m[04/09 15:47:31 RAR]: [0mData (t): 0.0192, 111.52/s/gpu Batch (t): 2.2955 LR: 0.000350 Step: 2900 Loss: 7.9892 Accuracy: 0.0019
|
| 208 |
+
[32m[04/09 15:49:58 RAR]: [0mData (t): 0.4393, 87.25/s/gpu Batch (t): 2.9341 LR: 0.000350 Step: 3000 Loss: 7.9847 Accuracy: 0.0018
|
| 209 |
+
[32m[04/09 15:52:25 RAR]: [0mData (t): 1.1977, 149.33/s/gpu Batch (t): 1.7144 LR: 0.000350 Step: 3100 Loss: 7.9832 Accuracy: 0.0016
|
| 210 |
+
[32m[04/09 15:54:56 RAR]: [0mData (t): 1.3806, 133.51/s/gpu Batch (t): 1.9175 LR: 0.000350 Step: 3200 Loss: 7.9699 Accuracy: 0.0021
|
| 211 |
+
[32m[04/09 15:57:25 RAR]: [0mData (t): 1.3350, 128.34/s/gpu Batch (t): 1.9947 LR: 0.000350 Step: 3300 Loss: 7.9745 Accuracy: 0.0018
|
| 212 |
+
[32m[04/09 15:59:54 RAR]: [0mData (t): 1.3171, 139.27/s/gpu Batch (t): 1.8382 LR: 0.000350 Step: 3400 Loss: 7.9459 Accuracy: 0.0020
|
| 213 |
+
[32m[04/09 16:02:19 RAR]: [0mData (t): 0.1564, 375.66/s/gpu Batch (t): 0.6815 LR: 0.000350 Step: 3500 Loss: 7.9592 Accuracy: 0.0019
|
| 214 |
+
[32m[04/09 16:04:48 RAR]: [0mData (t): 0.0272, 459.47/s/gpu Batch (t): 0.5572 LR: 0.000350 Step: 3600 Loss: 7.9532 Accuracy: 0.0019
|
| 215 |
+
[32m[04/09 16:07:19 RAR]: [0mData (t): 0.0233, 112.66/s/gpu Batch (t): 2.2723 LR: 0.000350 Step: 3700 Loss: 7.9582 Accuracy: 0.0022
|
| 216 |
+
[32m[04/09 16:09:52 RAR]: [0mData (t): 0.0291, 456.30/s/gpu Batch (t): 0.5610 LR: 0.000350 Step: 3800 Loss: 7.9390 Accuracy: 0.0027
|
| 217 |
+
[32m[04/09 16:12:25 RAR]: [0mData (t): 0.0267, 333.19/s/gpu Batch (t): 0.7683 LR: 0.000350 Step: 3900 Loss: 7.9175 Accuracy: 0.0025
|
| 218 |
+
[32m[04/09 16:14:59 RAR]: [0mData (t): 0.0206, 405.87/s/gpu Batch (t): 0.6307 LR: 0.000350 Step: 4000 Loss: 7.9305 Accuracy: 0.0024
|
| 219 |
+
[32m[04/09 16:17:31 RAR]: [0mData (t): 0.0171, 475.17/s/gpu Batch (t): 0.5388 LR: 0.000350 Step: 4100 Loss: 7.9275 Accuracy: 0.0024
|
| 220 |
+
[32m[04/09 16:20:00 RAR]: [0mData (t): 1.4238, 130.32/s/gpu Batch (t): 1.9644 LR: 0.000350 Step: 4200 Loss: 7.9302 Accuracy: 0.0023
|
| 221 |
+
[32m[04/09 16:22:27 RAR]: [0mData (t): 1.2357, 144.41/s/gpu Batch (t): 1.7727 LR: 0.000350 Step: 4300 Loss: 7.9217 Accuracy: 0.0023
|
| 222 |
+
[32m[04/09 16:25:00 RAR]: [0mData (t): 1.8754, 106.84/s/gpu Batch (t): 2.3962 LR: 0.000350 Step: 4400 Loss: 7.9117 Accuracy: 0.0024
|
| 223 |
+
[32m[04/09 16:27:29 RAR]: [0mData (t): 1.0540, 116.97/s/gpu Batch (t): 2.1886 LR: 0.000350 Step: 4500 Loss: 7.9029 Accuracy: 0.0026
|
| 224 |
+
[32m[04/09 16:29:55 RAR]: [0mData (t): 0.0194, 464.33/s/gpu Batch (t): 0.5513 LR: 0.000350 Step: 4600 Loss: 7.8965 Accuracy: 0.0024
|
| 225 |
+
[32m[04/09 16:32:25 RAR]: [0mData (t): 0.7719, 196.73/s/gpu Batch (t): 1.3012 LR: 0.000350 Step: 4700 Loss: 7.8757 Accuracy: 0.0027
|
| 226 |
+
[32m[04/09 16:34:55 RAR]: [0mData (t): 0.0229, 454.04/s/gpu Batch (t): 0.5638 LR: 0.000350 Step: 4800 Loss: 7.9061 Accuracy: 0.0026
|
| 227 |
+
[32m[04/09 16:37:22 RAR]: [0mData (t): 0.0208, 468.98/s/gpu Batch (t): 0.5459 LR: 0.000350 Step: 4900 Loss: 7.8847 Accuracy: 0.0029
|
| 228 |
+
[32m[04/09 16:39:53 RAR]: [0mData (t): 0.0202, 89.31/s/gpu Batch (t): 2.8663 LR: 0.000350 Step: 5000 Loss: 7.8835 Accuracy: 0.0028
|
| 229 |
+
[32m[04/09 16:42:16 RAR]: [0mData (t): 0.0154, 242.01/s/gpu Batch (t): 1.0578 LR: 0.000350 Step: 5100 Loss: 7.8674 Accuracy: 0.0028
|
| 230 |
+
[32m[04/09 16:44:26 RAR]: [0mData (t): 0.0358, 96.28/s/gpu Batch (t): 2.6590 LR: 0.000350 Step: 5200 Loss: 7.8907 Accuracy: 0.0025
|
| 231 |
+
[32m[04/09 16:47:07 RAR]: [0mData (t): 0.0329, 279.45/s/gpu Batch (t): 0.9161 LR: 0.000350 Step: 5300 Loss: 7.8895 Accuracy: 0.0027
|
| 232 |
+
[32m[04/09 16:49:52 RAR]: [0mData (t): 0.0343, 320.08/s/gpu Batch (t): 0.7998 LR: 0.000350 Step: 5400 Loss: 7.8690 Accuracy: 0.0027
|
| 233 |
+
[32m[04/09 16:52:36 RAR]: [0mData (t): 0.0262, 453.18/s/gpu Batch (t): 0.5649 LR: 0.000350 Step: 5500 Loss: 7.8589 Accuracy: 0.0031
|
| 234 |
+
[32m[04/09 16:55:22 RAR]: [0mData (t): 0.0335, 118.92/s/gpu Batch (t): 2.1527 LR: 0.000350 Step: 5600 Loss: 7.8661 Accuracy: 0.0031
|
| 235 |
+
[32m[04/09 16:58:05 RAR]: [0mData (t): 0.0348, 458.25/s/gpu Batch (t): 0.5587 LR: 0.000350 Step: 5700 Loss: 7.8510 Accuracy: 0.0029
|
| 236 |
+
[32m[04/09 17:00:48 RAR]: [0mData (t): 0.0305, 454.06/s/gpu Batch (t): 0.5638 LR: 0.000350 Step: 5800 Loss: 7.8386 Accuracy: 0.0035
|
| 237 |
+
[32m[04/09 17:03:34 RAR]: [0mData (t): 0.0281, 297.65/s/gpu Batch (t): 0.8601 LR: 0.000350 Step: 5900 Loss: 7.8500 Accuracy: 0.0034
|
| 238 |
+
[32m[04/09 17:06:16 RAR]: [0mData (t): 0.0280, 374.85/s/gpu Batch (t): 0.6829 LR: 0.000350 Step: 6000 Loss: 7.8506 Accuracy: 0.0028
|
| 239 |
+
[32m[04/09 17:08:56 RAR]: [0mData (t): 0.0336, 453.39/s/gpu Batch (t): 0.5646 LR: 0.000350 Step: 6100 Loss: 7.8508 Accuracy: 0.0031
|
| 240 |
+
[32m[04/09 17:11:38 RAR]: [0mData (t): 0.0217, 442.85/s/gpu Batch (t): 0.5781 LR: 0.000350 Step: 6200 Loss: 7.8361 Accuracy: 0.0036
|
| 241 |
+
[32m[04/09 17:14:24 RAR]: [0mData (t): 0.0343, 416.47/s/gpu Batch (t): 0.6147 LR: 0.000350 Step: 6300 Loss: 7.8417 Accuracy: 0.0033
|
| 242 |
+
[32m[04/09 17:17:09 RAR]: [0mData (t): 0.0225, 96.65/s/gpu Batch (t): 2.6488 LR: 0.000350 Step: 6400 Loss: 7.8501 Accuracy: 0.0033
|
| 243 |
+
[32m[04/09 17:19:53 RAR]: [0mData (t): 0.0304, 95.96/s/gpu Batch (t): 2.6679 LR: 0.000350 Step: 6500 Loss: 7.8394 Accuracy: 0.0043
|
| 244 |
+
[32m[04/09 17:22:33 RAR]: [0mData (t): 2.1302, 82.85/s/gpu Batch (t): 3.0901 LR: 0.000350 Step: 6600 Loss: 7.8010 Accuracy: 0.0043
|
| 245 |
+
[32m[04/09 17:25:15 RAR]: [0mData (t): 0.0330, 453.84/s/gpu Batch (t): 0.5641 LR: 0.000350 Step: 6700 Loss: 7.8082 Accuracy: 0.0040
|
| 246 |
+
[32m[04/09 17:27:58 RAR]: [0mData (t): 0.0338, 440.68/s/gpu Batch (t): 0.5809 LR: 0.000350 Step: 6800 Loss: 7.7663 Accuracy: 0.0051
|
| 247 |
+
[32m[04/09 17:30:38 RAR]: [0mData (t): 0.0268, 146.57/s/gpu Batch (t): 1.7466 LR: 0.000350 Step: 6900 Loss: 7.8118 Accuracy: 0.0045
|
| 248 |
+
[32m[04/09 17:33:21 RAR]: [0mData (t): 0.0291, 355.63/s/gpu Batch (t): 0.7198 LR: 0.000350 Step: 7000 Loss: 7.8211 Accuracy: 0.0037
|
| 249 |
+
[32m[04/09 17:36:05 RAR]: [0mData (t): 0.0307, 442.56/s/gpu Batch (t): 0.5785 LR: 0.000350 Step: 7100 Loss: 7.8315 Accuracy: 0.0038
|
| 250 |
+
[32m[04/09 17:38:37 RAR]: [0mData (t): 0.0150, 253.02/s/gpu Batch (t): 1.0118 LR: 0.000350 Step: 7200 Loss: 7.7748 Accuracy: 0.0046
|
| 251 |
+
[32m[04/09 17:40:09 RAR]: [0mData (t): 0.0150, 190.24/s/gpu Batch (t): 1.3457 LR: 0.000350 Step: 7300 Loss: 7.7952 Accuracy: 0.0045
|
| 252 |
+
[32m[04/09 17:42:08 RAR]: [0mData (t): 0.0232, 329.59/s/gpu Batch (t): 0.7767 LR: 0.000350 Step: 7400 Loss: 7.8065 Accuracy: 0.0043
|
| 253 |
+
[32m[04/09 17:44:28 RAR]: [0mData (t): 0.0235, 479.76/s/gpu Batch (t): 0.5336 LR: 0.000350 Step: 7500 Loss: 7.8319 Accuracy: 0.0036
|
| 254 |
+
[32m[04/09 17:46:49 RAR]: [0mData (t): 0.0293, 124.55/s/gpu Batch (t): 2.0554 LR: 0.000350 Step: 7600 Loss: 7.7934 Accuracy: 0.0050
|
| 255 |
+
[32m[04/09 17:49:08 RAR]: [0mData (t): 0.0307, 353.23/s/gpu Batch (t): 0.7247 LR: 0.000350 Step: 7700 Loss: 7.7900 Accuracy: 0.0041
|
| 256 |
+
[32m[04/09 17:51:25 RAR]: [0mData (t): 0.0369, 186.27/s/gpu Batch (t): 1.3743 LR: 0.000350 Step: 7800 Loss: 7.7738 Accuracy: 0.0047
|
| 257 |
+
[32m[04/09 17:53:38 RAR]: [0mData (t): 0.0219, 452.16/s/gpu Batch (t): 0.5662 LR: 0.000350 Step: 7900 Loss: 7.7705 Accuracy: 0.0053
|
| 258 |
+
[32m[04/09 17:55:55 RAR]: [0mData (t): 0.0312, 410.15/s/gpu Batch (t): 0.6242 LR: 0.000350 Step: 8000 Loss: 7.7487 Accuracy: 0.0049
|
| 259 |
+
[32m[04/09 17:58:10 RAR]: [0mData (t): 0.0213, 173.62/s/gpu Batch (t): 1.4745 LR: 0.000350 Step: 8100 Loss: 7.7421 Accuracy: 0.0055
|
| 260 |
+
[32m[04/09 18:00:25 RAR]: [0mData (t): 0.0275, 458.48/s/gpu Batch (t): 0.5584 LR: 0.000350 Step: 8200 Loss: 7.7826 Accuracy: 0.0043
|
| 261 |
+
[32m[04/09 18:02:40 RAR]: [0mData (t): 0.0278, 468.36/s/gpu Batch (t): 0.5466 LR: 0.000350 Step: 8300 Loss: 7.7622 Accuracy: 0.0042
|
| 262 |
+
[32m[04/09 18:04:56 RAR]: [0mData (t): 0.0174, 115.80/s/gpu Batch (t): 2.2108 LR: 0.000350 Step: 8400 Loss: 7.7423 Accuracy: 0.0051
|
| 263 |
+
[32m[04/09 18:07:06 RAR]: [0mData (t): 0.0176, 253.29/s/gpu Batch (t): 1.0107 LR: 0.000350 Step: 8500 Loss: 7.7696 Accuracy: 0.0045
|
| 264 |
+
[32m[04/09 18:09:21 RAR]: [0mData (t): 0.0163, 477.05/s/gpu Batch (t): 0.5366 LR: 0.000350 Step: 8600 Loss: 7.7322 Accuracy: 0.0063
|
| 265 |
+
[32m[04/09 18:11:35 RAR]: [0mData (t): 0.0262, 132.94/s/gpu Batch (t): 1.9257 LR: 0.000350 Step: 8700 Loss: 7.7139 Accuracy: 0.0065
|
| 266 |
+
[32m[04/09 18:13:51 RAR]: [0mData (t): 0.0273, 466.46/s/gpu Batch (t): 0.5488 LR: 0.000350 Step: 8800 Loss: 7.7708 Accuracy: 0.0046
|
| 267 |
+
[32m[04/09 18:16:05 RAR]: [0mData (t): 0.0274, 468.00/s/gpu Batch (t): 0.5470 LR: 0.000350 Step: 8900 Loss: 7.7516 Accuracy: 0.0053
|
| 268 |
+
[32m[04/09 18:18:18 RAR]: [0mData (t): 0.0266, 365.50/s/gpu Batch (t): 0.7004 LR: 0.000350 Step: 9000 Loss: 7.7470 Accuracy: 0.0057
|
| 269 |
+
[32m[04/09 18:20:36 RAR]: [0mData (t): 1.4101, 132.29/s/gpu Batch (t): 1.9351 LR: 0.000350 Step: 9100 Loss: 7.7165 Accuracy: 0.0056
|
| 270 |
+
[32m[04/09 18:22:50 RAR]: [0mData (t): 1.2962, 141.31/s/gpu Batch (t): 1.8116 LR: 0.000350 Step: 9200 Loss: 7.7291 Accuracy: 0.0052
|
| 271 |
+
[32m[04/09 18:25:04 RAR]: [0mData (t): 1.3310, 137.62/s/gpu Batch (t): 1.8602 LR: 0.000350 Step: 9300 Loss: 7.7293 Accuracy: 0.0059
|
| 272 |
+
[32m[04/09 18:27:14 RAR]: [0mData (t): 0.0289, 473.20/s/gpu Batch (t): 0.5410 LR: 0.000350 Step: 9400 Loss: 7.7444 Accuracy: 0.0050
|
| 273 |
+
[32m[04/09 18:29:28 RAR]: [0mData (t): 0.0201, 225.95/s/gpu Batch (t): 1.1330 LR: 0.000350 Step: 9500 Loss: 7.7485 Accuracy: 0.0057
|
| 274 |
+
[32m[04/09 18:31:42 RAR]: [0mData (t): 0.0317, 130.88/s/gpu Batch (t): 1.9559 LR: 0.000350 Step: 9600 Loss: 7.7604 Accuracy: 0.0049
|
| 275 |
+
[32m[04/09 18:33:57 RAR]: [0mData (t): 0.0258, 136.98/s/gpu Batch (t): 1.8689 LR: 0.000350 Step: 9700 Loss: 7.6677 Accuracy: 0.0082
|
| 276 |
+
[32m[04/09 18:36:09 RAR]: [0mData (t): 0.0294, 136.09/s/gpu Batch (t): 1.8811 LR: 0.000350 Step: 9800 Loss: 7.6954 Accuracy: 0.0060
|
| 277 |
+
[32m[04/09 18:38:22 RAR]: [0mData (t): 0.0173, 110.43/s/gpu Batch (t): 2.3183 LR: 0.000350 Step: 9900 Loss: 7.6930 Accuracy: 0.0072
|
| 278 |
+
[32m[04/09 18:40:34 RAR]: [0mData (t): 0.0157, 158.25/s/gpu Batch (t): 1.6177 LR: 0.000350 Step: 10000 Loss: 7.7283 Accuracy: 0.0063
|
| 279 |
+
[32m[04/09 18:40:44 RAR]: [0mSaved state to stage1/rar_baseline/checkpoint-10000
|
| 280 |
+
[32m[04/09 18:43:52 RAR]: [0mData (t): 0.0324, 480.00/s/gpu Batch (t): 0.5333 LR: 0.000350 Step: 10100 Loss: 7.7112 Accuracy: 0.0064
|
| 281 |
+
[32m[04/09 18:46:04 RAR]: [0mData (t): 0.0198, 457.55/s/gpu Batch (t): 0.5595 LR: 0.000350 Step: 10200 Loss: 7.7176 Accuracy: 0.0065
|
| 282 |
+
[32m[04/09 18:48:20 RAR]: [0mData (t): 0.0209, 453.42/s/gpu Batch (t): 0.5646 LR: 0.000350 Step: 10300 Loss: 7.6900 Accuracy: 0.0071
|
| 283 |
+
[32m[04/09 18:50:35 RAR]: [0mData (t): 0.0324, 460.26/s/gpu Batch (t): 0.5562 LR: 0.000350 Step: 10400 Loss: 7.6538 Accuracy: 0.0081
|
| 284 |
+
[32m[04/09 18:52:51 RAR]: [0mData (t): 0.0223, 459.99/s/gpu Batch (t): 0.5565 LR: 0.000350 Step: 10500 Loss: 7.7063 Accuracy: 0.0066
|
| 285 |
+
[32m[04/09 18:55:03 RAR]: [0mData (t): 0.0182, 336.27/s/gpu Batch (t): 0.7613 LR: 0.000350 Step: 10600 Loss: 7.6800 Accuracy: 0.0064
|
| 286 |
+
[32m[04/09 18:57:16 RAR]: [0mData (t): 0.0193, 237.72/s/gpu Batch (t): 1.0769 LR: 0.000350 Step: 10700 Loss: 7.7280 Accuracy: 0.0062
|
| 287 |
+
[32m[04/09 18:59:29 RAR]: [0mData (t): 0.0265, 129.92/s/gpu Batch (t): 1.9704 LR: 0.000350 Step: 10800 Loss: 7.6473 Accuracy: 0.0092
|
| 288 |
+
[32m[04/09 19:01:39 RAR]: [0mData (t): 0.0258, 400.84/s/gpu Batch (t): 0.6387 LR: 0.000350 Step: 10900 Loss: 7.6826 Accuracy: 0.0065
|
| 289 |
+
[32m[04/09 19:03:51 RAR]: [0mData (t): 0.0272, 121.14/s/gpu Batch (t): 2.1133 LR: 0.000350 Step: 11000 Loss: 7.6701 Accuracy: 0.0073
|
| 290 |
+
[32m[04/09 19:06:00 RAR]: [0mData (t): 0.0283, 296.92/s/gpu Batch (t): 0.8622 LR: 0.000350 Step: 11100 Loss: 7.6169 Accuracy: 0.0106
|
| 291 |
+
[32m[04/09 19:08:12 RAR]: [0mData (t): 0.0350, 454.38/s/gpu Batch (t): 0.5634 LR: 0.000350 Step: 11200 Loss: 7.6183 Accuracy: 0.0103
|
| 292 |
+
[32m[04/09 19:10:30 RAR]: [0mData (t): 0.0297, 464.83/s/gpu Batch (t): 0.5507 LR: 0.000350 Step: 11300 Loss: 7.6654 Accuracy: 0.0078
|
| 293 |
+
[32m[04/09 19:12:41 RAR]: [0mData (t): 0.0185, 229.37/s/gpu Batch (t): 1.1161 LR: 0.000350 Step: 11400 Loss: 7.6608 Accuracy: 0.0079
|
| 294 |
+
[32m[04/09 19:14:55 RAR]: [0mData (t): 0.0279, 106.44/s/gpu Batch (t): 2.4052 LR: 0.000350 Step: 11500 Loss: 7.6210 Accuracy: 0.0085
|
| 295 |
+
[32m[04/09 19:17:05 RAR]: [0mData (t): 1.3357, 138.17/s/gpu Batch (t): 1.8528 LR: 0.000350 Step: 11600 Loss: 7.6746 Accuracy: 0.0071
|
| 296 |
+
[32m[04/09 19:19:14 RAR]: [0mData (t): 0.9504, 147.81/s/gpu Batch (t): 1.7320 LR: 0.000350 Step: 11700 Loss: 7.6023 Accuracy: 0.0089
|
| 297 |
+
[32m[04/09 19:21:25 RAR]: [0mData (t): 0.0221, 477.52/s/gpu Batch (t): 0.5361 LR: 0.000350 Step: 11800 Loss: 7.6322 Accuracy: 0.0089
|
| 298 |
+
[32m[04/09 19:23:38 RAR]: [0mData (t): 0.0299, 132.90/s/gpu Batch (t): 1.9262 LR: 0.000350 Step: 11900 Loss: 7.6542 Accuracy: 0.0081
|
| 299 |
+
[32m[04/09 19:25:49 RAR]: [0mData (t): 0.0187, 113.68/s/gpu Batch (t): 2.2520 LR: 0.000350 Step: 12000 Loss: 7.6159 Accuracy: 0.0092
|
| 300 |
+
[32m[04/09 19:28:01 RAR]: [0mData (t): 0.0387, 458.26/s/gpu Batch (t): 0.5586 LR: 0.000350 Step: 12100 Loss: 7.6949 Accuracy: 0.0071
|
| 301 |
+
[32m[04/09 19:30:12 RAR]: [0mData (t): 0.0188, 469.12/s/gpu Batch (t): 0.5457 LR: 0.000350 Step: 12200 Loss: 7.6547 Accuracy: 0.0085
|
| 302 |
+
[32m[04/09 19:32:24 RAR]: [0mData (t): 0.0267, 467.39/s/gpu Batch (t): 0.5477 LR: 0.000349 Step: 12300 Loss: 7.6668 Accuracy: 0.0077
|
| 303 |
+
[32m[04/09 19:34:35 RAR]: [0mData (t): 0.0271, 471.15/s/gpu Batch (t): 0.5434 LR: 0.000349 Step: 12400 Loss: 7.5743 Accuracy: 0.0117
|
| 304 |
+
[32m[04/09 19:36:47 RAR]: [0mData (t): 0.0279, 120.65/s/gpu Batch (t): 2.1219 LR: 0.000349 Step: 12500 Loss: 7.5949 Accuracy: 0.0100
|
| 305 |
+
[32m[04/09 19:39:03 RAR]: [0mData (t): 0.0183, 109.50/s/gpu Batch (t): 2.3378 LR: 0.000349 Step: 12600 Loss: 7.6420 Accuracy: 0.0089
|
| 306 |
+
[32m[04/09 19:41:09 RAR]: [0mData (t): 0.0235, 478.35/s/gpu Batch (t): 0.5352 LR: 0.000349 Step: 12700 Loss: 7.5710 Accuracy: 0.0117
|
| 307 |
+
[32m[04/09 19:43:18 RAR]: [0mData (t): 0.0178, 474.23/s/gpu Batch (t): 0.5398 LR: 0.000349 Step: 12800 Loss: 7.5221 Accuracy: 0.0139
|
| 308 |
+
[32m[04/09 19:45:27 RAR]: [0mData (t): 1.0628, 160.46/s/gpu Batch (t): 1.5954 LR: 0.000349 Step: 12900 Loss: 7.5700 Accuracy: 0.0114
|
| 309 |
+
[32m[04/09 19:47:41 RAR]: [0mData (t): 0.0193, 294.49/s/gpu Batch (t): 0.8693 LR: 0.000349 Step: 13000 Loss: 7.5716 Accuracy: 0.0104
|
| 310 |
+
[32m[04/09 19:49:52 RAR]: [0mData (t): 1.0829, 160.02/s/gpu Batch (t): 1.5998 LR: 0.000349 Step: 13100 Loss: 7.5956 Accuracy: 0.0111
|
| 311 |
+
[32m[04/09 19:52:07 RAR]: [0mData (t): 0.0308, 120.54/s/gpu Batch (t): 2.1237 LR: 0.000349 Step: 13200 Loss: 7.5196 Accuracy: 0.0136
|
| 312 |
+
[32m[04/09 19:54:19 RAR]: [0mData (t): 0.0237, 103.74/s/gpu Batch (t): 2.4677 LR: 0.000349 Step: 13300 Loss: 7.5724 Accuracy: 0.0105
|
| 313 |
+
[32m[04/09 19:56:31 RAR]: [0mData (t): 0.0209, 93.88/s/gpu Batch (t): 2.7269 LR: 0.000349 Step: 13400 Loss: 7.5601 Accuracy: 0.0123
|
| 314 |
+
[32m[04/09 19:58:40 RAR]: [0mData (t): 0.0183, 232.38/s/gpu Batch (t): 1.1016 LR: 0.000349 Step: 13500 Loss: 7.5515 Accuracy: 0.0130
|
| 315 |
+
[32m[04/09 20:00:52 RAR]: [0mData (t): 0.0169, 478.25/s/gpu Batch (t): 0.5353 LR: 0.000349 Step: 13600 Loss: 7.5938 Accuracy: 0.0106
|
| 316 |
+
[32m[04/09 20:03:03 RAR]: [0mData (t): 0.0203, 131.23/s/gpu Batch (t): 1.9508 LR: 0.000349 Step: 13700 Loss: 7.5743 Accuracy: 0.0096
|
| 317 |
+
[32m[04/09 20:05:18 RAR]: [0mData (t): 0.0308, 465.77/s/gpu Batch (t): 0.5496 LR: 0.000349 Step: 13800 Loss: 7.6002 Accuracy: 0.0110
|
| 318 |
+
[32m[04/09 20:07:29 RAR]: [0mData (t): 0.0208, 183.60/s/gpu Batch (t): 1.3943 LR: 0.000349 Step: 13900 Loss: 7.5775 Accuracy: 0.0107
|
| 319 |
+
[32m[04/09 20:09:38 RAR]: [0mData (t): 0.0207, 414.58/s/gpu Batch (t): 0.6175 LR: 0.000349 Step: 14000 Loss: 7.5665 Accuracy: 0.0107
|
| 320 |
+
[32m[04/09 20:11:48 RAR]: [0mData (t): 0.0261, 474.85/s/gpu Batch (t): 0.5391 LR: 0.000349 Step: 14100 Loss: 7.6009 Accuracy: 0.0090
|
| 321 |
+
[32m[04/09 20:13:56 RAR]: [0mData (t): 0.2095, 350.60/s/gpu Batch (t): 0.7302 LR: 0.000349 Step: 14200 Loss: 7.5451 Accuracy: 0.0117
|
| 322 |
+
[32m[04/09 20:16:03 RAR]: [0mData (t): 0.0164, 175.17/s/gpu Batch (t): 1.4614 LR: 0.000349 Step: 14300 Loss: 7.5336 Accuracy: 0.0109
|
| 323 |
+
[32m[04/09 20:18:13 RAR]: [0mData (t): 0.0264, 152.29/s/gpu Batch (t): 1.6810 LR: 0.000349 Step: 14400 Loss: 7.6186 Accuracy: 0.0085
|
| 324 |
+
[32m[04/09 20:20:21 RAR]: [0mData (t): 1.5292, 124.79/s/gpu Batch (t): 2.0515 LR: 0.000349 Step: 14500 Loss: 7.5014 Accuracy: 0.0129
|
| 325 |
+
[32m[04/09 20:22:27 RAR]: [0mData (t): 0.0203, 476.30/s/gpu Batch (t): 0.5375 LR: 0.000349 Step: 14600 Loss: 7.5512 Accuracy: 0.0116
|
| 326 |
+
[32m[04/09 20:24:35 RAR]: [0mData (t): 0.0252, 478.23/s/gpu Batch (t): 0.5353 LR: 0.000349 Step: 14700 Loss: 7.5559 Accuracy: 0.0121
|
| 327 |
+
[32m[04/09 20:26:43 RAR]: [0mData (t): 0.0295, 466.32/s/gpu Batch (t): 0.5490 LR: 0.000349 Step: 14800 Loss: 7.5176 Accuracy: 0.0132
|
| 328 |
+
[32m[04/09 20:28:52 RAR]: [0mData (t): 0.1995, 354.09/s/gpu Batch (t): 0.7230 LR: 0.000349 Step: 14900 Loss: 7.5679 Accuracy: 0.0123
|
| 329 |
+
[32m[04/09 20:30:58 RAR]: [0mData (t): 0.1902, 287.43/s/gpu Batch (t): 0.8906 LR: 0.000349 Step: 15000 Loss: 7.5758 Accuracy: 0.0110
|
| 330 |
+
[32m[04/09 20:33:11 RAR]: [0mData (t): 0.0178, 473.38/s/gpu Batch (t): 0.5408 LR: 0.000349 Step: 15100 Loss: 7.5337 Accuracy: 0.0132
|
| 331 |
+
[32m[04/09 20:35:18 RAR]: [0mData (t): 0.0182, 471.67/s/gpu Batch (t): 0.5428 LR: 0.000349 Step: 15200 Loss: 7.5611 Accuracy: 0.0122
|
| 332 |
+
[32m[04/09 20:37:26 RAR]: [0mData (t): 0.0197, 458.53/s/gpu Batch (t): 0.5583 LR: 0.000349 Step: 15300 Loss: 7.5166 Accuracy: 0.0116
|
| 333 |
+
[32m[04/09 20:39:35 RAR]: [0mData (t): 0.0273, 189.32/s/gpu Batch (t): 1.3522 LR: 0.000349 Step: 15400 Loss: 7.5179 Accuracy: 0.0133
|
| 334 |
+
[32m[04/09 20:41:45 RAR]: [0mData (t): 0.0183, 119.68/s/gpu Batch (t): 2.1389 LR: 0.000349 Step: 15500 Loss: 7.5539 Accuracy: 0.0109
|
| 335 |
+
[32m[04/09 20:43:53 RAR]: [0mData (t): 0.0229, 145.22/s/gpu Batch (t): 1.7629 LR: 0.000349 Step: 15600 Loss: 7.5216 Accuracy: 0.0122
|
| 336 |
+
[32m[04/09 20:46:00 RAR]: [0mData (t): 0.0302, 477.56/s/gpu Batch (t): 0.5361 LR: 0.000349 Step: 15700 Loss: 7.4516 Accuracy: 0.0158
|
| 337 |
+
[32m[04/09 20:48:08 RAR]: [0mData (t): 0.0321, 461.15/s/gpu Batch (t): 0.5551 LR: 0.000349 Step: 15800 Loss: 7.4875 Accuracy: 0.0147
|
| 338 |
+
[32m[04/09 20:50:18 RAR]: [0mData (t): 0.0191, 479.63/s/gpu Batch (t): 0.5337 LR: 0.000349 Step: 15900 Loss: 7.4933 Accuracy: 0.0125
|
| 339 |
+
[32m[04/09 20:52:25 RAR]: [0mData (t): 0.0162, 130.70/s/gpu Batch (t): 1.9586 LR: 0.000349 Step: 16000 Loss: 7.5310 Accuracy: 0.0122
|
| 340 |
+
[32m[04/09 20:54:34 RAR]: [0mData (t): 0.0192, 472.66/s/gpu Batch (t): 0.5416 LR: 0.000349 Step: 16100 Loss: 7.5256 Accuracy: 0.0135
|
| 341 |
+
[32m[04/09 20:56:43 RAR]: [0mData (t): 0.0273, 161.22/s/gpu Batch (t): 1.5879 LR: 0.000349 Step: 16200 Loss: 7.5086 Accuracy: 0.0139
|
| 342 |
+
[32m[04/09 20:58:54 RAR]: [0mData (t): 0.0148, 185.58/s/gpu Batch (t): 1.3794 LR: 0.000349 Step: 16300 Loss: 7.4516 Accuracy: 0.0157
|
| 343 |
+
[32m[04/09 21:01:03 RAR]: [0mData (t): 0.0190, 362.92/s/gpu Batch (t): 0.7054 LR: 0.000349 Step: 16400 Loss: 7.5331 Accuracy: 0.0123
|
| 344 |
+
[32m[04/09 21:03:10 RAR]: [0mData (t): 1.0213, 101.27/s/gpu Batch (t): 2.5278 LR: 0.000349 Step: 16500 Loss: 7.4461 Accuracy: 0.0168
|
| 345 |
+
[32m[04/09 21:05:18 RAR]: [0mData (t): 0.0193, 253.79/s/gpu Batch (t): 1.0087 LR: 0.000349 Step: 16600 Loss: 7.4972 Accuracy: 0.0154
|
| 346 |
+
[32m[04/09 21:07:26 RAR]: [0mData (t): 0.0306, 99.16/s/gpu Batch (t): 2.5818 LR: 0.000349 Step: 16700 Loss: 7.5541 Accuracy: 0.0120
|
| 347 |
+
[32m[04/09 21:09:34 RAR]: [0mData (t): 0.0177, 275.37/s/gpu Batch (t): 0.9296 LR: 0.000349 Step: 16800 Loss: 7.5800 Accuracy: 0.0115
|
| 348 |
+
[32m[04/09 21:11:42 RAR]: [0mData (t): 0.0322, 121.95/s/gpu Batch (t): 2.0992 LR: 0.000349 Step: 16900 Loss: 7.4388 Accuracy: 0.0165
|
| 349 |
+
[32m[04/09 21:13:47 RAR]: [0mData (t): 0.0204, 369.09/s/gpu Batch (t): 0.6936 LR: 0.000349 Step: 17000 Loss: 7.4871 Accuracy: 0.0156
|
| 350 |
+
[32m[04/09 21:15:55 RAR]: [0mData (t): 0.0193, 477.89/s/gpu Batch (t): 0.5357 LR: 0.000349 Step: 17100 Loss: 7.5073 Accuracy: 0.0132
|
| 351 |
+
[32m[04/09 21:18:05 RAR]: [0mData (t): 0.0163, 468.58/s/gpu Batch (t): 0.5463 LR: 0.000349 Step: 17200 Loss: 7.4590 Accuracy: 0.0152
|
| 352 |
+
[32m[04/09 21:20:12 RAR]: [0mData (t): 0.0222, 477.64/s/gpu Batch (t): 0.5360 LR: 0.000349 Step: 17300 Loss: 7.4255 Accuracy: 0.0167
|
| 353 |
+
[32m[04/09 21:22:20 RAR]: [0mData (t): 1.3375, 137.86/s/gpu Batch (t): 1.8569 LR: 0.000349 Step: 17400 Loss: 7.4961 Accuracy: 0.0154
|
| 354 |
+
[32m[04/09 21:24:24 RAR]: [0mData (t): 0.0173, 435.59/s/gpu Batch (t): 0.5877 LR: 0.000349 Step: 17500 Loss: 7.4699 Accuracy: 0.0143
|
| 355 |
+
[32m[04/09 21:26:36 RAR]: [0mData (t): 0.0305, 456.26/s/gpu Batch (t): 0.5611 LR: 0.000349 Step: 17600 Loss: 7.5215 Accuracy: 0.0140
|
| 356 |
+
[32m[04/09 21:28:43 RAR]: [0mData (t): 0.0169, 205.39/s/gpu Batch (t): 1.2464 LR: 0.000349 Step: 17700 Loss: 7.4158 Accuracy: 0.0191
|
| 357 |
+
[32m[04/09 21:30:51 RAR]: [0mData (t): 1.1754, 151.73/s/gpu Batch (t): 1.6872 LR: 0.000349 Step: 17800 Loss: 7.4366 Accuracy: 0.0165
|
| 358 |
+
[32m[04/09 21:32:58 RAR]: [0mData (t): 0.1764, 133.33/s/gpu Batch (t): 1.9201 LR: 0.000349 Step: 17900 Loss: 7.4378 Accuracy: 0.0159
|
| 359 |
+
[32m[04/09 21:35:04 RAR]: [0mData (t): 0.0249, 185.19/s/gpu Batch (t): 1.3824 LR: 0.000349 Step: 18000 Loss: 7.4615 Accuracy: 0.0170
|
| 360 |
+
[32m[04/09 21:37:13 RAR]: [0mData (t): 0.0198, 134.90/s/gpu Batch (t): 1.8977 LR: 0.000349 Step: 18100 Loss: 7.4378 Accuracy: 0.0168
|
| 361 |
+
[32m[04/09 21:39:19 RAR]: [0mData (t): 0.5676, 234.94/s/gpu Batch (t): 1.0896 LR: 0.000349 Step: 18200 Loss: 7.4573 Accuracy: 0.0169
|
| 362 |
+
[32m[04/09 21:41:27 RAR]: [0mData (t): 0.0267, 211.53/s/gpu Batch (t): 1.2103 LR: 0.000349 Step: 18300 Loss: 7.4127 Accuracy: 0.0175
|
| 363 |
+
[32m[04/09 21:43:37 RAR]: [0mData (t): 0.0966, 135.10/s/gpu Batch (t): 1.8949 LR: 0.000349 Step: 18400 Loss: 7.5078 Accuracy: 0.0145
|
| 364 |
+
[32m[04/09 21:45:44 RAR]: [0mData (t): 1.5958, 121.78/s/gpu Batch (t): 2.1021 LR: 0.000349 Step: 18500 Loss: 7.4458 Accuracy: 0.0156
|
| 365 |
+
[32m[04/09 21:47:51 RAR]: [0mData (t): 0.4248, 274.79/s/gpu Batch (t): 0.9316 LR: 0.000349 Step: 18600 Loss: 7.5041 Accuracy: 0.0129
|
| 366 |
+
[32m[04/09 21:50:00 RAR]: [0mData (t): 0.0175, 313.76/s/gpu Batch (t): 0.8159 LR: 0.000349 Step: 18700 Loss: 7.4175 Accuracy: 0.0153
|
| 367 |
+
[32m[04/09 21:52:09 RAR]: [0mData (t): 0.0257, 301.78/s/gpu Batch (t): 0.8483 LR: 0.000349 Step: 18800 Loss: 7.4102 Accuracy: 0.0181
|
| 368 |
+
[32m[04/09 21:54:18 RAR]: [0mData (t): 1.3182, 139.55/s/gpu Batch (t): 1.8345 LR: 0.000349 Step: 18900 Loss: 7.4618 Accuracy: 0.0143
|
| 369 |
+
[32m[04/09 21:56:24 RAR]: [0mData (t): 0.7834, 181.30/s/gpu Batch (t): 1.4120 LR: 0.000349 Step: 19000 Loss: 7.3735 Accuracy: 0.0203
|
| 370 |
+
[32m[04/09 21:58:31 RAR]: [0mData (t): 0.0162, 467.23/s/gpu Batch (t): 0.5479 LR: 0.000349 Step: 19100 Loss: 7.4900 Accuracy: 0.0129
|
| 371 |
+
[32m[04/09 22:00:44 RAR]: [0mData (t): 0.0181, 125.27/s/gpu Batch (t): 2.0436 LR: 0.000349 Step: 19200 Loss: 7.4067 Accuracy: 0.0188
|
| 372 |
+
[32m[04/09 22:02:52 RAR]: [0mData (t): 0.0194, 131.64/s/gpu Batch (t): 1.9447 LR: 0.000349 Step: 19300 Loss: 7.4270 Accuracy: 0.0189
|
| 373 |
+
[32m[04/09 22:04:59 RAR]: [0mData (t): 0.0159, 102.92/s/gpu Batch (t): 2.4873 LR: 0.000349 Step: 19400 Loss: 7.4025 Accuracy: 0.0181
|
| 374 |
+
[32m[04/09 22:07:08 RAR]: [0mData (t): 0.0160, 133.97/s/gpu Batch (t): 1.9109 LR: 0.000349 Step: 19500 Loss: 7.4646 Accuracy: 0.0151
|
| 375 |
+
[32m[04/09 22:09:12 RAR]: [0mData (t): 0.0195, 472.05/s/gpu Batch (t): 0.5423 LR: 0.000349 Step: 19600 Loss: 7.3769 Accuracy: 0.0193
|
| 376 |
+
[32m[04/09 22:11:16 RAR]: [0mData (t): 0.0181, 140.97/s/gpu Batch (t): 1.8159 LR: 0.000349 Step: 19700 Loss: 7.3825 Accuracy: 0.0201
|
| 377 |
+
[32m[04/09 22:13:20 RAR]: [0mData (t): 0.9219, 119.45/s/gpu Batch (t): 2.1432 LR: 0.000349 Step: 19800 Loss: 7.4473 Accuracy: 0.0169
|
| 378 |
+
[32m[04/09 22:15:24 RAR]: [0mData (t): 0.0683, 438.75/s/gpu Batch (t): 0.5835 LR: 0.000349 Step: 19900 Loss: 7.3499 Accuracy: 0.0192
|
| 379 |
+
[32m[04/09 22:17:30 RAR]: [0mData (t): 0.0205, 190.14/s/gpu Batch (t): 1.3464 LR: 0.000349 Step: 20000 Loss: 7.4171 Accuracy: 0.0165
|
| 380 |
+
[32m[04/09 22:17:42 RAR]: [0mSaved state to stage1/rar_baseline/checkpoint-20000
|
| 381 |
+
[32m[04/09 22:20:34 RAR]: [0mData (t): 0.0176, 286.12/s/gpu Batch (t): 0.8947 LR: 0.000349 Step: 20100 Loss: 7.2654 Accuracy: 0.0245
|
| 382 |
+
[32m[04/09 22:22:40 RAR]: [0mData (t): 1.0574, 162.63/s/gpu Batch (t): 1.5741 LR: 0.000349 Step: 20200 Loss: 7.3570 Accuracy: 0.0210
|
| 383 |
+
[32m[04/09 22:24:45 RAR]: [0mData (t): 0.3605, 136.18/s/gpu Batch (t): 1.8798 LR: 0.000349 Step: 20300 Loss: 7.4934 Accuracy: 0.0144
|
| 384 |
+
[32m[04/09 22:26:51 RAR]: [0mData (t): 1.1197, 144.68/s/gpu Batch (t): 1.7694 LR: 0.000349 Step: 20400 Loss: 7.3943 Accuracy: 0.0188
|
| 385 |
+
[32m[04/09 22:28:56 RAR]: [0mData (t): 0.0213, 475.28/s/gpu Batch (t): 0.5386 LR: 0.000349 Step: 20500 Loss: 7.3888 Accuracy: 0.0186
|
| 386 |
+
[32m[04/09 22:31:00 RAR]: [0mData (t): 0.1139, 143.87/s/gpu Batch (t): 1.7793 LR: 0.000349 Step: 20600 Loss: 7.3411 Accuracy: 0.0225
|
| 387 |
+
[32m[04/09 22:33:06 RAR]: [0mData (t): 0.0300, 461.27/s/gpu Batch (t): 0.5550 LR: 0.000349 Step: 20700 Loss: 7.3853 Accuracy: 0.0174
|
| 388 |
+
[32m[04/09 22:35:12 RAR]: [0mData (t): 0.0262, 475.26/s/gpu Batch (t): 0.5387 LR: 0.000349 Step: 20800 Loss: 7.3841 Accuracy: 0.0210
|
| 389 |
+
[32m[04/09 22:37:17 RAR]: [0mData (t): 0.0215, 480.08/s/gpu Batch (t): 0.5332 LR: 0.000349 Step: 20900 Loss: 7.3635 Accuracy: 0.0210
|
stage1/rar_baseline/log1.txt
ADDED
|
File without changes
|
stage1/rar_baseline/log2.txt
ADDED
|
File without changes
|
stage1/rar_baseline/log3.txt
ADDED
|
File without changes
|
stage1/rar_ordertok/checkpoint-40000/ema_model/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a4ae00c8789beda789bf2d4d15f56bdc100ceaa298a2392a519d601249275fa
|
| 3 |
+
size 1869255086
|
stage1/rar_ordertok/checkpoint-40000/metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"global_step": 40000}
|
stage1/rar_ordertok/checkpoint-40000/optimizer.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b33f6ced587f05fb7e3e8f0e11814c437645abe3b719e4530471da25a7b32f5
|
| 3 |
+
size 3738572541
|
stage1/rar_ordertok/checkpoint-40000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:703c1fce0259a69a125a9454657bfd85a34d5f580906a8fd5859b79b2749cdbc
|
| 3 |
+
size 1869261230
|
stage1/rar_ordertok/checkpoint-40000/random_states_0.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a392ec6759485c46d5e6f447cb1c55abd77e6ee26be8ee92549214dfd50e4167
|
| 3 |
+
size 15124
|
stage1/rar_ordertok/checkpoint-40000/random_states_1.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7391e074557ace3491d490a158aca05fd21ac872741d271dfb481b37caf6627
|
| 3 |
+
size 15124
|
stage1/rar_ordertok/checkpoint-40000/random_states_2.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6e97b279a9d5545147dfd3ea3d378a8029e00c10112d0a05cdbef6a2909ef79
|
| 3 |
+
size 15124
|
stage1/rar_ordertok/checkpoint-40000/random_states_3.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2f850dd4dd50f1bb4534258c2bb8a31110d4bddbbaad0ea242e66557601fbef
|
| 3 |
+
size 15060
|
stage1/rar_ordertok/checkpoint-40000/scheduler.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04843554536f8e6800a3c88527835c6046e3124db2ed37d10a954e5a9dff0040
|
| 3 |
+
size 1064
|
stage1/rar_ordertok/checkpoint-40000/unwrapped_model/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:703c1fce0259a69a125a9454657bfd85a34d5f580906a8fd5859b79b2749cdbc
|
| 3 |
+
size 1869261230
|
stage1/rar_ordertok/config.yaml
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment:
|
| 2 |
+
project: titok_ca_rar
|
| 3 |
+
name: titok_ca_rar
|
| 4 |
+
max_train_examples: 1281167
|
| 5 |
+
save_every: 10000
|
| 6 |
+
eval_every: 5000000
|
| 7 |
+
generate_every: 10000000
|
| 8 |
+
log_every: 100
|
| 9 |
+
log_grad_norm_every: 1000
|
| 10 |
+
resume: true
|
| 11 |
+
tokenizer_checkpoint: /mnt/yscfs/huangmengqi/projects/visual_tokenization/order-tok/train_stage1/ordertok_simvq/checkpoint-100000/ema_model/pytorch_model.bin
|
| 12 |
+
output_dir: stage1/rar_ordertok
|
| 13 |
+
logging_dir: stage1/rar_ordertok/logs
|
| 14 |
+
model:
|
| 15 |
+
vq_model:
|
| 16 |
+
codebook_size: 4096
|
| 17 |
+
token_size: 256
|
| 18 |
+
use_l2_norm: true
|
| 19 |
+
commitment_cost: 0.25
|
| 20 |
+
vit_enc_model_size: large
|
| 21 |
+
vit_dec_model_size: large
|
| 22 |
+
vit_enc_patch_size: 16
|
| 23 |
+
vit_dec_patch_size: 16
|
| 24 |
+
num_latent_tokens: 32
|
| 25 |
+
layers_x: 18
|
| 26 |
+
layers_token: 2
|
| 27 |
+
embedding_width: 1024
|
| 28 |
+
width: 256
|
| 29 |
+
finetune_decoder: false
|
| 30 |
+
pretrained_tokenizer_weight: maskgit-vqgan-imagenet-f16-256.bin
|
| 31 |
+
generator:
|
| 32 |
+
hidden_size: 1024
|
| 33 |
+
num_hidden_layers: 24
|
| 34 |
+
num_attention_heads: 16
|
| 35 |
+
intermediate_size: 4096
|
| 36 |
+
dropout: 0.1
|
| 37 |
+
attn_drop: 0.1
|
| 38 |
+
class_label_dropout: 0.1
|
| 39 |
+
image_seq_len: 32
|
| 40 |
+
condition_num_classes: 1000
|
| 41 |
+
randomize_temperature: 1.02
|
| 42 |
+
guidance_scale: 15.5
|
| 43 |
+
guidance_scale_pow: 2.5
|
| 44 |
+
use_checkpoint: false
|
| 45 |
+
randomness_anneal_start: 0
|
| 46 |
+
randomness_anneal_end: 0
|
| 47 |
+
dataset:
|
| 48 |
+
params:
|
| 49 |
+
train_shards_path_or_url: imagenet/imagenet1k-train-{0000..1023}.tar
|
| 50 |
+
eval_shards_path_or_url: imagenet/imagenet1k-validation-{00..63}.tar
|
| 51 |
+
num_workers_per_gpu: 2
|
| 52 |
+
preprocessing:
|
| 53 |
+
resize_shorter_edge: 256
|
| 54 |
+
crop_size: 256
|
| 55 |
+
random_crop: false
|
| 56 |
+
random_flip: true
|
| 57 |
+
optimizer:
|
| 58 |
+
name: adamw
|
| 59 |
+
params:
|
| 60 |
+
learning_rate: 0.00035
|
| 61 |
+
beta1: 0.9
|
| 62 |
+
beta2: 0.96
|
| 63 |
+
weight_decay: 0.03
|
| 64 |
+
lr_scheduler:
|
| 65 |
+
scheduler: cosine
|
| 66 |
+
params:
|
| 67 |
+
learning_rate: ${optimizer.params.learning_rate}
|
| 68 |
+
warmup_steps: 0
|
| 69 |
+
end_lr: 1.0e-05
|
| 70 |
+
training:
|
| 71 |
+
gradient_accumulation_steps: 1
|
| 72 |
+
per_gpu_batch_size: 256
|
| 73 |
+
mixed_precision: bf16
|
| 74 |
+
enable_tf32: true
|
| 75 |
+
enable_wandb: true
|
| 76 |
+
use_ema: true
|
| 77 |
+
seed: 42
|
| 78 |
+
max_train_steps: 500000
|
| 79 |
+
max_grad_norm: 1.0
|
| 80 |
+
config: configs/training/generator/rar.yaml
|
stage1/rar_ordertok/log0.txt
ADDED
|
@@ -0,0 +1,682 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[32m[04/09 19:44:04 RAR]: [0mSaving config to stage1/rar_ordertok/config.yaml
|
| 2 |
+
[32m[04/09 19:44:04 RAR]: [0mConfig:
|
| 3 |
+
experiment:
|
| 4 |
+
project: titok_ca_rar
|
| 5 |
+
name: titok_ca_rar
|
| 6 |
+
max_train_examples: 1281167
|
| 7 |
+
save_every: 10000
|
| 8 |
+
eval_every: 5000000
|
| 9 |
+
generate_every: 10000000
|
| 10 |
+
log_every: 100
|
| 11 |
+
log_grad_norm_every: 1000
|
| 12 |
+
resume: true
|
| 13 |
+
tokenizer_checkpoint: /mnt/yscfs/huangmengqi/projects/visual_tokenization/order-tok/train_stage1/ordertok_simvq/checkpoint-100000/ema_model/pytorch_model.bin
|
| 14 |
+
output_dir: stage1/rar_ordertok
|
| 15 |
+
logging_dir: stage1/rar_ordertok/logs
|
| 16 |
+
model:
|
| 17 |
+
vq_model:
|
| 18 |
+
codebook_size: 4096
|
| 19 |
+
token_size: 256
|
| 20 |
+
use_l2_norm: true
|
| 21 |
+
commitment_cost: 0.25
|
| 22 |
+
vit_enc_model_size: large
|
| 23 |
+
vit_dec_model_size: large
|
| 24 |
+
vit_enc_patch_size: 16
|
| 25 |
+
vit_dec_patch_size: 16
|
| 26 |
+
num_latent_tokens: 32
|
| 27 |
+
layers_x: 18
|
| 28 |
+
layers_token: 2
|
| 29 |
+
embedding_width: 1024
|
| 30 |
+
width: 256
|
| 31 |
+
finetune_decoder: false
|
| 32 |
+
pretrained_tokenizer_weight: maskgit-vqgan-imagenet-f16-256.bin
|
| 33 |
+
generator:
|
| 34 |
+
hidden_size: 1024
|
| 35 |
+
num_hidden_layers: 24
|
| 36 |
+
num_attention_heads: 16
|
| 37 |
+
intermediate_size: 4096
|
| 38 |
+
dropout: 0.1
|
| 39 |
+
attn_drop: 0.1
|
| 40 |
+
class_label_dropout: 0.1
|
| 41 |
+
image_seq_len: 32
|
| 42 |
+
condition_num_classes: 1000
|
| 43 |
+
randomize_temperature: 1.02
|
| 44 |
+
guidance_scale: 15.5
|
| 45 |
+
guidance_scale_pow: 2.5
|
| 46 |
+
use_checkpoint: false
|
| 47 |
+
randomness_anneal_start: 0
|
| 48 |
+
randomness_anneal_end: 0
|
| 49 |
+
dataset:
|
| 50 |
+
params:
|
| 51 |
+
train_shards_path_or_url: imagenet/imagenet1k-train-{0000..1023}.tar
|
| 52 |
+
eval_shards_path_or_url: imagenet/imagenet1k-validation-{00..63}.tar
|
| 53 |
+
num_workers_per_gpu: 2
|
| 54 |
+
preprocessing:
|
| 55 |
+
resize_shorter_edge: 256
|
| 56 |
+
crop_size: 256
|
| 57 |
+
random_crop: false
|
| 58 |
+
random_flip: true
|
| 59 |
+
optimizer:
|
| 60 |
+
name: adamw
|
| 61 |
+
params:
|
| 62 |
+
learning_rate: 0.00035
|
| 63 |
+
beta1: 0.9
|
| 64 |
+
beta2: 0.96
|
| 65 |
+
weight_decay: 0.03
|
| 66 |
+
lr_scheduler:
|
| 67 |
+
scheduler: cosine
|
| 68 |
+
params:
|
| 69 |
+
learning_rate: ${optimizer.params.learning_rate}
|
| 70 |
+
warmup_steps: 0
|
| 71 |
+
end_lr: 1.0e-05
|
| 72 |
+
training:
|
| 73 |
+
gradient_accumulation_steps: 1
|
| 74 |
+
per_gpu_batch_size: 256
|
| 75 |
+
mixed_precision: bf16
|
| 76 |
+
enable_tf32: true
|
| 77 |
+
enable_wandb: true
|
| 78 |
+
use_ema: true
|
| 79 |
+
seed: 42
|
| 80 |
+
max_train_steps: 500000
|
| 81 |
+
max_grad_norm: 1.0
|
| 82 |
+
config: configs/training/generator/rar.yaml
|
| 83 |
+
|
| 84 |
+
[32m[04/09 19:44:22 RAR]: [0mCreating model and loss module.
|
| 85 |
+
[32m[04/09 19:44:30 RAR]: [0mCreating optimizers.
|
| 86 |
+
[32m[04/09 19:44:30 RAR]: [0mCreating lr_schedulers.
|
| 87 |
+
[32m[04/09 19:44:30 RAR]: [0mCreating dataloaders.
|
| 88 |
+
[32m[04/09 19:44:30 RAR]: [0mPreparing model, optimizer and dataloaders
|
| 89 |
+
[32m[04/09 19:44:31 RAR]: [0m***** Running training *****
|
| 90 |
+
[32m[04/09 19:44:31 RAR]: [0m Num training steps = 500000
|
| 91 |
+
[32m[04/09 19:44:31 RAR]: [0m Gradient Accumulation steps = 1
|
| 92 |
+
[32m[04/09 19:44:31 RAR]: [0m Instantaneous batch size per gpu = 256
|
| 93 |
+
[32m[04/09 19:44:31 RAR]: [0m Total train batch size (w. parallel, distributed & accumulation) = 1024
|
| 94 |
+
[32m[04/09 19:44:31 RAR]: [0mAll globbed checkpoints are: []
|
| 95 |
+
[32m[04/09 19:44:31 RAR]: [0mTraining from scratch.
|
| 96 |
+
[32m[04/09 19:46:49 RAR]: [0mData (t): 0.4487, 319.72/s/gpu Batch (t): 0.8007 LR: 0.000350 Step: 100 Loss: 7.7370 Accuracy: 0.0491
|
| 97 |
+
[32m[04/09 19:49:03 RAR]: [0mData (t): 1.0769, 181.23/s/gpu Batch (t): 1.4126 LR: 0.000350 Step: 200 Loss: 7.6568 Accuracy: 0.0526
|
| 98 |
+
[32m[04/09 19:51:14 RAR]: [0mData (t): 1.1857, 167.17/s/gpu Batch (t): 1.5314 LR: 0.000350 Step: 300 Loss: 7.6095 Accuracy: 0.0514
|
| 99 |
+
[32m[04/09 19:53:25 RAR]: [0mData (t): 1.6314, 129.58/s/gpu Batch (t): 1.9756 LR: 0.000350 Step: 400 Loss: 7.5645 Accuracy: 0.0537
|
| 100 |
+
[32m[04/09 19:55:35 RAR]: [0mData (t): 1.4973, 138.30/s/gpu Batch (t): 1.8511 LR: 0.000350 Step: 500 Loss: 7.5316 Accuracy: 0.0526
|
| 101 |
+
[32m[04/09 19:57:48 RAR]: [0mData (t): 0.2407, 439.59/s/gpu Batch (t): 0.5824 LR: 0.000350 Step: 600 Loss: 7.5113 Accuracy: 0.0530
|
| 102 |
+
[32m[04/09 20:00:03 RAR]: [0mData (t): 0.2408, 147.40/s/gpu Batch (t): 1.7367 LR: 0.000350 Step: 700 Loss: 7.4910 Accuracy: 0.0542
|
| 103 |
+
[32m[04/09 20:02:09 RAR]: [0mData (t): 0.2405, 302.02/s/gpu Batch (t): 0.8476 LR: 0.000350 Step: 800 Loss: 7.4666 Accuracy: 0.0534
|
| 104 |
+
[32m[04/09 20:04:23 RAR]: [0mData (t): 0.2412, 128.00/s/gpu Batch (t): 2.0000 LR: 0.000350 Step: 900 Loss: 7.4636 Accuracy: 0.0530
|
| 105 |
+
[32m[04/09 20:06:30 RAR]: [0mData (t): 0.6515, 237.18/s/gpu Batch (t): 1.0794 LR: 0.000350 Step: 1000 Loss: 7.4374 Accuracy: 0.0528
|
| 106 |
+
[32m[04/09 20:08:40 RAR]: [0mData (t): 0.2405, 176.90/s/gpu Batch (t): 1.4471 LR: 0.000350 Step: 1100 Loss: 7.4485 Accuracy: 0.0531
|
| 107 |
+
[32m[04/09 20:10:50 RAR]: [0mData (t): 0.3293, 172.75/s/gpu Batch (t): 1.4820 LR: 0.000350 Step: 1200 Loss: 7.3568 Accuracy: 0.0562
|
| 108 |
+
[32m[04/09 20:13:01 RAR]: [0mData (t): 0.2398, 406.35/s/gpu Batch (t): 0.6300 LR: 0.000350 Step: 1300 Loss: 7.3918 Accuracy: 0.0541
|
| 109 |
+
[32m[04/09 20:15:09 RAR]: [0mData (t): 1.7374, 122.92/s/gpu Batch (t): 2.0827 LR: 0.000350 Step: 1400 Loss: 7.3811 Accuracy: 0.0552
|
| 110 |
+
[32m[04/09 20:17:14 RAR]: [0mData (t): 0.2421, 138.35/s/gpu Batch (t): 1.8503 LR: 0.000350 Step: 1500 Loss: 7.3792 Accuracy: 0.0542
|
| 111 |
+
[32m[04/09 20:19:26 RAR]: [0mData (t): 0.2396, 143.85/s/gpu Batch (t): 1.7797 LR: 0.000350 Step: 1600 Loss: 7.3884 Accuracy: 0.0532
|
| 112 |
+
[32m[04/09 20:21:35 RAR]: [0mData (t): 0.2420, 404.60/s/gpu Batch (t): 0.6327 LR: 0.000350 Step: 1700 Loss: 7.3565 Accuracy: 0.0541
|
| 113 |
+
[32m[04/09 20:23:43 RAR]: [0mData (t): 0.2386, 441.29/s/gpu Batch (t): 0.5801 LR: 0.000350 Step: 1800 Loss: 7.3548 Accuracy: 0.0542
|
| 114 |
+
[32m[04/09 20:25:52 RAR]: [0mData (t): 0.2425, 435.43/s/gpu Batch (t): 0.5879 LR: 0.000350 Step: 1900 Loss: 7.3178 Accuracy: 0.0555
|
| 115 |
+
[32m[04/09 20:28:00 RAR]: [0mData (t): 0.2429, 136.54/s/gpu Batch (t): 1.8749 LR: 0.000350 Step: 2000 Loss: 7.3259 Accuracy: 0.0549
|
| 116 |
+
[32m[04/09 20:30:08 RAR]: [0mData (t): 1.1349, 172.82/s/gpu Batch (t): 1.4813 LR: 0.000350 Step: 2100 Loss: 7.3119 Accuracy: 0.0563
|
| 117 |
+
[32m[04/09 20:32:19 RAR]: [0mData (t): 0.2428, 111.57/s/gpu Batch (t): 2.2945 LR: 0.000350 Step: 2200 Loss: 7.3042 Accuracy: 0.0549
|
| 118 |
+
[32m[04/09 20:34:30 RAR]: [0mData (t): 0.2418, 144.67/s/gpu Batch (t): 1.7696 LR: 0.000350 Step: 2300 Loss: 7.2698 Accuracy: 0.0565
|
| 119 |
+
[32m[04/09 20:36:38 RAR]: [0mData (t): 0.2406, 131.07/s/gpu Batch (t): 1.9531 LR: 0.000350 Step: 2400 Loss: 7.2964 Accuracy: 0.0548
|
| 120 |
+
[32m[04/09 20:38:47 RAR]: [0mData (t): 0.2416, 124.64/s/gpu Batch (t): 2.0539 LR: 0.000350 Step: 2500 Loss: 7.2797 Accuracy: 0.0550
|
| 121 |
+
[32m[04/09 20:41:01 RAR]: [0mData (t): 0.2407, 423.05/s/gpu Batch (t): 0.6051 LR: 0.000350 Step: 2600 Loss: 7.2481 Accuracy: 0.0582
|
| 122 |
+
[32m[04/09 20:43:12 RAR]: [0mData (t): 0.2430, 108.57/s/gpu Batch (t): 2.3580 LR: 0.000350 Step: 2700 Loss: 7.2596 Accuracy: 0.0573
|
| 123 |
+
[32m[04/09 20:45:19 RAR]: [0mData (t): 0.2425, 428.26/s/gpu Batch (t): 0.5978 LR: 0.000350 Step: 2800 Loss: 7.2665 Accuracy: 0.0542
|
| 124 |
+
[32m[04/09 20:47:29 RAR]: [0mData (t): 1.6807, 125.75/s/gpu Batch (t): 2.0357 LR: 0.000350 Step: 2900 Loss: 7.2275 Accuracy: 0.0576
|
| 125 |
+
[32m[04/09 20:49:40 RAR]: [0mData (t): 1.6134, 125.35/s/gpu Batch (t): 2.0423 LR: 0.000350 Step: 3000 Loss: 7.2345 Accuracy: 0.0559
|
| 126 |
+
[32m[04/09 20:51:48 RAR]: [0mData (t): 0.2428, 428.26/s/gpu Batch (t): 0.5978 LR: 0.000350 Step: 3100 Loss: 7.1989 Accuracy: 0.0567
|
| 127 |
+
[32m[04/09 20:53:58 RAR]: [0mData (t): 0.2404, 441.79/s/gpu Batch (t): 0.5795 LR: 0.000350 Step: 3200 Loss: 7.2079 Accuracy: 0.0564
|
| 128 |
+
[32m[04/09 20:56:07 RAR]: [0mData (t): 0.2403, 418.98/s/gpu Batch (t): 0.6110 LR: 0.000350 Step: 3300 Loss: 7.1680 Accuracy: 0.0583
|
| 129 |
+
[32m[04/09 20:58:18 RAR]: [0mData (t): 0.2408, 410.66/s/gpu Batch (t): 0.6234 LR: 0.000350 Step: 3400 Loss: 7.1991 Accuracy: 0.0557
|
| 130 |
+
[32m[04/09 21:00:28 RAR]: [0mData (t): 0.2406, 162.88/s/gpu Batch (t): 1.5717 LR: 0.000350 Step: 3500 Loss: 7.1996 Accuracy: 0.0582
|
| 131 |
+
[32m[04/09 21:02:34 RAR]: [0mData (t): 0.2409, 429.21/s/gpu Batch (t): 0.5964 LR: 0.000350 Step: 3600 Loss: 7.1947 Accuracy: 0.0569
|
| 132 |
+
[32m[04/09 21:04:45 RAR]: [0mData (t): 0.2403, 427.45/s/gpu Batch (t): 0.5989 LR: 0.000350 Step: 3700 Loss: 7.2070 Accuracy: 0.0589
|
| 133 |
+
[32m[04/09 21:06:58 RAR]: [0mData (t): 0.2427, 434.82/s/gpu Batch (t): 0.5888 LR: 0.000350 Step: 3800 Loss: 7.1157 Accuracy: 0.0593
|
| 134 |
+
[32m[04/09 21:09:04 RAR]: [0mData (t): 0.2411, 158.39/s/gpu Batch (t): 1.6162 LR: 0.000350 Step: 3900 Loss: 7.1717 Accuracy: 0.0576
|
| 135 |
+
[32m[04/09 21:11:14 RAR]: [0mData (t): 0.2393, 351.87/s/gpu Batch (t): 0.7275 LR: 0.000350 Step: 4000 Loss: 7.1131 Accuracy: 0.0594
|
| 136 |
+
[32m[04/09 21:13:25 RAR]: [0mData (t): 0.2421, 426.85/s/gpu Batch (t): 0.5997 LR: 0.000350 Step: 4100 Loss: 7.1558 Accuracy: 0.0574
|
| 137 |
+
[32m[04/09 21:15:34 RAR]: [0mData (t): 0.2409, 129.27/s/gpu Batch (t): 1.9804 LR: 0.000350 Step: 4200 Loss: 7.1579 Accuracy: 0.0564
|
| 138 |
+
[32m[04/09 21:17:44 RAR]: [0mData (t): 0.2407, 136.31/s/gpu Batch (t): 1.8780 LR: 0.000350 Step: 4300 Loss: 7.1697 Accuracy: 0.0571
|
| 139 |
+
[32m[04/09 21:19:51 RAR]: [0mData (t): 1.9347, 112.28/s/gpu Batch (t): 2.2801 LR: 0.000350 Step: 4400 Loss: 7.1645 Accuracy: 0.0573
|
| 140 |
+
[32m[04/09 21:21:57 RAR]: [0mData (t): 0.2421, 227.82/s/gpu Batch (t): 1.1237 LR: 0.000350 Step: 4500 Loss: 7.1225 Accuracy: 0.0578
|
| 141 |
+
[32m[04/09 21:24:04 RAR]: [0mData (t): 0.2413, 428.63/s/gpu Batch (t): 0.5973 LR: 0.000350 Step: 4600 Loss: 7.1381 Accuracy: 0.0568
|
| 142 |
+
[32m[04/09 21:26:14 RAR]: [0mData (t): 0.2401, 430.03/s/gpu Batch (t): 0.5953 LR: 0.000350 Step: 4700 Loss: 7.1175 Accuracy: 0.0582
|
| 143 |
+
[32m[04/09 21:28:23 RAR]: [0mData (t): 0.2444, 432.44/s/gpu Batch (t): 0.5920 LR: 0.000350 Step: 4800 Loss: 7.1817 Accuracy: 0.0566
|
| 144 |
+
[32m[04/09 21:30:32 RAR]: [0mData (t): 0.2395, 136.53/s/gpu Batch (t): 1.8750 LR: 0.000350 Step: 4900 Loss: 7.1153 Accuracy: 0.0582
|
| 145 |
+
[32m[04/09 21:32:39 RAR]: [0mData (t): 0.2412, 361.58/s/gpu Batch (t): 0.7080 LR: 0.000350 Step: 5000 Loss: 7.1364 Accuracy: 0.0586
|
| 146 |
+
[32m[04/09 21:34:51 RAR]: [0mData (t): 0.2403, 283.39/s/gpu Batch (t): 0.9033 LR: 0.000350 Step: 5100 Loss: 7.0637 Accuracy: 0.0602
|
| 147 |
+
[32m[04/09 21:37:03 RAR]: [0mData (t): 1.2199, 163.15/s/gpu Batch (t): 1.5691 LR: 0.000350 Step: 5200 Loss: 7.0235 Accuracy: 0.0636
|
| 148 |
+
[32m[04/09 21:39:10 RAR]: [0mData (t): 1.0205, 186.70/s/gpu Batch (t): 1.3712 LR: 0.000350 Step: 5300 Loss: 7.0862 Accuracy: 0.0587
|
| 149 |
+
[32m[04/09 21:41:16 RAR]: [0mData (t): 0.2420, 428.57/s/gpu Batch (t): 0.5973 LR: 0.000350 Step: 5400 Loss: 7.0640 Accuracy: 0.0609
|
| 150 |
+
[32m[04/09 21:43:24 RAR]: [0mData (t): 0.2407, 371.48/s/gpu Batch (t): 0.6891 LR: 0.000350 Step: 5500 Loss: 7.0628 Accuracy: 0.0611
|
| 151 |
+
[32m[04/09 21:45:35 RAR]: [0mData (t): 0.2408, 130.00/s/gpu Batch (t): 1.9693 LR: 0.000350 Step: 5600 Loss: 7.0531 Accuracy: 0.0625
|
| 152 |
+
[32m[04/09 21:47:42 RAR]: [0mData (t): 0.2397, 131.43/s/gpu Batch (t): 1.9479 LR: 0.000350 Step: 5700 Loss: 7.0584 Accuracy: 0.0609
|
| 153 |
+
[32m[04/09 21:49:49 RAR]: [0mData (t): 0.2413, 115.24/s/gpu Batch (t): 2.2215 LR: 0.000350 Step: 5800 Loss: 7.0402 Accuracy: 0.0612
|
| 154 |
+
[32m[04/09 21:51:55 RAR]: [0mData (t): 0.2405, 422.41/s/gpu Batch (t): 0.6060 LR: 0.000350 Step: 5900 Loss: 7.0414 Accuracy: 0.0611
|
| 155 |
+
[32m[04/09 21:54:03 RAR]: [0mData (t): 0.2411, 249.39/s/gpu Batch (t): 1.0265 LR: 0.000350 Step: 6000 Loss: 7.0580 Accuracy: 0.0610
|
| 156 |
+
[32m[04/09 21:56:09 RAR]: [0mData (t): 0.4045, 343.08/s/gpu Batch (t): 0.7462 LR: 0.000350 Step: 6100 Loss: 7.0523 Accuracy: 0.0610
|
| 157 |
+
[32m[04/09 21:58:15 RAR]: [0mData (t): 0.2417, 439.49/s/gpu Batch (t): 0.5825 LR: 0.000350 Step: 6200 Loss: 7.0774 Accuracy: 0.0597
|
| 158 |
+
[32m[04/09 22:00:28 RAR]: [0mData (t): 1.7282, 122.68/s/gpu Batch (t): 2.0867 LR: 0.000350 Step: 6300 Loss: 7.0397 Accuracy: 0.0609
|
| 159 |
+
[32m[04/09 22:02:35 RAR]: [0mData (t): 1.2056, 162.91/s/gpu Batch (t): 1.5714 LR: 0.000350 Step: 6400 Loss: 7.0136 Accuracy: 0.0623
|
| 160 |
+
[32m[04/09 22:04:44 RAR]: [0mData (t): 0.2426, 110.19/s/gpu Batch (t): 2.3232 LR: 0.000350 Step: 6500 Loss: 7.0225 Accuracy: 0.0597
|
| 161 |
+
[32m[04/09 22:06:49 RAR]: [0mData (t): 1.5437, 134.59/s/gpu Batch (t): 1.9020 LR: 0.000350 Step: 6600 Loss: 6.9971 Accuracy: 0.0624
|
| 162 |
+
[32m[04/09 22:08:54 RAR]: [0mData (t): 0.8011, 218.88/s/gpu Batch (t): 1.1696 LR: 0.000350 Step: 6700 Loss: 7.0193 Accuracy: 0.0623
|
| 163 |
+
[32m[04/09 22:11:01 RAR]: [0mData (t): 0.2411, 133.29/s/gpu Batch (t): 1.9206 LR: 0.000350 Step: 6800 Loss: 6.9986 Accuracy: 0.0627
|
| 164 |
+
[32m[04/09 22:13:06 RAR]: [0mData (t): 0.9698, 137.20/s/gpu Batch (t): 1.8659 LR: 0.000350 Step: 6900 Loss: 6.9828 Accuracy: 0.0634
|
| 165 |
+
[32m[04/09 22:15:13 RAR]: [0mData (t): 0.2421, 347.98/s/gpu Batch (t): 0.7357 LR: 0.000350 Step: 7000 Loss: 6.9813 Accuracy: 0.0639
|
| 166 |
+
[32m[04/09 22:17:20 RAR]: [0mData (t): 0.2418, 404.03/s/gpu Batch (t): 0.6336 LR: 0.000350 Step: 7100 Loss: 7.0194 Accuracy: 0.0625
|
| 167 |
+
[32m[04/09 22:19:22 RAR]: [0mData (t): 1.9369, 111.86/s/gpu Batch (t): 2.2886 LR: 0.000350 Step: 7200 Loss: 6.9871 Accuracy: 0.0643
|
| 168 |
+
[32m[04/09 22:21:26 RAR]: [0mData (t): 0.2422, 142.53/s/gpu Batch (t): 1.7961 LR: 0.000350 Step: 7300 Loss: 6.9916 Accuracy: 0.0632
|
| 169 |
+
[32m[04/09 22:23:33 RAR]: [0mData (t): 0.2437, 264.25/s/gpu Batch (t): 0.9688 LR: 0.000350 Step: 7400 Loss: 6.9267 Accuracy: 0.0660
|
| 170 |
+
[32m[04/09 22:25:40 RAR]: [0mData (t): 0.2416, 106.36/s/gpu Batch (t): 2.4069 LR: 0.000350 Step: 7500 Loss: 6.9249 Accuracy: 0.0652
|
| 171 |
+
[32m[04/09 22:27:51 RAR]: [0mData (t): 1.1477, 171.37/s/gpu Batch (t): 1.4939 LR: 0.000350 Step: 7600 Loss: 6.9094 Accuracy: 0.0672
|
| 172 |
+
[32m[04/09 22:30:00 RAR]: [0mData (t): 0.2416, 434.23/s/gpu Batch (t): 0.5896 LR: 0.000350 Step: 7700 Loss: 6.8849 Accuracy: 0.0692
|
| 173 |
+
[32m[04/09 22:32:04 RAR]: [0mData (t): 0.4483, 321.82/s/gpu Batch (t): 0.7955 LR: 0.000350 Step: 7800 Loss: 6.9377 Accuracy: 0.0655
|
| 174 |
+
[32m[04/09 22:34:11 RAR]: [0mData (t): 0.2408, 136.70/s/gpu Batch (t): 1.8727 LR: 0.000350 Step: 7900 Loss: 6.9585 Accuracy: 0.0673
|
| 175 |
+
[32m[04/09 22:36:19 RAR]: [0mData (t): 0.2423, 113.28/s/gpu Batch (t): 2.2600 LR: 0.000350 Step: 8000 Loss: 6.9123 Accuracy: 0.0677
|
| 176 |
+
[32m[04/09 22:38:26 RAR]: [0mData (t): 0.2411, 153.56/s/gpu Batch (t): 1.6671 LR: 0.000350 Step: 8100 Loss: 6.9373 Accuracy: 0.0644
|
| 177 |
+
[32m[04/09 22:40:23 RAR]: [0mData (t): 0.2423, 149.97/s/gpu Batch (t): 1.7070 LR: 0.000350 Step: 8200 Loss: 6.9174 Accuracy: 0.0662
|
| 178 |
+
[32m[04/09 22:42:23 RAR]: [0mData (t): 0.2424, 429.06/s/gpu Batch (t): 0.5967 LR: 0.000350 Step: 8300 Loss: 6.8878 Accuracy: 0.0683
|
| 179 |
+
[32m[04/09 22:44:19 RAR]: [0mData (t): 0.2402, 437.44/s/gpu Batch (t): 0.5852 LR: 0.000350 Step: 8400 Loss: 6.9545 Accuracy: 0.0645
|
| 180 |
+
[32m[04/09 22:46:17 RAR]: [0mData (t): 0.2422, 438.22/s/gpu Batch (t): 0.5842 LR: 0.000350 Step: 8500 Loss: 6.9301 Accuracy: 0.0666
|
| 181 |
+
[32m[04/09 22:48:15 RAR]: [0mData (t): 0.2440, 414.68/s/gpu Batch (t): 0.6173 LR: 0.000350 Step: 8600 Loss: 6.9468 Accuracy: 0.0650
|
| 182 |
+
[32m[04/09 22:50:14 RAR]: [0mData (t): 0.2427, 142.32/s/gpu Batch (t): 1.7988 LR: 0.000350 Step: 8700 Loss: 6.9360 Accuracy: 0.0670
|
| 183 |
+
[32m[04/09 22:52:14 RAR]: [0mData (t): 0.2403, 427.70/s/gpu Batch (t): 0.5986 LR: 0.000350 Step: 8800 Loss: 6.7623 Accuracy: 0.0772
|
| 184 |
+
[32m[04/09 22:54:13 RAR]: [0mData (t): 0.2437, 432.72/s/gpu Batch (t): 0.5916 LR: 0.000350 Step: 8900 Loss: 6.9327 Accuracy: 0.0658
|
| 185 |
+
[32m[04/09 22:56:12 RAR]: [0mData (t): 1.2269, 155.19/s/gpu Batch (t): 1.6496 LR: 0.000350 Step: 9000 Loss: 6.9053 Accuracy: 0.0670
|
| 186 |
+
[32m[04/09 22:58:08 RAR]: [0mData (t): 0.8591, 212.44/s/gpu Batch (t): 1.2050 LR: 0.000350 Step: 9100 Loss: 6.9131 Accuracy: 0.0656
|
| 187 |
+
[32m[04/09 23:00:06 RAR]: [0mData (t): 0.2420, 433.90/s/gpu Batch (t): 0.5900 LR: 0.000350 Step: 9200 Loss: 6.8654 Accuracy: 0.0680
|
| 188 |
+
[32m[04/09 23:02:02 RAR]: [0mData (t): 0.2399, 416.51/s/gpu Batch (t): 0.6146 LR: 0.000350 Step: 9300 Loss: 6.8909 Accuracy: 0.0666
|
| 189 |
+
[32m[04/09 23:04:03 RAR]: [0mData (t): 1.2222, 162.87/s/gpu Batch (t): 1.5718 LR: 0.000350 Step: 9400 Loss: 6.8625 Accuracy: 0.0675
|
| 190 |
+
[32m[04/09 23:05:59 RAR]: [0mData (t): 0.4683, 163.30/s/gpu Batch (t): 1.5677 LR: 0.000350 Step: 9500 Loss: 6.8425 Accuracy: 0.0697
|
| 191 |
+
[32m[04/09 23:07:57 RAR]: [0mData (t): 0.2414, 433.74/s/gpu Batch (t): 0.5902 LR: 0.000350 Step: 9600 Loss: 6.8230 Accuracy: 0.0722
|
| 192 |
+
[32m[04/09 23:09:56 RAR]: [0mData (t): 0.2396, 424.66/s/gpu Batch (t): 0.6028 LR: 0.000350 Step: 9700 Loss: 6.8011 Accuracy: 0.0724
|
| 193 |
+
[32m[04/09 23:11:54 RAR]: [0mData (t): 1.2448, 160.32/s/gpu Batch (t): 1.5968 LR: 0.000350 Step: 9800 Loss: 6.9288 Accuracy: 0.0665
|
| 194 |
+
[32m[04/09 23:13:50 RAR]: [0mData (t): 1.2820, 157.14/s/gpu Batch (t): 1.6291 LR: 0.000350 Step: 9900 Loss: 6.9490 Accuracy: 0.0639
|
| 195 |
+
[32m[04/09 23:15:49 RAR]: [0mData (t): 1.1791, 159.54/s/gpu Batch (t): 1.6047 LR: 0.000350 Step: 10000 Loss: 6.9179 Accuracy: 0.0678
|
| 196 |
+
[32m[04/09 23:16:00 RAR]: [0mSaved state to stage1/rar_ordertok/checkpoint-10000
|
| 197 |
+
[32m[04/09 23:24:12 RAR]: [0mSaving config to stage1/rar_ordertok/config.yaml
|
| 198 |
+
[32m[04/09 23:24:12 RAR]: [0mConfig:
|
| 199 |
+
experiment:
|
| 200 |
+
project: titok_ca_rar
|
| 201 |
+
name: titok_ca_rar
|
| 202 |
+
max_train_examples: 1281167
|
| 203 |
+
save_every: 10000
|
| 204 |
+
eval_every: 5000000
|
| 205 |
+
generate_every: 10000000
|
| 206 |
+
log_every: 100
|
| 207 |
+
log_grad_norm_every: 1000
|
| 208 |
+
resume: true
|
| 209 |
+
tokenizer_checkpoint: /mnt/yscfs/huangmengqi/projects/visual_tokenization/order-tok/train_stage1/ordertok_simvq/checkpoint-100000/ema_model/pytorch_model.bin
|
| 210 |
+
output_dir: stage1/rar_ordertok
|
| 211 |
+
logging_dir: stage1/rar_ordertok/logs
|
| 212 |
+
model:
|
| 213 |
+
vq_model:
|
| 214 |
+
codebook_size: 4096
|
| 215 |
+
token_size: 256
|
| 216 |
+
use_l2_norm: true
|
| 217 |
+
commitment_cost: 0.25
|
| 218 |
+
vit_enc_model_size: large
|
| 219 |
+
vit_dec_model_size: large
|
| 220 |
+
vit_enc_patch_size: 16
|
| 221 |
+
vit_dec_patch_size: 16
|
| 222 |
+
num_latent_tokens: 32
|
| 223 |
+
layers_x: 18
|
| 224 |
+
layers_token: 2
|
| 225 |
+
embedding_width: 1024
|
| 226 |
+
width: 256
|
| 227 |
+
finetune_decoder: false
|
| 228 |
+
pretrained_tokenizer_weight: maskgit-vqgan-imagenet-f16-256.bin
|
| 229 |
+
generator:
|
| 230 |
+
hidden_size: 1024
|
| 231 |
+
num_hidden_layers: 24
|
| 232 |
+
num_attention_heads: 16
|
| 233 |
+
intermediate_size: 4096
|
| 234 |
+
dropout: 0.1
|
| 235 |
+
attn_drop: 0.1
|
| 236 |
+
class_label_dropout: 0.1
|
| 237 |
+
image_seq_len: 32
|
| 238 |
+
condition_num_classes: 1000
|
| 239 |
+
randomize_temperature: 1.02
|
| 240 |
+
guidance_scale: 15.5
|
| 241 |
+
guidance_scale_pow: 2.5
|
| 242 |
+
use_checkpoint: false
|
| 243 |
+
randomness_anneal_start: 0
|
| 244 |
+
randomness_anneal_end: 0
|
| 245 |
+
dataset:
|
| 246 |
+
params:
|
| 247 |
+
train_shards_path_or_url: imagenet/imagenet1k-train-{0000..1023}.tar
|
| 248 |
+
eval_shards_path_or_url: imagenet/imagenet1k-validation-{00..63}.tar
|
| 249 |
+
num_workers_per_gpu: 2
|
| 250 |
+
preprocessing:
|
| 251 |
+
resize_shorter_edge: 256
|
| 252 |
+
crop_size: 256
|
| 253 |
+
random_crop: false
|
| 254 |
+
random_flip: true
|
| 255 |
+
optimizer:
|
| 256 |
+
name: adamw
|
| 257 |
+
params:
|
| 258 |
+
learning_rate: 0.00035
|
| 259 |
+
beta1: 0.9
|
| 260 |
+
beta2: 0.96
|
| 261 |
+
weight_decay: 0.03
|
| 262 |
+
lr_scheduler:
|
| 263 |
+
scheduler: cosine
|
| 264 |
+
params:
|
| 265 |
+
learning_rate: ${optimizer.params.learning_rate}
|
| 266 |
+
warmup_steps: 0
|
| 267 |
+
end_lr: 1.0e-05
|
| 268 |
+
training:
|
| 269 |
+
gradient_accumulation_steps: 1
|
| 270 |
+
per_gpu_batch_size: 256
|
| 271 |
+
mixed_precision: bf16
|
| 272 |
+
enable_tf32: true
|
| 273 |
+
enable_wandb: true
|
| 274 |
+
use_ema: true
|
| 275 |
+
seed: 42
|
| 276 |
+
max_train_steps: 500000
|
| 277 |
+
max_grad_norm: 1.0
|
| 278 |
+
config: configs/training/generator/rar.yaml
|
| 279 |
+
|
| 280 |
+
[32m[04/09 23:24:36 RAR]: [0mCreating model and loss module.
|
| 281 |
+
[32m[04/09 23:24:44 RAR]: [0mCreating optimizers.
|
| 282 |
+
[32m[04/09 23:24:44 RAR]: [0mCreating lr_schedulers.
|
| 283 |
+
[32m[04/09 23:24:44 RAR]: [0mCreating dataloaders.
|
| 284 |
+
[32m[04/09 23:24:44 RAR]: [0mPreparing model, optimizer and dataloaders
|
| 285 |
+
[32m[04/09 23:24:45 RAR]: [0m***** Running training *****
|
| 286 |
+
[32m[04/09 23:24:45 RAR]: [0m Num training steps = 500000
|
| 287 |
+
[32m[04/09 23:24:45 RAR]: [0m Gradient Accumulation steps = 1
|
| 288 |
+
[32m[04/09 23:24:45 RAR]: [0m Instantaneous batch size per gpu = 256
|
| 289 |
+
[32m[04/09 23:24:45 RAR]: [0m Total train batch size (w. parallel, distributed & accumulation) = 1024
|
| 290 |
+
[32m[04/09 23:24:45 RAR]: [0mAll globbed checkpoints are: ['stage1/rar_ordertok/checkpoint-10000']
|
| 291 |
+
[32m[04/09 23:24:45 RAR]: [0mLoad checkpoint from stage1/rar_ordertok/checkpoint-10000
|
| 292 |
+
[32m[04/09 23:25:18 RAR]: [0mResuming at global_step 10000
|
| 293 |
+
[32m[04/09 23:27:22 RAR]: [0mData (t): 0.2394, 302.35/s/gpu Batch (t): 0.8467 LR: 0.000350 Step: 10100 Loss: 6.7939 Accuracy: 0.0742
|
| 294 |
+
[32m[04/09 23:29:23 RAR]: [0mData (t): 0.2827, 148.87/s/gpu Batch (t): 1.7197 LR: 0.000350 Step: 10200 Loss: 6.8112 Accuracy: 0.0714
|
| 295 |
+
[32m[04/09 23:31:24 RAR]: [0mData (t): 0.2391, 174.19/s/gpu Batch (t): 1.4696 LR: 0.000350 Step: 10300 Loss: 6.8923 Accuracy: 0.0669
|
| 296 |
+
[32m[04/09 23:33:20 RAR]: [0mData (t): 0.2391, 201.44/s/gpu Batch (t): 1.2709 LR: 0.000350 Step: 10400 Loss: 6.8976 Accuracy: 0.0654
|
| 297 |
+
[32m[04/09 23:35:18 RAR]: [0mData (t): 0.2408, 393.32/s/gpu Batch (t): 0.6509 LR: 0.000350 Step: 10500 Loss: 6.8749 Accuracy: 0.0689
|
| 298 |
+
[32m[04/09 23:37:18 RAR]: [0mData (t): 0.2396, 411.08/s/gpu Batch (t): 0.6228 LR: 0.000350 Step: 10600 Loss: 6.7787 Accuracy: 0.0739
|
| 299 |
+
[32m[04/09 23:39:15 RAR]: [0mData (t): 0.2406, 419.98/s/gpu Batch (t): 0.6095 LR: 0.000350 Step: 10700 Loss: 6.8481 Accuracy: 0.0681
|
| 300 |
+
[32m[04/09 23:41:14 RAR]: [0mData (t): 0.2399, 119.44/s/gpu Batch (t): 2.1433 LR: 0.000350 Step: 10800 Loss: 6.8456 Accuracy: 0.0693
|
| 301 |
+
[32m[04/09 23:43:11 RAR]: [0mData (t): 1.0869, 134.38/s/gpu Batch (t): 1.9051 LR: 0.000350 Step: 10900 Loss: 6.8538 Accuracy: 0.0685
|
| 302 |
+
[32m[04/09 23:45:02 RAR]: [0mData (t): 0.2392, 182.20/s/gpu Batch (t): 1.4050 LR: 0.000350 Step: 11000 Loss: 6.8128 Accuracy: 0.0711
|
| 303 |
+
[32m[04/09 23:47:03 RAR]: [0mData (t): 1.2431, 158.32/s/gpu Batch (t): 1.6169 LR: 0.000350 Step: 11100 Loss: 6.8337 Accuracy: 0.0722
|
| 304 |
+
[32m[04/09 23:49:15 RAR]: [0mData (t): 0.2409, 409.80/s/gpu Batch (t): 0.6247 LR: 0.000350 Step: 11200 Loss: 6.8296 Accuracy: 0.0716
|
| 305 |
+
[32m[04/09 23:51:30 RAR]: [0mData (t): 0.2402, 408.05/s/gpu Batch (t): 0.6274 LR: 0.000350 Step: 11300 Loss: 6.8048 Accuracy: 0.0713
|
| 306 |
+
[32m[04/09 23:53:38 RAR]: [0mData (t): 0.2407, 241.55/s/gpu Batch (t): 1.0598 LR: 0.000350 Step: 11400 Loss: 6.7893 Accuracy: 0.0728
|
| 307 |
+
[32m[04/09 23:55:46 RAR]: [0mData (t): 1.6834, 124.96/s/gpu Batch (t): 2.0486 LR: 0.000350 Step: 11500 Loss: 6.8478 Accuracy: 0.0695
|
| 308 |
+
[32m[04/09 23:57:49 RAR]: [0mData (t): 0.2407, 415.18/s/gpu Batch (t): 0.6166 LR: 0.000350 Step: 11600 Loss: 6.7026 Accuracy: 0.0788
|
| 309 |
+
[32m[04/09 23:59:51 RAR]: [0mData (t): 0.2394, 402.73/s/gpu Batch (t): 0.6357 LR: 0.000350 Step: 11700 Loss: 6.7698 Accuracy: 0.0728
|
| 310 |
+
[32m[04/10 00:01:53 RAR]: [0mData (t): 0.2440, 424.36/s/gpu Batch (t): 0.6033 LR: 0.000350 Step: 11800 Loss: 6.8780 Accuracy: 0.0670
|
| 311 |
+
[32m[04/10 00:03:55 RAR]: [0mData (t): 0.2403, 220.63/s/gpu Batch (t): 1.1603 LR: 0.000350 Step: 11900 Loss: 6.8096 Accuracy: 0.0728
|
| 312 |
+
[32m[04/10 00:05:56 RAR]: [0mData (t): 0.2406, 239.59/s/gpu Batch (t): 1.0685 LR: 0.000350 Step: 12000 Loss: 6.7901 Accuracy: 0.0722
|
| 313 |
+
[32m[04/10 00:07:57 RAR]: [0mData (t): 0.2404, 418.47/s/gpu Batch (t): 0.6117 LR: 0.000350 Step: 12100 Loss: 6.7951 Accuracy: 0.0732
|
| 314 |
+
[32m[04/10 00:09:58 RAR]: [0mData (t): 0.2392, 419.58/s/gpu Batch (t): 0.6101 LR: 0.000350 Step: 12200 Loss: 6.6813 Accuracy: 0.0811
|
| 315 |
+
[32m[04/10 00:11:58 RAR]: [0mData (t): 0.2398, 414.99/s/gpu Batch (t): 0.6169 LR: 0.000349 Step: 12300 Loss: 6.7997 Accuracy: 0.0754
|
| 316 |
+
[32m[04/10 00:13:59 RAR]: [0mData (t): 0.2390, 422.25/s/gpu Batch (t): 0.6063 LR: 0.000349 Step: 12400 Loss: 6.7223 Accuracy: 0.0775
|
| 317 |
+
[32m[04/10 00:15:58 RAR]: [0mData (t): 0.2389, 390.46/s/gpu Batch (t): 0.6556 LR: 0.000349 Step: 12500 Loss: 6.7630 Accuracy: 0.0750
|
| 318 |
+
[32m[04/10 00:18:02 RAR]: [0mData (t): 0.2391, 414.58/s/gpu Batch (t): 0.6175 LR: 0.000349 Step: 12600 Loss: 6.8571 Accuracy: 0.0694
|
| 319 |
+
[32m[04/10 00:20:04 RAR]: [0mData (t): 0.2424, 322.32/s/gpu Batch (t): 0.7942 LR: 0.000349 Step: 12700 Loss: 6.7317 Accuracy: 0.0754
|
| 320 |
+
[32m[04/10 00:22:06 RAR]: [0mData (t): 1.3945, 146.50/s/gpu Batch (t): 1.7475 LR: 0.000349 Step: 12800 Loss: 6.7510 Accuracy: 0.0730
|
| 321 |
+
[32m[04/10 00:24:12 RAR]: [0mData (t): 0.2386, 427.60/s/gpu Batch (t): 0.5987 LR: 0.000349 Step: 12900 Loss: 6.7673 Accuracy: 0.0750
|
| 322 |
+
[32m[04/10 00:26:13 RAR]: [0mData (t): 0.2409, 271.40/s/gpu Batch (t): 0.9433 LR: 0.000349 Step: 13000 Loss: 6.7291 Accuracy: 0.0780
|
| 323 |
+
[32m[04/10 00:28:15 RAR]: [0mData (t): 0.2438, 146.08/s/gpu Batch (t): 1.7525 LR: 0.000349 Step: 13100 Loss: 6.6553 Accuracy: 0.0790
|
| 324 |
+
[32m[04/10 00:30:16 RAR]: [0mData (t): 0.2483, 139.69/s/gpu Batch (t): 1.8326 LR: 0.000349 Step: 13200 Loss: 6.7800 Accuracy: 0.0743
|
| 325 |
+
[32m[04/10 00:32:17 RAR]: [0mData (t): 0.2401, 134.29/s/gpu Batch (t): 1.9063 LR: 0.000349 Step: 13300 Loss: 6.7193 Accuracy: 0.0751
|
| 326 |
+
[32m[04/10 00:34:16 RAR]: [0mData (t): 0.2453, 179.33/s/gpu Batch (t): 1.4275 LR: 0.000349 Step: 13400 Loss: 6.8340 Accuracy: 0.0709
|
| 327 |
+
[32m[04/10 00:36:15 RAR]: [0mData (t): 0.2441, 169.07/s/gpu Batch (t): 1.5141 LR: 0.000349 Step: 13500 Loss: 6.6396 Accuracy: 0.0794
|
| 328 |
+
[32m[04/10 00:38:12 RAR]: [0mData (t): 0.2450, 156.00/s/gpu Batch (t): 1.6411 LR: 0.000349 Step: 13600 Loss: 6.7173 Accuracy: 0.0768
|
| 329 |
+
[32m[04/10 00:40:12 RAR]: [0mData (t): 0.2421, 392.56/s/gpu Batch (t): 0.6521 LR: 0.000349 Step: 13700 Loss: 6.7068 Accuracy: 0.0773
|
| 330 |
+
[32m[04/10 00:42:15 RAR]: [0mData (t): 0.2409, 264.71/s/gpu Batch (t): 0.9671 LR: 0.000349 Step: 13800 Loss: 6.6112 Accuracy: 0.0838
|
| 331 |
+
[32m[04/10 00:44:13 RAR]: [0mData (t): 0.2451, 352.13/s/gpu Batch (t): 0.7270 LR: 0.000349 Step: 13900 Loss: 6.7261 Accuracy: 0.0761
|
| 332 |
+
[32m[04/10 00:46:13 RAR]: [0mData (t): 0.2400, 294.76/s/gpu Batch (t): 0.8685 LR: 0.000349 Step: 14000 Loss: 6.7182 Accuracy: 0.0757
|
| 333 |
+
[32m[04/10 00:48:09 RAR]: [0mData (t): 1.1072, 168.60/s/gpu Batch (t): 1.5184 LR: 0.000349 Step: 14100 Loss: 6.6402 Accuracy: 0.0829
|
| 334 |
+
[32m[04/10 00:50:04 RAR]: [0mData (t): 1.2534, 148.51/s/gpu Batch (t): 1.7238 LR: 0.000349 Step: 14200 Loss: 6.6917 Accuracy: 0.0809
|
| 335 |
+
[32m[04/10 00:52:00 RAR]: [0mData (t): 0.2482, 179.70/s/gpu Batch (t): 1.4246 LR: 0.000349 Step: 14300 Loss: 6.6892 Accuracy: 0.0777
|
| 336 |
+
[32m[04/10 00:53:57 RAR]: [0mData (t): 0.2452, 159.43/s/gpu Batch (t): 1.6057 LR: 0.000349 Step: 14400 Loss: 6.6103 Accuracy: 0.0827
|
| 337 |
+
[32m[04/10 00:55:52 RAR]: [0mData (t): 0.2438, 384.88/s/gpu Batch (t): 0.6651 LR: 0.000349 Step: 14500 Loss: 6.5876 Accuracy: 0.0834
|
| 338 |
+
[32m[04/10 00:57:49 RAR]: [0mData (t): 0.2405, 389.01/s/gpu Batch (t): 0.6581 LR: 0.000349 Step: 14600 Loss: 6.7004 Accuracy: 0.0772
|
| 339 |
+
[32m[04/10 00:59:46 RAR]: [0mData (t): 0.2408, 413.25/s/gpu Batch (t): 0.6195 LR: 0.000349 Step: 14700 Loss: 6.7230 Accuracy: 0.0765
|
| 340 |
+
[32m[04/10 01:01:41 RAR]: [0mData (t): 0.2425, 134.19/s/gpu Batch (t): 1.9077 LR: 0.000349 Step: 14800 Loss: 6.6798 Accuracy: 0.0797
|
| 341 |
+
[32m[04/10 01:03:36 RAR]: [0mData (t): 0.2417, 413.39/s/gpu Batch (t): 0.6193 LR: 0.000349 Step: 14900 Loss: 6.7233 Accuracy: 0.0786
|
| 342 |
+
[32m[04/10 01:05:31 RAR]: [0mData (t): 0.2426, 287.32/s/gpu Batch (t): 0.8910 LR: 0.000349 Step: 15000 Loss: 6.6847 Accuracy: 0.0751
|
| 343 |
+
[32m[04/10 01:07:28 RAR]: [0mData (t): 0.2409, 374.68/s/gpu Batch (t): 0.6833 LR: 0.000349 Step: 15100 Loss: 6.7235 Accuracy: 0.0779
|
| 344 |
+
[32m[04/10 01:09:23 RAR]: [0mData (t): 0.2411, 431.09/s/gpu Batch (t): 0.5938 LR: 0.000349 Step: 15200 Loss: 6.6577 Accuracy: 0.0801
|
| 345 |
+
[32m[04/10 01:11:16 RAR]: [0mData (t): 0.2409, 229.09/s/gpu Batch (t): 1.1175 LR: 0.000349 Step: 15300 Loss: 6.7002 Accuracy: 0.0785
|
| 346 |
+
[32m[04/10 01:13:10 RAR]: [0mData (t): 0.6592, 249.24/s/gpu Batch (t): 1.0271 LR: 0.000349 Step: 15400 Loss: 6.6358 Accuracy: 0.0821
|
| 347 |
+
[32m[04/10 01:15:02 RAR]: [0mData (t): 0.2409, 423.55/s/gpu Batch (t): 0.6044 LR: 0.000349 Step: 15500 Loss: 6.5859 Accuracy: 0.0845
|
| 348 |
+
[32m[04/10 01:16:55 RAR]: [0mData (t): 0.2404, 423.03/s/gpu Batch (t): 0.6052 LR: 0.000349 Step: 15600 Loss: 6.5701 Accuracy: 0.0861
|
| 349 |
+
[32m[04/10 01:18:48 RAR]: [0mData (t): 0.2407, 179.74/s/gpu Batch (t): 1.4243 LR: 0.000349 Step: 15700 Loss: 6.5720 Accuracy: 0.0838
|
| 350 |
+
[32m[04/10 01:20:41 RAR]: [0mData (t): 0.2399, 416.97/s/gpu Batch (t): 0.6140 LR: 0.000349 Step: 15800 Loss: 6.5419 Accuracy: 0.0893
|
| 351 |
+
[32m[04/10 01:22:31 RAR]: [0mData (t): 0.6126, 262.84/s/gpu Batch (t): 0.9740 LR: 0.000349 Step: 15900 Loss: 6.6150 Accuracy: 0.0823
|
| 352 |
+
[32m[04/10 01:24:23 RAR]: [0mData (t): 0.5884, 251.50/s/gpu Batch (t): 1.0179 LR: 0.000349 Step: 16000 Loss: 6.7369 Accuracy: 0.0768
|
| 353 |
+
[32m[04/10 01:26:13 RAR]: [0mData (t): 0.2389, 420.02/s/gpu Batch (t): 0.6095 LR: 0.000349 Step: 16100 Loss: 6.6850 Accuracy: 0.0791
|
| 354 |
+
[32m[04/10 01:28:04 RAR]: [0mData (t): 0.2392, 411.52/s/gpu Batch (t): 0.6221 LR: 0.000349 Step: 16200 Loss: 6.6096 Accuracy: 0.0811
|
| 355 |
+
[32m[04/10 01:29:58 RAR]: [0mData (t): 0.2391, 429.59/s/gpu Batch (t): 0.5959 LR: 0.000349 Step: 16300 Loss: 6.6119 Accuracy: 0.0858
|
| 356 |
+
[32m[04/10 01:31:45 RAR]: [0mData (t): 0.2396, 409.34/s/gpu Batch (t): 0.6254 LR: 0.000349 Step: 16400 Loss: 6.5017 Accuracy: 0.0898
|
| 357 |
+
[32m[04/10 01:33:34 RAR]: [0mData (t): 0.2393, 169.31/s/gpu Batch (t): 1.5121 LR: 0.000349 Step: 16500 Loss: 6.5670 Accuracy: 0.0872
|
| 358 |
+
[32m[04/10 01:35:23 RAR]: [0mData (t): 0.2408, 133.98/s/gpu Batch (t): 1.9107 LR: 0.000349 Step: 16600 Loss: 6.7087 Accuracy: 0.0767
|
| 359 |
+
[32m[04/10 01:37:12 RAR]: [0mData (t): 0.2396, 177.02/s/gpu Batch (t): 1.4462 LR: 0.000349 Step: 16700 Loss: 6.7376 Accuracy: 0.0775
|
| 360 |
+
[32m[04/10 01:39:03 RAR]: [0mData (t): 0.2397, 161.10/s/gpu Batch (t): 1.5891 LR: 0.000349 Step: 16800 Loss: 6.4960 Accuracy: 0.0907
|
| 361 |
+
[32m[04/10 01:40:52 RAR]: [0mData (t): 0.2407, 158.58/s/gpu Batch (t): 1.6143 LR: 0.000349 Step: 16900 Loss: 6.5817 Accuracy: 0.0857
|
| 362 |
+
[32m[04/10 01:42:41 RAR]: [0mData (t): 0.2397, 121.38/s/gpu Batch (t): 2.1091 LR: 0.000349 Step: 17000 Loss: 6.6149 Accuracy: 0.0819
|
| 363 |
+
[32m[04/10 01:44:29 RAR]: [0mData (t): 0.9621, 194.36/s/gpu Batch (t): 1.3171 LR: 0.000349 Step: 17100 Loss: 6.6867 Accuracy: 0.0789
|
| 364 |
+
[32m[04/10 01:46:16 RAR]: [0mData (t): 1.0876, 175.81/s/gpu Batch (t): 1.4561 LR: 0.000349 Step: 17200 Loss: 6.5490 Accuracy: 0.0879
|
| 365 |
+
[32m[04/10 01:48:05 RAR]: [0mData (t): 0.2413, 183.53/s/gpu Batch (t): 1.3949 LR: 0.000349 Step: 17300 Loss: 6.5923 Accuracy: 0.0843
|
| 366 |
+
[32m[04/10 01:49:52 RAR]: [0mData (t): 0.2391, 421.70/s/gpu Batch (t): 0.6071 LR: 0.000349 Step: 17400 Loss: 6.4851 Accuracy: 0.0912
|
| 367 |
+
[32m[04/10 01:51:40 RAR]: [0mData (t): 0.2408, 425.68/s/gpu Batch (t): 0.6014 LR: 0.000349 Step: 17500 Loss: 6.5454 Accuracy: 0.0852
|
| 368 |
+
[32m[04/10 01:53:31 RAR]: [0mData (t): 0.2390, 420.43/s/gpu Batch (t): 0.6089 LR: 0.000349 Step: 17600 Loss: 6.5020 Accuracy: 0.0941
|
| 369 |
+
[32m[04/10 01:55:21 RAR]: [0mData (t): 0.2395, 428.88/s/gpu Batch (t): 0.5969 LR: 0.000349 Step: 17700 Loss: 6.5895 Accuracy: 0.0847
|
| 370 |
+
[32m[04/10 01:57:09 RAR]: [0mData (t): 0.2410, 199.00/s/gpu Batch (t): 1.2864 LR: 0.000349 Step: 17800 Loss: 6.7317 Accuracy: 0.0743
|
| 371 |
+
[32m[04/10 01:58:57 RAR]: [0mData (t): 0.2396, 147.60/s/gpu Batch (t): 1.7344 LR: 0.000349 Step: 17900 Loss: 6.5754 Accuracy: 0.0856
|
| 372 |
+
[32m[04/10 02:00:46 RAR]: [0mData (t): 0.2394, 147.03/s/gpu Batch (t): 1.7412 LR: 0.000349 Step: 18000 Loss: 6.4988 Accuracy: 0.0895
|
| 373 |
+
[32m[04/10 02:02:35 RAR]: [0mData (t): 0.2403, 191.16/s/gpu Batch (t): 1.3392 LR: 0.000349 Step: 18100 Loss: 6.6187 Accuracy: 0.0821
|
| 374 |
+
[32m[04/10 02:04:23 RAR]: [0mData (t): 0.2391, 427.66/s/gpu Batch (t): 0.5986 LR: 0.000349 Step: 18200 Loss: 6.5419 Accuracy: 0.0882
|
| 375 |
+
[32m[04/10 02:06:10 RAR]: [0mData (t): 0.2385, 416.14/s/gpu Batch (t): 0.6152 LR: 0.000349 Step: 18300 Loss: 6.6064 Accuracy: 0.0846
|
| 376 |
+
[32m[04/10 02:07:59 RAR]: [0mData (t): 0.2396, 414.75/s/gpu Batch (t): 0.6172 LR: 0.000349 Step: 18400 Loss: 6.6029 Accuracy: 0.0831
|
| 377 |
+
[32m[04/10 02:09:46 RAR]: [0mData (t): 0.2398, 247.09/s/gpu Batch (t): 1.0361 LR: 0.000349 Step: 18500 Loss: 6.6392 Accuracy: 0.0843
|
| 378 |
+
[32m[04/10 02:11:35 RAR]: [0mData (t): 0.2399, 188.21/s/gpu Batch (t): 1.3602 LR: 0.000349 Step: 18600 Loss: 6.6179 Accuracy: 0.0836
|
| 379 |
+
[32m[04/10 02:13:22 RAR]: [0mData (t): 0.2392, 171.94/s/gpu Batch (t): 1.4889 LR: 0.000349 Step: 18700 Loss: 6.5997 Accuracy: 0.0837
|
| 380 |
+
[32m[04/10 02:15:14 RAR]: [0mData (t): 0.2400, 416.19/s/gpu Batch (t): 0.6151 LR: 0.000349 Step: 18800 Loss: 6.6147 Accuracy: 0.0840
|
| 381 |
+
[32m[04/10 02:17:03 RAR]: [0mData (t): 0.2400, 168.28/s/gpu Batch (t): 1.5212 LR: 0.000349 Step: 18900 Loss: 6.5682 Accuracy: 0.0876
|
| 382 |
+
[32m[04/10 02:18:52 RAR]: [0mData (t): 0.2395, 150.70/s/gpu Batch (t): 1.6987 LR: 0.000349 Step: 19000 Loss: 6.6053 Accuracy: 0.0839
|
| 383 |
+
[32m[04/10 02:20:40 RAR]: [0mData (t): 0.2404, 426.11/s/gpu Batch (t): 0.6008 LR: 0.000349 Step: 19100 Loss: 6.4458 Accuracy: 0.0962
|
| 384 |
+
[32m[04/10 02:22:29 RAR]: [0mData (t): 0.2394, 421.52/s/gpu Batch (t): 0.6073 LR: 0.000349 Step: 19200 Loss: 6.5699 Accuracy: 0.0858
|
| 385 |
+
[32m[04/10 02:24:15 RAR]: [0mData (t): 0.2399, 384.89/s/gpu Batch (t): 0.6651 LR: 0.000349 Step: 19300 Loss: 6.5726 Accuracy: 0.0854
|
| 386 |
+
[32m[04/10 02:26:03 RAR]: [0mData (t): 0.2399, 209.02/s/gpu Batch (t): 1.2248 LR: 0.000349 Step: 19400 Loss: 6.6238 Accuracy: 0.0832
|
| 387 |
+
[32m[04/10 02:27:50 RAR]: [0mData (t): 0.2389, 186.81/s/gpu Batch (t): 1.3704 LR: 0.000349 Step: 19500 Loss: 6.4949 Accuracy: 0.0907
|
| 388 |
+
[32m[04/10 02:29:34 RAR]: [0mData (t): 0.2398, 380.67/s/gpu Batch (t): 0.6725 LR: 0.000349 Step: 19600 Loss: 6.5179 Accuracy: 0.0889
|
| 389 |
+
[32m[04/10 02:31:21 RAR]: [0mData (t): 0.2397, 169.09/s/gpu Batch (t): 1.5140 LR: 0.000349 Step: 19700 Loss: 6.3948 Accuracy: 0.0963
|
| 390 |
+
[32m[04/10 02:33:08 RAR]: [0mData (t): 0.2408, 421.75/s/gpu Batch (t): 0.6070 LR: 0.000349 Step: 19800 Loss: 6.4526 Accuracy: 0.0949
|
| 391 |
+
[32m[04/10 02:34:54 RAR]: [0mData (t): 0.2390, 256.20/s/gpu Batch (t): 0.9992 LR: 0.000349 Step: 19900 Loss: 6.5613 Accuracy: 0.0859
|
| 392 |
+
[32m[04/10 02:36:40 RAR]: [0mData (t): 0.2390, 375.41/s/gpu Batch (t): 0.6819 LR: 0.000349 Step: 20000 Loss: 6.6044 Accuracy: 0.0838
|
| 393 |
+
[32m[04/10 02:36:52 RAR]: [0mSaved state to stage1/rar_ordertok/checkpoint-20000
|
| 394 |
+
[32m[04/10 02:39:29 RAR]: [0mData (t): 1.0652, 179.34/s/gpu Batch (t): 1.4274 LR: 0.000349 Step: 20100 Loss: 6.5849 Accuracy: 0.0829
|
| 395 |
+
[32m[04/10 02:41:15 RAR]: [0mData (t): 0.5811, 270.38/s/gpu Batch (t): 0.9468 LR: 0.000349 Step: 20200 Loss: 6.5742 Accuracy: 0.0850
|
| 396 |
+
[32m[04/10 02:43:02 RAR]: [0mData (t): 1.2709, 157.50/s/gpu Batch (t): 1.6254 LR: 0.000349 Step: 20300 Loss: 6.5609 Accuracy: 0.0842
|
| 397 |
+
[32m[04/10 02:44:51 RAR]: [0mData (t): 1.1979, 164.99/s/gpu Batch (t): 1.5516 LR: 0.000349 Step: 20400 Loss: 6.5873 Accuracy: 0.0844
|
| 398 |
+
[32m[04/10 02:46:34 RAR]: [0mData (t): 0.8413, 214.24/s/gpu Batch (t): 1.1949 LR: 0.000349 Step: 20500 Loss: 6.5546 Accuracy: 0.0860
|
| 399 |
+
[32m[04/10 02:48:17 RAR]: [0mData (t): 1.0061, 188.14/s/gpu Batch (t): 1.3607 LR: 0.000349 Step: 20600 Loss: 6.5828 Accuracy: 0.0854
|
| 400 |
+
[32m[04/10 02:50:00 RAR]: [0mData (t): 0.2394, 423.04/s/gpu Batch (t): 0.6052 LR: 0.000349 Step: 20700 Loss: 6.4910 Accuracy: 0.0898
|
| 401 |
+
[32m[04/10 02:51:44 RAR]: [0mData (t): 0.2393, 390.25/s/gpu Batch (t): 0.6560 LR: 0.000349 Step: 20800 Loss: 6.4989 Accuracy: 0.0894
|
| 402 |
+
[32m[04/10 02:53:30 RAR]: [0mData (t): 0.2401, 131.39/s/gpu Batch (t): 1.9485 LR: 0.000349 Step: 20900 Loss: 6.5236 Accuracy: 0.0895
|
| 403 |
+
[32m[04/10 02:55:14 RAR]: [0mData (t): 0.2407, 189.33/s/gpu Batch (t): 1.3522 LR: 0.000349 Step: 21000 Loss: 6.3643 Accuracy: 0.1031
|
| 404 |
+
[32m[04/10 02:56:58 RAR]: [0mData (t): 0.2406, 420.13/s/gpu Batch (t): 0.6093 LR: 0.000349 Step: 21100 Loss: 6.5122 Accuracy: 0.0897
|
| 405 |
+
[32m[04/10 02:58:42 RAR]: [0mData (t): 0.2396, 431.62/s/gpu Batch (t): 0.5931 LR: 0.000348 Step: 21200 Loss: 6.4352 Accuracy: 0.0966
|
| 406 |
+
[32m[04/10 03:00:25 RAR]: [0mData (t): 0.2396, 414.23/s/gpu Batch (t): 0.6180 LR: 0.000348 Step: 21300 Loss: 6.5085 Accuracy: 0.0922
|
| 407 |
+
[32m[04/10 03:02:04 RAR]: [0mData (t): 0.2395, 430.50/s/gpu Batch (t): 0.5947 LR: 0.000348 Step: 21400 Loss: 6.4463 Accuracy: 0.0934
|
| 408 |
+
[32m[04/10 03:03:45 RAR]: [0mData (t): 0.2397, 201.67/s/gpu Batch (t): 1.2694 LR: 0.000348 Step: 21500 Loss: 6.3784 Accuracy: 0.0994
|
| 409 |
+
[32m[04/10 03:05:25 RAR]: [0mData (t): 0.2389, 430.95/s/gpu Batch (t): 0.5940 LR: 0.000348 Step: 21600 Loss: 6.3921 Accuracy: 0.0987
|
| 410 |
+
[32m[04/10 03:07:04 RAR]: [0mData (t): 0.2391, 303.00/s/gpu Batch (t): 0.8449 LR: 0.000348 Step: 21700 Loss: 6.5963 Accuracy: 0.0829
|
| 411 |
+
[32m[04/10 03:08:43 RAR]: [0mData (t): 0.2410, 175.79/s/gpu Batch (t): 1.4563 LR: 0.000348 Step: 21800 Loss: 6.5262 Accuracy: 0.0858
|
| 412 |
+
[32m[04/10 03:10:21 RAR]: [0mData (t): 0.2397, 430.78/s/gpu Batch (t): 0.5943 LR: 0.000348 Step: 21900 Loss: 6.5149 Accuracy: 0.0878
|
| 413 |
+
[32m[04/10 03:12:01 RAR]: [0mData (t): 0.2402, 383.73/s/gpu Batch (t): 0.6671 LR: 0.000348 Step: 22000 Loss: 6.3911 Accuracy: 0.0972
|
| 414 |
+
[32m[04/10 03:13:41 RAR]: [0mData (t): 0.2399, 202.75/s/gpu Batch (t): 1.2627 LR: 0.000348 Step: 22100 Loss: 6.4119 Accuracy: 0.0943
|
| 415 |
+
[32m[04/10 03:15:20 RAR]: [0mData (t): 0.8012, 221.32/s/gpu Batch (t): 1.1567 LR: 0.000348 Step: 22200 Loss: 6.4710 Accuracy: 0.0916
|
| 416 |
+
[32m[04/10 03:16:59 RAR]: [0mData (t): 0.2400, 198.01/s/gpu Batch (t): 1.2929 LR: 0.000348 Step: 22300 Loss: 6.4837 Accuracy: 0.0933
|
| 417 |
+
[32m[04/10 03:18:37 RAR]: [0mData (t): 0.2388, 420.51/s/gpu Batch (t): 0.6088 LR: 0.000348 Step: 22400 Loss: 6.5020 Accuracy: 0.0906
|
| 418 |
+
[32m[04/10 03:20:16 RAR]: [0mData (t): 0.2393, 230.54/s/gpu Batch (t): 1.1104 LR: 0.000348 Step: 22500 Loss: 6.3576 Accuracy: 0.0981
|
| 419 |
+
[32m[04/10 03:22:00 RAR]: [0mData (t): 0.2394, 420.41/s/gpu Batch (t): 0.6089 LR: 0.000348 Step: 22600 Loss: 6.5808 Accuracy: 0.0832
|
| 420 |
+
[32m[04/10 03:23:38 RAR]: [0mData (t): 0.2394, 416.70/s/gpu Batch (t): 0.6143 LR: 0.000348 Step: 22700 Loss: 6.3840 Accuracy: 0.1017
|
| 421 |
+
[32m[04/10 03:25:19 RAR]: [0mData (t): 0.2391, 207.41/s/gpu Batch (t): 1.2343 LR: 0.000348 Step: 22800 Loss: 6.3529 Accuracy: 0.1032
|
| 422 |
+
[32m[04/10 03:26:58 RAR]: [0mData (t): 0.2405, 371.53/s/gpu Batch (t): 0.6890 LR: 0.000348 Step: 22900 Loss: 6.4973 Accuracy: 0.0897
|
| 423 |
+
[32m[04/10 03:28:38 RAR]: [0mData (t): 0.2388, 388.81/s/gpu Batch (t): 0.6584 LR: 0.000348 Step: 23000 Loss: 6.4693 Accuracy: 0.0928
|
| 424 |
+
[32m[04/10 03:30:17 RAR]: [0mData (t): 0.2391, 281.99/s/gpu Batch (t): 0.9078 LR: 0.000348 Step: 23100 Loss: 6.4065 Accuracy: 0.0964
|
| 425 |
+
[32m[04/10 03:31:56 RAR]: [0mData (t): 0.2396, 433.88/s/gpu Batch (t): 0.5900 LR: 0.000348 Step: 23200 Loss: 6.6215 Accuracy: 0.0843
|
| 426 |
+
[32m[04/10 03:33:36 RAR]: [0mData (t): 0.2391, 192.85/s/gpu Batch (t): 1.3275 LR: 0.000348 Step: 23300 Loss: 6.3362 Accuracy: 0.1012
|
| 427 |
+
[32m[04/10 03:35:15 RAR]: [0mData (t): 0.2399, 172.83/s/gpu Batch (t): 1.4812 LR: 0.000348 Step: 23400 Loss: 6.5375 Accuracy: 0.0835
|
| 428 |
+
[32m[04/10 03:36:54 RAR]: [0mData (t): 0.2394, 155.82/s/gpu Batch (t): 1.6429 LR: 0.000348 Step: 23500 Loss: 6.3908 Accuracy: 0.0955
|
| 429 |
+
[32m[04/10 03:38:36 RAR]: [0mData (t): 0.2390, 179.73/s/gpu Batch (t): 1.4244 LR: 0.000348 Step: 23600 Loss: 6.3857 Accuracy: 0.0986
|
| 430 |
+
[32m[04/10 03:40:13 RAR]: [0mData (t): 0.3466, 193.09/s/gpu Batch (t): 1.3258 LR: 0.000348 Step: 23700 Loss: 6.3799 Accuracy: 0.0978
|
| 431 |
+
[32m[04/10 03:41:55 RAR]: [0mData (t): 0.2391, 423.46/s/gpu Batch (t): 0.6045 LR: 0.000348 Step: 23800 Loss: 6.4039 Accuracy: 0.0951
|
| 432 |
+
[32m[04/10 03:43:35 RAR]: [0mData (t): 0.9340, 197.22/s/gpu Batch (t): 1.2981 LR: 0.000348 Step: 23900 Loss: 6.3856 Accuracy: 0.0953
|
| 433 |
+
[32m[04/10 03:45:12 RAR]: [0mData (t): 0.2392, 382.64/s/gpu Batch (t): 0.6690 LR: 0.000348 Step: 24000 Loss: 6.4392 Accuracy: 0.0956
|
| 434 |
+
[32m[04/10 03:46:51 RAR]: [0mData (t): 1.0108, 187.90/s/gpu Batch (t): 1.3625 LR: 0.000348 Step: 24100 Loss: 6.4047 Accuracy: 0.0980
|
| 435 |
+
[32m[04/10 03:48:28 RAR]: [0mData (t): 0.2390, 431.76/s/gpu Batch (t): 0.5929 LR: 0.000348 Step: 24200 Loss: 6.4889 Accuracy: 0.0900
|
| 436 |
+
[32m[04/10 03:50:07 RAR]: [0mData (t): 0.2391, 432.86/s/gpu Batch (t): 0.5914 LR: 0.000348 Step: 24300 Loss: 6.4581 Accuracy: 0.0960
|
| 437 |
+
[32m[04/10 03:51:45 RAR]: [0mData (t): 0.9895, 190.44/s/gpu Batch (t): 1.3443 LR: 0.000348 Step: 24400 Loss: 6.3953 Accuracy: 0.0983
|
| 438 |
+
[32m[04/10 03:53:23 RAR]: [0mData (t): 0.2403, 193.92/s/gpu Batch (t): 1.3202 LR: 0.000348 Step: 24500 Loss: 6.4243 Accuracy: 0.0937
|
| 439 |
+
[32m[04/10 03:54:59 RAR]: [0mData (t): 0.2394, 168.23/s/gpu Batch (t): 1.5218 LR: 0.000348 Step: 24600 Loss: 6.4958 Accuracy: 0.0914
|
| 440 |
+
[32m[04/10 03:56:36 RAR]: [0mData (t): 0.2391, 168.03/s/gpu Batch (t): 1.5235 LR: 0.000348 Step: 24700 Loss: 6.4837 Accuracy: 0.0916
|
| 441 |
+
[32m[04/10 03:58:11 RAR]: [0mData (t): 0.2396, 297.09/s/gpu Batch (t): 0.8617 LR: 0.000348 Step: 24800 Loss: 6.4312 Accuracy: 0.0914
|
| 442 |
+
[32m[04/10 03:59:48 RAR]: [0mData (t): 0.2394, 430.26/s/gpu Batch (t): 0.5950 LR: 0.000348 Step: 24900 Loss: 6.2954 Accuracy: 0.1048
|
| 443 |
+
[32m[04/10 04:01:23 RAR]: [0mData (t): 0.2402, 155.72/s/gpu Batch (t): 1.6439 LR: 0.000348 Step: 25000 Loss: 6.4204 Accuracy: 0.0961
|
| 444 |
+
[32m[04/10 04:03:02 RAR]: [0mData (t): 0.2399, 431.31/s/gpu Batch (t): 0.5935 LR: 0.000348 Step: 25100 Loss: 6.2909 Accuracy: 0.1038
|
| 445 |
+
[32m[04/10 04:04:37 RAR]: [0mData (t): 0.2389, 419.47/s/gpu Batch (t): 0.6103 LR: 0.000348 Step: 25200 Loss: 6.4417 Accuracy: 0.0943
|
| 446 |
+
[32m[04/10 04:06:12 RAR]: [0mData (t): 0.2399, 433.81/s/gpu Batch (t): 0.5901 LR: 0.000348 Step: 25300 Loss: 6.4741 Accuracy: 0.0930
|
| 447 |
+
[32m[04/10 04:07:47 RAR]: [0mData (t): 0.2402, 431.99/s/gpu Batch (t): 0.5926 LR: 0.000348 Step: 25400 Loss: 6.4183 Accuracy: 0.0948
|
| 448 |
+
[32m[04/10 04:09:22 RAR]: [0mData (t): 0.2394, 433.69/s/gpu Batch (t): 0.5903 LR: 0.000348 Step: 25500 Loss: 6.3612 Accuracy: 0.1003
|
| 449 |
+
[32m[04/10 04:10:58 RAR]: [0mData (t): 0.2389, 432.20/s/gpu Batch (t): 0.5923 LR: 0.000348 Step: 25600 Loss: 6.3757 Accuracy: 0.1009
|
| 450 |
+
[32m[04/10 04:12:31 RAR]: [0mData (t): 0.2396, 245.79/s/gpu Batch (t): 1.0415 LR: 0.000348 Step: 25700 Loss: 6.3147 Accuracy: 0.1034
|
| 451 |
+
[32m[04/10 04:14:06 RAR]: [0mData (t): 0.2391, 423.42/s/gpu Batch (t): 0.6046 LR: 0.000348 Step: 25800 Loss: 6.4155 Accuracy: 0.0947
|
| 452 |
+
[32m[04/10 04:15:41 RAR]: [0mData (t): 0.2391, 229.03/s/gpu Batch (t): 1.1178 LR: 0.000348 Step: 25900 Loss: 6.3901 Accuracy: 0.0993
|
| 453 |
+
[32m[04/10 04:17:16 RAR]: [0mData (t): 0.2397, 192.49/s/gpu Batch (t): 1.3300 LR: 0.000348 Step: 26000 Loss: 6.4845 Accuracy: 0.0894
|
| 454 |
+
[32m[04/10 04:18:51 RAR]: [0mData (t): 0.2396, 193.65/s/gpu Batch (t): 1.3219 LR: 0.000348 Step: 26100 Loss: 6.3177 Accuracy: 0.1029
|
| 455 |
+
[32m[04/10 04:20:25 RAR]: [0mData (t): 0.2392, 224.22/s/gpu Batch (t): 1.1417 LR: 0.000348 Step: 26200 Loss: 6.4239 Accuracy: 0.0916
|
| 456 |
+
[32m[04/10 04:22:03 RAR]: [0mData (t): 0.2386, 435.26/s/gpu Batch (t): 0.5881 LR: 0.000348 Step: 26300 Loss: 6.0831 Accuracy: 0.1254
|
| 457 |
+
[32m[04/10 04:23:38 RAR]: [0mData (t): 0.2393, 321.20/s/gpu Batch (t): 0.7970 LR: 0.000348 Step: 26400 Loss: 6.2721 Accuracy: 0.1077
|
| 458 |
+
[32m[04/10 04:25:13 RAR]: [0mData (t): 0.2393, 210.24/s/gpu Batch (t): 1.2176 LR: 0.000348 Step: 26500 Loss: 6.4046 Accuracy: 0.0952
|
| 459 |
+
[32m[04/10 04:26:48 RAR]: [0mData (t): 0.2393, 203.74/s/gpu Batch (t): 1.2565 LR: 0.000348 Step: 26600 Loss: 6.2342 Accuracy: 0.1111
|
| 460 |
+
[32m[04/10 04:28:24 RAR]: [0mData (t): 0.2395, 195.60/s/gpu Batch (t): 1.3088 LR: 0.000348 Step: 26700 Loss: 6.3112 Accuracy: 0.1022
|
| 461 |
+
[32m[04/10 04:29:58 RAR]: [0mData (t): 0.2394, 235.75/s/gpu Batch (t): 1.0859 LR: 0.000348 Step: 26800 Loss: 6.3952 Accuracy: 0.0942
|
| 462 |
+
[32m[04/10 04:31:32 RAR]: [0mData (t): 0.2396, 232.84/s/gpu Batch (t): 1.0995 LR: 0.000348 Step: 26900 Loss: 6.3956 Accuracy: 0.0975
|
| 463 |
+
[32m[04/10 04:33:06 RAR]: [0mData (t): 0.2394, 184.42/s/gpu Batch (t): 1.3881 LR: 0.000348 Step: 27000 Loss: 6.1997 Accuracy: 0.1134
|
| 464 |
+
[32m[04/10 04:34:39 RAR]: [0mData (t): 0.2400, 377.91/s/gpu Batch (t): 0.6774 LR: 0.000348 Step: 27100 Loss: 6.2502 Accuracy: 0.1090
|
| 465 |
+
[32m[04/10 04:36:11 RAR]: [0mData (t): 0.2388, 431.76/s/gpu Batch (t): 0.5929 LR: 0.000348 Step: 27200 Loss: 6.4226 Accuracy: 0.0958
|
| 466 |
+
[32m[04/10 04:37:47 RAR]: [0mData (t): 0.2392, 409.02/s/gpu Batch (t): 0.6259 LR: 0.000348 Step: 27300 Loss: 6.3918 Accuracy: 0.0999
|
| 467 |
+
[32m[04/10 04:39:20 RAR]: [0mData (t): 0.2389, 222.03/s/gpu Batch (t): 1.1530 LR: 0.000347 Step: 27400 Loss: 6.2948 Accuracy: 0.1064
|
| 468 |
+
[32m[04/10 04:40:54 RAR]: [0mData (t): 0.2399, 225.29/s/gpu Batch (t): 1.1363 LR: 0.000347 Step: 27500 Loss: 6.3134 Accuracy: 0.1025
|
| 469 |
+
[32m[04/10 04:42:33 RAR]: [0mData (t): 0.4456, 319.88/s/gpu Batch (t): 0.8003 LR: 0.000347 Step: 27600 Loss: 6.2982 Accuracy: 0.1044
|
| 470 |
+
[32m[04/10 04:44:06 RAR]: [0mData (t): 0.2398, 430.40/s/gpu Batch (t): 0.5948 LR: 0.000347 Step: 27700 Loss: 6.1390 Accuracy: 0.1178
|
| 471 |
+
[32m[04/10 04:45:40 RAR]: [0mData (t): 0.2391, 432.63/s/gpu Batch (t): 0.5917 LR: 0.000347 Step: 27800 Loss: 6.2865 Accuracy: 0.1046
|
| 472 |
+
[32m[04/10 04:47:16 RAR]: [0mData (t): 0.2398, 434.08/s/gpu Batch (t): 0.5897 LR: 0.000347 Step: 27900 Loss: 6.3434 Accuracy: 0.1018
|
| 473 |
+
[32m[04/10 04:48:49 RAR]: [0mData (t): 0.2391, 233.95/s/gpu Batch (t): 1.0942 LR: 0.000347 Step: 28000 Loss: 6.2466 Accuracy: 0.1057
|
| 474 |
+
[32m[04/10 04:50:23 RAR]: [0mData (t): 0.2389, 430.24/s/gpu Batch (t): 0.5950 LR: 0.000347 Step: 28100 Loss: 6.3707 Accuracy: 0.0979
|
| 475 |
+
[32m[04/10 04:51:56 RAR]: [0mData (t): 0.2401, 176.38/s/gpu Batch (t): 1.4514 LR: 0.000347 Step: 28200 Loss: 6.4386 Accuracy: 0.0948
|
| 476 |
+
[32m[04/10 04:53:31 RAR]: [0mData (t): 0.2406, 186.75/s/gpu Batch (t): 1.3709 LR: 0.000347 Step: 28300 Loss: 6.3290 Accuracy: 0.1034
|
| 477 |
+
[32m[04/10 04:55:04 RAR]: [0mData (t): 0.2397, 202.08/s/gpu Batch (t): 1.2668 LR: 0.000347 Step: 28400 Loss: 6.2734 Accuracy: 0.1043
|
| 478 |
+
[32m[04/10 04:56:38 RAR]: [0mData (t): 0.2391, 201.77/s/gpu Batch (t): 1.2688 LR: 0.000347 Step: 28500 Loss: 6.3208 Accuracy: 0.1009
|
| 479 |
+
[32m[04/10 04:58:12 RAR]: [0mData (t): 0.2403, 432.71/s/gpu Batch (t): 0.5916 LR: 0.000347 Step: 28600 Loss: 6.2674 Accuracy: 0.1092
|
| 480 |
+
[32m[04/10 04:59:49 RAR]: [0mData (t): 0.2388, 428.33/s/gpu Batch (t): 0.5977 LR: 0.000347 Step: 28700 Loss: 6.3615 Accuracy: 0.1004
|
| 481 |
+
[32m[04/10 05:01:29 RAR]: [0mData (t): 0.2392, 432.57/s/gpu Batch (t): 0.5918 LR: 0.000347 Step: 28800 Loss: 6.4387 Accuracy: 0.0912
|
| 482 |
+
[32m[04/10 05:03:03 RAR]: [0mData (t): 0.2393, 436.00/s/gpu Batch (t): 0.5872 LR: 0.000347 Step: 28900 Loss: 6.2987 Accuracy: 0.1049
|
| 483 |
+
[32m[04/10 05:04:38 RAR]: [0mData (t): 0.2402, 384.46/s/gpu Batch (t): 0.6659 LR: 0.000347 Step: 29000 Loss: 6.2174 Accuracy: 0.1106
|
| 484 |
+
[32m[04/10 05:06:12 RAR]: [0mData (t): 0.2399, 428.78/s/gpu Batch (t): 0.5970 LR: 0.000347 Step: 29100 Loss: 6.2687 Accuracy: 0.1064
|
| 485 |
+
[32m[04/10 05:07:46 RAR]: [0mData (t): 0.2396, 432.70/s/gpu Batch (t): 0.5916 LR: 0.000347 Step: 29200 Loss: 6.3839 Accuracy: 0.0930
|
| 486 |
+
[32m[04/10 05:09:22 RAR]: [0mData (t): 0.2389, 430.11/s/gpu Batch (t): 0.5952 LR: 0.000347 Step: 29300 Loss: 6.3024 Accuracy: 0.1014
|
| 487 |
+
[32m[04/10 05:10:58 RAR]: [0mData (t): 0.2398, 433.15/s/gpu Batch (t): 0.5910 LR: 0.000347 Step: 29400 Loss: 6.3020 Accuracy: 0.1014
|
| 488 |
+
[32m[04/10 05:12:33 RAR]: [0mData (t): 0.2399, 430.18/s/gpu Batch (t): 0.5951 LR: 0.000347 Step: 29500 Loss: 6.1587 Accuracy: 0.1158
|
| 489 |
+
[32m[04/10 05:14:09 RAR]: [0mData (t): 0.2410, 432.28/s/gpu Batch (t): 0.5922 LR: 0.000347 Step: 29600 Loss: 6.2737 Accuracy: 0.1042
|
| 490 |
+
[32m[04/10 05:15:45 RAR]: [0mData (t): 0.2395, 427.18/s/gpu Batch (t): 0.5993 LR: 0.000347 Step: 29700 Loss: 6.3301 Accuracy: 0.0991
|
| 491 |
+
[32m[04/10 05:17:20 RAR]: [0mData (t): 0.2402, 433.96/s/gpu Batch (t): 0.5899 LR: 0.000347 Step: 29800 Loss: 6.2807 Accuracy: 0.1044
|
| 492 |
+
[32m[04/10 05:18:56 RAR]: [0mData (t): 0.2394, 427.65/s/gpu Batch (t): 0.5986 LR: 0.000347 Step: 29900 Loss: 6.3383 Accuracy: 0.0999
|
| 493 |
+
[32m[04/10 05:20:30 RAR]: [0mData (t): 0.2391, 277.43/s/gpu Batch (t): 0.9227 LR: 0.000347 Step: 30000 Loss: 6.2117 Accuracy: 0.1111
|
| 494 |
+
[32m[04/10 05:20:41 RAR]: [0mSaved state to stage1/rar_ordertok/checkpoint-30000
|
| 495 |
+
[32m[04/10 05:23:13 RAR]: [0mData (t): 0.2397, 432.11/s/gpu Batch (t): 0.5924 LR: 0.000347 Step: 30100 Loss: 6.2427 Accuracy: 0.1135
|
| 496 |
+
[32m[04/10 05:24:45 RAR]: [0mData (t): 0.2390, 245.83/s/gpu Batch (t): 1.0414 LR: 0.000347 Step: 30200 Loss: 6.3080 Accuracy: 0.0999
|
| 497 |
+
[32m[04/10 05:26:18 RAR]: [0mData (t): 0.2390, 187.59/s/gpu Batch (t): 1.3647 LR: 0.000347 Step: 30300 Loss: 6.2457 Accuracy: 0.1104
|
| 498 |
+
[32m[04/10 05:27:53 RAR]: [0mData (t): 0.2392, 224.81/s/gpu Batch (t): 1.1388 LR: 0.000347 Step: 30400 Loss: 6.3108 Accuracy: 0.1006
|
| 499 |
+
[32m[04/10 05:29:26 RAR]: [0mData (t): 0.2391, 218.73/s/gpu Batch (t): 1.1704 LR: 0.000347 Step: 30500 Loss: 6.3121 Accuracy: 0.1023
|
| 500 |
+
[32m[04/10 05:30:58 RAR]: [0mData (t): 0.2393, 197.27/s/gpu Batch (t): 1.2977 LR: 0.000347 Step: 30600 Loss: 6.2756 Accuracy: 0.1054
|
| 501 |
+
[32m[04/10 05:32:31 RAR]: [0mData (t): 0.2387, 430.98/s/gpu Batch (t): 0.5940 LR: 0.000347 Step: 30700 Loss: 6.1803 Accuracy: 0.1129
|
| 502 |
+
[32m[04/10 05:34:04 RAR]: [0mData (t): 0.2393, 188.19/s/gpu Batch (t): 1.3603 LR: 0.000347 Step: 30800 Loss: 6.1768 Accuracy: 0.1129
|
| 503 |
+
[32m[04/10 05:35:37 RAR]: [0mData (t): 0.2394, 150.37/s/gpu Batch (t): 1.7025 LR: 0.000347 Step: 30900 Loss: 6.2448 Accuracy: 0.1057
|
| 504 |
+
[32m[04/10 05:37:09 RAR]: [0mData (t): 0.2392, 157.26/s/gpu Batch (t): 1.6279 LR: 0.000347 Step: 31000 Loss: 6.3125 Accuracy: 0.1004
|
| 505 |
+
[32m[04/10 05:38:42 RAR]: [0mData (t): 0.2402, 246.05/s/gpu Batch (t): 1.0404 LR: 0.000347 Step: 31100 Loss: 6.3228 Accuracy: 0.1021
|
| 506 |
+
[32m[04/10 05:40:15 RAR]: [0mData (t): 0.2393, 234.36/s/gpu Batch (t): 1.0924 LR: 0.000347 Step: 31200 Loss: 6.3593 Accuracy: 0.0972
|
| 507 |
+
[32m[04/10 05:41:50 RAR]: [0mData (t): 0.2393, 435.08/s/gpu Batch (t): 0.5884 LR: 0.000347 Step: 31300 Loss: 6.2598 Accuracy: 0.1073
|
| 508 |
+
[32m[04/10 05:43:22 RAR]: [0mData (t): 0.2393, 433.81/s/gpu Batch (t): 0.5901 LR: 0.000347 Step: 31400 Loss: 6.1868 Accuracy: 0.1132
|
| 509 |
+
[32m[04/10 05:44:58 RAR]: [0mData (t): 0.2388, 432.51/s/gpu Batch (t): 0.5919 LR: 0.000347 Step: 31500 Loss: 6.2500 Accuracy: 0.1049
|
| 510 |
+
[32m[04/10 05:46:31 RAR]: [0mData (t): 0.2390, 214.54/s/gpu Batch (t): 1.1933 LR: 0.000347 Step: 31600 Loss: 6.2585 Accuracy: 0.1045
|
| 511 |
+
[32m[04/10 05:48:05 RAR]: [0mData (t): 0.2394, 171.94/s/gpu Batch (t): 1.4889 LR: 0.000347 Step: 31700 Loss: 6.1824 Accuracy: 0.1139
|
| 512 |
+
[32m[04/10 05:49:38 RAR]: [0mData (t): 0.2388, 215.20/s/gpu Batch (t): 1.1896 LR: 0.000347 Step: 31800 Loss: 6.1794 Accuracy: 0.1127
|
| 513 |
+
[32m[04/10 05:51:11 RAR]: [0mData (t): 0.2394, 429.99/s/gpu Batch (t): 0.5954 LR: 0.000347 Step: 31900 Loss: 6.1592 Accuracy: 0.1139
|
| 514 |
+
[32m[04/10 05:52:44 RAR]: [0mData (t): 0.4398, 207.54/s/gpu Batch (t): 1.2335 LR: 0.000347 Step: 32000 Loss: 6.2938 Accuracy: 0.1022
|
| 515 |
+
[32m[04/10 05:54:15 RAR]: [0mData (t): 0.7167, 239.67/s/gpu Batch (t): 1.0681 LR: 0.000347 Step: 32100 Loss: 6.3300 Accuracy: 0.0999
|
| 516 |
+
[32m[04/10 05:55:47 RAR]: [0mData (t): 0.2390, 426.44/s/gpu Batch (t): 0.6003 LR: 0.000347 Step: 32200 Loss: 6.1539 Accuracy: 0.1172
|
| 517 |
+
[32m[04/10 05:57:21 RAR]: [0mData (t): 0.2391, 430.56/s/gpu Batch (t): 0.5946 LR: 0.000347 Step: 32300 Loss: 6.2648 Accuracy: 0.1055
|
| 518 |
+
[32m[04/10 05:58:53 RAR]: [0mData (t): 0.2387, 433.86/s/gpu Batch (t): 0.5901 LR: 0.000346 Step: 32400 Loss: 6.1414 Accuracy: 0.1183
|
| 519 |
+
[32m[04/10 06:00:28 RAR]: [0mData (t): 0.2395, 401.17/s/gpu Batch (t): 0.6381 LR: 0.000346 Step: 32500 Loss: 6.1612 Accuracy: 0.1154
|
| 520 |
+
[32m[04/10 06:02:04 RAR]: [0mData (t): 0.2386, 290.30/s/gpu Batch (t): 0.8818 LR: 0.000346 Step: 32600 Loss: 6.0809 Accuracy: 0.1242
|
| 521 |
+
[32m[04/10 06:03:37 RAR]: [0mData (t): 0.8940, 205.37/s/gpu Batch (t): 1.2465 LR: 0.000346 Step: 32700 Loss: 6.1557 Accuracy: 0.1127
|
| 522 |
+
[32m[04/10 06:05:10 RAR]: [0mData (t): 0.7105, 240.16/s/gpu Batch (t): 1.0660 LR: 0.000346 Step: 32800 Loss: 6.1568 Accuracy: 0.1145
|
| 523 |
+
[32m[04/10 06:06:45 RAR]: [0mData (t): 0.2392, 211.53/s/gpu Batch (t): 1.2103 LR: 0.000346 Step: 32900 Loss: 6.1729 Accuracy: 0.1123
|
| 524 |
+
[32m[04/10 06:08:18 RAR]: [0mData (t): 0.2386, 202.34/s/gpu Batch (t): 1.2652 LR: 0.000346 Step: 33000 Loss: 6.1506 Accuracy: 0.1167
|
| 525 |
+
[32m[04/10 06:09:49 RAR]: [0mData (t): 0.2389, 357.24/s/gpu Batch (t): 0.7166 LR: 0.000346 Step: 33100 Loss: 6.1568 Accuracy: 0.1158
|
| 526 |
+
[32m[04/10 06:11:21 RAR]: [0mData (t): 0.2389, 433.21/s/gpu Batch (t): 0.5909 LR: 0.000346 Step: 33200 Loss: 6.0311 Accuracy: 0.1257
|
| 527 |
+
[32m[04/10 06:12:53 RAR]: [0mData (t): 0.2393, 426.14/s/gpu Batch (t): 0.6007 LR: 0.000346 Step: 33300 Loss: 6.2060 Accuracy: 0.1117
|
| 528 |
+
[32m[04/10 06:14:25 RAR]: [0mData (t): 0.6561, 254.26/s/gpu Batch (t): 1.0068 LR: 0.000346 Step: 33400 Loss: 6.2625 Accuracy: 0.1055
|
| 529 |
+
[32m[04/10 06:15:58 RAR]: [0mData (t): 1.0286, 185.57/s/gpu Batch (t): 1.3795 LR: 0.000346 Step: 33500 Loss: 6.1826 Accuracy: 0.1091
|
| 530 |
+
[32m[04/10 06:17:29 RAR]: [0mData (t): 0.2389, 420.10/s/gpu Batch (t): 0.6094 LR: 0.000346 Step: 33600 Loss: 6.1407 Accuracy: 0.1139
|
| 531 |
+
[32m[04/10 06:19:00 RAR]: [0mData (t): 0.4370, 324.79/s/gpu Batch (t): 0.7882 LR: 0.000346 Step: 33700 Loss: 6.1959 Accuracy: 0.1087
|
| 532 |
+
[32m[04/10 06:20:38 RAR]: [0mData (t): 0.2397, 415.01/s/gpu Batch (t): 0.6169 LR: 0.000346 Step: 33800 Loss: 6.1552 Accuracy: 0.1106
|
| 533 |
+
[32m[04/10 06:22:11 RAR]: [0mData (t): 0.2385, 416.00/s/gpu Batch (t): 0.6154 LR: 0.000346 Step: 33900 Loss: 6.1790 Accuracy: 0.1166
|
| 534 |
+
[32m[04/10 06:23:42 RAR]: [0mData (t): 0.2391, 387.46/s/gpu Batch (t): 0.6607 LR: 0.000346 Step: 34000 Loss: 6.2160 Accuracy: 0.1099
|
| 535 |
+
[32m[04/10 06:25:14 RAR]: [0mData (t): 0.2393, 432.88/s/gpu Batch (t): 0.5914 LR: 0.000346 Step: 34100 Loss: 6.2531 Accuracy: 0.1033
|
| 536 |
+
[32m[04/10 06:26:47 RAR]: [0mData (t): 0.2396, 195.37/s/gpu Batch (t): 1.3103 LR: 0.000346 Step: 34200 Loss: 6.1899 Accuracy: 0.1127
|
| 537 |
+
[32m[04/10 06:28:20 RAR]: [0mData (t): 0.8790, 207.77/s/gpu Batch (t): 1.2322 LR: 0.000346 Step: 34300 Loss: 6.1101 Accuracy: 0.1187
|
| 538 |
+
[32m[04/10 06:29:50 RAR]: [0mData (t): 0.7144, 240.37/s/gpu Batch (t): 1.0650 LR: 0.000346 Step: 34400 Loss: 5.9886 Accuracy: 0.1258
|
| 539 |
+
[32m[04/10 06:31:22 RAR]: [0mData (t): 0.2398, 433.75/s/gpu Batch (t): 0.5902 LR: 0.000346 Step: 34500 Loss: 6.1613 Accuracy: 0.1186
|
| 540 |
+
[32m[04/10 06:32:53 RAR]: [0mData (t): 0.2400, 419.82/s/gpu Batch (t): 0.6098 LR: 0.000346 Step: 34600 Loss: 6.0493 Accuracy: 0.1261
|
| 541 |
+
[32m[04/10 06:34:27 RAR]: [0mData (t): 0.2395, 430.96/s/gpu Batch (t): 0.5940 LR: 0.000346 Step: 34700 Loss: 6.1655 Accuracy: 0.1130
|
| 542 |
+
[32m[04/10 06:35:59 RAR]: [0mData (t): 0.2390, 283.11/s/gpu Batch (t): 0.9042 LR: 0.000346 Step: 34800 Loss: 6.0727 Accuracy: 0.1245
|
| 543 |
+
[32m[04/10 06:37:30 RAR]: [0mData (t): 0.2399, 373.06/s/gpu Batch (t): 0.6862 LR: 0.000346 Step: 34900 Loss: 6.2947 Accuracy: 0.1037
|
| 544 |
+
[32m[04/10 06:39:03 RAR]: [0mData (t): 0.2389, 351.71/s/gpu Batch (t): 0.7279 LR: 0.000346 Step: 35000 Loss: 6.1764 Accuracy: 0.1139
|
| 545 |
+
[32m[04/10 06:40:39 RAR]: [0mData (t): 0.2385, 428.28/s/gpu Batch (t): 0.5977 LR: 0.000346 Step: 35100 Loss: 6.0239 Accuracy: 0.1252
|
| 546 |
+
[32m[04/10 06:42:12 RAR]: [0mData (t): 0.2392, 431.78/s/gpu Batch (t): 0.5929 LR: 0.000346 Step: 35200 Loss: 6.0476 Accuracy: 0.1244
|
| 547 |
+
[32m[04/10 06:43:44 RAR]: [0mData (t): 0.2401, 212.12/s/gpu Batch (t): 1.2068 LR: 0.000346 Step: 35300 Loss: 6.1415 Accuracy: 0.1158
|
| 548 |
+
[32m[04/10 06:45:15 RAR]: [0mData (t): 0.2398, 342.23/s/gpu Batch (t): 0.7480 LR: 0.000346 Step: 35400 Loss: 6.1288 Accuracy: 0.1173
|
| 549 |
+
[32m[04/10 06:46:47 RAR]: [0mData (t): 0.2399, 435.82/s/gpu Batch (t): 0.5874 LR: 0.000346 Step: 35500 Loss: 6.3091 Accuracy: 0.1011
|
| 550 |
+
[32m[04/10 06:48:20 RAR]: [0mData (t): 0.2394, 431.61/s/gpu Batch (t): 0.5931 LR: 0.000346 Step: 35600 Loss: 6.2807 Accuracy: 0.1050
|
| 551 |
+
[32m[04/10 06:49:54 RAR]: [0mData (t): 0.5339, 289.59/s/gpu Batch (t): 0.8840 LR: 0.000346 Step: 35700 Loss: 6.1052 Accuracy: 0.1180
|
| 552 |
+
[32m[04/10 06:51:24 RAR]: [0mData (t): 0.2391, 204.41/s/gpu Batch (t): 1.2524 LR: 0.000346 Step: 35800 Loss: 6.0060 Accuracy: 0.1265
|
| 553 |
+
[32m[04/10 06:52:54 RAR]: [0mData (t): 0.2388, 433.91/s/gpu Batch (t): 0.5900 LR: 0.000346 Step: 35900 Loss: 6.0701 Accuracy: 0.1224
|
| 554 |
+
[32m[04/10 06:54:25 RAR]: [0mData (t): 0.2385, 198.47/s/gpu Batch (t): 1.2898 LR: 0.000346 Step: 36000 Loss: 6.0461 Accuracy: 0.1244
|
| 555 |
+
[32m[04/10 06:55:56 RAR]: [0mData (t): 0.2389, 220.65/s/gpu Batch (t): 1.1602 LR: 0.000346 Step: 36100 Loss: 6.1763 Accuracy: 0.1101
|
| 556 |
+
[32m[04/10 06:57:26 RAR]: [0mData (t): 0.2386, 347.96/s/gpu Batch (t): 0.7357 LR: 0.000346 Step: 36200 Loss: 6.0757 Accuracy: 0.1165
|
| 557 |
+
[32m[04/10 06:58:58 RAR]: [0mData (t): 0.2385, 434.96/s/gpu Batch (t): 0.5886 LR: 0.000346 Step: 36300 Loss: 6.1318 Accuracy: 0.1105
|
| 558 |
+
[32m[04/10 07:00:28 RAR]: [0mData (t): 0.2396, 344.99/s/gpu Batch (t): 0.7421 LR: 0.000346 Step: 36400 Loss: 6.0949 Accuracy: 0.1195
|
| 559 |
+
[32m[04/10 07:02:00 RAR]: [0mData (t): 0.2402, 192.35/s/gpu Batch (t): 1.3309 LR: 0.000346 Step: 36500 Loss: 6.0180 Accuracy: 0.1254
|
| 560 |
+
[32m[04/10 07:03:31 RAR]: [0mData (t): 0.2390, 222.68/s/gpu Batch (t): 1.1496 LR: 0.000346 Step: 36600 Loss: 6.2120 Accuracy: 0.1084
|
| 561 |
+
[32m[04/10 07:05:01 RAR]: [0mData (t): 0.2401, 226.41/s/gpu Batch (t): 1.1307 LR: 0.000346 Step: 36700 Loss: 6.1956 Accuracy: 0.1099
|
| 562 |
+
[32m[04/10 07:06:31 RAR]: [0mData (t): 0.2397, 218.57/s/gpu Batch (t): 1.1712 LR: 0.000345 Step: 36800 Loss: 6.1731 Accuracy: 0.1103
|
| 563 |
+
[32m[04/10 07:08:02 RAR]: [0mData (t): 0.2398, 243.34/s/gpu Batch (t): 1.0520 LR: 0.000345 Step: 36900 Loss: 5.9917 Accuracy: 0.1280
|
| 564 |
+
[32m[04/10 07:09:34 RAR]: [0mData (t): 0.2388, 390.31/s/gpu Batch (t): 0.6559 LR: 0.000345 Step: 37000 Loss: 6.1735 Accuracy: 0.1120
|
| 565 |
+
[32m[04/10 07:11:06 RAR]: [0mData (t): 0.2389, 230.35/s/gpu Batch (t): 1.1114 LR: 0.000345 Step: 37100 Loss: 6.0965 Accuracy: 0.1176
|
| 566 |
+
[32m[04/10 07:12:36 RAR]: [0mData (t): 0.2395, 231.65/s/gpu Batch (t): 1.1051 LR: 0.000345 Step: 37200 Loss: 6.2205 Accuracy: 0.1089
|
| 567 |
+
[32m[04/10 07:14:07 RAR]: [0mData (t): 0.2401, 212.12/s/gpu Batch (t): 1.2069 LR: 0.000345 Step: 37300 Loss: 5.9524 Accuracy: 0.1314
|
| 568 |
+
[32m[04/10 07:15:39 RAR]: [0mData (t): 0.2392, 432.92/s/gpu Batch (t): 0.5913 LR: 0.000345 Step: 37400 Loss: 6.1586 Accuracy: 0.1133
|
| 569 |
+
[32m[04/10 07:17:08 RAR]: [0mData (t): 0.2400, 435.92/s/gpu Batch (t): 0.5873 LR: 0.000345 Step: 37500 Loss: 6.0725 Accuracy: 0.1197
|
| 570 |
+
[32m[04/10 07:18:41 RAR]: [0mData (t): 0.3492, 367.62/s/gpu Batch (t): 0.6964 LR: 0.000345 Step: 37600 Loss: 6.1542 Accuracy: 0.1121
|
| 571 |
+
[32m[04/10 07:20:11 RAR]: [0mData (t): 0.2397, 375.00/s/gpu Batch (t): 0.6827 LR: 0.000345 Step: 37700 Loss: 6.0379 Accuracy: 0.1197
|
| 572 |
+
[32m[04/10 07:21:41 RAR]: [0mData (t): 0.2398, 241.26/s/gpu Batch (t): 1.0611 LR: 0.000345 Step: 37800 Loss: 6.0550 Accuracy: 0.1184
|
| 573 |
+
[32m[04/10 07:23:13 RAR]: [0mData (t): 0.2394, 188.91/s/gpu Batch (t): 1.3551 LR: 0.000345 Step: 37900 Loss: 6.2600 Accuracy: 0.1046
|
| 574 |
+
[32m[04/10 07:24:44 RAR]: [0mData (t): 0.2396, 390.34/s/gpu Batch (t): 0.6558 LR: 0.000345 Step: 38000 Loss: 6.0059 Accuracy: 0.1243
|
| 575 |
+
[32m[04/10 07:26:14 RAR]: [0mData (t): 0.2395, 434.67/s/gpu Batch (t): 0.5889 LR: 0.000345 Step: 38100 Loss: 6.1433 Accuracy: 0.1150
|
| 576 |
+
[32m[04/10 07:27:44 RAR]: [0mData (t): 0.2387, 434.68/s/gpu Batch (t): 0.5889 LR: 0.000345 Step: 38200 Loss: 6.1557 Accuracy: 0.1128
|
| 577 |
+
[32m[04/10 07:29:13 RAR]: [0mData (t): 0.7342, 236.11/s/gpu Batch (t): 1.0842 LR: 0.000345 Step: 38300 Loss: 6.1228 Accuracy: 0.1147
|
| 578 |
+
[32m[04/10 07:30:45 RAR]: [0mData (t): 0.2399, 223.28/s/gpu Batch (t): 1.1466 LR: 0.000345 Step: 38400 Loss: 6.1425 Accuracy: 0.1137
|
| 579 |
+
[32m[04/10 07:32:15 RAR]: [0mData (t): 0.2401, 210.32/s/gpu Batch (t): 1.2172 LR: 0.000345 Step: 38500 Loss: 6.1743 Accuracy: 0.1152
|
| 580 |
+
[32m[04/10 07:33:46 RAR]: [0mData (t): 0.2396, 217.03/s/gpu Batch (t): 1.1796 LR: 0.000345 Step: 38600 Loss: 6.1220 Accuracy: 0.1154
|
| 581 |
+
[32m[04/10 07:35:18 RAR]: [0mData (t): 0.2399, 208.26/s/gpu Batch (t): 1.2292 LR: 0.000345 Step: 38700 Loss: 5.9211 Accuracy: 0.1345
|
| 582 |
+
[32m[04/10 07:36:50 RAR]: [0mData (t): 0.2395, 435.56/s/gpu Batch (t): 0.5877 LR: 0.000345 Step: 38800 Loss: 5.8699 Accuracy: 0.1347
|
| 583 |
+
[32m[04/10 07:38:22 RAR]: [0mData (t): 0.2398, 436.27/s/gpu Batch (t): 0.5868 LR: 0.000345 Step: 38900 Loss: 6.1489 Accuracy: 0.1147
|
| 584 |
+
[32m[04/10 07:39:52 RAR]: [0mData (t): 0.2403, 390.74/s/gpu Batch (t): 0.6552 LR: 0.000345 Step: 39000 Loss: 6.1481 Accuracy: 0.1129
|
| 585 |
+
[32m[04/10 07:41:22 RAR]: [0mData (t): 0.4844, 221.57/s/gpu Batch (t): 1.1554 LR: 0.000345 Step: 39100 Loss: 6.1035 Accuracy: 0.1166
|
| 586 |
+
[32m[04/10 07:42:51 RAR]: [0mData (t): 0.7720, 228.85/s/gpu Batch (t): 1.1186 LR: 0.000345 Step: 39200 Loss: 6.1624 Accuracy: 0.1128
|
| 587 |
+
[32m[04/10 07:44:22 RAR]: [0mData (t): 0.2395, 435.33/s/gpu Batch (t): 0.5881 LR: 0.000345 Step: 39300 Loss: 6.0321 Accuracy: 0.1254
|
| 588 |
+
[32m[04/10 07:45:53 RAR]: [0mData (t): 0.2392, 413.85/s/gpu Batch (t): 0.6186 LR: 0.000345 Step: 39400 Loss: 6.0146 Accuracy: 0.1262
|
| 589 |
+
[32m[04/10 07:47:23 RAR]: [0mData (t): 0.2394, 433.14/s/gpu Batch (t): 0.5910 LR: 0.000345 Step: 39500 Loss: 6.0300 Accuracy: 0.1214
|
| 590 |
+
[32m[04/10 07:48:55 RAR]: [0mData (t): 0.2400, 262.61/s/gpu Batch (t): 0.9748 LR: 0.000345 Step: 39600 Loss: 5.9418 Accuracy: 0.1360
|
| 591 |
+
[32m[04/10 07:50:25 RAR]: [0mData (t): 0.2401, 434.90/s/gpu Batch (t): 0.5886 LR: 0.000345 Step: 39700 Loss: 6.0610 Accuracy: 0.1289
|
| 592 |
+
[32m[04/10 07:51:56 RAR]: [0mData (t): 0.2400, 436.60/s/gpu Batch (t): 0.5864 LR: 0.000345 Step: 39800 Loss: 6.1456 Accuracy: 0.1148
|
| 593 |
+
[32m[04/10 07:53:27 RAR]: [0mData (t): 0.2399, 432.44/s/gpu Batch (t): 0.5920 LR: 0.000345 Step: 39900 Loss: 6.0482 Accuracy: 0.1224
|
| 594 |
+
[32m[04/10 07:54:56 RAR]: [0mData (t): 0.3013, 358.18/s/gpu Batch (t): 0.7147 LR: 0.000345 Step: 40000 Loss: 6.1175 Accuracy: 0.1166
|
| 595 |
+
[32m[04/10 07:55:07 RAR]: [0mSaved state to stage1/rar_ordertok/checkpoint-40000
|
| 596 |
+
[32m[04/10 07:57:28 RAR]: [0mData (t): 0.2398, 435.15/s/gpu Batch (t): 0.5883 LR: 0.000345 Step: 40100 Loss: 6.1875 Accuracy: 0.1080
|
| 597 |
+
[32m[04/10 07:58:59 RAR]: [0mData (t): 0.2401, 432.99/s/gpu Batch (t): 0.5912 LR: 0.000345 Step: 40200 Loss: 6.0055 Accuracy: 0.1248
|
| 598 |
+
[32m[04/10 08:00:32 RAR]: [0mData (t): 0.2401, 432.47/s/gpu Batch (t): 0.5920 LR: 0.000345 Step: 40300 Loss: 6.0142 Accuracy: 0.1230
|
| 599 |
+
[32m[04/10 08:02:02 RAR]: [0mData (t): 0.2396, 433.52/s/gpu Batch (t): 0.5905 LR: 0.000345 Step: 40400 Loss: 5.9038 Accuracy: 0.1375
|
| 600 |
+
[32m[04/10 08:03:32 RAR]: [0mData (t): 0.5165, 214.92/s/gpu Batch (t): 1.1911 LR: 0.000345 Step: 40500 Loss: 5.9828 Accuracy: 0.1240
|
| 601 |
+
[32m[04/10 08:05:03 RAR]: [0mData (t): 0.2407, 205.08/s/gpu Batch (t): 1.2483 LR: 0.000344 Step: 40600 Loss: 5.9923 Accuracy: 0.1271
|
| 602 |
+
[32m[04/10 08:06:34 RAR]: [0mData (t): 0.2406, 197.20/s/gpu Batch (t): 1.2982 LR: 0.000344 Step: 40700 Loss: 6.0005 Accuracy: 0.1251
|
| 603 |
+
[32m[04/10 08:08:04 RAR]: [0mData (t): 0.2397, 433.37/s/gpu Batch (t): 0.5907 LR: 0.000344 Step: 40800 Loss: 6.1249 Accuracy: 0.1168
|
| 604 |
+
[32m[04/10 08:09:33 RAR]: [0mData (t): 0.2399, 433.65/s/gpu Batch (t): 0.5903 LR: 0.000344 Step: 40900 Loss: 6.0299 Accuracy: 0.1208
|
| 605 |
+
[32m[04/10 08:11:04 RAR]: [0mData (t): 0.2401, 198.55/s/gpu Batch (t): 1.2893 LR: 0.000344 Step: 41000 Loss: 6.0511 Accuracy: 0.1200
|
| 606 |
+
[32m[04/10 08:12:33 RAR]: [0mData (t): 0.2402, 315.03/s/gpu Batch (t): 0.8126 LR: 0.000344 Step: 41100 Loss: 5.9174 Accuracy: 0.1364
|
| 607 |
+
[32m[04/10 08:14:04 RAR]: [0mData (t): 0.2400, 234.07/s/gpu Batch (t): 1.0937 LR: 0.000344 Step: 41200 Loss: 6.1024 Accuracy: 0.1172
|
| 608 |
+
[32m[04/10 08:15:33 RAR]: [0mData (t): 0.2406, 436.97/s/gpu Batch (t): 0.5859 LR: 0.000344 Step: 41300 Loss: 6.0993 Accuracy: 0.1166
|
| 609 |
+
[32m[04/10 08:17:06 RAR]: [0mData (t): 0.2394, 436.45/s/gpu Batch (t): 0.5865 LR: 0.000344 Step: 41400 Loss: 6.0978 Accuracy: 0.1152
|
| 610 |
+
[32m[04/10 08:18:36 RAR]: [0mData (t): 0.2401, 435.39/s/gpu Batch (t): 0.5880 LR: 0.000344 Step: 41500 Loss: 6.0822 Accuracy: 0.1199
|
| 611 |
+
[32m[04/10 08:20:06 RAR]: [0mData (t): 0.3769, 307.19/s/gpu Batch (t): 0.8334 LR: 0.000344 Step: 41600 Loss: 6.0307 Accuracy: 0.1253
|
| 612 |
+
[32m[04/10 08:21:36 RAR]: [0mData (t): 0.2402, 435.02/s/gpu Batch (t): 0.5885 LR: 0.000344 Step: 41700 Loss: 5.9433 Accuracy: 0.1311
|
| 613 |
+
[32m[04/10 08:23:06 RAR]: [0mData (t): 0.2398, 435.05/s/gpu Batch (t): 0.5884 LR: 0.000344 Step: 41800 Loss: 6.0109 Accuracy: 0.1250
|
| 614 |
+
[32m[04/10 08:24:36 RAR]: [0mData (t): 0.4211, 333.20/s/gpu Batch (t): 0.7683 LR: 0.000344 Step: 41900 Loss: 5.9330 Accuracy: 0.1325
|
| 615 |
+
[32m[04/10 08:26:07 RAR]: [0mData (t): 0.2399, 389.16/s/gpu Batch (t): 0.6578 LR: 0.000344 Step: 42000 Loss: 6.1042 Accuracy: 0.1194
|
| 616 |
+
[32m[04/10 08:27:39 RAR]: [0mData (t): 0.2401, 435.72/s/gpu Batch (t): 0.5875 LR: 0.000344 Step: 42100 Loss: 5.9540 Accuracy: 0.1280
|
| 617 |
+
[32m[04/10 08:29:11 RAR]: [0mData (t): 0.2400, 435.33/s/gpu Batch (t): 0.5881 LR: 0.000344 Step: 42200 Loss: 5.9822 Accuracy: 0.1261
|
| 618 |
+
[32m[04/10 08:30:42 RAR]: [0mData (t): 0.2402, 435.19/s/gpu Batch (t): 0.5882 LR: 0.000344 Step: 42300 Loss: 6.0036 Accuracy: 0.1250
|
| 619 |
+
[32m[04/10 08:32:13 RAR]: [0mData (t): 0.2403, 435.28/s/gpu Batch (t): 0.5881 LR: 0.000344 Step: 42400 Loss: 6.0794 Accuracy: 0.1189
|
| 620 |
+
[32m[04/10 08:33:45 RAR]: [0mData (t): 0.2398, 434.29/s/gpu Batch (t): 0.5895 LR: 0.000344 Step: 42500 Loss: 6.0515 Accuracy: 0.1230
|
| 621 |
+
[32m[04/10 08:35:18 RAR]: [0mData (t): 0.4139, 266.76/s/gpu Batch (t): 0.9597 LR: 0.000344 Step: 42600 Loss: 5.9696 Accuracy: 0.1287
|
| 622 |
+
[32m[04/10 08:36:48 RAR]: [0mData (t): 0.2400, 328.69/s/gpu Batch (t): 0.7789 LR: 0.000344 Step: 42700 Loss: 6.0245 Accuracy: 0.1205
|
| 623 |
+
[32m[04/10 08:38:18 RAR]: [0mData (t): 0.2398, 194.71/s/gpu Batch (t): 1.3148 LR: 0.000344 Step: 42800 Loss: 6.0442 Accuracy: 0.1268
|
| 624 |
+
[32m[04/10 08:39:50 RAR]: [0mData (t): 0.2394, 435.28/s/gpu Batch (t): 0.5881 LR: 0.000344 Step: 42900 Loss: 5.8853 Accuracy: 0.1357
|
| 625 |
+
[32m[04/10 08:41:23 RAR]: [0mData (t): 0.2399, 387.87/s/gpu Batch (t): 0.6600 LR: 0.000344 Step: 43000 Loss: 6.0098 Accuracy: 0.1247
|
| 626 |
+
[32m[04/10 08:42:52 RAR]: [0mData (t): 0.2399, 435.45/s/gpu Batch (t): 0.5879 LR: 0.000344 Step: 43100 Loss: 5.8849 Accuracy: 0.1350
|
| 627 |
+
[32m[04/10 08:44:23 RAR]: [0mData (t): 0.2405, 216.69/s/gpu Batch (t): 1.1814 LR: 0.000344 Step: 43200 Loss: 5.9460 Accuracy: 0.1285
|
| 628 |
+
[32m[04/10 08:45:52 RAR]: [0mData (t): 0.2400, 186.82/s/gpu Batch (t): 1.3703 LR: 0.000344 Step: 43300 Loss: 5.9736 Accuracy: 0.1276
|
| 629 |
+
[32m[04/10 08:47:22 RAR]: [0mData (t): 0.2887, 321.74/s/gpu Batch (t): 0.7957 LR: 0.000344 Step: 43400 Loss: 5.9291 Accuracy: 0.1319
|
| 630 |
+
[32m[04/10 08:48:55 RAR]: [0mData (t): 0.2406, 197.81/s/gpu Batch (t): 1.2942 LR: 0.000344 Step: 43500 Loss: 6.1117 Accuracy: 0.1156
|
| 631 |
+
[32m[04/10 08:50:25 RAR]: [0mData (t): 0.2396, 213.03/s/gpu Batch (t): 1.2017 LR: 0.000344 Step: 43600 Loss: 5.9462 Accuracy: 0.1273
|
| 632 |
+
[32m[04/10 08:51:55 RAR]: [0mData (t): 0.2404, 213.45/s/gpu Batch (t): 1.1994 LR: 0.000344 Step: 43700 Loss: 5.9926 Accuracy: 0.1238
|
| 633 |
+
[32m[04/10 08:53:25 RAR]: [0mData (t): 0.2401, 215.64/s/gpu Batch (t): 1.1871 LR: 0.000344 Step: 43800 Loss: 6.0041 Accuracy: 0.1258
|
| 634 |
+
[32m[04/10 08:55:01 RAR]: [0mData (t): 0.2395, 434.20/s/gpu Batch (t): 0.5896 LR: 0.000344 Step: 43900 Loss: 6.0272 Accuracy: 0.1219
|
| 635 |
+
[32m[04/10 08:56:32 RAR]: [0mData (t): 0.2395, 176.17/s/gpu Batch (t): 1.4531 LR: 0.000344 Step: 44000 Loss: 6.0735 Accuracy: 0.1185
|
| 636 |
+
[32m[04/10 08:58:03 RAR]: [0mData (t): 0.2399, 235.68/s/gpu Batch (t): 1.0862 LR: 0.000344 Step: 44100 Loss: 5.8423 Accuracy: 0.1405
|
| 637 |
+
[32m[04/10 08:59:33 RAR]: [0mData (t): 0.2862, 199.64/s/gpu Batch (t): 1.2823 LR: 0.000343 Step: 44200 Loss: 5.8970 Accuracy: 0.1391
|
| 638 |
+
[32m[04/10 09:01:04 RAR]: [0mData (t): 0.2402, 246.19/s/gpu Batch (t): 1.0399 LR: 0.000343 Step: 44300 Loss: 5.8752 Accuracy: 0.1383
|
| 639 |
+
[32m[04/10 09:02:36 RAR]: [0mData (t): 0.2398, 213.62/s/gpu Batch (t): 1.1984 LR: 0.000343 Step: 44400 Loss: 5.9243 Accuracy: 0.1299
|
| 640 |
+
[32m[04/10 09:04:07 RAR]: [0mData (t): 0.2401, 429.71/s/gpu Batch (t): 0.5958 LR: 0.000343 Step: 44500 Loss: 5.8496 Accuracy: 0.1352
|
| 641 |
+
[32m[04/10 09:05:37 RAR]: [0mData (t): 0.2394, 191.06/s/gpu Batch (t): 1.3399 LR: 0.000343 Step: 44600 Loss: 5.9806 Accuracy: 0.1262
|
| 642 |
+
[32m[04/10 09:07:08 RAR]: [0mData (t): 0.5627, 220.15/s/gpu Batch (t): 1.1628 LR: 0.000343 Step: 44700 Loss: 6.0796 Accuracy: 0.1194
|
| 643 |
+
[32m[04/10 09:08:40 RAR]: [0mData (t): 0.2405, 229.24/s/gpu Batch (t): 1.1167 LR: 0.000343 Step: 44800 Loss: 5.9253 Accuracy: 0.1308
|
| 644 |
+
[32m[04/10 09:10:11 RAR]: [0mData (t): 0.2388, 432.40/s/gpu Batch (t): 0.5920 LR: 0.000343 Step: 44900 Loss: 5.9889 Accuracy: 0.1232
|
| 645 |
+
[32m[04/10 09:11:41 RAR]: [0mData (t): 0.2392, 387.14/s/gpu Batch (t): 0.6613 LR: 0.000343 Step: 45000 Loss: 5.9109 Accuracy: 0.1295
|
| 646 |
+
[32m[04/10 09:13:17 RAR]: [0mData (t): 0.2389, 432.84/s/gpu Batch (t): 0.5914 LR: 0.000343 Step: 45100 Loss: 6.0543 Accuracy: 0.1190
|
| 647 |
+
[32m[04/10 09:14:50 RAR]: [0mData (t): 0.2405, 433.19/s/gpu Batch (t): 0.5910 LR: 0.000343 Step: 45200 Loss: 5.7996 Accuracy: 0.1444
|
| 648 |
+
[32m[04/10 09:16:21 RAR]: [0mData (t): 0.4851, 307.76/s/gpu Batch (t): 0.8318 LR: 0.000343 Step: 45300 Loss: 5.9371 Accuracy: 0.1306
|
| 649 |
+
[32m[04/10 09:17:52 RAR]: [0mData (t): 0.2395, 433.19/s/gpu Batch (t): 0.5910 LR: 0.000343 Step: 45400 Loss: 6.0728 Accuracy: 0.1202
|
| 650 |
+
[32m[04/10 09:19:24 RAR]: [0mData (t): 0.2396, 433.30/s/gpu Batch (t): 0.5908 LR: 0.000343 Step: 45500 Loss: 5.8088 Accuracy: 0.1423
|
| 651 |
+
[32m[04/10 09:20:55 RAR]: [0mData (t): 0.2400, 436.53/s/gpu Batch (t): 0.5864 LR: 0.000343 Step: 45600 Loss: 5.9006 Accuracy: 0.1368
|
| 652 |
+
[32m[04/10 09:22:28 RAR]: [0mData (t): 0.2400, 206.81/s/gpu Batch (t): 1.2378 LR: 0.000343 Step: 45700 Loss: 6.0159 Accuracy: 0.1223
|
| 653 |
+
[32m[04/10 09:23:58 RAR]: [0mData (t): 0.2401, 433.69/s/gpu Batch (t): 0.5903 LR: 0.000343 Step: 45800 Loss: 6.0333 Accuracy: 0.1198
|
| 654 |
+
[32m[04/10 09:25:28 RAR]: [0mData (t): 0.2400, 434.21/s/gpu Batch (t): 0.5896 LR: 0.000343 Step: 45900 Loss: 5.9777 Accuracy: 0.1257
|
| 655 |
+
[32m[04/10 09:27:00 RAR]: [0mData (t): 0.2401, 390.44/s/gpu Batch (t): 0.6557 LR: 0.000343 Step: 46000 Loss: 5.9376 Accuracy: 0.1304
|
| 656 |
+
[32m[04/10 09:28:30 RAR]: [0mData (t): 0.2396, 433.84/s/gpu Batch (t): 0.5901 LR: 0.000343 Step: 46100 Loss: 5.8924 Accuracy: 0.1379
|
| 657 |
+
[32m[04/10 09:30:02 RAR]: [0mData (t): 0.2390, 426.37/s/gpu Batch (t): 0.6004 LR: 0.000343 Step: 46200 Loss: 5.8541 Accuracy: 0.1404
|
| 658 |
+
[32m[04/10 09:31:33 RAR]: [0mData (t): 0.2400, 192.42/s/gpu Batch (t): 1.3304 LR: 0.000343 Step: 46300 Loss: 5.9814 Accuracy: 0.1227
|
| 659 |
+
[32m[04/10 09:33:07 RAR]: [0mData (t): 0.2403, 433.57/s/gpu Batch (t): 0.5904 LR: 0.000343 Step: 46400 Loss: 6.0232 Accuracy: 0.1216
|
| 660 |
+
[32m[04/10 09:34:38 RAR]: [0mData (t): 0.2400, 431.37/s/gpu Batch (t): 0.5935 LR: 0.000343 Step: 46500 Loss: 5.9402 Accuracy: 0.1289
|
| 661 |
+
[32m[04/10 09:36:11 RAR]: [0mData (t): 0.2397, 194.83/s/gpu Batch (t): 1.3140 LR: 0.000343 Step: 46600 Loss: 6.0304 Accuracy: 0.1195
|
| 662 |
+
[32m[04/10 09:37:42 RAR]: [0mData (t): 0.2398, 222.66/s/gpu Batch (t): 1.1498 LR: 0.000343 Step: 46700 Loss: 5.9356 Accuracy: 0.1252
|
| 663 |
+
[32m[04/10 09:39:11 RAR]: [0mData (t): 0.4249, 329.89/s/gpu Batch (t): 0.7760 LR: 0.000343 Step: 46800 Loss: 5.8512 Accuracy: 0.1342
|
| 664 |
+
[32m[04/10 09:40:42 RAR]: [0mData (t): 0.2386, 432.45/s/gpu Batch (t): 0.5920 LR: 0.000343 Step: 46900 Loss: 6.0111 Accuracy: 0.1239
|
| 665 |
+
[32m[04/10 09:42:15 RAR]: [0mData (t): 0.2394, 387.13/s/gpu Batch (t): 0.6613 LR: 0.000343 Step: 47000 Loss: 5.9578 Accuracy: 0.1294
|
| 666 |
+
[32m[04/10 09:43:47 RAR]: [0mData (t): 0.2391, 431.82/s/gpu Batch (t): 0.5928 LR: 0.000343 Step: 47100 Loss: 6.0064 Accuracy: 0.1256
|
| 667 |
+
[32m[04/10 09:45:19 RAR]: [0mData (t): 0.2389, 431.31/s/gpu Batch (t): 0.5935 LR: 0.000343 Step: 47200 Loss: 6.0723 Accuracy: 0.1202
|
| 668 |
+
[32m[04/10 09:46:51 RAR]: [0mData (t): 0.2392, 433.87/s/gpu Batch (t): 0.5900 LR: 0.000343 Step: 47300 Loss: 5.9527 Accuracy: 0.1261
|
| 669 |
+
[32m[04/10 09:48:22 RAR]: [0mData (t): 0.2400, 433.10/s/gpu Batch (t): 0.5911 LR: 0.000343 Step: 47400 Loss: 5.9705 Accuracy: 0.1325
|
| 670 |
+
[32m[04/10 09:49:55 RAR]: [0mData (t): 0.2389, 431.50/s/gpu Batch (t): 0.5933 LR: 0.000342 Step: 47500 Loss: 5.8672 Accuracy: 0.1360
|
| 671 |
+
[32m[04/10 09:51:30 RAR]: [0mData (t): 0.2392, 420.75/s/gpu Batch (t): 0.6084 LR: 0.000342 Step: 47600 Loss: 5.6793 Accuracy: 0.1526
|
| 672 |
+
[32m[04/10 09:53:00 RAR]: [0mData (t): 0.2393, 431.98/s/gpu Batch (t): 0.5926 LR: 0.000342 Step: 47700 Loss: 5.8475 Accuracy: 0.1334
|
| 673 |
+
[32m[04/10 09:54:32 RAR]: [0mData (t): 0.2391, 432.16/s/gpu Batch (t): 0.5924 LR: 0.000342 Step: 47800 Loss: 5.9315 Accuracy: 0.1320
|
| 674 |
+
[32m[04/10 09:56:04 RAR]: [0mData (t): 0.2390, 432.23/s/gpu Batch (t): 0.5923 LR: 0.000342 Step: 47900 Loss: 5.8249 Accuracy: 0.1343
|
| 675 |
+
[32m[04/10 09:57:36 RAR]: [0mData (t): 0.2396, 386.32/s/gpu Batch (t): 0.6627 LR: 0.000342 Step: 48000 Loss: 5.9378 Accuracy: 0.1308
|
| 676 |
+
[32m[04/10 09:59:07 RAR]: [0mData (t): 0.2386, 432.38/s/gpu Batch (t): 0.5921 LR: 0.000342 Step: 48100 Loss: 5.9428 Accuracy: 0.1322
|
| 677 |
+
[32m[04/10 10:00:38 RAR]: [0mData (t): 0.2399, 432.41/s/gpu Batch (t): 0.5920 LR: 0.000342 Step: 48200 Loss: 5.9292 Accuracy: 0.1306
|
| 678 |
+
[32m[04/10 10:02:10 RAR]: [0mData (t): 0.2396, 430.78/s/gpu Batch (t): 0.5943 LR: 0.000342 Step: 48300 Loss: 5.8585 Accuracy: 0.1376
|
| 679 |
+
[32m[04/10 10:03:42 RAR]: [0mData (t): 0.2398, 433.35/s/gpu Batch (t): 0.5907 LR: 0.000342 Step: 48400 Loss: 5.9468 Accuracy: 0.1309
|
| 680 |
+
[32m[04/10 10:05:14 RAR]: [0mData (t): 0.2385, 434.61/s/gpu Batch (t): 0.5890 LR: 0.000342 Step: 48500 Loss: 5.9377 Accuracy: 0.1274
|
| 681 |
+
[32m[04/10 10:06:46 RAR]: [0mData (t): 0.2393, 433.88/s/gpu Batch (t): 0.5900 LR: 0.000342 Step: 48600 Loss: 5.9654 Accuracy: 0.1251
|
| 682 |
+
[32m[04/10 10:08:18 RAR]: [0mData (t): 0.2399, 432.14/s/gpu Batch (t): 0.5924 LR: 0.000342 Step: 48700 Loss: 5.9275 Accuracy: 0.1265
|
stage1/rar_ordertok/log1.txt
ADDED
|
File without changes
|
stage1/rar_ordertok/log2.txt
ADDED
|
File without changes
|
stage1/rar_ordertok/log3.txt
ADDED
|
File without changes
|