Upload folder using huggingface_hub
Browse files- debug.log +2 -0
- eval.log +22 -0
- model.json +1 -0
- model.safetensors +3 -0
- train.log +503 -0
debug.log
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name tiny_32x4 | device cuda | compile True | data_dir data/tiny_stories_10m | should_randomize True | log_interval 10 | eval_interval 250 | eval_steps 100 | batch_size 128 | gradient_accumulation_steps 8 | learning_rate 0.001 | warmup_steps 0 | max_steps 5000 | decay_lr False | min_lr 0 | weight_decay 0.1 | grad_clip 1.0 | gpt_config {'name': 'tiktoken_32x4', 'device': device(type='cuda'), 'compile': True, 'block_size': 128, 'vocab_size': 50257, 'n_layer': 4, 'n_head': 16, 'n_embd': 32}
|
| 2 |
+
name tiny_32x4 | device cuda | compile True | data_dir data/tiny_stories_10m | should_randomize True | log_interval 10 | eval_interval 250 | eval_steps 100 | batch_size 128 | gradient_accumulation_steps 8 | learning_rate 0.001 | warmup_steps 0 | max_steps 5000 | decay_lr False | min_lr 0 | weight_decay 0.1 | grad_clip 1.0 | gpt_config {'name': 'tiktoken_32x4', 'device': device(type='cuda'), 'compile': True, 'block_size': 128, 'vocab_size': 50257, 'n_layer': 4, 'n_head': 16, 'n_embd': 32}
|
eval.log
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
type eval | step 0 | loss 10.8265 | checkpoint False
|
| 2 |
+
type eval | step 0 | loss 10.8273 | checkpoint False
|
| 3 |
+
type eval | step 250 | loss 4.5234 | checkpoint True
|
| 4 |
+
type eval | step 500 | loss 3.7377 | checkpoint True
|
| 5 |
+
type eval | step 750 | loss 3.4481 | checkpoint True
|
| 6 |
+
type eval | step 1000 | loss 3.2734 | checkpoint True
|
| 7 |
+
type eval | step 1250 | loss 3.1417 | checkpoint True
|
| 8 |
+
type eval | step 1500 | loss 3.0562 | checkpoint True
|
| 9 |
+
type eval | step 1750 | loss 3.0011 | checkpoint True
|
| 10 |
+
type eval | step 2000 | loss 2.9519 | checkpoint True
|
| 11 |
+
type eval | step 2250 | loss 2.9140 | checkpoint True
|
| 12 |
+
type eval | step 2500 | loss 2.8882 | checkpoint True
|
| 13 |
+
type eval | step 2750 | loss 2.8661 | checkpoint True
|
| 14 |
+
type eval | step 3000 | loss 2.8455 | checkpoint True
|
| 15 |
+
type eval | step 3250 | loss 2.8208 | checkpoint True
|
| 16 |
+
type eval | step 3500 | loss 2.8082 | checkpoint True
|
| 17 |
+
type eval | step 3750 | loss 2.7947 | checkpoint True
|
| 18 |
+
type eval | step 4000 | loss 2.7789 | checkpoint True
|
| 19 |
+
type eval | step 4250 | loss 2.7625 | checkpoint True
|
| 20 |
+
type eval | step 4500 | loss 2.7484 | checkpoint True
|
| 21 |
+
type eval | step 4750 | loss 2.7381 | checkpoint True
|
| 22 |
+
type eval | step 5000 | loss 2.7314 | checkpoint True
|
model.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"block_size": 128, "vocab_size": 50257, "n_layer": 4, "n_head": 16, "n_embd": 32}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b8622299196d6027e4f97612593af474116dae9250add08131bf25e2770fefe
|
| 3 |
+
size 6657832
|
train.log
ADDED
|
@@ -0,0 +1,503 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
type train | step 10 | loss 10.3330 | lr 1.0e-03 | norm 0.7072 | dt 0.417
|
| 2 |
+
type train | step 20 | loss 9.7623 | lr 1.0e-03 | norm 0.7508 | dt 0.417
|
| 3 |
+
type train | step 30 | loss 9.1839 | lr 1.0e-03 | norm 0.8107 | dt 0.418
|
| 4 |
+
type train | step 10 | loss 10.3383 | lr 1.0e-03 | norm 0.7068 | dt 0.111
|
| 5 |
+
type train | step 20 | loss 9.7668 | lr 1.0e-03 | norm 0.7496 | dt 0.111
|
| 6 |
+
type train | step 30 | loss 9.1860 | lr 1.0e-03 | norm 0.8091 | dt 0.111
|
| 7 |
+
type train | step 40 | loss 8.6266 | lr 1.0e-03 | norm 0.8568 | dt 0.111
|
| 8 |
+
type train | step 50 | loss 8.1013 | lr 1.0e-03 | norm 0.8972 | dt 0.110
|
| 9 |
+
type train | step 60 | loss 7.6199 | lr 1.0e-03 | norm 0.9090 | dt 0.111
|
| 10 |
+
type train | step 70 | loss 7.1879 | lr 1.0e-03 | norm 0.9057 | dt 0.111
|
| 11 |
+
type train | step 80 | loss 6.8313 | lr 1.0e-03 | norm 0.8720 | dt 0.111
|
| 12 |
+
type train | step 90 | loss 6.5357 | lr 1.0e-03 | norm 0.8382 | dt 0.111
|
| 13 |
+
type train | step 100 | loss 6.3160 | lr 1.0e-03 | norm 0.7600 | dt 0.111
|
| 14 |
+
type train | step 110 | loss 6.1447 | lr 1.0e-03 | norm 0.7295 | dt 0.111
|
| 15 |
+
type train | step 120 | loss 6.0097 | lr 1.0e-03 | norm 0.7348 | dt 0.111
|
| 16 |
+
type train | step 130 | loss 5.8503 | lr 1.0e-03 | norm 0.8594 | dt 0.111
|
| 17 |
+
type train | step 140 | loss 5.7640 | lr 1.0e-03 | norm 1.0842 | dt 0.111
|
| 18 |
+
type train | step 150 | loss 5.6639 | lr 1.0e-03 | norm 1.7299 | dt 0.111
|
| 19 |
+
type train | step 160 | loss 5.5513 | lr 1.0e-03 | norm 1.7778 | dt 0.111
|
| 20 |
+
type train | step 170 | loss 5.4143 | lr 1.0e-03 | norm 0.6508 | dt 0.111
|
| 21 |
+
type train | step 180 | loss 5.3034 | lr 1.0e-03 | norm 1.6037 | dt 0.111
|
| 22 |
+
type train | step 190 | loss 5.1707 | lr 1.0e-03 | norm 2.4459 | dt 0.111
|
| 23 |
+
type train | step 200 | loss 5.0737 | lr 1.0e-03 | norm 2.2686 | dt 0.111
|
| 24 |
+
type train | step 210 | loss 4.9398 | lr 1.0e-03 | norm 2.9354 | dt 0.111
|
| 25 |
+
type train | step 220 | loss 4.8236 | lr 1.0e-03 | norm 1.6275 | dt 0.111
|
| 26 |
+
type train | step 230 | loss 4.7232 | lr 1.0e-03 | norm 1.8814 | dt 0.111
|
| 27 |
+
type train | step 240 | loss 4.6439 | lr 1.0e-03 | norm 1.5637 | dt 0.111
|
| 28 |
+
type train | step 250 | loss 4.5500 | lr 1.0e-03 | norm 0.8494 | dt 0.111
|
| 29 |
+
type train | step 260 | loss 4.4530 | lr 1.0e-03 | norm 1.4317 | dt 0.111
|
| 30 |
+
type train | step 270 | loss 4.4119 | lr 1.0e-03 | norm 0.9960 | dt 0.111
|
| 31 |
+
type train | step 280 | loss 4.3221 | lr 1.0e-03 | norm 0.7116 | dt 0.112
|
| 32 |
+
type train | step 290 | loss 4.2935 | lr 1.0e-03 | norm 1.2159 | dt 0.111
|
| 33 |
+
type train | step 300 | loss 4.2530 | lr 1.0e-03 | norm 2.5565 | dt 0.111
|
| 34 |
+
type train | step 310 | loss 4.2130 | lr 1.0e-03 | norm 1.2535 | dt 0.112
|
| 35 |
+
type train | step 320 | loss 4.1495 | lr 1.0e-03 | norm 1.6140 | dt 0.111
|
| 36 |
+
type train | step 330 | loss 4.0959 | lr 1.0e-03 | norm 1.8347 | dt 0.111
|
| 37 |
+
type train | step 340 | loss 4.0825 | lr 1.0e-03 | norm 1.1320 | dt 0.111
|
| 38 |
+
type train | step 350 | loss 4.0471 | lr 1.0e-03 | norm 1.4113 | dt 0.111
|
| 39 |
+
type train | step 360 | loss 4.0109 | lr 1.0e-03 | norm 1.1816 | dt 0.111
|
| 40 |
+
type train | step 370 | loss 3.9523 | lr 1.0e-03 | norm 1.0637 | dt 0.111
|
| 41 |
+
type train | step 380 | loss 3.9676 | lr 1.0e-03 | norm 1.3457 | dt 0.119
|
| 42 |
+
type train | step 390 | loss 3.9231 | lr 1.0e-03 | norm 1.5495 | dt 0.111
|
| 43 |
+
type train | step 400 | loss 3.9146 | lr 1.0e-03 | norm 1.7005 | dt 0.111
|
| 44 |
+
type train | step 410 | loss 3.8805 | lr 1.0e-03 | norm 1.6612 | dt 0.112
|
| 45 |
+
type train | step 420 | loss 3.8791 | lr 1.0e-03 | norm 1.4267 | dt 0.111
|
| 46 |
+
type train | step 430 | loss 3.8615 | lr 1.0e-03 | norm 1.7027 | dt 0.111
|
| 47 |
+
type train | step 440 | loss 3.8344 | lr 1.0e-03 | norm 1.9053 | dt 0.112
|
| 48 |
+
type train | step 450 | loss 3.8084 | lr 1.0e-03 | norm 1.8223 | dt 0.112
|
| 49 |
+
type train | step 460 | loss 3.8010 | lr 1.0e-03 | norm 1.9712 | dt 0.112
|
| 50 |
+
type train | step 470 | loss 3.7612 | lr 1.0e-03 | norm 1.8497 | dt 0.112
|
| 51 |
+
type train | step 480 | loss 3.7730 | lr 1.0e-03 | norm 1.7918 | dt 0.111
|
| 52 |
+
type train | step 490 | loss 3.7459 | lr 1.0e-03 | norm 1.9049 | dt 0.112
|
| 53 |
+
type train | step 500 | loss 3.7482 | lr 1.0e-03 | norm 1.8751 | dt 0.111
|
| 54 |
+
type train | step 510 | loss 3.6855 | lr 1.0e-03 | norm 2.7669 | dt 0.112
|
| 55 |
+
type train | step 520 | loss 3.6997 | lr 1.0e-03 | norm 1.9023 | dt 0.111
|
| 56 |
+
type train | step 530 | loss 3.7013 | lr 1.0e-03 | norm 2.7672 | dt 0.112
|
| 57 |
+
type train | step 540 | loss 3.6742 | lr 1.0e-03 | norm 1.8523 | dt 0.112
|
| 58 |
+
type train | step 550 | loss 3.6391 | lr 1.0e-03 | norm 1.9280 | dt 0.112
|
| 59 |
+
type train | step 560 | loss 3.6567 | lr 1.0e-03 | norm 1.8137 | dt 0.113
|
| 60 |
+
type train | step 570 | loss 3.6269 | lr 1.0e-03 | norm 2.7995 | dt 0.112
|
| 61 |
+
type train | step 580 | loss 3.6258 | lr 1.0e-03 | norm 2.6514 | dt 0.112
|
| 62 |
+
type train | step 590 | loss 3.6018 | lr 1.0e-03 | norm 2.5518 | dt 0.112
|
| 63 |
+
type train | step 600 | loss 3.5883 | lr 1.0e-03 | norm 3.9457 | dt 0.112
|
| 64 |
+
type train | step 610 | loss 3.5783 | lr 1.0e-03 | norm 2.1407 | dt 0.113
|
| 65 |
+
type train | step 620 | loss 3.5874 | lr 1.0e-03 | norm 2.6008 | dt 0.111
|
| 66 |
+
type train | step 630 | loss 3.5825 | lr 1.0e-03 | norm 3.5026 | dt 0.113
|
| 67 |
+
type train | step 640 | loss 3.5508 | lr 1.0e-03 | norm 4.6949 | dt 0.113
|
| 68 |
+
type train | step 650 | loss 3.5597 | lr 1.0e-03 | norm 1.5705 | dt 0.113
|
| 69 |
+
type train | step 660 | loss 3.5296 | lr 1.0e-03 | norm 2.8740 | dt 0.112
|
| 70 |
+
type train | step 670 | loss 3.5351 | lr 1.0e-03 | norm 1.4937 | dt 0.112
|
| 71 |
+
type train | step 680 | loss 3.5293 | lr 1.0e-03 | norm 2.2113 | dt 0.111
|
| 72 |
+
type train | step 690 | loss 3.5269 | lr 1.0e-03 | norm 2.9841 | dt 0.112
|
| 73 |
+
type train | step 700 | loss 3.4906 | lr 1.0e-03 | norm 2.4684 | dt 0.112
|
| 74 |
+
type train | step 710 | loss 3.4646 | lr 1.0e-03 | norm 2.3474 | dt 0.113
|
| 75 |
+
type train | step 720 | loss 3.4617 | lr 1.0e-03 | norm 2.7813 | dt 0.113
|
| 76 |
+
type train | step 730 | loss 3.4459 | lr 1.0e-03 | norm 1.5406 | dt 0.112
|
| 77 |
+
type train | step 740 | loss 3.4371 | lr 1.0e-03 | norm 2.6166 | dt 0.113
|
| 78 |
+
type train | step 750 | loss 3.4039 | lr 1.0e-03 | norm 1.5664 | dt 0.112
|
| 79 |
+
type train | step 760 | loss 3.4434 | lr 1.0e-03 | norm 3.0861 | dt 0.119
|
| 80 |
+
type train | step 770 | loss 3.4065 | lr 1.0e-03 | norm 3.5232 | dt 0.112
|
| 81 |
+
type train | step 780 | loss 3.4096 | lr 1.0e-03 | norm 2.8305 | dt 0.112
|
| 82 |
+
type train | step 790 | loss 3.3894 | lr 1.0e-03 | norm 1.8813 | dt 0.112
|
| 83 |
+
type train | step 800 | loss 3.4055 | lr 1.0e-03 | norm 2.5700 | dt 0.112
|
| 84 |
+
type train | step 810 | loss 3.3972 | lr 1.0e-03 | norm 2.5834 | dt 0.112
|
| 85 |
+
type train | step 820 | loss 3.3792 | lr 1.0e-03 | norm 2.5971 | dt 0.113
|
| 86 |
+
type train | step 830 | loss 3.3618 | lr 1.0e-03 | norm 1.8595 | dt 0.113
|
| 87 |
+
type train | step 840 | loss 3.3678 | lr 1.0e-03 | norm 2.5004 | dt 0.112
|
| 88 |
+
type train | step 850 | loss 3.3409 | lr 1.0e-03 | norm 2.9880 | dt 0.112
|
| 89 |
+
type train | step 860 | loss 3.3568 | lr 1.0e-03 | norm 3.4849 | dt 0.114
|
| 90 |
+
type train | step 870 | loss 3.3403 | lr 1.0e-03 | norm 2.4013 | dt 0.113
|
| 91 |
+
type train | step 880 | loss 3.3492 | lr 1.0e-03 | norm 2.1356 | dt 0.114
|
| 92 |
+
type train | step 890 | loss 3.2926 | lr 1.0e-03 | norm 1.9721 | dt 0.113
|
| 93 |
+
type train | step 900 | loss 3.3159 | lr 1.0e-03 | norm 2.1230 | dt 0.113
|
| 94 |
+
type train | step 910 | loss 3.3261 | lr 1.0e-03 | norm 2.9547 | dt 0.114
|
| 95 |
+
type train | step 920 | loss 3.2953 | lr 1.0e-03 | norm 1.7801 | dt 0.114
|
| 96 |
+
type train | step 930 | loss 3.2854 | lr 1.0e-03 | norm 3.0788 | dt 0.113
|
| 97 |
+
type train | step 940 | loss 3.3010 | lr 1.0e-03 | norm 2.4363 | dt 0.113
|
| 98 |
+
type train | step 950 | loss 3.2744 | lr 1.0e-03 | norm 1.8727 | dt 0.114
|
| 99 |
+
type train | step 960 | loss 3.2844 | lr 1.0e-03 | norm 3.1843 | dt 0.113
|
| 100 |
+
type train | step 970 | loss 3.2700 | lr 1.0e-03 | norm 2.9397 | dt 0.113
|
| 101 |
+
type train | step 980 | loss 3.2513 | lr 1.0e-03 | norm 2.6933 | dt 0.113
|
| 102 |
+
type train | step 990 | loss 3.2500 | lr 1.0e-03 | norm 2.4030 | dt 0.114
|
| 103 |
+
type train | step 1000 | loss 3.2668 | lr 1.0e-03 | norm 2.4430 | dt 0.113
|
| 104 |
+
type train | step 1010 | loss 3.2617 | lr 1.0e-03 | norm 2.4673 | dt 0.112
|
| 105 |
+
type train | step 1020 | loss 3.2231 | lr 1.0e-03 | norm 2.5424 | dt 0.113
|
| 106 |
+
type train | step 1030 | loss 3.2535 | lr 1.0e-03 | norm 1.9049 | dt 0.113
|
| 107 |
+
type train | step 1040 | loss 3.2289 | lr 1.0e-03 | norm 2.8275 | dt 0.112
|
| 108 |
+
type train | step 1050 | loss 3.2416 | lr 1.0e-03 | norm 2.1650 | dt 0.113
|
| 109 |
+
type train | step 1060 | loss 3.2383 | lr 1.0e-03 | norm 2.3707 | dt 0.113
|
| 110 |
+
type train | step 1070 | loss 3.2356 | lr 1.0e-03 | norm 1.7848 | dt 0.119
|
| 111 |
+
type train | step 1080 | loss 3.2029 | lr 1.0e-03 | norm 2.6397 | dt 0.114
|
| 112 |
+
type train | step 1090 | loss 3.1909 | lr 1.0e-03 | norm 3.0345 | dt 0.115
|
| 113 |
+
type train | step 1100 | loss 3.1876 | lr 1.0e-03 | norm 3.1054 | dt 0.114
|
| 114 |
+
type train | step 1110 | loss 3.1949 | lr 1.0e-03 | norm 3.2054 | dt 0.113
|
| 115 |
+
type train | step 1120 | loss 3.1793 | lr 1.0e-03 | norm 3.1650 | dt 0.113
|
| 116 |
+
type train | step 1130 | loss 3.1449 | lr 1.0e-03 | norm 2.2814 | dt 0.113
|
| 117 |
+
type train | step 1140 | loss 3.1763 | lr 1.0e-03 | norm 2.7021 | dt 0.519
|
| 118 |
+
type train | step 1150 | loss 3.1516 | lr 1.0e-03 | norm 3.2957 | dt 0.113
|
| 119 |
+
type train | step 1160 | loss 3.1611 | lr 1.0e-03 | norm 2.5074 | dt 0.112
|
| 120 |
+
type train | step 1170 | loss 3.1548 | lr 1.0e-03 | norm 3.4140 | dt 0.112
|
| 121 |
+
type train | step 1180 | loss 3.1679 | lr 1.0e-03 | norm 2.9607 | dt 0.114
|
| 122 |
+
type train | step 1190 | loss 3.1651 | lr 1.0e-03 | norm 3.6046 | dt 0.114
|
| 123 |
+
type train | step 1200 | loss 3.1426 | lr 1.0e-03 | norm 2.5556 | dt 0.114
|
| 124 |
+
type train | step 1210 | loss 3.1257 | lr 1.0e-03 | norm 2.6682 | dt 0.112
|
| 125 |
+
type train | step 1220 | loss 3.1413 | lr 1.0e-03 | norm 2.7121 | dt 0.115
|
| 126 |
+
type train | step 1230 | loss 3.1098 | lr 1.0e-03 | norm 2.8892 | dt 0.113
|
| 127 |
+
type train | step 1240 | loss 3.1332 | lr 1.0e-03 | norm 3.3983 | dt 0.114
|
| 128 |
+
type train | step 1250 | loss 3.1184 | lr 1.0e-03 | norm 2.7903 | dt 0.113
|
| 129 |
+
type train | step 1260 | loss 3.1315 | lr 1.0e-03 | norm 2.8927 | dt 0.115
|
| 130 |
+
type train | step 1270 | loss 3.0778 | lr 1.0e-03 | norm 2.3560 | dt 0.115
|
| 131 |
+
type train | step 1280 | loss 3.1146 | lr 1.0e-03 | norm 3.5064 | dt 0.115
|
| 132 |
+
type train | step 1290 | loss 3.1136 | lr 1.0e-03 | norm 2.8561 | dt 0.113
|
| 133 |
+
type train | step 1300 | loss 3.0911 | lr 1.0e-03 | norm 2.2730 | dt 0.115
|
| 134 |
+
type train | step 1310 | loss 3.0748 | lr 1.0e-03 | norm 2.5766 | dt 0.114
|
| 135 |
+
type train | step 1320 | loss 3.1009 | lr 1.0e-03 | norm 3.9068 | dt 0.114
|
| 136 |
+
type train | step 1330 | loss 3.0803 | lr 1.0e-03 | norm 3.1166 | dt 0.113
|
| 137 |
+
type train | step 1340 | loss 3.0926 | lr 1.0e-03 | norm 2.7389 | dt 0.115
|
| 138 |
+
type train | step 1350 | loss 3.0744 | lr 1.0e-03 | norm 2.0786 | dt 0.114
|
| 139 |
+
type train | step 1360 | loss 3.0577 | lr 1.0e-03 | norm 2.5219 | dt 0.113
|
| 140 |
+
type train | step 1370 | loss 3.0669 | lr 1.0e-03 | norm 3.3838 | dt 0.112
|
| 141 |
+
type train | step 1380 | loss 3.0778 | lr 1.0e-03 | norm 2.2342 | dt 0.112
|
| 142 |
+
type train | step 1390 | loss 3.0739 | lr 1.0e-03 | norm 2.5469 | dt 0.112
|
| 143 |
+
type train | step 1400 | loss 3.0546 | lr 1.0e-03 | norm 4.2186 | dt 0.113
|
| 144 |
+
type train | step 1410 | loss 3.0820 | lr 1.0e-03 | norm 2.8416 | dt 0.113
|
| 145 |
+
type train | step 1420 | loss 3.0504 | lr 1.0e-03 | norm 2.4169 | dt 0.113
|
| 146 |
+
type train | step 1430 | loss 3.0668 | lr 1.0e-03 | norm 2.1773 | dt 0.112
|
| 147 |
+
type train | step 1440 | loss 3.0626 | lr 1.0e-03 | norm 1.9690 | dt 0.113
|
| 148 |
+
type train | step 1450 | loss 3.0819 | lr 1.0e-03 | norm 3.4426 | dt 0.112
|
| 149 |
+
type train | step 1460 | loss 3.0320 | lr 1.0e-03 | norm 2.5645 | dt 0.112
|
| 150 |
+
type train | step 1470 | loss 3.0254 | lr 1.0e-03 | norm 3.3960 | dt 0.112
|
| 151 |
+
type train | step 1480 | loss 3.0238 | lr 1.0e-03 | norm 2.7148 | dt 0.112
|
| 152 |
+
type train | step 1490 | loss 3.0299 | lr 1.0e-03 | norm 2.7401 | dt 0.111
|
| 153 |
+
type train | step 1500 | loss 3.0184 | lr 1.0e-03 | norm 2.9799 | dt 0.111
|
| 154 |
+
type train | step 1510 | loss 2.9895 | lr 1.0e-03 | norm 2.7538 | dt 0.112
|
| 155 |
+
type train | step 1520 | loss 3.0257 | lr 1.0e-03 | norm 2.9352 | dt 0.786
|
| 156 |
+
type train | step 1530 | loss 2.9938 | lr 1.0e-03 | norm 3.5523 | dt 0.119
|
| 157 |
+
type train | step 1540 | loss 3.0104 | lr 1.0e-03 | norm 2.2947 | dt 0.113
|
| 158 |
+
type train | step 1550 | loss 3.0030 | lr 1.0e-03 | norm 2.8988 | dt 0.113
|
| 159 |
+
type train | step 1560 | loss 3.0205 | lr 1.0e-03 | norm 2.8917 | dt 0.113
|
| 160 |
+
type train | step 1570 | loss 3.0170 | lr 1.0e-03 | norm 2.9276 | dt 0.112
|
| 161 |
+
type train | step 1580 | loss 3.0038 | lr 1.0e-03 | norm 3.3706 | dt 0.113
|
| 162 |
+
type train | step 1590 | loss 2.9851 | lr 1.0e-03 | norm 3.3408 | dt 0.113
|
| 163 |
+
type train | step 1600 | loss 3.0001 | lr 1.0e-03 | norm 3.3847 | dt 0.111
|
| 164 |
+
type train | step 1610 | loss 2.9764 | lr 1.0e-03 | norm 3.2374 | dt 0.112
|
| 165 |
+
type train | step 1620 | loss 2.9924 | lr 1.0e-03 | norm 2.8452 | dt 0.112
|
| 166 |
+
type train | step 1630 | loss 2.9834 | lr 1.0e-03 | norm 2.5053 | dt 0.111
|
| 167 |
+
type train | step 1640 | loss 3.0062 | lr 1.0e-03 | norm 2.9985 | dt 0.115
|
| 168 |
+
type train | step 1650 | loss 2.9527 | lr 1.0e-03 | norm 2.7376 | dt 0.112
|
| 169 |
+
type train | step 1660 | loss 2.9800 | lr 1.0e-03 | norm 2.2764 | dt 0.112
|
| 170 |
+
type train | step 1670 | loss 2.9875 | lr 1.0e-03 | norm 2.8126 | dt 0.112
|
| 171 |
+
type train | step 1680 | loss 2.9827 | lr 1.0e-03 | norm 4.0496 | dt 0.113
|
| 172 |
+
type train | step 1690 | loss 2.9564 | lr 1.0e-03 | norm 3.7922 | dt 0.113
|
| 173 |
+
type train | step 1700 | loss 2.9789 | lr 1.0e-03 | norm 3.6865 | dt 0.113
|
| 174 |
+
type train | step 1710 | loss 2.9598 | lr 1.0e-03 | norm 2.8699 | dt 0.116
|
| 175 |
+
type train | step 1720 | loss 2.9697 | lr 1.0e-03 | norm 2.7418 | dt 0.113
|
| 176 |
+
type train | step 1730 | loss 2.9698 | lr 1.0e-03 | norm 4.6787 | dt 0.114
|
| 177 |
+
type train | step 1740 | loss 2.9466 | lr 1.0e-03 | norm 2.6091 | dt 0.114
|
| 178 |
+
type train | step 1750 | loss 2.9490 | lr 1.0e-03 | norm 2.7180 | dt 0.113
|
| 179 |
+
type train | step 1760 | loss 2.9675 | lr 1.0e-03 | norm 3.4603 | dt 0.112
|
| 180 |
+
type train | step 1770 | loss 2.9694 | lr 1.0e-03 | norm 3.8823 | dt 0.112
|
| 181 |
+
type train | step 1780 | loss 2.9294 | lr 1.0e-03 | norm 2.1583 | dt 0.113
|
| 182 |
+
type train | step 1790 | loss 2.9758 | lr 1.0e-03 | norm 3.1254 | dt 0.114
|
| 183 |
+
type train | step 1800 | loss 2.9418 | lr 1.0e-03 | norm 2.3019 | dt 0.113
|
| 184 |
+
type train | step 1810 | loss 2.9595 | lr 1.0e-03 | norm 2.3203 | dt 0.114
|
| 185 |
+
type train | step 1820 | loss 2.9645 | lr 1.0e-03 | norm 2.7078 | dt 0.112
|
| 186 |
+
type train | step 1830 | loss 2.9791 | lr 1.0e-03 | norm 4.0801 | dt 0.112
|
| 187 |
+
type train | step 1840 | loss 2.9322 | lr 1.0e-03 | norm 2.5803 | dt 0.112
|
| 188 |
+
type train | step 1850 | loss 2.9238 | lr 1.0e-03 | norm 3.4429 | dt 0.113
|
| 189 |
+
type train | step 1860 | loss 2.9231 | lr 1.0e-03 | norm 2.9536 | dt 0.113
|
| 190 |
+
type train | step 1870 | loss 2.9300 | lr 1.0e-03 | norm 2.7063 | dt 0.112
|
| 191 |
+
type train | step 1880 | loss 2.9261 | lr 1.0e-03 | norm 3.2127 | dt 0.114
|
| 192 |
+
type train | step 1890 | loss 2.8983 | lr 1.0e-03 | norm 3.4601 | dt 0.113
|
| 193 |
+
type train | step 1900 | loss 2.9265 | lr 1.0e-03 | norm 2.7227 | dt 1.548
|
| 194 |
+
type train | step 1910 | loss 2.8953 | lr 1.0e-03 | norm 2.7074 | dt 0.112
|
| 195 |
+
type train | step 1920 | loss 2.9190 | lr 1.0e-03 | norm 2.8995 | dt 0.113
|
| 196 |
+
type train | step 1930 | loss 2.9167 | lr 1.0e-03 | norm 3.8481 | dt 0.113
|
| 197 |
+
type train | step 1940 | loss 2.9281 | lr 1.0e-03 | norm 2.8103 | dt 0.114
|
| 198 |
+
type train | step 1950 | loss 2.9268 | lr 1.0e-03 | norm 2.5927 | dt 0.114
|
| 199 |
+
type train | step 1960 | loss 2.9142 | lr 1.0e-03 | norm 3.0433 | dt 0.113
|
| 200 |
+
type train | step 1970 | loss 2.8983 | lr 1.0e-03 | norm 3.2740 | dt 0.112
|
| 201 |
+
type train | step 1980 | loss 2.9161 | lr 1.0e-03 | norm 3.9185 | dt 0.112
|
| 202 |
+
type train | step 1990 | loss 2.8895 | lr 1.0e-03 | norm 3.0155 | dt 0.112
|
| 203 |
+
type train | step 2000 | loss 2.9092 | lr 1.0e-03 | norm 3.4781 | dt 0.112
|
| 204 |
+
type train | step 2010 | loss 2.9025 | lr 1.0e-03 | norm 3.0084 | dt 0.112
|
| 205 |
+
type train | step 2020 | loss 2.9196 | lr 1.0e-03 | norm 3.2020 | dt 0.114
|
| 206 |
+
type train | step 2030 | loss 2.8721 | lr 1.0e-03 | norm 2.9734 | dt 0.113
|
| 207 |
+
type train | step 2040 | loss 2.9046 | lr 1.0e-03 | norm 3.6180 | dt 0.112
|
| 208 |
+
type train | step 2050 | loss 2.9086 | lr 1.0e-03 | norm 2.7358 | dt 0.112
|
| 209 |
+
type train | step 2060 | loss 2.8910 | lr 1.0e-03 | norm 4.0219 | dt 0.112
|
| 210 |
+
type train | step 2070 | loss 2.8788 | lr 1.0e-03 | norm 3.4502 | dt 0.112
|
| 211 |
+
type train | step 2080 | loss 2.8967 | lr 1.0e-03 | norm 3.7852 | dt 0.112
|
| 212 |
+
type train | step 2090 | loss 2.8824 | lr 1.0e-03 | norm 2.7029 | dt 0.112
|
| 213 |
+
type train | step 2100 | loss 2.8959 | lr 1.0e-03 | norm 2.6283 | dt 0.112
|
| 214 |
+
type train | step 2110 | loss 2.8875 | lr 1.0e-03 | norm 3.4177 | dt 0.112
|
| 215 |
+
type train | step 2120 | loss 2.8799 | lr 1.0e-03 | norm 3.5287 | dt 0.112
|
| 216 |
+
type train | step 2130 | loss 2.8750 | lr 1.0e-03 | norm 2.8981 | dt 0.113
|
| 217 |
+
type train | step 2140 | loss 2.8971 | lr 1.0e-03 | norm 4.3269 | dt 0.115
|
| 218 |
+
type train | step 2150 | loss 2.8941 | lr 1.0e-03 | norm 3.3644 | dt 0.112
|
| 219 |
+
type train | step 2160 | loss 2.8595 | lr 1.0e-03 | norm 3.3858 | dt 0.112
|
| 220 |
+
type train | step 2170 | loss 2.9046 | lr 1.0e-03 | norm 3.0400 | dt 0.113
|
| 221 |
+
type train | step 2180 | loss 2.8755 | lr 1.0e-03 | norm 2.7293 | dt 0.114
|
| 222 |
+
type train | step 2190 | loss 2.8909 | lr 1.0e-03 | norm 2.9052 | dt 0.113
|
| 223 |
+
type train | step 2200 | loss 2.8960 | lr 1.0e-03 | norm 3.4143 | dt 0.114
|
| 224 |
+
type train | step 2210 | loss 2.9062 | lr 1.0e-03 | norm 3.6502 | dt 0.113
|
| 225 |
+
type train | step 2220 | loss 2.8667 | lr 1.0e-03 | norm 4.0320 | dt 0.113
|
| 226 |
+
type train | step 2230 | loss 2.8529 | lr 1.0e-03 | norm 2.5836 | dt 0.114
|
| 227 |
+
type train | step 2240 | loss 2.8551 | lr 1.0e-03 | norm 2.9113 | dt 0.113
|
| 228 |
+
type train | step 2250 | loss 2.8683 | lr 1.0e-03 | norm 3.0616 | dt 0.114
|
| 229 |
+
type train | step 2260 | loss 2.8626 | lr 1.0e-03 | norm 3.6118 | dt 0.113
|
| 230 |
+
type train | step 2270 | loss 2.8310 | lr 1.0e-03 | norm 2.9135 | dt 0.113
|
| 231 |
+
type train | step 2280 | loss 2.8622 | lr 1.0e-03 | norm 2.8750 | dt 0.121
|
| 232 |
+
type train | step 2290 | loss 2.8356 | lr 1.0e-03 | norm 3.8494 | dt 0.113
|
| 233 |
+
type train | step 2300 | loss 2.8605 | lr 1.0e-03 | norm 3.9284 | dt 0.113
|
| 234 |
+
type train | step 2310 | loss 2.8499 | lr 1.0e-03 | norm 3.0800 | dt 0.112
|
| 235 |
+
type train | step 2320 | loss 2.8685 | lr 1.0e-03 | norm 3.8296 | dt 0.113
|
| 236 |
+
type train | step 2330 | loss 2.8656 | lr 1.0e-03 | norm 3.0852 | dt 0.113
|
| 237 |
+
type train | step 2340 | loss 2.8557 | lr 1.0e-03 | norm 3.1466 | dt 0.114
|
| 238 |
+
type train | step 2350 | loss 2.8373 | lr 1.0e-03 | norm 2.8554 | dt 0.112
|
| 239 |
+
type train | step 2360 | loss 2.8624 | lr 1.0e-03 | norm 4.2768 | dt 0.113
|
| 240 |
+
type train | step 2370 | loss 2.8275 | lr 1.0e-03 | norm 2.8592 | dt 0.112
|
| 241 |
+
type train | step 2380 | loss 2.8514 | lr 1.0e-03 | norm 3.2689 | dt 0.112
|
| 242 |
+
type train | step 2390 | loss 2.8451 | lr 1.0e-03 | norm 2.7075 | dt 0.112
|
| 243 |
+
type train | step 2400 | loss 2.8721 | lr 1.0e-03 | norm 4.6623 | dt 0.113
|
| 244 |
+
type train | step 2410 | loss 2.8171 | lr 1.0e-03 | norm 2.7415 | dt 0.114
|
| 245 |
+
type train | step 2420 | loss 2.8505 | lr 1.0e-03 | norm 3.7496 | dt 0.114
|
| 246 |
+
type train | step 2430 | loss 2.8556 | lr 1.0e-03 | norm 3.3305 | dt 0.113
|
| 247 |
+
type train | step 2440 | loss 2.8453 | lr 1.0e-03 | norm 3.7107 | dt 0.114
|
| 248 |
+
type train | step 2450 | loss 2.8160 | lr 1.0e-03 | norm 2.5289 | dt 0.113
|
| 249 |
+
type train | step 2460 | loss 2.8412 | lr 1.0e-03 | norm 3.0190 | dt 0.113
|
| 250 |
+
type train | step 2470 | loss 2.8300 | lr 1.0e-03 | norm 3.0272 | dt 0.114
|
| 251 |
+
type train | step 2480 | loss 2.8461 | lr 1.0e-03 | norm 4.2926 | dt 0.113
|
| 252 |
+
type train | step 2490 | loss 2.8300 | lr 1.0e-03 | norm 3.4930 | dt 0.113
|
| 253 |
+
type train | step 2500 | loss 2.8174 | lr 1.0e-03 | norm 3.0242 | dt 0.112
|
| 254 |
+
type train | step 2510 | loss 2.8407 | lr 1.0e-03 | norm 6.4129 | dt 0.112
|
| 255 |
+
type train | step 2520 | loss 2.8393 | lr 1.0e-03 | norm 3.7974 | dt 0.112
|
| 256 |
+
type train | step 2530 | loss 2.8411 | lr 1.0e-03 | norm 2.9628 | dt 0.113
|
| 257 |
+
type train | step 2540 | loss 2.8201 | lr 1.0e-03 | norm 3.6036 | dt 0.112
|
| 258 |
+
type train | step 2550 | loss 2.8549 | lr 1.0e-03 | norm 4.0269 | dt 0.113
|
| 259 |
+
type train | step 2560 | loss 2.8308 | lr 1.0e-03 | norm 3.6912 | dt 0.114
|
| 260 |
+
type train | step 2570 | loss 2.8413 | lr 1.0e-03 | norm 2.8608 | dt 0.114
|
| 261 |
+
type train | step 2580 | loss 2.8525 | lr 1.0e-03 | norm 3.1250 | dt 0.113
|
| 262 |
+
type train | step 2590 | loss 2.8555 | lr 1.0e-03 | norm 2.9232 | dt 0.112
|
| 263 |
+
type train | step 2600 | loss 2.8209 | lr 1.0e-03 | norm 3.4581 | dt 0.112
|
| 264 |
+
type train | step 2610 | loss 2.8085 | lr 1.0e-03 | norm 3.6581 | dt 0.113
|
| 265 |
+
type train | step 2620 | loss 2.8089 | lr 1.0e-03 | norm 3.5684 | dt 0.113
|
| 266 |
+
type train | step 2630 | loss 2.8268 | lr 1.0e-03 | norm 3.6094 | dt 0.113
|
| 267 |
+
type train | step 2640 | loss 2.8174 | lr 1.0e-03 | norm 3.3872 | dt 0.113
|
| 268 |
+
type train | step 2650 | loss 2.7831 | lr 1.0e-03 | norm 2.9816 | dt 0.113
|
| 269 |
+
type train | step 2660 | loss 2.8178 | lr 1.0e-03 | norm 2.8023 | dt 0.119
|
| 270 |
+
type train | step 2670 | loss 2.7875 | lr 1.0e-03 | norm 3.4217 | dt 0.115
|
| 271 |
+
type train | step 2680 | loss 2.8154 | lr 1.0e-03 | norm 3.6919 | dt 0.111
|
| 272 |
+
type train | step 2690 | loss 2.8023 | lr 1.0e-03 | norm 3.2433 | dt 0.114
|
| 273 |
+
type train | step 2700 | loss 2.8197 | lr 1.0e-03 | norm 2.8322 | dt 0.115
|
| 274 |
+
type train | step 2710 | loss 2.8159 | lr 1.0e-03 | norm 3.1906 | dt 0.113
|
| 275 |
+
type train | step 2720 | loss 2.8026 | lr 1.0e-03 | norm 2.5616 | dt 0.113
|
| 276 |
+
type train | step 2730 | loss 2.7919 | lr 1.0e-03 | norm 3.0122 | dt 0.114
|
| 277 |
+
type train | step 2740 | loss 2.8215 | lr 1.0e-03 | norm 4.5926 | dt 0.114
|
| 278 |
+
type train | step 2750 | loss 2.7926 | lr 1.0e-03 | norm 4.4416 | dt 0.113
|
| 279 |
+
type train | step 2760 | loss 2.8127 | lr 1.0e-03 | norm 4.3600 | dt 0.113
|
| 280 |
+
type train | step 2770 | loss 2.8033 | lr 1.0e-03 | norm 3.2910 | dt 0.113
|
| 281 |
+
type train | step 2780 | loss 2.8248 | lr 1.0e-03 | norm 4.1132 | dt 0.113
|
| 282 |
+
type train | step 2790 | loss 2.7778 | lr 1.0e-03 | norm 3.6617 | dt 0.113
|
| 283 |
+
type train | step 2800 | loss 2.8080 | lr 1.0e-03 | norm 3.5671 | dt 0.113
|
| 284 |
+
type train | step 2810 | loss 2.8120 | lr 1.0e-03 | norm 2.8360 | dt 0.112
|
| 285 |
+
type train | step 2820 | loss 2.7959 | lr 1.0e-03 | norm 3.4689 | dt 0.113
|
| 286 |
+
type train | step 2830 | loss 2.7751 | lr 1.0e-03 | norm 3.1263 | dt 0.113
|
| 287 |
+
type train | step 2840 | loss 2.8040 | lr 1.0e-03 | norm 4.7723 | dt 0.113
|
| 288 |
+
type train | step 2850 | loss 2.7878 | lr 1.0e-03 | norm 2.8504 | dt 0.113
|
| 289 |
+
type train | step 2860 | loss 2.8069 | lr 1.0e-03 | norm 3.9886 | dt 0.112
|
| 290 |
+
type train | step 2870 | loss 2.7915 | lr 1.0e-03 | norm 3.3547 | dt 0.113
|
| 291 |
+
type train | step 2880 | loss 2.7795 | lr 1.0e-03 | norm 3.8351 | dt 0.112
|
| 292 |
+
type train | step 2890 | loss 2.7911 | lr 1.0e-03 | norm 3.7898 | dt 0.111
|
| 293 |
+
type train | step 2900 | loss 2.7968 | lr 1.0e-03 | norm 3.3697 | dt 0.112
|
| 294 |
+
type train | step 2910 | loss 2.7973 | lr 1.0e-03 | norm 2.7783 | dt 0.113
|
| 295 |
+
type train | step 2920 | loss 2.7721 | lr 1.0e-03 | norm 4.2394 | dt 0.112
|
| 296 |
+
type train | step 2930 | loss 2.8144 | lr 1.0e-03 | norm 3.7734 | dt 0.114
|
| 297 |
+
type train | step 2940 | loss 2.7893 | lr 1.0e-03 | norm 3.3477 | dt 0.113
|
| 298 |
+
type train | step 2950 | loss 2.8055 | lr 1.0e-03 | norm 4.1091 | dt 0.113
|
| 299 |
+
type train | step 2960 | loss 2.8119 | lr 1.0e-03 | norm 3.3889 | dt 0.114
|
| 300 |
+
type train | step 2970 | loss 2.8116 | lr 1.0e-03 | norm 2.3539 | dt 0.112
|
| 301 |
+
type train | step 2980 | loss 2.7820 | lr 1.0e-03 | norm 3.1976 | dt 0.114
|
| 302 |
+
type train | step 2990 | loss 2.7685 | lr 1.0e-03 | norm 3.1358 | dt 0.112
|
| 303 |
+
type train | step 3000 | loss 2.7698 | lr 1.0e-03 | norm 3.3466 | dt 0.114
|
| 304 |
+
type train | step 3010 | loss 2.7901 | lr 1.0e-03 | norm 4.0134 | dt 0.112
|
| 305 |
+
type train | step 3020 | loss 2.7774 | lr 1.0e-03 | norm 3.3120 | dt 0.112
|
| 306 |
+
type train | step 3030 | loss 2.7468 | lr 1.0e-03 | norm 3.5089 | dt 0.112
|
| 307 |
+
type train | step 3040 | loss 2.7845 | lr 1.0e-03 | norm 3.5996 | dt 0.683
|
| 308 |
+
type train | step 3050 | loss 2.7484 | lr 1.0e-03 | norm 3.3160 | dt 0.113
|
| 309 |
+
type train | step 3060 | loss 2.7802 | lr 1.0e-03 | norm 3.5098 | dt 0.112
|
| 310 |
+
type train | step 3070 | loss 2.7648 | lr 1.0e-03 | norm 2.9626 | dt 0.113
|
| 311 |
+
type train | step 3080 | loss 2.7928 | lr 1.0e-03 | norm 4.9214 | dt 0.112
|
| 312 |
+
type train | step 3090 | loss 2.7792 | lr 1.0e-03 | norm 3.0415 | dt 0.114
|
| 313 |
+
type train | step 3100 | loss 2.7703 | lr 1.0e-03 | norm 3.8194 | dt 0.112
|
| 314 |
+
type train | step 3110 | loss 2.7536 | lr 1.0e-03 | norm 2.6179 | dt 0.113
|
| 315 |
+
type train | step 3120 | loss 2.7777 | lr 1.0e-03 | norm 3.8597 | dt 0.111
|
| 316 |
+
type train | step 3130 | loss 2.7586 | lr 1.0e-03 | norm 3.5598 | dt 0.113
|
| 317 |
+
type train | step 3140 | loss 2.7790 | lr 1.0e-03 | norm 3.6057 | dt 0.112
|
| 318 |
+
type train | step 3150 | loss 2.7643 | lr 1.0e-03 | norm 2.9176 | dt 0.113
|
| 319 |
+
type train | step 3160 | loss 2.7912 | lr 1.0e-03 | norm 3.6541 | dt 0.113
|
| 320 |
+
type train | step 3170 | loss 2.7408 | lr 1.0e-03 | norm 3.4311 | dt 0.113
|
| 321 |
+
type train | step 3180 | loss 2.7720 | lr 1.0e-03 | norm 3.8333 | dt 0.112
|
| 322 |
+
type train | step 3190 | loss 2.7741 | lr 1.0e-03 | norm 2.7587 | dt 0.113
|
| 323 |
+
type train | step 3200 | loss 2.7577 | lr 1.0e-03 | norm 3.2884 | dt 0.112
|
| 324 |
+
type train | step 3210 | loss 2.7489 | lr 1.0e-03 | norm 4.7644 | dt 0.113
|
| 325 |
+
type train | step 3220 | loss 2.7650 | lr 1.0e-03 | norm 4.1145 | dt 0.111
|
| 326 |
+
type train | step 3230 | loss 2.7511 | lr 1.0e-03 | norm 3.1436 | dt 0.112
|
| 327 |
+
type train | step 3240 | loss 2.7716 | lr 1.0e-03 | norm 4.0496 | dt 0.112
|
| 328 |
+
type train | step 3250 | loss 2.7592 | lr 1.0e-03 | norm 3.4712 | dt 0.112
|
| 329 |
+
type train | step 3260 | loss 2.7435 | lr 1.0e-03 | norm 3.2489 | dt 0.112
|
| 330 |
+
type train | step 3270 | loss 2.7635 | lr 1.0e-03 | norm 5.3592 | dt 0.111
|
| 331 |
+
type train | step 3280 | loss 2.7653 | lr 1.0e-03 | norm 3.9390 | dt 0.115
|
| 332 |
+
type train | step 3290 | loss 2.7688 | lr 1.0e-03 | norm 3.1874 | dt 0.113
|
| 333 |
+
type train | step 3300 | loss 2.7344 | lr 1.0e-03 | norm 2.8381 | dt 0.114
|
| 334 |
+
type train | step 3310 | loss 2.7841 | lr 1.0e-03 | norm 4.4371 | dt 0.113
|
| 335 |
+
type train | step 3320 | loss 2.7544 | lr 1.0e-03 | norm 3.4362 | dt 0.113
|
| 336 |
+
type train | step 3330 | loss 2.7671 | lr 1.0e-03 | norm 2.6081 | dt 0.114
|
| 337 |
+
type train | step 3340 | loss 2.7799 | lr 1.0e-03 | norm 2.9720 | dt 0.113
|
| 338 |
+
type train | step 3350 | loss 2.7861 | lr 1.0e-03 | norm 4.1959 | dt 0.112
|
| 339 |
+
type train | step 3360 | loss 2.7476 | lr 1.0e-03 | norm 3.6431 | dt 0.112
|
| 340 |
+
type train | step 3370 | loss 2.7421 | lr 1.0e-03 | norm 3.7062 | dt 0.113
|
| 341 |
+
type train | step 3380 | loss 2.7359 | lr 1.0e-03 | norm 3.2727 | dt 0.112
|
| 342 |
+
type train | step 3390 | loss 2.7536 | lr 1.0e-03 | norm 4.0304 | dt 0.112
|
| 343 |
+
type train | step 3400 | loss 2.7492 | lr 1.0e-03 | norm 4.5609 | dt 0.112
|
| 344 |
+
type train | step 3410 | loss 2.7132 | lr 1.0e-03 | norm 2.9332 | dt 0.111
|
| 345 |
+
type train | step 3420 | loss 2.7632 | lr 1.0e-03 | norm 3.8717 | dt 0.453
|
| 346 |
+
type train | step 3430 | loss 2.7204 | lr 1.0e-03 | norm 3.7906 | dt 0.114
|
| 347 |
+
type train | step 3440 | loss 2.7436 | lr 1.0e-03 | norm 3.1095 | dt 0.112
|
| 348 |
+
type train | step 3450 | loss 2.7374 | lr 1.0e-03 | norm 3.4169 | dt 0.112
|
| 349 |
+
type train | step 3460 | loss 2.7563 | lr 1.0e-03 | norm 3.8623 | dt 0.113
|
| 350 |
+
type train | step 3470 | loss 2.7534 | lr 1.0e-03 | norm 4.0683 | dt 0.112
|
| 351 |
+
type train | step 3480 | loss 2.7455 | lr 1.0e-03 | norm 4.5492 | dt 0.112
|
| 352 |
+
type train | step 3490 | loss 2.7274 | lr 1.0e-03 | norm 3.2147 | dt 0.112
|
| 353 |
+
type train | step 3500 | loss 2.7516 | lr 1.0e-03 | norm 4.0523 | dt 0.113
|
| 354 |
+
type train | step 3510 | loss 2.7194 | lr 1.0e-03 | norm 3.0837 | dt 0.112
|
| 355 |
+
type train | step 3520 | loss 2.7470 | lr 1.0e-03 | norm 4.2621 | dt 0.113
|
| 356 |
+
type train | step 3530 | loss 2.7400 | lr 1.0e-03 | norm 3.6261 | dt 0.112
|
| 357 |
+
type train | step 3540 | loss 2.7652 | lr 1.0e-03 | norm 4.2756 | dt 0.112
|
| 358 |
+
type train | step 3550 | loss 2.7168 | lr 1.0e-03 | norm 4.0585 | dt 0.112
|
| 359 |
+
type train | step 3560 | loss 2.7429 | lr 1.0e-03 | norm 3.2045 | dt 0.112
|
| 360 |
+
type train | step 3570 | loss 2.7464 | lr 1.0e-03 | norm 2.7687 | dt 0.113
|
| 361 |
+
type train | step 3580 | loss 2.7373 | lr 1.0e-03 | norm 5.4374 | dt 0.115
|
| 362 |
+
type train | step 3590 | loss 2.7237 | lr 1.0e-03 | norm 4.9567 | dt 0.112
|
| 363 |
+
type train | step 3600 | loss 2.7365 | lr 1.0e-03 | norm 3.8587 | dt 0.113
|
| 364 |
+
type train | step 3610 | loss 2.7276 | lr 1.0e-03 | norm 3.8897 | dt 0.112
|
| 365 |
+
type train | step 3620 | loss 2.7446 | lr 1.0e-03 | norm 3.1910 | dt 0.113
|
| 366 |
+
type train | step 3630 | loss 2.7278 | lr 1.0e-03 | norm 2.7554 | dt 0.112
|
| 367 |
+
type train | step 3640 | loss 2.7135 | lr 1.0e-03 | norm 2.9128 | dt 0.112
|
| 368 |
+
type train | step 3650 | loss 2.7271 | lr 1.0e-03 | norm 3.9133 | dt 0.111
|
| 369 |
+
type train | step 3660 | loss 2.7397 | lr 1.0e-03 | norm 4.2219 | dt 0.117
|
| 370 |
+
type train | step 3670 | loss 2.7378 | lr 1.0e-03 | norm 2.5150 | dt 0.112
|
| 371 |
+
type train | step 3680 | loss 2.7068 | lr 1.0e-03 | norm 2.8761 | dt 0.112
|
| 372 |
+
type train | step 3690 | loss 2.7525 | lr 1.0e-03 | norm 3.2987 | dt 0.112
|
| 373 |
+
type train | step 3700 | loss 2.7258 | lr 1.0e-03 | norm 3.2104 | dt 0.112
|
| 374 |
+
type train | step 3710 | loss 2.7404 | lr 1.0e-03 | norm 3.2426 | dt 0.114
|
| 375 |
+
type train | step 3720 | loss 2.7449 | lr 1.0e-03 | norm 2.9995 | dt 0.113
|
| 376 |
+
type train | step 3730 | loss 2.7604 | lr 1.0e-03 | norm 4.3234 | dt 0.112
|
| 377 |
+
type train | step 3740 | loss 2.7211 | lr 1.0e-03 | norm 2.9728 | dt 0.112
|
| 378 |
+
type train | step 3750 | loss 2.7178 | lr 1.0e-03 | norm 4.3165 | dt 0.113
|
| 379 |
+
type train | step 3760 | loss 2.7066 | lr 1.0e-03 | norm 2.5637 | dt 0.114
|
| 380 |
+
type train | step 3770 | loss 2.7282 | lr 1.0e-03 | norm 3.4858 | dt 0.114
|
| 381 |
+
type train | step 3780 | loss 2.7243 | lr 1.0e-03 | norm 4.1968 | dt 0.112
|
| 382 |
+
type train | step 3790 | loss 2.6911 | lr 1.0e-03 | norm 3.7193 | dt 0.114
|
| 383 |
+
type train | step 3800 | loss 2.7315 | lr 1.0e-03 | norm 3.7593 | dt 0.274
|
| 384 |
+
type train | step 3810 | loss 2.6951 | lr 1.0e-03 | norm 3.5749 | dt 0.113
|
| 385 |
+
type train | step 3820 | loss 2.7177 | lr 1.0e-03 | norm 2.8342 | dt 0.112
|
| 386 |
+
type train | step 3830 | loss 2.7106 | lr 1.0e-03 | norm 3.6803 | dt 0.113
|
| 387 |
+
type train | step 3840 | loss 2.7366 | lr 1.0e-03 | norm 3.8993 | dt 0.112
|
| 388 |
+
type train | step 3850 | loss 2.7305 | lr 1.0e-03 | norm 3.5593 | dt 0.114
|
| 389 |
+
type train | step 3860 | loss 2.7142 | lr 1.0e-03 | norm 3.3496 | dt 0.112
|
| 390 |
+
type train | step 3870 | loss 2.7017 | lr 1.0e-03 | norm 3.7996 | dt 0.113
|
| 391 |
+
type train | step 3880 | loss 2.7246 | lr 1.0e-03 | norm 2.9687 | dt 0.115
|
| 392 |
+
type train | step 3890 | loss 2.6996 | lr 1.0e-03 | norm 4.3324 | dt 0.112
|
| 393 |
+
type train | step 3900 | loss 2.7299 | lr 1.0e-03 | norm 4.7540 | dt 0.112
|
| 394 |
+
type train | step 3910 | loss 2.7175 | lr 1.0e-03 | norm 4.5622 | dt 0.112
|
| 395 |
+
type train | step 3920 | loss 2.7445 | lr 1.0e-03 | norm 4.5210 | dt 0.112
|
| 396 |
+
type train | step 3930 | loss 2.6917 | lr 1.0e-03 | norm 4.2401 | dt 0.113
|
| 397 |
+
type train | step 3940 | loss 2.7196 | lr 1.0e-03 | norm 3.9838 | dt 0.111
|
| 398 |
+
type train | step 3950 | loss 2.7259 | lr 1.0e-03 | norm 3.5923 | dt 0.112
|
| 399 |
+
type train | step 3960 | loss 2.7024 | lr 1.0e-03 | norm 2.9689 | dt 0.112
|
| 400 |
+
type train | step 3970 | loss 2.6920 | lr 1.0e-03 | norm 5.0040 | dt 0.112
|
| 401 |
+
type train | step 3980 | loss 2.7100 | lr 1.0e-03 | norm 3.7985 | dt 0.113
|
| 402 |
+
type train | step 3990 | loss 2.7032 | lr 1.0e-03 | norm 3.4739 | dt 0.113
|
| 403 |
+
type train | step 4000 | loss 2.7203 | lr 1.0e-03 | norm 3.7162 | dt 0.112
|
| 404 |
+
type train | step 4010 | loss 2.7059 | lr 1.0e-03 | norm 3.3635 | dt 0.112
|
| 405 |
+
type train | step 4020 | loss 2.6927 | lr 1.0e-03 | norm 3.5211 | dt 0.112
|
| 406 |
+
type train | step 4030 | loss 2.6982 | lr 1.0e-03 | norm 3.4169 | dt 0.114
|
| 407 |
+
type train | step 4040 | loss 2.7185 | lr 1.0e-03 | norm 3.5633 | dt 0.112
|
| 408 |
+
type train | step 4050 | loss 2.7151 | lr 1.0e-03 | norm 3.2189 | dt 0.112
|
| 409 |
+
type train | step 4060 | loss 2.6830 | lr 1.0e-03 | norm 3.1005 | dt 0.113
|
| 410 |
+
type train | step 4070 | loss 2.7358 | lr 1.0e-03 | norm 4.2720 | dt 0.112
|
| 411 |
+
type train | step 4080 | loss 2.7034 | lr 1.0e-03 | norm 2.6812 | dt 0.112
|
| 412 |
+
type train | step 4090 | loss 2.7200 | lr 1.0e-03 | norm 3.8335 | dt 0.112
|
| 413 |
+
type train | step 4100 | loss 2.7263 | lr 1.0e-03 | norm 3.0014 | dt 0.113
|
| 414 |
+
type train | step 4110 | loss 2.7304 | lr 1.0e-03 | norm 3.1252 | dt 0.112
|
| 415 |
+
type train | step 4120 | loss 2.6995 | lr 1.0e-03 | norm 3.2771 | dt 0.112
|
| 416 |
+
type train | step 4130 | loss 2.6983 | lr 1.0e-03 | norm 4.1201 | dt 0.112
|
| 417 |
+
type train | step 4140 | loss 2.6936 | lr 1.0e-03 | norm 3.9117 | dt 0.113
|
| 418 |
+
type train | step 4150 | loss 2.7068 | lr 1.0e-03 | norm 3.6974 | dt 0.113
|
| 419 |
+
type train | step 4160 | loss 2.6984 | lr 1.0e-03 | norm 3.1602 | dt 0.113
|
| 420 |
+
type train | step 4170 | loss 2.6660 | lr 1.0e-03 | norm 3.7614 | dt 0.114
|
| 421 |
+
type train | step 4180 | loss 2.7068 | lr 1.0e-03 | norm 3.5035 | dt 0.122
|
| 422 |
+
type train | step 4190 | loss 2.6706 | lr 1.0e-03 | norm 3.4399 | dt 0.113
|
| 423 |
+
type train | step 4200 | loss 2.7023 | lr 1.0e-03 | norm 3.7420 | dt 0.112
|
| 424 |
+
type train | step 4210 | loss 2.6893 | lr 1.0e-03 | norm 3.5744 | dt 0.113
|
| 425 |
+
type train | step 4220 | loss 2.7063 | lr 1.0e-03 | norm 2.9496 | dt 0.114
|
| 426 |
+
type train | step 4230 | loss 2.7079 | lr 1.0e-03 | norm 4.1317 | dt 0.112
|
| 427 |
+
type train | step 4240 | loss 2.7058 | lr 1.0e-03 | norm 5.3741 | dt 0.113
|
| 428 |
+
type train | step 4250 | loss 2.6881 | lr 1.0e-03 | norm 3.9840 | dt 0.112
|
| 429 |
+
type train | step 4260 | loss 2.7151 | lr 1.0e-03 | norm 5.0553 | dt 0.111
|
| 430 |
+
type train | step 4270 | loss 2.6837 | lr 1.0e-03 | norm 5.0900 | dt 0.112
|
| 431 |
+
type train | step 4280 | loss 2.7110 | lr 1.0e-03 | norm 3.9984 | dt 0.112
|
| 432 |
+
type train | step 4290 | loss 2.6954 | lr 1.0e-03 | norm 3.2105 | dt 0.112
|
| 433 |
+
type train | step 4300 | loss 2.7243 | lr 1.0e-03 | norm 4.0479 | dt 0.112
|
| 434 |
+
type train | step 4310 | loss 2.6692 | lr 1.0e-03 | norm 3.1193 | dt 0.112
|
| 435 |
+
type train | step 4320 | loss 2.7000 | lr 1.0e-03 | norm 4.1347 | dt 0.112
|
| 436 |
+
type train | step 4330 | loss 2.7062 | lr 1.0e-03 | norm 3.1431 | dt 0.112
|
| 437 |
+
type train | step 4340 | loss 2.6915 | lr 1.0e-03 | norm 3.9019 | dt 0.113
|
| 438 |
+
type train | step 4350 | loss 2.6757 | lr 1.0e-03 | norm 3.2795 | dt 0.113
|
| 439 |
+
type train | step 4360 | loss 2.6951 | lr 1.0e-03 | norm 3.9674 | dt 0.113
|
| 440 |
+
type train | step 4370 | loss 2.6855 | lr 1.0e-03 | norm 4.0404 | dt 0.112
|
| 441 |
+
type train | step 4380 | loss 2.7024 | lr 1.0e-03 | norm 4.5539 | dt 0.113
|
| 442 |
+
type train | step 4390 | loss 2.6917 | lr 1.0e-03 | norm 4.2215 | dt 0.113
|
| 443 |
+
type train | step 4400 | loss 2.6675 | lr 1.0e-03 | norm 2.7754 | dt 0.112
|
| 444 |
+
type train | step 4410 | loss 2.6809 | lr 1.0e-03 | norm 3.2600 | dt 0.112
|
| 445 |
+
type train | step 4420 | loss 2.6956 | lr 1.0e-03 | norm 3.2976 | dt 0.112
|
| 446 |
+
type train | step 4430 | loss 2.6984 | lr 1.0e-03 | norm 3.1306 | dt 0.113
|
| 447 |
+
type train | step 4440 | loss 2.6770 | lr 1.0e-03 | norm 4.3158 | dt 0.113
|
| 448 |
+
type train | step 4450 | loss 2.7173 | lr 1.0e-03 | norm 3.8809 | dt 0.113
|
| 449 |
+
type train | step 4460 | loss 2.6844 | lr 1.0e-03 | norm 3.6960 | dt 0.112
|
| 450 |
+
type train | step 4470 | loss 2.6986 | lr 1.0e-03 | norm 3.2030 | dt 0.113
|
| 451 |
+
type train | step 4480 | loss 2.7112 | lr 1.0e-03 | norm 3.9093 | dt 0.113
|
| 452 |
+
type train | step 4490 | loss 2.7174 | lr 1.0e-03 | norm 4.0407 | dt 0.113
|
| 453 |
+
type train | step 4500 | loss 2.6825 | lr 1.0e-03 | norm 4.1231 | dt 0.112
|
| 454 |
+
type train | step 4510 | loss 2.6809 | lr 1.0e-03 | norm 4.3415 | dt 0.112
|
| 455 |
+
type train | step 4520 | loss 2.6655 | lr 1.0e-03 | norm 2.8773 | dt 0.115
|
| 456 |
+
type train | step 4530 | loss 2.6912 | lr 1.0e-03 | norm 4.1305 | dt 0.115
|
| 457 |
+
type train | step 4540 | loss 2.6796 | lr 1.0e-03 | norm 3.3092 | dt 0.114
|
| 458 |
+
type train | step 4550 | loss 2.6493 | lr 1.0e-03 | norm 3.4483 | dt 0.113
|
| 459 |
+
type train | step 4560 | loss 2.6926 | lr 1.0e-03 | norm 3.4501 | dt 0.294
|
| 460 |
+
type train | step 4570 | loss 2.6629 | lr 1.0e-03 | norm 4.3711 | dt 0.112
|
| 461 |
+
type train | step 4580 | loss 2.6822 | lr 1.0e-03 | norm 4.2044 | dt 0.112
|
| 462 |
+
type train | step 4590 | loss 2.6727 | lr 1.0e-03 | norm 3.6775 | dt 0.113
|
| 463 |
+
type train | step 4600 | loss 2.6843 | lr 1.0e-03 | norm 2.6381 | dt 0.113
|
| 464 |
+
type train | step 4610 | loss 2.6898 | lr 1.0e-03 | norm 4.4385 | dt 0.113
|
| 465 |
+
type train | step 4620 | loss 2.6811 | lr 1.0e-03 | norm 4.5569 | dt 0.113
|
| 466 |
+
type train | step 4630 | loss 2.6692 | lr 1.0e-03 | norm 4.3784 | dt 0.114
|
| 467 |
+
type train | step 4640 | loss 2.6894 | lr 1.0e-03 | norm 4.3346 | dt 0.112
|
| 468 |
+
type train | step 4650 | loss 2.6616 | lr 1.0e-03 | norm 4.1270 | dt 0.114
|
| 469 |
+
type train | step 4660 | loss 2.6857 | lr 1.0e-03 | norm 3.7124 | dt 0.114
|
| 470 |
+
type train | step 4670 | loss 2.6729 | lr 1.0e-03 | norm 2.9445 | dt 0.114
|
| 471 |
+
type train | step 4680 | loss 2.7061 | lr 1.0e-03 | norm 5.1870 | dt 0.113
|
| 472 |
+
type train | step 4690 | loss 2.6547 | lr 1.0e-03 | norm 4.0145 | dt 0.112
|
| 473 |
+
type train | step 4700 | loss 2.6832 | lr 1.0e-03 | norm 3.6089 | dt 0.113
|
| 474 |
+
type train | step 4710 | loss 2.6875 | lr 1.0e-03 | norm 3.4779 | dt 0.113
|
| 475 |
+
type train | step 4720 | loss 2.6775 | lr 1.0e-03 | norm 4.6578 | dt 0.113
|
| 476 |
+
type train | step 4730 | loss 2.6581 | lr 1.0e-03 | norm 3.5623 | dt 0.114
|
| 477 |
+
type train | step 4740 | loss 2.6719 | lr 1.0e-03 | norm 3.3082 | dt 0.113
|
| 478 |
+
type train | step 4750 | loss 2.6682 | lr 1.0e-03 | norm 3.8716 | dt 0.113
|
| 479 |
+
type train | step 4760 | loss 2.6786 | lr 1.0e-03 | norm 3.2089 | dt 0.114
|
| 480 |
+
type train | step 4770 | loss 2.6733 | lr 1.0e-03 | norm 3.6031 | dt 0.114
|
| 481 |
+
type train | step 4780 | loss 2.6551 | lr 1.0e-03 | norm 3.8391 | dt 0.114
|
| 482 |
+
type train | step 4790 | loss 2.6726 | lr 1.0e-03 | norm 5.0322 | dt 0.112
|
| 483 |
+
type train | step 4800 | loss 2.6790 | lr 1.0e-03 | norm 3.8612 | dt 0.113
|
| 484 |
+
type train | step 4810 | loss 2.6857 | lr 1.0e-03 | norm 3.5971 | dt 0.113
|
| 485 |
+
type train | step 4820 | loss 2.6557 | lr 1.0e-03 | norm 3.6249 | dt 0.113
|
| 486 |
+
type train | step 4830 | loss 2.6996 | lr 1.0e-03 | norm 4.0842 | dt 0.112
|
| 487 |
+
type train | step 4840 | loss 2.6734 | lr 1.0e-03 | norm 3.9741 | dt 0.112
|
| 488 |
+
type train | step 4850 | loss 2.6854 | lr 1.0e-03 | norm 3.1812 | dt 0.112
|
| 489 |
+
type train | step 4860 | loss 2.6908 | lr 1.0e-03 | norm 3.2802 | dt 0.112
|
| 490 |
+
type train | step 4870 | loss 2.6986 | lr 1.0e-03 | norm 4.2077 | dt 0.112
|
| 491 |
+
type train | step 4880 | loss 2.6561 | lr 1.0e-03 | norm 2.2364 | dt 0.112
|
| 492 |
+
type train | step 4890 | loss 2.6601 | lr 1.0e-03 | norm 4.0356 | dt 0.112
|
| 493 |
+
type train | step 4900 | loss 2.6486 | lr 1.0e-03 | norm 3.2067 | dt 0.112
|
| 494 |
+
type train | step 4910 | loss 2.6725 | lr 1.0e-03 | norm 3.2824 | dt 0.113
|
| 495 |
+
type train | step 4920 | loss 2.6629 | lr 1.0e-03 | norm 3.1649 | dt 0.112
|
| 496 |
+
type train | step 4930 | loss 2.6344 | lr 1.0e-03 | norm 3.8950 | dt 0.112
|
| 497 |
+
type train | step 4940 | loss 2.6770 | lr 1.0e-03 | norm 3.6144 | dt 0.121
|
| 498 |
+
type train | step 4950 | loss 2.6364 | lr 1.0e-03 | norm 3.4820 | dt 0.113
|
| 499 |
+
type train | step 4960 | loss 2.6633 | lr 1.0e-03 | norm 2.8684 | dt 0.112
|
| 500 |
+
type train | step 4970 | loss 2.6570 | lr 1.0e-03 | norm 3.7896 | dt 0.113
|
| 501 |
+
type train | step 4980 | loss 2.6794 | lr 1.0e-03 | norm 5.2916 | dt 0.111
|
| 502 |
+
type train | step 4990 | loss 2.6667 | lr 1.0e-03 | norm 3.1418 | dt 0.112
|
| 503 |
+
type train | step 5000 | loss 2.6611 | lr 1.0e-03 | norm 3.3038 | dt 0.112
|