Upload folder using huggingface_hub
Browse files- debug.log +1 -0
- eval.log +31 -0
- model.json +1 -0
- model.safetensors +3 -0
- sae.0.safetensors +3 -0
- sae.1.safetensors +3 -0
- sae.2.safetensors +3 -0
- sae.3.safetensors +3 -0
- sae.4.safetensors +3 -0
- sae.json +1 -0
- train.log +750 -0
debug.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
name regularized.shakespeare_64x4 | device cuda | compile True | data_dir data/shakespeare | should_randomize True | log_interval 10 | eval_interval 250 | eval_steps 100 | batch_size 128 | gradient_accumulation_steps 1 | learning_rate 0.001 | warmup_steps 750 | max_steps 7500 | decay_lr True | min_lr 0.0001 | weight_decay 0.1 | grad_clip 1.0 | sae_config {'name': 'standardx8.shakespeare_64x4', 'device': device(type='cuda'), 'compile': True, 'gpt_config': {'name': 'ascii_64x4', 'device': device(type='cuda'), 'compile': True, 'block_size': 128, 'vocab_size': 128, 'n_layer': 4, 'n_head': 4, 'n_embd': 64}, 'n_features': (512, 512, 512, 512, 512), 'sae_variant': <SAEVariant.STANDARD: 'standard'>} | trainable_layers None | loss_coefficients {'sparsity': (0.02, 0.035, 0.085, 0.07, 0.075), 'regularization': tensor(3.), 'downstream': None, 'bandwidth': None}
|
eval.log
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
type eval | step 0 | loss 69.9351 227.3715 251.4663 262.0019 1158.8053 | checkpoint False | ce_loss 1.5683 | sae_losses 69.9351 227.3715 251.4663 262.0019 1158.8053 | ce_loss_increases 9.5531 6.2093 3.0940 1.4725 0.2427 | compound_ce_loss_increase 7.8380 | l0s 255.7843 246.1082 261.4560 257.6235 253.3834 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 2 |
+
type eval | step 250 | loss 2.7938 4.6736 9.2002 19.7001 65.0648 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 2.7938 4.6736 9.2002 19.7001 65.0648 | ce_loss_increases 2.4624 2.5027 1.4391 0.8839 0.5587 | compound_ce_loss_increase 15.9712 | l0s 109.0341 98.4950 101.4176 119.2250 157.3766 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 3 |
+
type eval | step 500 | loss 0.0672 0.2048 0.6036 0.8582 1.7478 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0672 0.2048 0.6036 0.8582 1.7478 | ce_loss_increases 0.1324 0.6147 0.6956 0.5069 0.1164 | compound_ce_loss_increase 2.1451 | l0s 33.4035 41.5548 42.0688 54.1596 55.7794 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 4 |
+
type eval | step 750 | loss 0.0335 0.1020 0.3495 0.5447 1.0117 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0335 0.1020 0.3495 0.5447 1.0117 | ce_loss_increases 0.0313 0.3622 0.3207 0.1837 0.0290 | compound_ce_loss_increase 1.0494 | l0s 20.5222 24.9453 34.0170 69.1310 94.0110 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 5 |
+
type eval | step 1000 | loss 0.0291 0.0836 0.3035 0.4791 0.8924 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0291 0.0836 0.3035 0.4791 0.8924 | ce_loss_increases 0.0188 0.3401 0.2667 0.1451 0.0183 | compound_ce_loss_increase 1.0156 | l0s 16.9708 21.7072 29.8823 67.8966 112.0945 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 6 |
+
type eval | step 1250 | loss 0.0278 0.0777 0.2887 0.4584 0.8595 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0278 0.0777 0.2887 0.4584 0.8595 | ce_loss_increases 0.0136 0.3101 0.2397 0.1342 0.0155 | compound_ce_loss_increase 1.0264 | l0s 14.0052 20.0144 27.8973 65.0998 115.1745 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 7 |
+
type eval | step 1500 | loss 0.0270 0.0749 0.2814 0.4484 0.8443 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0270 0.0749 0.2814 0.4484 0.8443 | ce_loss_increases 0.0125 0.2840 0.2297 0.1284 0.0145 | compound_ce_loss_increase 1.0387 | l0s 12.3140 19.0326 26.4638 63.0203 114.9442 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 8 |
+
type eval | step 1750 | loss 0.0265 0.0733 0.2776 0.4432 0.8359 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0265 0.0733 0.2776 0.4432 0.8359 | ce_loss_increases 0.0123 0.2665 0.2284 0.1277 0.0140 | compound_ce_loss_increase 1.0452 | l0s 11.0394 18.3555 25.7191 61.4684 113.9065 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 9 |
+
type eval | step 2000 | loss 0.0261 0.0721 0.2746 0.4398 0.8302 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0261 0.0721 0.2746 0.4398 0.8302 | ce_loss_increases 0.0120 0.2507 0.2298 0.1308 0.0139 | compound_ce_loss_increase 1.0642 | l0s 10.2797 17.5832 25.1559 60.1170 113.0572 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 10 |
+
type eval | step 2250 | loss 0.0258 0.0712 0.2725 0.4370 0.8258 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0258 0.0712 0.2725 0.4370 0.8258 | ce_loss_increases 0.0125 0.2354 0.2288 0.1284 0.0134 | compound_ce_loss_increase 1.0967 | l0s 9.5582 17.2766 24.6476 59.3353 112.1152 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 11 |
+
type eval | step 2500 | loss 0.0255 0.0704 0.2709 0.4348 0.8225 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0255 0.0704 0.2709 0.4348 0.8225 | ce_loss_increases 0.0127 0.2308 0.2220 0.1267 0.0130 | compound_ce_loss_increase 1.0747 | l0s 9.0886 16.7897 24.2651 58.6772 111.6607 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 12 |
+
type eval | step 2750 | loss 0.0253 0.0698 0.2695 0.4327 0.8196 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0253 0.0698 0.2695 0.4327 0.8196 | ce_loss_increases 0.0112 0.2156 0.2130 0.1292 0.0135 | compound_ce_loss_increase 1.0413 | l0s 8.6659 16.6079 24.0919 57.5640 110.8334 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 13 |
+
type eval | step 3000 | loss 0.0252 0.0693 0.2686 0.4317 0.8183 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0252 0.0693 0.2686 0.4317 0.8183 | ce_loss_increases 0.0124 0.2080 0.2110 0.1272 0.0142 | compound_ce_loss_increase 1.0826 | l0s 8.4159 16.3360 23.7989 57.3684 110.3676 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 14 |
+
type eval | step 3250 | loss 0.0251 0.0690 0.2681 0.4309 0.8167 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0251 0.0690 0.2681 0.4309 0.8167 | ce_loss_increases 0.0108 0.2129 0.2096 0.1288 0.0142 | compound_ce_loss_increase 1.0807 | l0s 8.2436 16.0343 23.4633 57.0171 110.1526 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 15 |
+
type eval | step 3500 | loss 0.0250 0.0688 0.2678 0.4304 0.8164 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0250 0.0688 0.2678 0.4304 0.8164 | ce_loss_increases 0.0113 0.2076 0.2122 0.1260 0.0147 | compound_ce_loss_increase 1.0646 | l0s 8.1357 15.6935 23.2728 56.9503 109.8648 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 16 |
+
type eval | step 3750 | loss 0.0249 0.0684 0.2672 0.4299 0.8151 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0249 0.0684 0.2672 0.4299 0.8151 | ce_loss_increases 0.0107 0.2024 0.2087 0.1245 0.0148 | compound_ce_loss_increase 1.0792 | l0s 7.9486 15.5286 23.0477 56.5439 109.6589 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 17 |
+
type eval | step 4000 | loss 0.0248 0.0681 0.2667 0.4289 0.8137 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0248 0.0681 0.2667 0.4289 0.8137 | ce_loss_increases 0.0101 0.1978 0.2083 0.1243 0.0150 | compound_ce_loss_increase 1.0657 | l0s 7.8210 15.4135 22.9380 56.2839 109.3188 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 18 |
+
type eval | step 4250 | loss 0.0248 0.0679 0.2663 0.4285 0.8130 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0248 0.0679 0.2663 0.4285 0.8130 | ce_loss_increases 0.0107 0.1952 0.2055 0.1236 0.0144 | compound_ce_loss_increase 1.0770 | l0s 7.7570 15.3227 22.8371 56.1585 109.3503 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 19 |
+
type eval | step 4500 | loss 0.0247 0.0677 0.2660 0.4280 0.8121 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0247 0.0677 0.2660 0.4280 0.8121 | ce_loss_increases 0.0098 0.1911 0.2064 0.1243 0.0144 | compound_ce_loss_increase 1.0759 | l0s 7.6724 15.1929 22.7772 55.8330 109.2679 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 20 |
+
type eval | step 4750 | loss 0.0247 0.0675 0.2657 0.4275 0.8112 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0247 0.0675 0.2657 0.4275 0.8112 | ce_loss_increases 0.0099 0.1907 0.2019 0.1237 0.0143 | compound_ce_loss_increase 1.0667 | l0s 7.6310 15.1614 22.8068 55.7218 108.9904 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 21 |
+
type eval | step 5000 | loss 0.0247 0.0673 0.2653 0.4270 0.8105 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0247 0.0673 0.2653 0.4270 0.8105 | ce_loss_increases 0.0099 0.1891 0.2036 0.1217 0.0141 | compound_ce_loss_increase 1.0497 | l0s 7.6042 15.0103 22.7263 55.7531 108.8672 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 22 |
+
type eval | step 5250 | loss 0.0246 0.0672 0.2650 0.4264 0.8097 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0246 0.0672 0.2650 0.4264 0.8097 | ce_loss_increases 0.0093 0.1852 0.2020 0.1222 0.0148 | compound_ce_loss_increase 1.0509 | l0s 7.5453 14.9759 22.7354 55.5485 108.6608 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 23 |
+
type eval | step 5500 | loss 0.0246 0.0671 0.2648 0.4262 0.8094 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0246 0.0671 0.2648 0.4262 0.8094 | ce_loss_increases 0.0094 0.1837 0.2016 0.1229 0.0146 | compound_ce_loss_increase 1.0697 | l0s 7.5389 14.8933 22.6347 55.4255 108.4745 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 24 |
+
type eval | step 5750 | loss 0.0246 0.0670 0.2648 0.4261 0.8090 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0246 0.0670 0.2648 0.4261 0.8090 | ce_loss_increases 0.0093 0.1828 0.2045 0.1233 0.0149 | compound_ce_loss_increase 1.0857 | l0s 7.4760 14.8114 22.4662 55.3199 108.4992 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 25 |
+
type eval | step 6000 | loss 0.0246 0.0670 0.2648 0.4260 0.8088 | checkpoint True True False True True | ce_loss 1.5683 | sae_losses 0.0246 0.0670 0.2648 0.4260 0.8088 | ce_loss_increases 0.0095 0.1846 0.2058 0.1227 0.0150 | compound_ce_loss_increase 1.0818 | l0s 7.4760 14.7987 22.5016 55.3805 108.5362 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 26 |
+
type eval | step 6250 | loss 0.0245 0.0670 0.2648 0.4261 0.8088 | checkpoint True True False False True | ce_loss 1.5683 | sae_losses 0.0245 0.0670 0.2648 0.4261 0.8088 | ce_loss_increases 0.0089 0.1797 0.2100 0.1226 0.0150 | compound_ce_loss_increase 1.0657 | l0s 7.4739 14.7209 22.4140 55.3752 108.6182 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 27 |
+
type eval | step 6500 | loss 0.0245 0.0669 0.2647 0.4258 0.8084 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0245 0.0669 0.2647 0.4258 0.8084 | ce_loss_increases 0.0095 0.1806 0.2065 0.1222 0.0151 | compound_ce_loss_increase 1.0924 | l0s 7.4278 14.6543 22.3796 55.2960 108.6694 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 28 |
+
type eval | step 6750 | loss 0.0245 0.0668 0.2646 0.4257 0.8082 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0245 0.0668 0.2646 0.4257 0.8082 | ce_loss_increases 0.0088 0.1803 0.2037 0.1218 0.0150 | compound_ce_loss_increase 1.0682 | l0s 7.4555 14.6546 22.4456 55.3179 108.4709 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 29 |
+
type eval | step 7000 | loss 0.0245 0.0668 0.2645 0.4257 0.8082 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0245 0.0668 0.2645 0.4257 0.8082 | ce_loss_increases 0.0089 0.1797 0.2041 0.1220 0.0149 | compound_ce_loss_increase 1.0774 | l0s 7.3995 14.5176 22.3972 55.2673 108.3490 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 30 |
+
type eval | step 7250 | loss 0.0245 0.0667 0.2644 0.4256 0.8080 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0245 0.0667 0.2644 0.4256 0.8080 | ce_loss_increases 0.0084 0.1774 0.2050 0.1220 0.0149 | compound_ce_loss_increase 1.0624 | l0s 7.3754 14.5630 22.3671 55.1606 108.3959 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
| 31 |
+
type eval | step 7500 | loss 0.0245 0.0667 0.2643 0.4255 0.8078 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0245 0.0667 0.2643 0.4255 0.8078 | ce_loss_increases 0.0085 0.1788 0.2038 0.1217 0.0146 | compound_ce_loss_increase 1.0660 | l0s 7.4011 14.4701 22.3721 55.1387 108.2986 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
|
model.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"block_size": 128, "vocab_size": 128, "n_layer": 4, "n_head": 4, "n_embd": 64}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b7a990722c440fdbd1e79a78b08a6f65f74fe96acfa49e5ef4cd5c8936a3c85
|
| 3 |
+
size 870720
|
sae.0.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a9760aa72b062f34c9e45c37a4f7d4a131b0ff9d62c274f94b5a2b7e0561451
|
| 3 |
+
size 264736
|
sae.1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1ad3ce0f5188d6a8af0c91eb54690dec1a12de067342266ee06629d4bdbe00d
|
| 3 |
+
size 264736
|
sae.2.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1eaf452e932443af916c0b2df6c24121c301f6a6c8f2aa945902d7212f5832c
|
| 3 |
+
size 264736
|
sae.3.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62a81b951825174b796b0f9a11b2546edebd8164ff486faefe2d0145f5fdc437
|
| 3 |
+
size 264736
|
sae.4.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9932d87075ce7388db9196450f1b045ad9fbb4646860a2c8cfaee9240707d02
|
| 3 |
+
size 264736
|
sae.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"n_features": [512, 512, 512, 512, 512], "sae_variant": "standard"}
|
train.log
ADDED
|
@@ -0,0 +1,750 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
type train | step 10 | loss 69.4636 226.5789 250.8517 256.6940 1153.1455 | lr 1.3e-05 | norm 886.2820 | dt 0.018
|
| 2 |
+
type train | step 20 | loss 68.1997 222.8179 247.9711 253.0499 1149.1156 | lr 2.7e-05 | norm 894.3831 | dt 0.019
|
| 3 |
+
type train | step 30 | loss 66.6820 216.2963 242.8411 251.0308 1125.3365 | lr 4.0e-05 | norm 868.0057 | dt 0.018
|
| 4 |
+
type train | step 40 | loss 66.3617 208.2336 235.0398 244.6176 1050.3451 | lr 5.3e-05 | norm 796.7020 | dt 0.018
|
| 5 |
+
type train | step 50 | loss 62.1938 197.4808 224.5412 236.7062 1034.7133 | lr 6.7e-05 | norm 794.4216 | dt 0.018
|
| 6 |
+
type train | step 60 | loss 59.2433 185.0247 211.9988 224.8304 956.9526 | lr 8.0e-05 | norm 733.3275 | dt 0.018
|
| 7 |
+
type train | step 70 | loss 56.1140 171.2373 199.1852 215.3797 903.0059 | lr 9.3e-05 | norm 696.8870 | dt 0.018
|
| 8 |
+
type train | step 80 | loss 53.7543 158.4786 188.2167 203.7323 817.3013 | lr 1.1e-04 | norm 617.6252 | dt 0.018
|
| 9 |
+
type train | step 90 | loss 49.1172 142.7348 172.4434 193.9439 769.2914 | lr 1.2e-04 | norm 581.0894 | dt 0.018
|
| 10 |
+
type train | step 100 | loss 44.8693 128.0235 157.2219 177.9275 680.1750 | lr 1.3e-04 | norm 524.4868 | dt 0.018
|
| 11 |
+
type train | step 110 | loss 41.6606 113.5900 142.0942 167.6646 614.4323 | lr 1.5e-04 | norm 459.5229 | dt 0.018
|
| 12 |
+
type train | step 120 | loss 39.1014 99.7290 128.9482 156.4952 538.9907 | lr 1.6e-04 | norm 391.7111 | dt 0.018
|
| 13 |
+
type train | step 130 | loss 34.2333 85.3391 114.0419 142.0932 476.9624 | lr 1.7e-04 | norm 347.3186 | dt 0.018
|
| 14 |
+
type train | step 140 | loss 32.3965 74.5033 102.4195 132.5119 411.1675 | lr 1.9e-04 | norm 297.0230 | dt 0.018
|
| 15 |
+
type train | step 150 | loss 26.6403 61.2950 87.1429 117.0333 363.1526 | lr 2.0e-04 | norm 261.2342 | dt 0.018
|
| 16 |
+
type train | step 160 | loss 22.6446 51.0130 75.2246 102.4038 301.4481 | lr 2.1e-04 | norm 223.7403 | dt 0.018
|
| 17 |
+
type train | step 170 | loss 18.4608 40.8693 63.0367 88.4909 253.7374 | lr 2.3e-04 | norm 190.5051 | dt 0.018
|
| 18 |
+
type train | step 180 | loss 16.9705 33.6819 53.5488 80.8746 221.5518 | lr 2.4e-04 | norm 162.3525 | dt 0.018
|
| 19 |
+
type train | step 190 | loss 13.2541 26.1239 44.0511 69.7721 190.0946 | lr 2.5e-04 | norm 142.4153 | dt 0.018
|
| 20 |
+
type train | step 200 | loss 11.7273 20.8617 36.2357 58.9314 160.5240 | lr 2.7e-04 | norm 126.1539 | dt 0.018
|
| 21 |
+
type train | step 210 | loss 9.1706 15.9394 28.7063 49.1267 135.3334 | lr 2.8e-04 | norm 112.9733 | dt 0.018
|
| 22 |
+
type train | step 220 | loss 6.3784 11.5876 21.9930 39.4931 108.4306 | lr 2.9e-04 | norm 98.0527 | dt 0.018
|
| 23 |
+
type train | step 230 | loss 4.5263 8.4531 16.3871 30.2598 86.2431 | lr 3.1e-04 | norm 84.8507 | dt 0.018
|
| 24 |
+
type train | step 240 | loss 3.8490 6.5626 13.0430 27.0773 79.8424 | lr 3.2e-04 | norm 82.5597 | dt 0.018
|
| 25 |
+
type train | step 250 | loss 2.9099 4.7822 9.4725 19.9634 66.4382 | lr 3.3e-04 | norm 71.3396 | dt 0.018
|
| 26 |
+
type train | step 260 | loss 2.0588 3.5635 7.0585 15.0222 51.2331 | lr 3.5e-04 | norm 60.0523 | dt 0.018
|
| 27 |
+
type train | step 270 | loss 1.5776 2.7857 5.3007 11.4343 43.0835 | lr 3.6e-04 | norm 52.4717 | dt 0.018
|
| 28 |
+
type train | step 280 | loss 1.2177 2.2190 4.0215 9.5068 37.3215 | lr 3.7e-04 | norm 46.9859 | dt 0.018
|
| 29 |
+
type train | step 290 | loss 0.7455 1.6687 2.9226 6.0741 25.5075 | lr 3.9e-04 | norm 33.5766 | dt 0.018
|
| 30 |
+
type train | step 300 | loss 0.5948 1.4152 2.4800 4.9125 22.3764 | lr 4.0e-04 | norm 29.6173 | dt 0.018
|
| 31 |
+
type train | step 310 | loss 0.4870 1.2102 2.1670 3.7058 19.2448 | lr 4.1e-04 | norm 24.6486 | dt 0.018
|
| 32 |
+
type train | step 320 | loss 0.3746 1.0141 1.9373 3.1380 14.3225 | lr 4.3e-04 | norm 18.3827 | dt 0.018
|
| 33 |
+
type train | step 330 | loss 0.2919 0.8577 1.7706 2.5948 10.5112 | lr 4.4e-04 | norm 12.4146 | dt 0.018
|
| 34 |
+
type train | step 340 | loss 0.2257 0.7246 1.5766 2.3074 8.2138 | lr 4.5e-04 | norm 8.1415 | dt 0.018
|
| 35 |
+
type train | step 350 | loss 0.1994 0.6405 1.4454 2.0842 6.8487 | lr 4.7e-04 | norm 6.2130 | dt 0.018
|
| 36 |
+
type train | step 360 | loss 0.1723 0.5604 1.3191 1.8661 5.9103 | lr 4.8e-04 | norm 4.8704 | dt 0.018
|
| 37 |
+
type train | step 370 | loss 0.1559 0.4981 1.2278 1.7410 5.0767 | lr 4.9e-04 | norm 3.4502 | dt 0.018
|
| 38 |
+
type train | step 380 | loss 0.1488 0.4646 1.1538 1.6798 4.8698 | lr 5.1e-04 | norm 3.8159 | dt 0.023
|
| 39 |
+
type train | step 390 | loss 0.1277 0.4035 1.0497 1.4549 3.6398 | lr 5.2e-04 | norm 1.4677 | dt 0.019
|
| 40 |
+
type train | step 400 | loss 0.1177 0.3710 0.9695 1.3665 3.3093 | lr 5.3e-04 | norm 1.0679 | dt 0.018
|
| 41 |
+
type train | step 410 | loss 0.1104 0.3417 0.9191 1.2880 2.9529 | lr 5.5e-04 | norm 0.7334 | dt 0.018
|
| 42 |
+
type train | step 420 | loss 0.1025 0.3170 0.8595 1.2216 2.8003 | lr 5.6e-04 | norm 0.8519 | dt 0.018
|
| 43 |
+
type train | step 430 | loss 0.0953 0.2916 0.8185 1.1490 2.5262 | lr 5.7e-04 | norm 0.5294 | dt 0.018
|
| 44 |
+
type train | step 440 | loss 0.0904 0.2748 0.7711 1.0751 2.3714 | lr 5.9e-04 | norm 0.4445 | dt 0.019
|
| 45 |
+
type train | step 450 | loss 0.0859 0.2614 0.7437 1.0359 2.2252 | lr 6.0e-04 | norm 0.3785 | dt 0.018
|
| 46 |
+
type train | step 460 | loss 0.0811 0.2460 0.6982 0.9798 2.0998 | lr 6.1e-04 | norm 0.3374 | dt 0.019
|
| 47 |
+
type train | step 470 | loss 0.0790 0.2378 0.6846 0.9601 1.9977 | lr 6.3e-04 | norm 0.3118 | dt 0.018
|
| 48 |
+
type train | step 480 | loss 0.0740 0.2241 0.6524 0.9228 1.9176 | lr 6.4e-04 | norm 0.3128 | dt 0.018
|
| 49 |
+
type train | step 490 | loss 0.0701 0.2136 0.6277 0.8918 1.8196 | lr 6.5e-04 | norm 0.3199 | dt 0.018
|
| 50 |
+
type train | step 500 | loss 0.0676 0.2055 0.6086 0.8596 1.7490 | lr 6.7e-04 | norm 0.2709 | dt 0.018
|
| 51 |
+
type train | step 510 | loss 0.0640 0.1964 0.5873 0.8365 1.6768 | lr 6.8e-04 | norm 0.3042 | dt 0.018
|
| 52 |
+
type train | step 520 | loss 0.0611 0.1875 0.5639 0.8139 1.6041 | lr 6.9e-04 | norm 0.2596 | dt 0.018
|
| 53 |
+
type train | step 530 | loss 0.0606 0.1842 0.5559 0.7943 1.5613 | lr 7.1e-04 | norm 0.2156 | dt 0.018
|
| 54 |
+
type train | step 540 | loss 0.0559 0.1723 0.5272 0.7619 1.4930 | lr 7.2e-04 | norm 0.1883 | dt 0.018
|
| 55 |
+
type train | step 550 | loss 0.0538 0.1665 0.5176 0.7531 1.4499 | lr 7.3e-04 | norm 0.2875 | dt 0.018
|
| 56 |
+
type train | step 560 | loss 0.0534 0.1641 0.5058 0.7313 1.4123 | lr 7.5e-04 | norm 0.2460 | dt 0.018
|
| 57 |
+
type train | step 570 | loss 0.0505 0.1567 0.4917 0.7192 1.3694 | lr 7.6e-04 | norm 0.2039 | dt 0.018
|
| 58 |
+
type train | step 580 | loss 0.0482 0.1516 0.4714 0.6964 1.3353 | lr 7.7e-04 | norm 0.1957 | dt 0.018
|
| 59 |
+
type train | step 590 | loss 0.0476 0.1479 0.4661 0.6865 1.2945 | lr 7.9e-04 | norm 0.1735 | dt 0.018
|
| 60 |
+
type train | step 600 | loss 0.0448 0.1401 0.4472 0.6638 1.2605 | lr 8.0e-04 | norm 0.1817 | dt 0.018
|
| 61 |
+
type train | step 610 | loss 0.0438 0.1376 0.4396 0.6592 1.2431 | lr 8.1e-04 | norm 0.2034 | dt 0.019
|
| 62 |
+
type train | step 620 | loss 0.0427 0.1333 0.4312 0.6423 1.2101 | lr 8.3e-04 | norm 0.2978 | dt 0.018
|
| 63 |
+
type train | step 630 | loss 0.0414 0.1295 0.4235 0.6374 1.1932 | lr 8.4e-04 | norm 0.2064 | dt 0.018
|
| 64 |
+
type train | step 640 | loss 0.0403 0.1263 0.4073 0.6205 1.1666 | lr 8.5e-04 | norm 0.2201 | dt 0.018
|
| 65 |
+
type train | step 650 | loss 0.0410 0.1273 0.4121 0.6271 1.1645 | lr 8.7e-04 | norm 0.1735 | dt 0.018
|
| 66 |
+
type train | step 660 | loss 0.0385 0.1200 0.3961 0.5986 1.1275 | lr 8.8e-04 | norm 0.1498 | dt 0.018
|
| 67 |
+
type train | step 670 | loss 0.0371 0.1164 0.3851 0.5909 1.1092 | lr 8.9e-04 | norm 0.1515 | dt 0.018
|
| 68 |
+
type train | step 680 | loss 0.0367 0.1138 0.3803 0.5826 1.0908 | lr 9.1e-04 | norm 0.2868 | dt 0.018
|
| 69 |
+
type train | step 690 | loss 0.0369 0.1140 0.3835 0.5924 1.0937 | lr 9.2e-04 | norm 0.1878 | dt 0.018
|
| 70 |
+
type train | step 700 | loss 0.0357 0.1106 0.3694 0.5723 1.0684 | lr 9.3e-04 | norm 0.3153 | dt 0.018
|
| 71 |
+
type train | step 710 | loss 0.0350 0.1072 0.3616 0.5592 1.0431 | lr 9.5e-04 | norm 0.1431 | dt 0.018
|
| 72 |
+
type train | step 720 | loss 0.0347 0.1068 0.3607 0.5571 1.0391 | lr 9.6e-04 | norm 0.1545 | dt 0.018
|
| 73 |
+
type train | step 730 | loss 0.0346 0.1057 0.3598 0.5542 1.0308 | lr 9.7e-04 | norm 0.1393 | dt 0.018
|
| 74 |
+
type train | step 740 | loss 0.0338 0.1026 0.3517 0.5476 1.0176 | lr 9.9e-04 | norm 0.2018 | dt 0.018
|
| 75 |
+
type train | step 750 | loss 0.0346 0.1048 0.3608 0.5531 1.0136 | lr 1.0e-03 | norm 0.3024 | dt 0.018
|
| 76 |
+
type train | step 760 | loss 0.0332 0.1007 0.3462 0.5398 1.0070 | lr 1.0e-03 | norm 0.1674 | dt 0.018
|
| 77 |
+
type train | step 770 | loss 0.0327 0.0982 0.3425 0.5328 0.9883 | lr 1.0e-03 | norm 0.1503 | dt 0.018
|
| 78 |
+
type train | step 780 | loss 0.0320 0.0964 0.3376 0.5223 0.9753 | lr 1.0e-03 | norm 0.2171 | dt 0.018
|
| 79 |
+
type train | step 790 | loss 0.0323 0.0964 0.3391 0.5261 0.9738 | lr 1.0e-03 | norm 0.1257 | dt 0.018
|
| 80 |
+
type train | step 800 | loss 0.0317 0.0949 0.3327 0.5201 0.9673 | lr 1.0e-03 | norm 0.1716 | dt 0.018
|
| 81 |
+
type train | step 810 | loss 0.0321 0.0955 0.3367 0.5272 0.9673 | lr 1.0e-03 | norm 0.1156 | dt 0.018
|
| 82 |
+
type train | step 820 | loss 0.0317 0.0939 0.3289 0.5142 0.9521 | lr 1.0e-03 | norm 0.1073 | dt 0.018
|
| 83 |
+
type train | step 830 | loss 0.0311 0.0916 0.3256 0.5094 0.9415 | lr 1.0e-03 | norm 0.1762 | dt 0.018
|
| 84 |
+
type train | step 840 | loss 0.0306 0.0900 0.3225 0.5011 0.9338 | lr 1.0e-03 | norm 0.2765 | dt 0.018
|
| 85 |
+
type train | step 850 | loss 0.0309 0.0911 0.3284 0.5082 0.9395 | lr 1.0e-03 | norm 0.1067 | dt 0.018
|
| 86 |
+
type train | step 860 | loss 0.0306 0.0897 0.3183 0.4968 0.9312 | lr 1.0e-03 | norm 0.2106 | dt 0.018
|
| 87 |
+
type train | step 870 | loss 0.0306 0.0886 0.3186 0.4964 0.9252 | lr 1.0e-03 | norm 0.1065 | dt 0.018
|
| 88 |
+
type train | step 880 | loss 0.0305 0.0892 0.3164 0.4939 0.9167 | lr 1.0e-03 | norm 0.1697 | dt 0.018
|
| 89 |
+
type train | step 890 | loss 0.0306 0.0890 0.3174 0.4981 0.9259 | lr 1.0e-03 | norm 0.1404 | dt 0.018
|
| 90 |
+
type train | step 900 | loss 0.0299 0.0872 0.3114 0.4870 0.9140 | lr 1.0e-03 | norm 0.2454 | dt 0.018
|
| 91 |
+
type train | step 910 | loss 0.0300 0.0868 0.3174 0.4915 0.9141 | lr 1.0e-03 | norm 0.1200 | dt 0.018
|
| 92 |
+
type train | step 920 | loss 0.0302 0.0871 0.3113 0.4892 0.9152 | lr 1.0e-03 | norm 0.2026 | dt 0.018
|
| 93 |
+
type train | step 930 | loss 0.0298 0.0860 0.3090 0.4855 0.9092 | lr 1.0e-03 | norm 0.1073 | dt 0.018
|
| 94 |
+
type train | step 940 | loss 0.0297 0.0854 0.3101 0.4864 0.9008 | lr 1.0e-03 | norm 0.2066 | dt 0.018
|
| 95 |
+
type train | step 950 | loss 0.0294 0.0844 0.3066 0.4812 0.8966 | lr 1.0e-03 | norm 0.1644 | dt 0.018
|
| 96 |
+
type train | step 960 | loss 0.0293 0.0846 0.3070 0.4804 0.9002 | lr 1.0e-03 | norm 0.1300 | dt 0.018
|
| 97 |
+
type train | step 970 | loss 0.0293 0.0838 0.3024 0.4727 0.8890 | lr 1.0e-03 | norm 0.0967 | dt 0.018
|
| 98 |
+
type train | step 980 | loss 0.0292 0.0836 0.3055 0.4788 0.8960 | lr 1.0e-03 | norm 0.1529 | dt 0.018
|
| 99 |
+
type train | step 990 | loss 0.0296 0.0843 0.3067 0.4863 0.9027 | lr 1.0e-03 | norm 0.1585 | dt 0.018
|
| 100 |
+
type train | step 1000 | loss 0.0290 0.0824 0.3028 0.4741 0.8827 | lr 1.0e-03 | norm 0.1293 | dt 0.018
|
| 101 |
+
type train | step 1010 | loss 0.0289 0.0831 0.2987 0.4699 0.8815 | lr 1.0e-03 | norm 0.1887 | dt 0.018
|
| 102 |
+
type train | step 1020 | loss 0.0290 0.0826 0.3015 0.4733 0.8835 | lr 1.0e-03 | norm 0.1759 | dt 0.018
|
| 103 |
+
type train | step 1030 | loss 0.0289 0.0823 0.2997 0.4718 0.8867 | lr 1.0e-03 | norm 0.1432 | dt 0.018
|
| 104 |
+
type train | step 1040 | loss 0.0287 0.0816 0.3016 0.4747 0.8849 | lr 1.0e-03 | norm 0.1084 | dt 0.018
|
| 105 |
+
type train | step 1050 | loss 0.0287 0.0812 0.2963 0.4667 0.8794 | lr 1.0e-03 | norm 0.1330 | dt 0.018
|
| 106 |
+
type train | step 1060 | loss 0.0287 0.0814 0.2989 0.4708 0.8769 | lr 1.0e-03 | norm 0.1368 | dt 0.018
|
| 107 |
+
type train | step 1070 | loss 0.0285 0.0806 0.2923 0.4602 0.8679 | lr 1.0e-03 | norm 0.0953 | dt 0.018
|
| 108 |
+
type train | step 1080 | loss 0.0288 0.0810 0.2972 0.4678 0.8692 | lr 9.9e-04 | norm 0.1065 | dt 0.018
|
| 109 |
+
type train | step 1090 | loss 0.0285 0.0805 0.2962 0.4662 0.8766 | lr 9.9e-04 | norm 0.2667 | dt 0.018
|
| 110 |
+
type train | step 1100 | loss 0.0283 0.0801 0.2956 0.4674 0.8741 | lr 9.9e-04 | norm 0.1118 | dt 0.018
|
| 111 |
+
type train | step 1110 | loss 0.0285 0.0801 0.2956 0.4663 0.8721 | lr 9.9e-04 | norm 0.1035 | dt 0.018
|
| 112 |
+
type train | step 1120 | loss 0.0283 0.0795 0.2957 0.4664 0.8680 | lr 9.9e-04 | norm 0.1478 | dt 0.018
|
| 113 |
+
type train | step 1130 | loss 0.0282 0.0794 0.2933 0.4662 0.8652 | lr 9.9e-04 | norm 0.2210 | dt 0.018
|
| 114 |
+
type train | step 1140 | loss 0.0287 0.0800 0.2951 0.4666 0.8696 | lr 9.9e-04 | norm 0.1183 | dt 0.018
|
| 115 |
+
type train | step 1150 | loss 0.0280 0.0782 0.2904 0.4572 0.8621 | lr 9.9e-04 | norm 0.0950 | dt 0.018
|
| 116 |
+
type train | step 1160 | loss 0.0280 0.0786 0.2945 0.4642 0.8652 | lr 9.9e-04 | norm 0.1461 | dt 0.018
|
| 117 |
+
type train | step 1170 | loss 0.0284 0.0793 0.2930 0.4634 0.8696 | lr 9.9e-04 | norm 0.1252 | dt 0.018
|
| 118 |
+
type train | step 1180 | loss 0.0281 0.0783 0.2930 0.4662 0.8656 | lr 9.9e-04 | norm 0.1411 | dt 0.018
|
| 119 |
+
type train | step 1190 | loss 0.0278 0.0786 0.2888 0.4564 0.8612 | lr 9.9e-04 | norm 0.1097 | dt 0.018
|
| 120 |
+
type train | step 1200 | loss 0.0281 0.0782 0.2909 0.4605 0.8579 | lr 9.9e-04 | norm 0.1376 | dt 0.018
|
| 121 |
+
type train | step 1210 | loss 0.0277 0.0769 0.2881 0.4524 0.8536 | lr 9.9e-04 | norm 0.1107 | dt 0.018
|
| 122 |
+
type train | step 1220 | loss 0.0277 0.0774 0.2889 0.4593 0.8585 | lr 9.9e-04 | norm 0.1439 | dt 0.022
|
| 123 |
+
type train | step 1230 | loss 0.0277 0.0770 0.2891 0.4545 0.8555 | lr 9.9e-04 | norm 0.1200 | dt 0.018
|
| 124 |
+
type train | step 1240 | loss 0.0277 0.0770 0.2901 0.4583 0.8586 | lr 9.9e-04 | norm 0.1661 | dt 0.018
|
| 125 |
+
type train | step 1250 | loss 0.0276 0.0769 0.2845 0.4516 0.8516 | lr 9.9e-04 | norm 0.2225 | dt 0.018
|
| 126 |
+
type train | step 1260 | loss 0.0281 0.0783 0.2905 0.4659 0.8656 | lr 9.9e-04 | norm 0.1148 | dt 0.018
|
| 127 |
+
type train | step 1270 | loss 0.0275 0.0765 0.2867 0.4472 0.8503 | lr 9.9e-04 | norm 0.2198 | dt 0.018
|
| 128 |
+
type train | step 1280 | loss 0.0273 0.0762 0.2843 0.4490 0.8484 | lr 9.9e-04 | norm 0.1443 | dt 0.018
|
| 129 |
+
type train | step 1290 | loss 0.0274 0.0760 0.2847 0.4482 0.8476 | lr 9.9e-04 | norm 0.1802 | dt 0.018
|
| 130 |
+
type train | step 1300 | loss 0.0277 0.0770 0.2894 0.4633 0.8625 | lr 9.9e-04 | norm 0.0969 | dt 0.018
|
| 131 |
+
type train | step 1310 | loss 0.0274 0.0763 0.2837 0.4505 0.8515 | lr 9.8e-04 | norm 0.2014 | dt 0.018
|
| 132 |
+
type train | step 1320 | loss 0.0273 0.0755 0.2812 0.4442 0.8398 | lr 9.8e-04 | norm 0.1201 | dt 0.018
|
| 133 |
+
type train | step 1330 | loss 0.0274 0.0762 0.2842 0.4474 0.8472 | lr 9.8e-04 | norm 0.0912 | dt 0.018
|
| 134 |
+
type train | step 1340 | loss 0.0276 0.0768 0.2860 0.4492 0.8521 | lr 9.8e-04 | norm 0.0993 | dt 0.018
|
| 135 |
+
type train | step 1350 | loss 0.0274 0.0757 0.2829 0.4484 0.8482 | lr 9.8e-04 | norm 0.0936 | dt 0.018
|
| 136 |
+
type train | step 1360 | loss 0.0280 0.0779 0.2909 0.4599 0.8562 | lr 9.8e-04 | norm 0.2049 | dt 0.018
|
| 137 |
+
type train | step 1370 | loss 0.0273 0.0761 0.2839 0.4508 0.8537 | lr 9.8e-04 | norm 0.1813 | dt 0.018
|
| 138 |
+
type train | step 1380 | loss 0.0273 0.0754 0.2834 0.4482 0.8449 | lr 9.8e-04 | norm 0.0986 | dt 0.018
|
| 139 |
+
type train | step 1390 | loss 0.0270 0.0750 0.2828 0.4424 0.8405 | lr 9.8e-04 | norm 0.2460 | dt 0.018
|
| 140 |
+
type train | step 1400 | loss 0.0274 0.0756 0.2848 0.4494 0.8477 | lr 9.8e-04 | norm 0.2080 | dt 0.018
|
| 141 |
+
type train | step 1410 | loss 0.0271 0.0753 0.2822 0.4464 0.8457 | lr 9.8e-04 | norm 0.1807 | dt 0.019
|
| 142 |
+
type train | step 1420 | loss 0.0274 0.0763 0.2863 0.4574 0.8541 | lr 9.8e-04 | norm 0.1084 | dt 0.018
|
| 143 |
+
type train | step 1430 | loss 0.0273 0.0756 0.2814 0.4481 0.8454 | lr 9.8e-04 | norm 0.1572 | dt 0.018
|
| 144 |
+
type train | step 1440 | loss 0.0271 0.0745 0.2814 0.4460 0.8382 | lr 9.8e-04 | norm 0.0923 | dt 0.018
|
| 145 |
+
type train | step 1450 | loss 0.0268 0.0739 0.2809 0.4416 0.8363 | lr 9.8e-04 | norm 0.2386 | dt 0.018
|
| 146 |
+
type train | step 1460 | loss 0.0271 0.0752 0.2864 0.4498 0.8461 | lr 9.8e-04 | norm 0.1115 | dt 0.018
|
| 147 |
+
type train | step 1470 | loss 0.0270 0.0746 0.2795 0.4415 0.8414 | lr 9.7e-04 | norm 0.1573 | dt 0.018
|
| 148 |
+
type train | step 1480 | loss 0.0271 0.0741 0.2808 0.4425 0.8395 | lr 9.7e-04 | norm 0.1882 | dt 0.018
|
| 149 |
+
type train | step 1490 | loss 0.0270 0.0748 0.2797 0.4429 0.8364 | lr 9.7e-04 | norm 0.1861 | dt 0.018
|
| 150 |
+
type train | step 1500 | loss 0.0273 0.0750 0.2818 0.4491 0.8484 | lr 9.7e-04 | norm 0.1673 | dt 0.018
|
| 151 |
+
type train | step 1510 | loss 0.0269 0.0742 0.2785 0.4394 0.8391 | lr 9.7e-04 | norm 0.1711 | dt 0.018
|
| 152 |
+
type train | step 1520 | loss 0.0269 0.0742 0.2846 0.4453 0.8425 | lr 9.7e-04 | norm 0.1103 | dt 0.018
|
| 153 |
+
type train | step 1530 | loss 0.0272 0.0749 0.2797 0.4447 0.8459 | lr 9.7e-04 | norm 0.2115 | dt 0.018
|
| 154 |
+
type train | step 1540 | loss 0.0269 0.0742 0.2784 0.4426 0.8421 | lr 9.7e-04 | norm 0.1256 | dt 0.018
|
| 155 |
+
type train | step 1550 | loss 0.0269 0.0741 0.2805 0.4459 0.8381 | lr 9.7e-04 | norm 0.0932 | dt 0.018
|
| 156 |
+
type train | step 1560 | loss 0.0267 0.0734 0.2784 0.4426 0.8355 | lr 9.7e-04 | norm 0.1850 | dt 0.018
|
| 157 |
+
type train | step 1570 | loss 0.0268 0.0739 0.2795 0.4420 0.8405 | lr 9.7e-04 | norm 0.1793 | dt 0.018
|
| 158 |
+
type train | step 1580 | loss 0.0268 0.0736 0.2761 0.4365 0.8324 | lr 9.7e-04 | norm 0.1044 | dt 0.018
|
| 159 |
+
type train | step 1590 | loss 0.0267 0.0735 0.2798 0.4417 0.8396 | lr 9.7e-04 | norm 0.1356 | dt 0.018
|
| 160 |
+
type train | step 1600 | loss 0.0271 0.0744 0.2807 0.4513 0.8488 | lr 9.7e-04 | norm 0.0916 | dt 0.018
|
| 161 |
+
type train | step 1610 | loss 0.0266 0.0730 0.2786 0.4407 0.8323 | lr 9.6e-04 | norm 0.1403 | dt 0.018
|
| 162 |
+
type train | step 1620 | loss 0.0266 0.0738 0.2757 0.4385 0.8317 | lr 9.6e-04 | norm 0.1411 | dt 0.018
|
| 163 |
+
type train | step 1630 | loss 0.0267 0.0734 0.2787 0.4419 0.8352 | lr 9.6e-04 | norm 0.1060 | dt 0.018
|
| 164 |
+
type train | step 1640 | loss 0.0267 0.0737 0.2776 0.4412 0.8401 | lr 9.6e-04 | norm 0.1146 | dt 0.018
|
| 165 |
+
type train | step 1650 | loss 0.0266 0.0732 0.2803 0.4445 0.8395 | lr 9.6e-04 | norm 0.1714 | dt 0.018
|
| 166 |
+
type train | step 1660 | loss 0.0266 0.0729 0.2754 0.4379 0.8346 | lr 9.6e-04 | norm 0.1015 | dt 0.018
|
| 167 |
+
type train | step 1670 | loss 0.0266 0.0734 0.2784 0.4424 0.8337 | lr 9.6e-04 | norm 0.1191 | dt 0.018
|
| 168 |
+
type train | step 1680 | loss 0.0265 0.0729 0.2728 0.4335 0.8265 | lr 9.6e-04 | norm 0.1011 | dt 0.018
|
| 169 |
+
type train | step 1690 | loss 0.0268 0.0733 0.2775 0.4415 0.8298 | lr 9.6e-04 | norm 0.0986 | dt 0.018
|
| 170 |
+
type train | step 1700 | loss 0.0266 0.0731 0.2775 0.4400 0.8374 | lr 9.6e-04 | norm 0.2354 | dt 0.018
|
| 171 |
+
type train | step 1710 | loss 0.0264 0.0729 0.2776 0.4423 0.8359 | lr 9.6e-04 | norm 0.1153 | dt 0.018
|
| 172 |
+
type train | step 1720 | loss 0.0266 0.0730 0.2772 0.4418 0.8345 | lr 9.5e-04 | norm 0.1104 | dt 0.018
|
| 173 |
+
type train | step 1730 | loss 0.0265 0.0727 0.2781 0.4430 0.8325 | lr 9.5e-04 | norm 0.1494 | dt 0.018
|
| 174 |
+
type train | step 1740 | loss 0.0264 0.0726 0.2766 0.4437 0.8312 | lr 9.5e-04 | norm 0.1823 | dt 0.018
|
| 175 |
+
type train | step 1750 | loss 0.0268 0.0732 0.2781 0.4445 0.8358 | lr 9.5e-04 | norm 0.1073 | dt 0.018
|
| 176 |
+
type train | step 1760 | loss 0.0263 0.0720 0.2748 0.4353 0.8287 | lr 9.5e-04 | norm 0.1822 | dt 0.018
|
| 177 |
+
type train | step 1770 | loss 0.0263 0.0724 0.2786 0.4425 0.8330 | lr 9.5e-04 | norm 0.1370 | dt 0.018
|
| 178 |
+
type train | step 1780 | loss 0.0267 0.0731 0.2770 0.4424 0.8361 | lr 9.5e-04 | norm 0.1296 | dt 0.018
|
| 179 |
+
type train | step 1790 | loss 0.0264 0.0724 0.2779 0.4458 0.8355 | lr 9.5e-04 | norm 0.1097 | dt 0.018
|
| 180 |
+
type train | step 1800 | loss 0.0262 0.0729 0.2744 0.4368 0.8323 | lr 9.5e-04 | norm 0.2461 | dt 0.018
|
| 181 |
+
type train | step 1810 | loss 0.0265 0.0725 0.2764 0.4412 0.8287 | lr 9.5e-04 | norm 0.1623 | dt 0.018
|
| 182 |
+
type train | step 1820 | loss 0.0261 0.0714 0.2742 0.4332 0.8248 | lr 9.5e-04 | norm 0.1356 | dt 0.018
|
| 183 |
+
type train | step 1830 | loss 0.0261 0.0721 0.2755 0.4411 0.8306 | lr 9.4e-04 | norm 0.1211 | dt 0.020
|
| 184 |
+
type train | step 1840 | loss 0.0262 0.0718 0.2759 0.4364 0.8279 | lr 9.4e-04 | norm 0.1491 | dt 0.019
|
| 185 |
+
type train | step 1850 | loss 0.0261 0.0718 0.2772 0.4409 0.8324 | lr 9.4e-04 | norm 0.1092 | dt 0.018
|
| 186 |
+
type train | step 1860 | loss 0.0261 0.0718 0.2718 0.4344 0.8253 | lr 9.4e-04 | norm 0.1200 | dt 0.018
|
| 187 |
+
type train | step 1870 | loss 0.0266 0.0732 0.2775 0.4485 0.8397 | lr 9.4e-04 | norm 0.0993 | dt 0.018
|
| 188 |
+
type train | step 1880 | loss 0.0261 0.0717 0.2745 0.4309 0.8246 | lr 9.4e-04 | norm 0.1920 | dt 0.018
|
| 189 |
+
type train | step 1890 | loss 0.0259 0.0715 0.2730 0.4332 0.8239 | lr 9.4e-04 | norm 0.1072 | dt 0.019
|
| 190 |
+
type train | step 1900 | loss 0.0260 0.0714 0.2732 0.4326 0.8240 | lr 9.4e-04 | norm 0.1930 | dt 0.018
|
| 191 |
+
type train | step 1910 | loss 0.0263 0.0724 0.2780 0.4484 0.8387 | lr 9.4e-04 | norm 0.1038 | dt 0.018
|
| 192 |
+
type train | step 1920 | loss 0.0260 0.0718 0.2725 0.4359 0.8284 | lr 9.3e-04 | norm 0.1042 | dt 0.019
|
| 193 |
+
type train | step 1930 | loss 0.0260 0.0711 0.2704 0.4296 0.8177 | lr 9.3e-04 | norm 0.0915 | dt 0.019
|
| 194 |
+
type train | step 1940 | loss 0.0261 0.0719 0.2738 0.4332 0.8253 | lr 9.3e-04 | norm 0.1354 | dt 0.019
|
| 195 |
+
type train | step 1950 | loss 0.0263 0.0725 0.2752 0.4348 0.8297 | lr 9.3e-04 | norm 0.1444 | dt 0.018
|
| 196 |
+
type train | step 1960 | loss 0.0261 0.0716 0.2730 0.4345 0.8267 | lr 9.3e-04 | norm 0.0957 | dt 0.019
|
| 197 |
+
type train | step 1970 | loss 0.0266 0.0736 0.2797 0.4462 0.8348 | lr 9.3e-04 | norm 0.1127 | dt 0.018
|
| 198 |
+
type train | step 1980 | loss 0.0261 0.0721 0.2739 0.4379 0.8328 | lr 9.3e-04 | norm 0.1137 | dt 0.019
|
| 199 |
+
type train | step 1990 | loss 0.0261 0.0714 0.2735 0.4353 0.8253 | lr 9.3e-04 | norm 0.1640 | dt 0.018
|
| 200 |
+
type train | step 2000 | loss 0.0257 0.0711 0.2736 0.4300 0.8209 | lr 9.3e-04 | norm 0.1911 | dt 0.021
|
| 201 |
+
type train | step 2010 | loss 0.0261 0.0719 0.2752 0.4366 0.8279 | lr 9.2e-04 | norm 0.1168 | dt 0.018
|
| 202 |
+
type train | step 2020 | loss 0.0259 0.0717 0.2731 0.4337 0.8267 | lr 9.2e-04 | norm 0.1561 | dt 0.018
|
| 203 |
+
type train | step 2030 | loss 0.0262 0.0725 0.2769 0.4453 0.8354 | lr 9.2e-04 | norm 0.1745 | dt 0.018
|
| 204 |
+
type train | step 2040 | loss 0.0261 0.0719 0.2722 0.4365 0.8273 | lr 9.2e-04 | norm 0.2309 | dt 0.018
|
| 205 |
+
type train | step 2050 | loss 0.0259 0.0710 0.2727 0.4348 0.8202 | lr 9.2e-04 | norm 0.0933 | dt 0.018
|
| 206 |
+
type train | step 2060 | loss 0.0256 0.0704 0.2724 0.4305 0.8182 | lr 9.2e-04 | norm 0.1374 | dt 0.018
|
| 207 |
+
type train | step 2070 | loss 0.0260 0.0717 0.2776 0.4388 0.8282 | lr 9.2e-04 | norm 0.0903 | dt 0.018
|
| 208 |
+
type train | step 2080 | loss 0.0259 0.0712 0.2713 0.4308 0.8239 | lr 9.2e-04 | norm 0.0952 | dt 0.018
|
| 209 |
+
type train | step 2090 | loss 0.0259 0.0708 0.2725 0.4316 0.8219 | lr 9.2e-04 | norm 0.1069 | dt 0.018
|
| 210 |
+
type train | step 2100 | loss 0.0259 0.0715 0.2716 0.4326 0.8196 | lr 9.1e-04 | norm 0.1125 | dt 0.018
|
| 211 |
+
type train | step 2110 | loss 0.0261 0.0717 0.2733 0.4389 0.8321 | lr 9.1e-04 | norm 0.1456 | dt 0.018
|
| 212 |
+
type train | step 2120 | loss 0.0257 0.0710 0.2710 0.4294 0.8225 | lr 9.1e-04 | norm 0.1261 | dt 0.018
|
| 213 |
+
type train | step 2130 | loss 0.0258 0.0712 0.2768 0.4353 0.8269 | lr 9.1e-04 | norm 0.1233 | dt 0.018
|
| 214 |
+
type train | step 2140 | loss 0.0261 0.0717 0.2722 0.4352 0.8298 | lr 9.1e-04 | norm 0.2011 | dt 0.018
|
| 215 |
+
type train | step 2150 | loss 0.0259 0.0712 0.2709 0.4328 0.8267 | lr 9.1e-04 | norm 0.1329 | dt 0.018
|
| 216 |
+
type train | step 2160 | loss 0.0258 0.0710 0.2733 0.4365 0.8233 | lr 9.1e-04 | norm 0.1517 | dt 0.019
|
| 217 |
+
type train | step 2170 | loss 0.0256 0.0704 0.2711 0.4333 0.8209 | lr 9.1e-04 | norm 0.0977 | dt 0.018
|
| 218 |
+
type train | step 2180 | loss 0.0258 0.0709 0.2725 0.4325 0.8250 | lr 9.0e-04 | norm 0.1066 | dt 0.018
|
| 219 |
+
type train | step 2190 | loss 0.0258 0.0708 0.2692 0.4273 0.8182 | lr 9.0e-04 | norm 0.1150 | dt 0.018
|
| 220 |
+
type train | step 2200 | loss 0.0257 0.0707 0.2728 0.4333 0.8250 | lr 9.0e-04 | norm 0.0986 | dt 0.018
|
| 221 |
+
type train | step 2210 | loss 0.0261 0.0715 0.2735 0.4421 0.8348 | lr 9.0e-04 | norm 0.1336 | dt 0.018
|
| 222 |
+
type train | step 2220 | loss 0.0257 0.0702 0.2721 0.4322 0.8190 | lr 9.0e-04 | norm 0.1991 | dt 0.018
|
| 223 |
+
type train | step 2230 | loss 0.0256 0.0711 0.2694 0.4299 0.8182 | lr 9.0e-04 | norm 0.1596 | dt 0.018
|
| 224 |
+
type train | step 2240 | loss 0.0258 0.0706 0.2722 0.4333 0.8215 | lr 9.0e-04 | norm 0.1374 | dt 0.018
|
| 225 |
+
type train | step 2250 | loss 0.0257 0.0710 0.2714 0.4325 0.8265 | lr 8.9e-04 | norm 0.1457 | dt 0.018
|
| 226 |
+
type train | step 2260 | loss 0.0257 0.0706 0.2740 0.4365 0.8260 | lr 8.9e-04 | norm 0.1279 | dt 0.018
|
| 227 |
+
type train | step 2270 | loss 0.0256 0.0704 0.2693 0.4296 0.8209 | lr 8.9e-04 | norm 0.1127 | dt 0.018
|
| 228 |
+
type train | step 2280 | loss 0.0257 0.0708 0.2722 0.4344 0.8207 | lr 8.9e-04 | norm 0.0964 | dt 0.018
|
| 229 |
+
type train | step 2290 | loss 0.0256 0.0704 0.2667 0.4255 0.8140 | lr 8.9e-04 | norm 0.1023 | dt 0.018
|
| 230 |
+
type train | step 2300 | loss 0.0258 0.0706 0.2713 0.4341 0.8180 | lr 8.9e-04 | norm 0.0889 | dt 0.018
|
| 231 |
+
type train | step 2310 | loss 0.0257 0.0707 0.2716 0.4324 0.8248 | lr 8.9e-04 | norm 0.1102 | dt 0.018
|
| 232 |
+
type train | step 2320 | loss 0.0256 0.0705 0.2718 0.4347 0.8239 | lr 8.9e-04 | norm 0.1677 | dt 0.018
|
| 233 |
+
type train | step 2330 | loss 0.0257 0.0706 0.2713 0.4341 0.8219 | lr 8.8e-04 | norm 0.1642 | dt 0.018
|
| 234 |
+
type train | step 2340 | loss 0.0256 0.0702 0.2724 0.4355 0.8213 | lr 8.8e-04 | norm 0.1954 | dt 0.019
|
| 235 |
+
type train | step 2350 | loss 0.0255 0.0702 0.2710 0.4366 0.8197 | lr 8.8e-04 | norm 0.1230 | dt 0.018
|
| 236 |
+
type train | step 2360 | loss 0.0260 0.0707 0.2723 0.4376 0.8244 | lr 8.8e-04 | norm 0.0928 | dt 0.018
|
| 237 |
+
type train | step 2370 | loss 0.0254 0.0697 0.2695 0.4282 0.8173 | lr 8.8e-04 | norm 0.1077 | dt 0.018
|
| 238 |
+
type train | step 2380 | loss 0.0254 0.0700 0.2731 0.4355 0.8220 | lr 8.8e-04 | norm 0.1314 | dt 0.018
|
| 239 |
+
type train | step 2390 | loss 0.0258 0.0708 0.2713 0.4353 0.8243 | lr 8.8e-04 | norm 0.1653 | dt 0.018
|
| 240 |
+
type train | step 2400 | loss 0.0256 0.0701 0.2725 0.4392 0.8249 | lr 8.7e-04 | norm 0.0943 | dt 0.018
|
| 241 |
+
type train | step 2410 | loss 0.0254 0.0706 0.2693 0.4301 0.8216 | lr 8.7e-04 | norm 0.2082 | dt 0.019
|
| 242 |
+
type train | step 2420 | loss 0.0257 0.0702 0.2712 0.4346 0.8185 | lr 8.7e-04 | norm 0.1128 | dt 0.018
|
| 243 |
+
type train | step 2430 | loss 0.0253 0.0693 0.2694 0.4266 0.8144 | lr 8.7e-04 | norm 0.1954 | dt 0.018
|
| 244 |
+
type train | step 2440 | loss 0.0254 0.0700 0.2708 0.4343 0.8206 | lr 8.7e-04 | norm 0.1513 | dt 0.021
|
| 245 |
+
type train | step 2450 | loss 0.0254 0.0697 0.2708 0.4302 0.8177 | lr 8.7e-04 | norm 0.1328 | dt 0.018
|
| 246 |
+
type train | step 2460 | loss 0.0254 0.0697 0.2723 0.4347 0.8225 | lr 8.6e-04 | norm 0.1179 | dt 0.018
|
| 247 |
+
type train | step 2470 | loss 0.0253 0.0697 0.2669 0.4281 0.8157 | lr 8.6e-04 | norm 0.1201 | dt 0.020
|
| 248 |
+
type train | step 2480 | loss 0.0259 0.0711 0.2724 0.4425 0.8299 | lr 8.6e-04 | norm 0.1326 | dt 0.020
|
| 249 |
+
type train | step 2490 | loss 0.0253 0.0697 0.2699 0.4250 0.8149 | lr 8.6e-04 | norm 0.1458 | dt 0.018
|
| 250 |
+
type train | step 2500 | loss 0.0251 0.0694 0.2685 0.4272 0.8147 | lr 8.6e-04 | norm 0.0996 | dt 0.018
|
| 251 |
+
type train | step 2510 | loss 0.0253 0.0694 0.2688 0.4271 0.8143 | lr 8.6e-04 | norm 0.1289 | dt 0.018
|
| 252 |
+
type train | step 2520 | loss 0.0255 0.0704 0.2734 0.4424 0.8297 | lr 8.6e-04 | norm 0.1029 | dt 0.018
|
| 253 |
+
type train | step 2530 | loss 0.0253 0.0698 0.2680 0.4299 0.8194 | lr 8.5e-04 | norm 0.2135 | dt 0.018
|
| 254 |
+
type train | step 2540 | loss 0.0253 0.0691 0.2661 0.4239 0.8094 | lr 8.5e-04 | norm 0.2041 | dt 0.018
|
| 255 |
+
type train | step 2550 | loss 0.0254 0.0699 0.2695 0.4276 0.8163 | lr 8.5e-04 | norm 0.1220 | dt 0.018
|
| 256 |
+
type train | step 2560 | loss 0.0256 0.0705 0.2708 0.4291 0.8204 | lr 8.5e-04 | norm 0.0921 | dt 0.018
|
| 257 |
+
type train | step 2570 | loss 0.0254 0.0697 0.2687 0.4292 0.8179 | lr 8.5e-04 | norm 0.0840 | dt 0.018
|
| 258 |
+
type train | step 2580 | loss 0.0259 0.0715 0.2748 0.4401 0.8264 | lr 8.5e-04 | norm 0.1731 | dt 0.018
|
| 259 |
+
type train | step 2590 | loss 0.0254 0.0702 0.2698 0.4324 0.8243 | lr 8.4e-04 | norm 0.1832 | dt 0.018
|
| 260 |
+
type train | step 2600 | loss 0.0254 0.0696 0.2693 0.4300 0.8172 | lr 8.4e-04 | norm 0.0977 | dt 0.018
|
| 261 |
+
type train | step 2610 | loss 0.0251 0.0694 0.2696 0.4247 0.8125 | lr 8.4e-04 | norm 0.1714 | dt 0.025
|
| 262 |
+
type train | step 2620 | loss 0.0255 0.0700 0.2712 0.4314 0.8195 | lr 8.4e-04 | norm 0.0990 | dt 0.025
|
| 263 |
+
type train | step 2630 | loss 0.0252 0.0697 0.2691 0.4286 0.8192 | lr 8.4e-04 | norm 0.1553 | dt 0.019
|
| 264 |
+
type train | step 2640 | loss 0.0256 0.0705 0.2728 0.4396 0.8272 | lr 8.4e-04 | norm 0.0845 | dt 0.018
|
| 265 |
+
type train | step 2650 | loss 0.0255 0.0701 0.2681 0.4313 0.8192 | lr 8.4e-04 | norm 0.1207 | dt 0.018
|
| 266 |
+
type train | step 2660 | loss 0.0253 0.0692 0.2687 0.4297 0.8127 | lr 8.3e-04 | norm 0.1622 | dt 0.018
|
| 267 |
+
type train | step 2670 | loss 0.0250 0.0687 0.2685 0.4257 0.8105 | lr 8.3e-04 | norm 0.1102 | dt 0.018
|
| 268 |
+
type train | step 2680 | loss 0.0253 0.0700 0.2736 0.4340 0.8206 | lr 8.3e-04 | norm 0.0987 | dt 0.018
|
| 269 |
+
type train | step 2690 | loss 0.0253 0.0695 0.2677 0.4261 0.8162 | lr 8.3e-04 | norm 0.0765 | dt 0.018
|
| 270 |
+
type train | step 2700 | loss 0.0253 0.0691 0.2687 0.4267 0.8141 | lr 8.3e-04 | norm 0.1273 | dt 0.018
|
| 271 |
+
type train | step 2710 | loss 0.0254 0.0698 0.2678 0.4277 0.8127 | lr 8.3e-04 | norm 0.2020 | dt 0.018
|
| 272 |
+
type train | step 2720 | loss 0.0255 0.0700 0.2694 0.4336 0.8249 | lr 8.2e-04 | norm 0.1021 | dt 0.018
|
| 273 |
+
type train | step 2730 | loss 0.0252 0.0693 0.2674 0.4244 0.8149 | lr 8.2e-04 | norm 0.1465 | dt 0.018
|
| 274 |
+
type train | step 2740 | loss 0.0252 0.0695 0.2732 0.4307 0.8195 | lr 8.2e-04 | norm 0.1198 | dt 0.018
|
| 275 |
+
type train | step 2750 | loss 0.0256 0.0700 0.2687 0.4306 0.8225 | lr 8.2e-04 | norm 0.1885 | dt 0.018
|
| 276 |
+
type train | step 2760 | loss 0.0253 0.0695 0.2675 0.4282 0.8197 | lr 8.2e-04 | norm 0.1270 | dt 0.018
|
| 277 |
+
type train | step 2770 | loss 0.0253 0.0694 0.2699 0.4321 0.8163 | lr 8.2e-04 | norm 0.1093 | dt 0.018
|
| 278 |
+
type train | step 2780 | loss 0.0251 0.0689 0.2678 0.4290 0.8147 | lr 8.1e-04 | norm 0.1623 | dt 0.018
|
| 279 |
+
type train | step 2790 | loss 0.0252 0.0693 0.2690 0.4278 0.8180 | lr 8.1e-04 | norm 0.1654 | dt 0.018
|
| 280 |
+
type train | step 2800 | loss 0.0252 0.0692 0.2658 0.4229 0.8114 | lr 8.1e-04 | norm 0.0865 | dt 0.018
|
| 281 |
+
type train | step 2810 | loss 0.0252 0.0692 0.2693 0.4290 0.8180 | lr 8.1e-04 | norm 0.0871 | dt 0.018
|
| 282 |
+
type train | step 2820 | loss 0.0255 0.0700 0.2699 0.4373 0.8281 | lr 8.1e-04 | norm 0.1485 | dt 0.018
|
| 283 |
+
type train | step 2830 | loss 0.0251 0.0687 0.2688 0.4283 0.8128 | lr 8.1e-04 | norm 0.1920 | dt 0.018
|
| 284 |
+
type train | step 2840 | loss 0.0251 0.0696 0.2662 0.4256 0.8118 | lr 8.0e-04 | norm 0.0801 | dt 0.018
|
| 285 |
+
type train | step 2850 | loss 0.0253 0.0691 0.2689 0.4290 0.8150 | lr 8.0e-04 | norm 0.1063 | dt 0.019
|
| 286 |
+
type train | step 2860 | loss 0.0252 0.0695 0.2683 0.4283 0.8195 | lr 8.0e-04 | norm 0.1148 | dt 0.019
|
| 287 |
+
type train | step 2870 | loss 0.0251 0.0692 0.2709 0.4324 0.8198 | lr 8.0e-04 | norm 0.1824 | dt 0.019
|
| 288 |
+
type train | step 2880 | loss 0.0251 0.0690 0.2661 0.4251 0.8146 | lr 8.0e-04 | norm 0.1378 | dt 0.018
|
| 289 |
+
type train | step 2890 | loss 0.0252 0.0693 0.2692 0.4302 0.8149 | lr 7.9e-04 | norm 0.1870 | dt 0.018
|
| 290 |
+
type train | step 2900 | loss 0.0251 0.0690 0.2639 0.4215 0.8079 | lr 7.9e-04 | norm 0.0900 | dt 0.018
|
| 291 |
+
type train | step 2910 | loss 0.0254 0.0692 0.2682 0.4302 0.8127 | lr 7.9e-04 | norm 0.1197 | dt 0.018
|
| 292 |
+
type train | step 2920 | loss 0.0252 0.0692 0.2685 0.4285 0.8189 | lr 7.9e-04 | norm 0.1070 | dt 0.018
|
| 293 |
+
type train | step 2930 | loss 0.0251 0.0691 0.2689 0.4311 0.8179 | lr 7.9e-04 | norm 0.1005 | dt 0.018
|
| 294 |
+
type train | step 2940 | loss 0.0252 0.0692 0.2683 0.4299 0.8159 | lr 7.9e-04 | norm 0.1133 | dt 0.018
|
| 295 |
+
type train | step 2950 | loss 0.0251 0.0689 0.2695 0.4317 0.8155 | lr 7.8e-04 | norm 0.0951 | dt 0.018
|
| 296 |
+
type train | step 2960 | loss 0.0251 0.0689 0.2681 0.4329 0.8143 | lr 7.8e-04 | norm 0.1428 | dt 0.018
|
| 297 |
+
type train | step 2970 | loss 0.0255 0.0693 0.2694 0.4338 0.8191 | lr 7.8e-04 | norm 0.0981 | dt 0.018
|
| 298 |
+
type train | step 2980 | loss 0.0250 0.0684 0.2667 0.4244 0.8117 | lr 7.8e-04 | norm 0.1399 | dt 0.018
|
| 299 |
+
type train | step 2990 | loss 0.0250 0.0687 0.2702 0.4317 0.8164 | lr 7.8e-04 | norm 0.1413 | dt 0.018
|
| 300 |
+
type train | step 3000 | loss 0.0254 0.0694 0.2683 0.4312 0.8183 | lr 7.8e-04 | norm 0.1084 | dt 0.018
|
| 301 |
+
type train | step 3010 | loss 0.0251 0.0688 0.2697 0.4354 0.8198 | lr 7.7e-04 | norm 0.1497 | dt 0.018
|
| 302 |
+
type train | step 3020 | loss 0.0250 0.0694 0.2668 0.4265 0.8164 | lr 7.7e-04 | norm 0.2567 | dt 0.018
|
| 303 |
+
type train | step 3030 | loss 0.0253 0.0689 0.2683 0.4309 0.8134 | lr 7.7e-04 | norm 0.1822 | dt 0.018
|
| 304 |
+
type train | step 3040 | loss 0.0249 0.0680 0.2669 0.4230 0.8088 | lr 7.7e-04 | norm 0.0975 | dt 0.018
|
| 305 |
+
type train | step 3050 | loss 0.0250 0.0687 0.2684 0.4307 0.8152 | lr 7.7e-04 | norm 0.0959 | dt 0.020
|
| 306 |
+
type train | step 3060 | loss 0.0251 0.0685 0.2681 0.4268 0.8120 | lr 7.6e-04 | norm 0.0814 | dt 0.018
|
| 307 |
+
type train | step 3070 | loss 0.0250 0.0684 0.2696 0.4311 0.8172 | lr 7.6e-04 | norm 0.1092 | dt 0.018
|
| 308 |
+
type train | step 3080 | loss 0.0249 0.0685 0.2644 0.4247 0.8105 | lr 7.6e-04 | norm 0.1025 | dt 0.018
|
| 309 |
+
type train | step 3090 | loss 0.0255 0.0697 0.2697 0.4387 0.8247 | lr 7.6e-04 | norm 0.1004 | dt 0.019
|
| 310 |
+
type train | step 3100 | loss 0.0249 0.0685 0.2675 0.4216 0.8094 | lr 7.6e-04 | norm 0.1086 | dt 0.018
|
| 311 |
+
type train | step 3110 | loss 0.0248 0.0683 0.2662 0.4238 0.8098 | lr 7.5e-04 | norm 0.1094 | dt 0.018
|
| 312 |
+
type train | step 3120 | loss 0.0249 0.0682 0.2664 0.4241 0.8094 | lr 7.5e-04 | norm 0.1350 | dt 0.018
|
| 313 |
+
type train | step 3130 | loss 0.0252 0.0692 0.2709 0.4389 0.8249 | lr 7.5e-04 | norm 0.1336 | dt 0.018
|
| 314 |
+
type train | step 3140 | loss 0.0249 0.0687 0.2655 0.4264 0.8141 | lr 7.5e-04 | norm 0.1558 | dt 0.018
|
| 315 |
+
type train | step 3150 | loss 0.0249 0.0680 0.2638 0.4205 0.8044 | lr 7.5e-04 | norm 0.0772 | dt 0.018
|
| 316 |
+
type train | step 3160 | loss 0.0250 0.0687 0.2671 0.4243 0.8115 | lr 7.5e-04 | norm 0.1289 | dt 0.018
|
| 317 |
+
type train | step 3170 | loss 0.0252 0.0693 0.2684 0.4259 0.8152 | lr 7.4e-04 | norm 0.0840 | dt 0.018
|
| 318 |
+
type train | step 3180 | loss 0.0251 0.0685 0.2664 0.4261 0.8131 | lr 7.4e-04 | norm 0.1025 | dt 0.018
|
| 319 |
+
type train | step 3190 | loss 0.0256 0.0703 0.2722 0.4365 0.8212 | lr 7.4e-04 | norm 0.1584 | dt 0.018
|
| 320 |
+
type train | step 3200 | loss 0.0251 0.0690 0.2676 0.4292 0.8194 | lr 7.4e-04 | norm 0.1608 | dt 0.018
|
| 321 |
+
type train | step 3210 | loss 0.0251 0.0684 0.2669 0.4267 0.8126 | lr 7.4e-04 | norm 0.0853 | dt 0.018
|
| 322 |
+
type train | step 3220 | loss 0.0247 0.0681 0.2674 0.4215 0.8075 | lr 7.3e-04 | norm 0.1150 | dt 0.018
|
| 323 |
+
type train | step 3230 | loss 0.0252 0.0689 0.2688 0.4283 0.8148 | lr 7.3e-04 | norm 0.0977 | dt 0.018
|
| 324 |
+
type train | step 3240 | loss 0.0249 0.0686 0.2670 0.4258 0.8146 | lr 7.3e-04 | norm 0.1344 | dt 0.018
|
| 325 |
+
type train | step 3250 | loss 0.0253 0.0695 0.2706 0.4365 0.8225 | lr 7.3e-04 | norm 0.0960 | dt 0.018
|
| 326 |
+
type train | step 3260 | loss 0.0252 0.0690 0.2660 0.4282 0.8148 | lr 7.3e-04 | norm 0.1764 | dt 0.018
|
| 327 |
+
type train | step 3270 | loss 0.0249 0.0682 0.2666 0.4266 0.8082 | lr 7.2e-04 | norm 0.1259 | dt 0.018
|
| 328 |
+
type train | step 3280 | loss 0.0247 0.0677 0.2664 0.4227 0.8059 | lr 7.2e-04 | norm 0.1430 | dt 0.018
|
| 329 |
+
type train | step 3290 | loss 0.0250 0.0689 0.2714 0.4310 0.8160 | lr 7.2e-04 | norm 0.0881 | dt 0.018
|
| 330 |
+
type train | step 3300 | loss 0.0250 0.0684 0.2656 0.4233 0.8116 | lr 7.2e-04 | norm 0.0803 | dt 0.018
|
| 331 |
+
type train | step 3310 | loss 0.0250 0.0681 0.2666 0.4239 0.8096 | lr 7.2e-04 | norm 0.1290 | dt 0.018
|
| 332 |
+
type train | step 3320 | loss 0.0250 0.0688 0.2658 0.4249 0.8081 | lr 7.1e-04 | norm 0.1021 | dt 0.018
|
| 333 |
+
type train | step 3330 | loss 0.0252 0.0689 0.2671 0.4307 0.8203 | lr 7.1e-04 | norm 0.1142 | dt 0.018
|
| 334 |
+
type train | step 3340 | loss 0.0249 0.0684 0.2652 0.4215 0.8101 | lr 7.1e-04 | norm 0.1542 | dt 0.018
|
| 335 |
+
type train | step 3350 | loss 0.0249 0.0686 0.2710 0.4278 0.8150 | lr 7.1e-04 | norm 0.1147 | dt 0.018
|
| 336 |
+
type train | step 3360 | loss 0.0253 0.0689 0.2666 0.4278 0.8180 | lr 7.1e-04 | norm 0.1288 | dt 0.018
|
| 337 |
+
type train | step 3370 | loss 0.0250 0.0686 0.2655 0.4254 0.8155 | lr 7.0e-04 | norm 0.1106 | dt 0.018
|
| 338 |
+
type train | step 3380 | loss 0.0250 0.0684 0.2678 0.4293 0.8125 | lr 7.0e-04 | norm 0.1442 | dt 0.018
|
| 339 |
+
type train | step 3390 | loss 0.0248 0.0679 0.2659 0.4265 0.8106 | lr 7.0e-04 | norm 0.1856 | dt 0.018
|
| 340 |
+
type train | step 3400 | loss 0.0250 0.0682 0.2670 0.4249 0.8136 | lr 7.0e-04 | norm 0.1398 | dt 0.018
|
| 341 |
+
type train | step 3410 | loss 0.0249 0.0682 0.2638 0.4202 0.8072 | lr 7.0e-04 | norm 0.1031 | dt 0.018
|
| 342 |
+
type train | step 3420 | loss 0.0249 0.0682 0.2673 0.4263 0.8137 | lr 7.0e-04 | norm 0.0992 | dt 0.019
|
| 343 |
+
type train | step 3430 | loss 0.0252 0.0690 0.2678 0.4344 0.8236 | lr 6.9e-04 | norm 0.1117 | dt 0.019
|
| 344 |
+
type train | step 3440 | loss 0.0249 0.0678 0.2670 0.4259 0.8088 | lr 6.9e-04 | norm 0.1669 | dt 0.019
|
| 345 |
+
type train | step 3450 | loss 0.0248 0.0687 0.2644 0.4230 0.8080 | lr 6.9e-04 | norm 0.0955 | dt 0.020
|
| 346 |
+
type train | step 3460 | loss 0.0250 0.0683 0.2670 0.4265 0.8110 | lr 6.9e-04 | norm 0.1354 | dt 0.018
|
| 347 |
+
type train | step 3470 | loss 0.0250 0.0686 0.2664 0.4258 0.8152 | lr 6.9e-04 | norm 0.1174 | dt 0.018
|
| 348 |
+
type train | step 3480 | loss 0.0249 0.0682 0.2690 0.4299 0.8156 | lr 6.8e-04 | norm 0.1128 | dt 0.018
|
| 349 |
+
type train | step 3490 | loss 0.0249 0.0681 0.2643 0.4224 0.8107 | lr 6.8e-04 | norm 0.1033 | dt 0.018
|
| 350 |
+
type train | step 3500 | loss 0.0250 0.0684 0.2674 0.4277 0.8109 | lr 6.8e-04 | norm 0.0960 | dt 0.019
|
| 351 |
+
type train | step 3510 | loss 0.0248 0.0681 0.2621 0.4189 0.8043 | lr 6.8e-04 | norm 0.1435 | dt 0.018
|
| 352 |
+
type train | step 3520 | loss 0.0251 0.0683 0.2664 0.4279 0.8093 | lr 6.8e-04 | norm 0.1367 | dt 0.018
|
| 353 |
+
type train | step 3530 | loss 0.0250 0.0683 0.2666 0.4262 0.8149 | lr 6.7e-04 | norm 0.1138 | dt 0.018
|
| 354 |
+
type train | step 3540 | loss 0.0249 0.0682 0.2670 0.4288 0.8139 | lr 6.7e-04 | norm 0.0875 | dt 0.018
|
| 355 |
+
type train | step 3550 | loss 0.0250 0.0683 0.2666 0.4273 0.8120 | lr 6.7e-04 | norm 0.1535 | dt 0.018
|
| 356 |
+
type train | step 3560 | loss 0.0249 0.0681 0.2677 0.4294 0.8120 | lr 6.7e-04 | norm 0.1111 | dt 0.018
|
| 357 |
+
type train | step 3570 | loss 0.0248 0.0680 0.2665 0.4306 0.8107 | lr 6.6e-04 | norm 0.0973 | dt 0.018
|
| 358 |
+
type train | step 3580 | loss 0.0252 0.0684 0.2677 0.4316 0.8157 | lr 6.6e-04 | norm 0.1057 | dt 0.018
|
| 359 |
+
type train | step 3590 | loss 0.0247 0.0675 0.2650 0.4222 0.8078 | lr 6.6e-04 | norm 0.0912 | dt 0.019
|
| 360 |
+
type train | step 3600 | loss 0.0248 0.0678 0.2684 0.4293 0.8126 | lr 6.6e-04 | norm 0.1332 | dt 0.018
|
| 361 |
+
type train | step 3610 | loss 0.0251 0.0685 0.2666 0.4286 0.8143 | lr 6.6e-04 | norm 0.1239 | dt 0.018
|
| 362 |
+
type train | step 3620 | loss 0.0249 0.0680 0.2680 0.4331 0.8162 | lr 6.5e-04 | norm 0.1015 | dt 0.018
|
| 363 |
+
type train | step 3630 | loss 0.0247 0.0685 0.2652 0.4243 0.8125 | lr 6.5e-04 | norm 0.1435 | dt 0.018
|
| 364 |
+
type train | step 3640 | loss 0.0250 0.0681 0.2668 0.4288 0.8101 | lr 6.5e-04 | norm 0.1151 | dt 0.018
|
| 365 |
+
type train | step 3650 | loss 0.0246 0.0672 0.2653 0.4211 0.8053 | lr 6.5e-04 | norm 0.1028 | dt 0.018
|
| 366 |
+
type train | step 3660 | loss 0.0248 0.0678 0.2668 0.4285 0.8122 | lr 6.5e-04 | norm 0.0916 | dt 0.021
|
| 367 |
+
type train | step 3670 | loss 0.0248 0.0677 0.2665 0.4245 0.8088 | lr 6.4e-04 | norm 0.1152 | dt 0.018
|
| 368 |
+
type train | step 3680 | loss 0.0248 0.0677 0.2679 0.4290 0.8140 | lr 6.4e-04 | norm 0.1248 | dt 0.018
|
| 369 |
+
type train | step 3690 | loss 0.0247 0.0678 0.2629 0.4228 0.8072 | lr 6.4e-04 | norm 0.1256 | dt 0.018
|
| 370 |
+
type train | step 3700 | loss 0.0252 0.0689 0.2682 0.4366 0.8214 | lr 6.4e-04 | norm 0.1068 | dt 0.018
|
| 371 |
+
type train | step 3710 | loss 0.0247 0.0677 0.2659 0.4197 0.8061 | lr 6.4e-04 | norm 0.0995 | dt 0.018
|
| 372 |
+
type train | step 3720 | loss 0.0246 0.0675 0.2647 0.4218 0.8066 | lr 6.3e-04 | norm 0.1095 | dt 0.018
|
| 373 |
+
type train | step 3730 | loss 0.0247 0.0675 0.2650 0.4223 0.8065 | lr 6.3e-04 | norm 0.2110 | dt 0.018
|
| 374 |
+
type train | step 3740 | loss 0.0250 0.0684 0.2694 0.4369 0.8217 | lr 6.3e-04 | norm 0.1188 | dt 0.018
|
| 375 |
+
type train | step 3750 | loss 0.0247 0.0679 0.2640 0.4243 0.8107 | lr 6.3e-04 | norm 0.1007 | dt 0.019
|
| 376 |
+
type train | step 3760 | loss 0.0247 0.0673 0.2624 0.4186 0.8014 | lr 6.3e-04 | norm 0.0949 | dt 0.018
|
| 377 |
+
type train | step 3770 | loss 0.0248 0.0680 0.2656 0.4224 0.8082 | lr 6.2e-04 | norm 0.0986 | dt 0.018
|
| 378 |
+
type train | step 3780 | loss 0.0250 0.0686 0.2668 0.4239 0.8119 | lr 6.2e-04 | norm 0.0794 | dt 0.019
|
| 379 |
+
type train | step 3790 | loss 0.0249 0.0678 0.2650 0.4244 0.8099 | lr 6.2e-04 | norm 0.0873 | dt 0.018
|
| 380 |
+
type train | step 3800 | loss 0.0253 0.0694 0.2704 0.4344 0.8176 | lr 6.2e-04 | norm 0.1139 | dt 0.018
|
| 381 |
+
type train | step 3810 | loss 0.0249 0.0683 0.2662 0.4272 0.8162 | lr 6.2e-04 | norm 0.1321 | dt 0.018
|
| 382 |
+
type train | step 3820 | loss 0.0249 0.0677 0.2655 0.4249 0.8096 | lr 6.1e-04 | norm 0.1207 | dt 0.018
|
| 383 |
+
type train | step 3830 | loss 0.0245 0.0674 0.2659 0.4196 0.8044 | lr 6.1e-04 | norm 0.1425 | dt 0.018
|
| 384 |
+
type train | step 3840 | loss 0.0249 0.0681 0.2673 0.4265 0.8117 | lr 6.1e-04 | norm 0.0868 | dt 0.018
|
| 385 |
+
type train | step 3850 | loss 0.0247 0.0679 0.2657 0.4240 0.8117 | lr 6.1e-04 | norm 0.1476 | dt 0.018
|
| 386 |
+
type train | step 3860 | loss 0.0251 0.0687 0.2691 0.4346 0.8194 | lr 6.1e-04 | norm 0.1355 | dt 0.018
|
| 387 |
+
type train | step 3870 | loss 0.0250 0.0683 0.2646 0.4265 0.8118 | lr 6.0e-04 | norm 0.0915 | dt 0.018
|
| 388 |
+
type train | step 3880 | loss 0.0247 0.0674 0.2652 0.4248 0.8052 | lr 6.0e-04 | norm 0.0786 | dt 0.018
|
| 389 |
+
type train | step 3890 | loss 0.0246 0.0670 0.2650 0.4208 0.8027 | lr 6.0e-04 | norm 0.0845 | dt 0.018
|
| 390 |
+
type train | step 3900 | loss 0.0248 0.0682 0.2699 0.4291 0.8130 | lr 6.0e-04 | norm 0.0866 | dt 0.018
|
| 391 |
+
type train | step 3910 | loss 0.0248 0.0677 0.2642 0.4215 0.8087 | lr 5.9e-04 | norm 0.0762 | dt 0.018
|
| 392 |
+
type train | step 3920 | loss 0.0248 0.0673 0.2653 0.4222 0.8066 | lr 5.9e-04 | norm 0.1002 | dt 0.018
|
| 393 |
+
type train | step 3930 | loss 0.0249 0.0681 0.2645 0.4232 0.8054 | lr 5.9e-04 | norm 0.0887 | dt 0.018
|
| 394 |
+
type train | step 3940 | loss 0.0250 0.0682 0.2657 0.4289 0.8174 | lr 5.9e-04 | norm 0.1069 | dt 0.018
|
| 395 |
+
type train | step 3950 | loss 0.0247 0.0677 0.2637 0.4197 0.8069 | lr 5.9e-04 | norm 0.1500 | dt 0.018
|
| 396 |
+
type train | step 3960 | loss 0.0248 0.0679 0.2696 0.4260 0.8121 | lr 5.8e-04 | norm 0.1103 | dt 0.018
|
| 397 |
+
type train | step 3970 | loss 0.0251 0.0682 0.2652 0.4260 0.8150 | lr 5.8e-04 | norm 0.1289 | dt 0.018
|
| 398 |
+
type train | step 3980 | loss 0.0249 0.0679 0.2642 0.4235 0.8125 | lr 5.8e-04 | norm 0.1020 | dt 0.018
|
| 399 |
+
type train | step 3990 | loss 0.0248 0.0677 0.2666 0.4276 0.8098 | lr 5.8e-04 | norm 0.0951 | dt 0.018
|
| 400 |
+
type train | step 4000 | loss 0.0246 0.0672 0.2647 0.4248 0.8077 | lr 5.8e-04 | norm 0.0750 | dt 0.018
|
| 401 |
+
type train | step 4010 | loss 0.0248 0.0676 0.2656 0.4231 0.8107 | lr 5.7e-04 | norm 0.0877 | dt 0.018
|
| 402 |
+
type train | step 4020 | loss 0.0248 0.0676 0.2625 0.4184 0.8044 | lr 5.7e-04 | norm 0.0742 | dt 0.019
|
| 403 |
+
type train | step 4030 | loss 0.0247 0.0675 0.2659 0.4245 0.8109 | lr 5.7e-04 | norm 0.0814 | dt 0.018
|
| 404 |
+
type train | step 4040 | loss 0.0251 0.0683 0.2665 0.4325 0.8205 | lr 5.7e-04 | norm 0.1041 | dt 0.018
|
| 405 |
+
type train | step 4050 | loss 0.0247 0.0672 0.2656 0.4243 0.8061 | lr 5.7e-04 | norm 0.0868 | dt 0.018
|
| 406 |
+
type train | step 4060 | loss 0.0246 0.0680 0.2632 0.4214 0.8054 | lr 5.6e-04 | norm 0.0861 | dt 0.018
|
| 407 |
+
type train | step 4070 | loss 0.0248 0.0676 0.2656 0.4250 0.8081 | lr 5.6e-04 | norm 0.0970 | dt 0.018
|
| 408 |
+
type train | step 4080 | loss 0.0248 0.0679 0.2652 0.4242 0.8123 | lr 5.6e-04 | norm 0.0984 | dt 0.020
|
| 409 |
+
type train | step 4090 | loss 0.0247 0.0676 0.2678 0.4282 0.8130 | lr 5.6e-04 | norm 0.1264 | dt 0.019
|
| 410 |
+
type train | step 4100 | loss 0.0247 0.0675 0.2631 0.4206 0.8082 | lr 5.6e-04 | norm 0.1411 | dt 0.032
|
| 411 |
+
type train | step 4110 | loss 0.0248 0.0677 0.2662 0.4260 0.8085 | lr 5.5e-04 | norm 0.1314 | dt 0.018
|
| 412 |
+
type train | step 4120 | loss 0.0247 0.0675 0.2611 0.4173 0.8016 | lr 5.5e-04 | norm 0.0841 | dt 0.018
|
| 413 |
+
type train | step 4130 | loss 0.0249 0.0676 0.2653 0.4266 0.8068 | lr 5.5e-04 | norm 0.1351 | dt 0.018
|
| 414 |
+
type train | step 4140 | loss 0.0248 0.0677 0.2655 0.4246 0.8124 | lr 5.5e-04 | norm 0.1007 | dt 0.018
|
| 415 |
+
type train | step 4150 | loss 0.0247 0.0676 0.2658 0.4273 0.8114 | lr 5.4e-04 | norm 0.0805 | dt 0.018
|
| 416 |
+
type train | step 4160 | loss 0.0248 0.0677 0.2654 0.4256 0.8091 | lr 5.4e-04 | norm 0.1037 | dt 0.018
|
| 417 |
+
type train | step 4170 | loss 0.0247 0.0675 0.2666 0.4279 0.8095 | lr 5.4e-04 | norm 0.1050 | dt 0.018
|
| 418 |
+
type train | step 4180 | loss 0.0247 0.0674 0.2653 0.4292 0.8084 | lr 5.4e-04 | norm 0.1262 | dt 0.018
|
| 419 |
+
type train | step 4190 | loss 0.0251 0.0677 0.2667 0.4301 0.8133 | lr 5.4e-04 | norm 0.0800 | dt 0.020
|
| 420 |
+
type train | step 4200 | loss 0.0246 0.0669 0.2639 0.4208 0.8053 | lr 5.3e-04 | norm 0.0777 | dt 0.018
|
| 421 |
+
type train | step 4210 | loss 0.0246 0.0672 0.2672 0.4277 0.8100 | lr 5.3e-04 | norm 0.1233 | dt 0.018
|
| 422 |
+
type train | step 4220 | loss 0.0249 0.0679 0.2654 0.4267 0.8116 | lr 5.3e-04 | norm 0.1500 | dt 0.018
|
| 423 |
+
type train | step 4230 | loss 0.0248 0.0673 0.2670 0.4316 0.8138 | lr 5.3e-04 | norm 0.1110 | dt 0.018
|
| 424 |
+
type train | step 4240 | loss 0.0246 0.0679 0.2641 0.4228 0.8096 | lr 5.3e-04 | norm 0.0961 | dt 0.018
|
| 425 |
+
type train | step 4250 | loss 0.0249 0.0675 0.2658 0.4272 0.8078 | lr 5.2e-04 | norm 0.1170 | dt 0.018
|
| 426 |
+
type train | step 4260 | loss 0.0245 0.0666 0.2643 0.4196 0.8027 | lr 5.2e-04 | norm 0.0820 | dt 0.018
|
| 427 |
+
type train | step 4270 | loss 0.0247 0.0672 0.2658 0.4270 0.8099 | lr 5.2e-04 | norm 0.0795 | dt 0.020
|
| 428 |
+
type train | step 4280 | loss 0.0247 0.0672 0.2655 0.4229 0.8065 | lr 5.2e-04 | norm 0.1829 | dt 0.018
|
| 429 |
+
type train | step 4290 | loss 0.0246 0.0671 0.2667 0.4274 0.8116 | lr 5.2e-04 | norm 0.1070 | dt 0.018
|
| 430 |
+
type train | step 4300 | loss 0.0246 0.0673 0.2619 0.4213 0.8046 | lr 5.1e-04 | norm 0.1116 | dt 0.018
|
| 431 |
+
type train | step 4310 | loss 0.0251 0.0683 0.2671 0.4351 0.8189 | lr 5.1e-04 | norm 0.1031 | dt 0.018
|
| 432 |
+
type train | step 4320 | loss 0.0246 0.0671 0.2649 0.4181 0.8039 | lr 5.1e-04 | norm 0.0903 | dt 0.018
|
| 433 |
+
type train | step 4330 | loss 0.0244 0.0669 0.2637 0.4203 0.8043 | lr 5.1e-04 | norm 0.1022 | dt 0.018
|
| 434 |
+
type train | step 4340 | loss 0.0246 0.0669 0.2641 0.4211 0.8041 | lr 5.1e-04 | norm 0.2018 | dt 0.018
|
| 435 |
+
type train | step 4350 | loss 0.0248 0.0678 0.2684 0.4353 0.8193 | lr 5.0e-04 | norm 0.1131 | dt 0.018
|
| 436 |
+
type train | step 4360 | loss 0.0246 0.0674 0.2630 0.4227 0.8080 | lr 5.0e-04 | norm 0.0864 | dt 0.018
|
| 437 |
+
type train | step 4370 | loss 0.0245 0.0667 0.2614 0.4173 0.7991 | lr 5.0e-04 | norm 0.0848 | dt 0.018
|
| 438 |
+
type train | step 4380 | loss 0.0247 0.0675 0.2646 0.4210 0.8058 | lr 5.0e-04 | norm 0.0902 | dt 0.018
|
| 439 |
+
type train | step 4390 | loss 0.0249 0.0680 0.2658 0.4225 0.8096 | lr 4.9e-04 | norm 0.0826 | dt 0.018
|
| 440 |
+
type train | step 4400 | loss 0.0247 0.0673 0.2640 0.4231 0.8078 | lr 4.9e-04 | norm 0.1302 | dt 0.018
|
| 441 |
+
type train | step 4410 | loss 0.0252 0.0688 0.2691 0.4328 0.8152 | lr 4.9e-04 | norm 0.1419 | dt 0.018
|
| 442 |
+
type train | step 4420 | loss 0.0247 0.0678 0.2653 0.4257 0.8134 | lr 4.9e-04 | norm 0.0992 | dt 0.018
|
| 443 |
+
type train | step 4430 | loss 0.0247 0.0672 0.2645 0.4236 0.8073 | lr 4.9e-04 | norm 0.0933 | dt 0.018
|
| 444 |
+
type train | step 4440 | loss 0.0244 0.0668 0.2649 0.4183 0.8021 | lr 4.8e-04 | norm 0.1133 | dt 0.018
|
| 445 |
+
type train | step 4450 | loss 0.0248 0.0676 0.2663 0.4251 0.8095 | lr 4.8e-04 | norm 0.0796 | dt 0.018
|
| 446 |
+
type train | step 4460 | loss 0.0246 0.0674 0.2648 0.4226 0.8096 | lr 4.8e-04 | norm 0.1488 | dt 0.018
|
| 447 |
+
type train | step 4470 | loss 0.0249 0.0682 0.2681 0.4331 0.8170 | lr 4.8e-04 | norm 0.1116 | dt 0.018
|
| 448 |
+
type train | step 4480 | loss 0.0249 0.0678 0.2638 0.4252 0.8095 | lr 4.8e-04 | norm 0.0845 | dt 0.018
|
| 449 |
+
type train | step 4490 | loss 0.0246 0.0669 0.2643 0.4235 0.8029 | lr 4.7e-04 | norm 0.0780 | dt 0.018
|
| 450 |
+
type train | step 4500 | loss 0.0244 0.0665 0.2642 0.4194 0.8005 | lr 4.7e-04 | norm 0.1204 | dt 0.018
|
| 451 |
+
type train | step 4510 | loss 0.0247 0.0677 0.2689 0.4277 0.8109 | lr 4.7e-04 | norm 0.1049 | dt 0.018
|
| 452 |
+
type train | step 4520 | loss 0.0247 0.0672 0.2633 0.4201 0.8065 | lr 4.7e-04 | norm 0.0785 | dt 0.018
|
| 453 |
+
type train | step 4530 | loss 0.0247 0.0668 0.2643 0.4209 0.8045 | lr 4.7e-04 | norm 0.1488 | dt 0.018
|
| 454 |
+
type train | step 4540 | loss 0.0247 0.0676 0.2637 0.4219 0.8032 | lr 4.6e-04 | norm 0.0723 | dt 0.018
|
| 455 |
+
type train | step 4550 | loss 0.0249 0.0677 0.2648 0.4276 0.8150 | lr 4.6e-04 | norm 0.0864 | dt 0.018
|
| 456 |
+
type train | step 4560 | loss 0.0246 0.0671 0.2629 0.4184 0.8045 | lr 4.6e-04 | norm 0.1605 | dt 0.018
|
| 457 |
+
type train | step 4570 | loss 0.0246 0.0673 0.2687 0.4246 0.8099 | lr 4.6e-04 | norm 0.0874 | dt 0.018
|
| 458 |
+
type train | step 4580 | loss 0.0250 0.0676 0.2643 0.4246 0.8128 | lr 4.6e-04 | norm 0.1439 | dt 0.019
|
| 459 |
+
type train | step 4590 | loss 0.0247 0.0674 0.2633 0.4221 0.8102 | lr 4.5e-04 | norm 0.1090 | dt 0.018
|
| 460 |
+
type train | step 4600 | loss 0.0247 0.0672 0.2658 0.4263 0.8077 | lr 4.5e-04 | norm 0.0854 | dt 0.018
|
| 461 |
+
type train | step 4610 | loss 0.0245 0.0667 0.2638 0.4236 0.8056 | lr 4.5e-04 | norm 0.0693 | dt 0.018
|
| 462 |
+
type train | step 4620 | loss 0.0247 0.0671 0.2647 0.4217 0.8084 | lr 4.5e-04 | norm 0.0992 | dt 0.018
|
| 463 |
+
type train | step 4630 | loss 0.0247 0.0671 0.2617 0.4171 0.8023 | lr 4.5e-04 | norm 0.0688 | dt 0.018
|
| 464 |
+
type train | step 4640 | loss 0.0246 0.0671 0.2651 0.4232 0.8088 | lr 4.4e-04 | norm 0.0800 | dt 0.018
|
| 465 |
+
type train | step 4650 | loss 0.0249 0.0678 0.2656 0.4311 0.8182 | lr 4.4e-04 | norm 0.1399 | dt 0.018
|
| 466 |
+
type train | step 4660 | loss 0.0246 0.0667 0.2649 0.4231 0.8041 | lr 4.4e-04 | norm 0.0713 | dt 0.018
|
| 467 |
+
type train | step 4670 | loss 0.0245 0.0676 0.2625 0.4202 0.8033 | lr 4.4e-04 | norm 0.0737 | dt 0.018
|
| 468 |
+
type train | step 4680 | loss 0.0247 0.0671 0.2648 0.4238 0.8060 | lr 4.4e-04 | norm 0.1079 | dt 0.018
|
| 469 |
+
type train | step 4690 | loss 0.0247 0.0675 0.2644 0.4231 0.8102 | lr 4.3e-04 | norm 0.0948 | dt 0.018
|
| 470 |
+
type train | step 4700 | loss 0.0246 0.0671 0.2670 0.4268 0.8109 | lr 4.3e-04 | norm 0.1103 | dt 0.018
|
| 471 |
+
type train | step 4710 | loss 0.0246 0.0670 0.2623 0.4192 0.8060 | lr 4.3e-04 | norm 0.1020 | dt 0.018
|
| 472 |
+
type train | step 4720 | loss 0.0247 0.0673 0.2655 0.4247 0.8063 | lr 4.3e-04 | norm 0.0794 | dt 0.018
|
| 473 |
+
type train | step 4730 | loss 0.0246 0.0670 0.2605 0.4163 0.7996 | lr 4.2e-04 | norm 0.0718 | dt 0.018
|
| 474 |
+
type train | step 4740 | loss 0.0248 0.0671 0.2646 0.4256 0.8050 | lr 4.2e-04 | norm 0.0970 | dt 0.018
|
| 475 |
+
type train | step 4750 | loss 0.0247 0.0672 0.2647 0.4233 0.8104 | lr 4.2e-04 | norm 0.0694 | dt 0.018
|
| 476 |
+
type train | step 4760 | loss 0.0246 0.0671 0.2650 0.4261 0.8096 | lr 4.2e-04 | norm 0.0930 | dt 0.018
|
| 477 |
+
type train | step 4770 | loss 0.0247 0.0672 0.2646 0.4243 0.8070 | lr 4.2e-04 | norm 0.1049 | dt 0.018
|
| 478 |
+
type train | step 4780 | loss 0.0246 0.0670 0.2658 0.4268 0.8075 | lr 4.1e-04 | norm 0.0971 | dt 0.018
|
| 479 |
+
type train | step 4790 | loss 0.0246 0.0669 0.2645 0.4280 0.8064 | lr 4.1e-04 | norm 0.0798 | dt 0.018
|
| 480 |
+
type train | step 4800 | loss 0.0250 0.0673 0.2661 0.4290 0.8115 | lr 4.1e-04 | norm 0.0919 | dt 0.018
|
| 481 |
+
type train | step 4810 | loss 0.0245 0.0665 0.2632 0.4197 0.8035 | lr 4.1e-04 | norm 0.1002 | dt 0.018
|
| 482 |
+
type train | step 4820 | loss 0.0245 0.0667 0.2664 0.4264 0.8080 | lr 4.1e-04 | norm 0.1121 | dt 0.018
|
| 483 |
+
type train | step 4830 | loss 0.0248 0.0675 0.2646 0.4252 0.8092 | lr 4.0e-04 | norm 0.1167 | dt 0.018
|
| 484 |
+
type train | step 4840 | loss 0.0247 0.0669 0.2663 0.4304 0.8118 | lr 4.0e-04 | norm 0.0842 | dt 0.018
|
| 485 |
+
type train | step 4850 | loss 0.0245 0.0675 0.2634 0.4215 0.8075 | lr 4.0e-04 | norm 0.1050 | dt 0.018
|
| 486 |
+
type train | step 4860 | loss 0.0248 0.0670 0.2652 0.4261 0.8059 | lr 4.0e-04 | norm 0.1164 | dt 0.018
|
| 487 |
+
type train | step 4870 | loss 0.0244 0.0662 0.2636 0.4185 0.8008 | lr 4.0e-04 | norm 0.0734 | dt 0.018
|
| 488 |
+
type train | step 4880 | loss 0.0246 0.0668 0.2652 0.4259 0.8082 | lr 4.0e-04 | norm 0.0832 | dt 0.019
|
| 489 |
+
type train | step 4890 | loss 0.0246 0.0667 0.2649 0.4218 0.8045 | lr 3.9e-04 | norm 0.1420 | dt 0.018
|
| 490 |
+
type train | step 4900 | loss 0.0245 0.0666 0.2660 0.4263 0.8097 | lr 3.9e-04 | norm 0.0881 | dt 0.018
|
| 491 |
+
type train | step 4910 | loss 0.0245 0.0668 0.2613 0.4201 0.8026 | lr 3.9e-04 | norm 0.0748 | dt 0.018
|
| 492 |
+
type train | step 4920 | loss 0.0250 0.0678 0.2664 0.4340 0.8171 | lr 3.9e-04 | norm 0.0944 | dt 0.018
|
| 493 |
+
type train | step 4930 | loss 0.0245 0.0667 0.2642 0.4170 0.8020 | lr 3.9e-04 | norm 0.0747 | dt 0.018
|
| 494 |
+
type train | step 4940 | loss 0.0243 0.0665 0.2630 0.4193 0.8023 | lr 3.8e-04 | norm 0.1016 | dt 0.018
|
| 495 |
+
type train | step 4950 | loss 0.0245 0.0665 0.2634 0.4202 0.8024 | lr 3.8e-04 | norm 0.0966 | dt 0.018
|
| 496 |
+
type train | step 4960 | loss 0.0247 0.0674 0.2676 0.4342 0.8175 | lr 3.8e-04 | norm 0.0884 | dt 0.018
|
| 497 |
+
type train | step 4970 | loss 0.0245 0.0670 0.2623 0.4216 0.8061 | lr 3.8e-04 | norm 0.0804 | dt 0.018
|
| 498 |
+
type train | step 4980 | loss 0.0244 0.0663 0.2607 0.4164 0.7975 | lr 3.8e-04 | norm 0.1127 | dt 0.018
|
| 499 |
+
type train | step 4990 | loss 0.0246 0.0671 0.2638 0.4200 0.8041 | lr 3.7e-04 | norm 0.1030 | dt 0.018
|
| 500 |
+
type train | step 5000 | loss 0.0248 0.0676 0.2652 0.4216 0.8080 | lr 3.7e-04 | norm 0.0718 | dt 0.018
|
| 501 |
+
type train | step 5010 | loss 0.0247 0.0669 0.2634 0.4222 0.8063 | lr 3.7e-04 | norm 0.1279 | dt 0.018
|
| 502 |
+
type train | step 5020 | loss 0.0251 0.0684 0.2682 0.4314 0.8132 | lr 3.7e-04 | norm 0.1176 | dt 0.018
|
| 503 |
+
type train | step 5030 | loss 0.0247 0.0673 0.2647 0.4245 0.8116 | lr 3.7e-04 | norm 0.0933 | dt 0.018
|
| 504 |
+
type train | step 5040 | loss 0.0247 0.0667 0.2639 0.4228 0.8057 | lr 3.6e-04 | norm 0.0863 | dt 0.018
|
| 505 |
+
type train | step 5050 | loss 0.0243 0.0664 0.2642 0.4174 0.8003 | lr 3.6e-04 | norm 0.0745 | dt 0.018
|
| 506 |
+
type train | step 5060 | loss 0.0247 0.0672 0.2657 0.4241 0.8080 | lr 3.6e-04 | norm 0.0795 | dt 0.018
|
| 507 |
+
type train | step 5070 | loss 0.0245 0.0669 0.2642 0.4217 0.8081 | lr 3.6e-04 | norm 0.0957 | dt 0.018
|
| 508 |
+
type train | step 5080 | loss 0.0248 0.0678 0.2674 0.4320 0.8153 | lr 3.6e-04 | norm 0.1075 | dt 0.018
|
| 509 |
+
type train | step 5090 | loss 0.0248 0.0673 0.2632 0.4243 0.8081 | lr 3.5e-04 | norm 0.1007 | dt 0.018
|
| 510 |
+
type train | step 5100 | loss 0.0245 0.0665 0.2637 0.4227 0.8014 | lr 3.5e-04 | norm 0.1066 | dt 0.018
|
| 511 |
+
type train | step 5110 | loss 0.0243 0.0661 0.2636 0.4185 0.7990 | lr 3.5e-04 | norm 0.0798 | dt 0.018
|
| 512 |
+
type train | step 5120 | loss 0.0246 0.0673 0.2683 0.4268 0.8093 | lr 3.5e-04 | norm 0.0937 | dt 0.018
|
| 513 |
+
type train | step 5130 | loss 0.0246 0.0669 0.2627 0.4193 0.8052 | lr 3.5e-04 | norm 0.1340 | dt 0.018
|
| 514 |
+
type train | step 5140 | loss 0.0246 0.0665 0.2637 0.4199 0.8029 | lr 3.5e-04 | norm 0.1096 | dt 0.018
|
| 515 |
+
type train | step 5150 | loss 0.0246 0.0672 0.2631 0.4211 0.8017 | lr 3.4e-04 | norm 0.1091 | dt 0.018
|
| 516 |
+
type train | step 5160 | loss 0.0248 0.0672 0.2641 0.4266 0.8136 | lr 3.4e-04 | norm 0.1464 | dt 0.018
|
| 517 |
+
type train | step 5170 | loss 0.0245 0.0667 0.2622 0.4174 0.8027 | lr 3.4e-04 | norm 0.1186 | dt 0.018
|
| 518 |
+
type train | step 5180 | loss 0.0246 0.0670 0.2681 0.4237 0.8083 | lr 3.4e-04 | norm 0.0961 | dt 0.018
|
| 519 |
+
type train | step 5190 | loss 0.0249 0.0672 0.2637 0.4236 0.8112 | lr 3.4e-04 | norm 0.0903 | dt 0.019
|
| 520 |
+
type train | step 5200 | loss 0.0247 0.0671 0.2627 0.4211 0.8086 | lr 3.3e-04 | norm 0.1437 | dt 0.018
|
| 521 |
+
type train | step 5210 | loss 0.0246 0.0669 0.2652 0.4254 0.8063 | lr 3.3e-04 | norm 0.1167 | dt 0.018
|
| 522 |
+
type train | step 5220 | loss 0.0244 0.0664 0.2633 0.4227 0.8044 | lr 3.3e-04 | norm 0.1023 | dt 0.018
|
| 523 |
+
type train | step 5230 | loss 0.0246 0.0667 0.2641 0.4206 0.8067 | lr 3.3e-04 | norm 0.1018 | dt 0.018
|
| 524 |
+
type train | step 5240 | loss 0.0246 0.0667 0.2611 0.4161 0.8008 | lr 3.3e-04 | norm 0.0716 | dt 0.018
|
| 525 |
+
type train | step 5250 | loss 0.0245 0.0668 0.2645 0.4223 0.8074 | lr 3.3e-04 | norm 0.1436 | dt 0.018
|
| 526 |
+
type train | step 5260 | loss 0.0249 0.0675 0.2649 0.4301 0.8166 | lr 3.2e-04 | norm 0.0805 | dt 0.027
|
| 527 |
+
type train | step 5270 | loss 0.0245 0.0664 0.2644 0.4223 0.8028 | lr 3.2e-04 | norm 0.1088 | dt 0.018
|
| 528 |
+
type train | step 5280 | loss 0.0245 0.0672 0.2621 0.4194 0.8019 | lr 3.2e-04 | norm 0.0791 | dt 0.019
|
| 529 |
+
type train | step 5290 | loss 0.0246 0.0667 0.2642 0.4228 0.8045 | lr 3.2e-04 | norm 0.0790 | dt 0.018
|
| 530 |
+
type train | step 5300 | loss 0.0246 0.0671 0.2637 0.4222 0.8088 | lr 3.2e-04 | norm 0.0892 | dt 0.018
|
| 531 |
+
type train | step 5310 | loss 0.0246 0.0667 0.2665 0.4258 0.8095 | lr 3.1e-04 | norm 0.0893 | dt 0.018
|
| 532 |
+
type train | step 5320 | loss 0.0245 0.0667 0.2617 0.4182 0.8046 | lr 3.1e-04 | norm 0.1332 | dt 0.018
|
| 533 |
+
type train | step 5330 | loss 0.0246 0.0670 0.2649 0.4238 0.8051 | lr 3.1e-04 | norm 0.1486 | dt 0.018
|
| 534 |
+
type train | step 5340 | loss 0.0245 0.0667 0.2600 0.4155 0.7984 | lr 3.1e-04 | norm 0.1217 | dt 0.018
|
| 535 |
+
type train | step 5350 | loss 0.0248 0.0668 0.2641 0.4248 0.8040 | lr 3.1e-04 | norm 0.0671 | dt 0.018
|
| 536 |
+
type train | step 5360 | loss 0.0246 0.0669 0.2641 0.4223 0.8090 | lr 3.1e-04 | norm 0.0733 | dt 0.018
|
| 537 |
+
type train | step 5370 | loss 0.0245 0.0668 0.2646 0.4252 0.8082 | lr 3.0e-04 | norm 0.1081 | dt 0.018
|
| 538 |
+
type train | step 5380 | loss 0.0246 0.0669 0.2640 0.4233 0.8054 | lr 3.0e-04 | norm 0.0792 | dt 0.018
|
| 539 |
+
type train | step 5390 | loss 0.0246 0.0667 0.2653 0.4260 0.8062 | lr 3.0e-04 | norm 0.0953 | dt 0.018
|
| 540 |
+
type train | step 5400 | loss 0.0245 0.0666 0.2641 0.4271 0.8050 | lr 3.0e-04 | norm 0.0942 | dt 0.018
|
| 541 |
+
type train | step 5410 | loss 0.0249 0.0669 0.2656 0.4282 0.8104 | lr 3.0e-04 | norm 0.0795 | dt 0.018
|
| 542 |
+
type train | step 5420 | loss 0.0244 0.0662 0.2627 0.4188 0.8022 | lr 2.9e-04 | norm 0.0800 | dt 0.018
|
| 543 |
+
type train | step 5430 | loss 0.0244 0.0665 0.2659 0.4254 0.8065 | lr 2.9e-04 | norm 0.1116 | dt 0.019
|
| 544 |
+
type train | step 5440 | loss 0.0248 0.0671 0.2640 0.4240 0.8076 | lr 2.9e-04 | norm 0.1083 | dt 0.018
|
| 545 |
+
type train | step 5450 | loss 0.0246 0.0666 0.2657 0.4295 0.8106 | lr 2.9e-04 | norm 0.1183 | dt 0.018
|
| 546 |
+
type train | step 5460 | loss 0.0244 0.0672 0.2628 0.4206 0.8061 | lr 2.9e-04 | norm 0.1204 | dt 0.018
|
| 547 |
+
type train | step 5470 | loss 0.0247 0.0667 0.2646 0.4253 0.8048 | lr 2.9e-04 | norm 0.0909 | dt 0.018
|
| 548 |
+
type train | step 5480 | loss 0.0243 0.0659 0.2631 0.4176 0.7996 | lr 2.8e-04 | norm 0.0657 | dt 0.018
|
| 549 |
+
type train | step 5490 | loss 0.0245 0.0665 0.2647 0.4251 0.8071 | lr 2.8e-04 | norm 0.0938 | dt 0.021
|
| 550 |
+
type train | step 5500 | loss 0.0245 0.0664 0.2644 0.4210 0.8033 | lr 2.8e-04 | norm 0.1031 | dt 0.018
|
| 551 |
+
type train | step 5510 | loss 0.0245 0.0664 0.2654 0.4254 0.8085 | lr 2.8e-04 | norm 0.0983 | dt 0.018
|
| 552 |
+
type train | step 5520 | loss 0.0244 0.0666 0.2608 0.4193 0.8012 | lr 2.8e-04 | norm 0.0798 | dt 0.018
|
| 553 |
+
type train | step 5530 | loss 0.0249 0.0674 0.2658 0.4332 0.8159 | lr 2.8e-04 | norm 0.0897 | dt 0.018
|
| 554 |
+
type train | step 5540 | loss 0.0244 0.0664 0.2637 0.4163 0.8008 | lr 2.7e-04 | norm 0.0811 | dt 0.018
|
| 555 |
+
type train | step 5550 | loss 0.0243 0.0662 0.2625 0.4185 0.8011 | lr 2.7e-04 | norm 0.1083 | dt 0.018
|
| 556 |
+
type train | step 5560 | loss 0.0244 0.0662 0.2630 0.4196 0.8013 | lr 2.7e-04 | norm 0.0806 | dt 0.018
|
| 557 |
+
type train | step 5570 | loss 0.0247 0.0671 0.2670 0.4332 0.8162 | lr 2.7e-04 | norm 0.0687 | dt 0.018
|
| 558 |
+
type train | step 5580 | loss 0.0244 0.0667 0.2618 0.4208 0.8047 | lr 2.7e-04 | norm 0.0825 | dt 0.018
|
| 559 |
+
type train | step 5590 | loss 0.0244 0.0660 0.2603 0.4158 0.7963 | lr 2.7e-04 | norm 0.0774 | dt 0.018
|
| 560 |
+
type train | step 5600 | loss 0.0245 0.0668 0.2633 0.4192 0.8028 | lr 2.6e-04 | norm 0.0683 | dt 0.018
|
| 561 |
+
type train | step 5610 | loss 0.0247 0.0673 0.2647 0.4209 0.8069 | lr 2.6e-04 | norm 0.0703 | dt 0.018
|
| 562 |
+
type train | step 5620 | loss 0.0246 0.0666 0.2630 0.4216 0.8053 | lr 2.6e-04 | norm 0.1344 | dt 0.018
|
| 563 |
+
type train | step 5630 | loss 0.0250 0.0681 0.2675 0.4303 0.8118 | lr 2.6e-04 | norm 0.1066 | dt 0.018
|
| 564 |
+
type train | step 5640 | loss 0.0246 0.0671 0.2641 0.4236 0.8101 | lr 2.6e-04 | norm 0.0919 | dt 0.018
|
| 565 |
+
type train | step 5650 | loss 0.0246 0.0665 0.2634 0.4221 0.8046 | lr 2.6e-04 | norm 0.1015 | dt 0.018
|
| 566 |
+
type train | step 5660 | loss 0.0242 0.0662 0.2637 0.4166 0.7990 | lr 2.6e-04 | norm 0.0884 | dt 0.018
|
| 567 |
+
type train | step 5670 | loss 0.0247 0.0669 0.2652 0.4234 0.8069 | lr 2.5e-04 | norm 0.0803 | dt 0.018
|
| 568 |
+
type train | step 5680 | loss 0.0244 0.0667 0.2637 0.4210 0.8070 | lr 2.5e-04 | norm 0.0921 | dt 0.018
|
| 569 |
+
type train | step 5690 | loss 0.0248 0.0675 0.2668 0.4311 0.8139 | lr 2.5e-04 | norm 0.0781 | dt 0.018
|
| 570 |
+
type train | step 5700 | loss 0.0247 0.0671 0.2627 0.4236 0.8070 | lr 2.5e-04 | norm 0.0782 | dt 0.018
|
| 571 |
+
type train | step 5710 | loss 0.0245 0.0662 0.2632 0.4220 0.8002 | lr 2.5e-04 | norm 0.0887 | dt 0.018
|
| 572 |
+
type train | step 5720 | loss 0.0243 0.0658 0.2631 0.4177 0.7979 | lr 2.5e-04 | norm 0.0868 | dt 0.018
|
| 573 |
+
type train | step 5730 | loss 0.0245 0.0670 0.2679 0.4260 0.8080 | lr 2.4e-04 | norm 0.0762 | dt 0.018
|
| 574 |
+
type train | step 5740 | loss 0.0246 0.0666 0.2623 0.4187 0.8040 | lr 2.4e-04 | norm 0.0959 | dt 0.018
|
| 575 |
+
type train | step 5750 | loss 0.0246 0.0662 0.2632 0.4191 0.8018 | lr 2.4e-04 | norm 0.0836 | dt 0.018
|
| 576 |
+
type train | step 5760 | loss 0.0246 0.0670 0.2627 0.4204 0.8006 | lr 2.4e-04 | norm 0.0628 | dt 0.018
|
| 577 |
+
type train | step 5770 | loss 0.0248 0.0669 0.2636 0.4259 0.8124 | lr 2.4e-04 | norm 0.0870 | dt 0.018
|
| 578 |
+
type train | step 5780 | loss 0.0244 0.0664 0.2617 0.4165 0.8014 | lr 2.4e-04 | norm 0.0837 | dt 0.018
|
| 579 |
+
type train | step 5790 | loss 0.0245 0.0668 0.2677 0.4230 0.8072 | lr 2.4e-04 | norm 0.0863 | dt 0.018
|
| 580 |
+
type train | step 5800 | loss 0.0248 0.0670 0.2632 0.4228 0.8101 | lr 2.3e-04 | norm 0.0727 | dt 0.018
|
| 581 |
+
type train | step 5810 | loss 0.0246 0.0668 0.2623 0.4203 0.8073 | lr 2.3e-04 | norm 0.0931 | dt 0.018
|
| 582 |
+
type train | step 5820 | loss 0.0245 0.0666 0.2648 0.4247 0.8052 | lr 2.3e-04 | norm 0.0719 | dt 0.018
|
| 583 |
+
type train | step 5830 | loss 0.0244 0.0662 0.2629 0.4220 0.8034 | lr 2.3e-04 | norm 0.0899 | dt 0.018
|
| 584 |
+
type train | step 5840 | loss 0.0245 0.0664 0.2636 0.4197 0.8056 | lr 2.3e-04 | norm 0.1086 | dt 0.018
|
| 585 |
+
type train | step 5850 | loss 0.0245 0.0665 0.2607 0.4155 0.7997 | lr 2.3e-04 | norm 0.0709 | dt 0.018
|
| 586 |
+
type train | step 5860 | loss 0.0245 0.0665 0.2641 0.4215 0.8063 | lr 2.2e-04 | norm 0.0932 | dt 0.018
|
| 587 |
+
type train | step 5870 | loss 0.0248 0.0673 0.2645 0.4293 0.8153 | lr 2.2e-04 | norm 0.0750 | dt 0.018
|
| 588 |
+
type train | step 5880 | loss 0.0244 0.0662 0.2641 0.4217 0.8018 | lr 2.2e-04 | norm 0.0748 | dt 0.018
|
| 589 |
+
type train | step 5890 | loss 0.0244 0.0670 0.2617 0.4188 0.8009 | lr 2.2e-04 | norm 0.1184 | dt 0.018
|
| 590 |
+
type train | step 5900 | loss 0.0246 0.0664 0.2638 0.4221 0.8033 | lr 2.2e-04 | norm 0.0719 | dt 0.018
|
| 591 |
+
type train | step 5910 | loss 0.0246 0.0669 0.2632 0.4216 0.8076 | lr 2.2e-04 | norm 0.0899 | dt 0.018
|
| 592 |
+
type train | step 5920 | loss 0.0245 0.0665 0.2661 0.4251 0.8083 | lr 2.2e-04 | norm 0.0686 | dt 0.018
|
| 593 |
+
type train | step 5930 | loss 0.0245 0.0665 0.2613 0.4176 0.8033 | lr 2.1e-04 | norm 0.0741 | dt 0.018
|
| 594 |
+
type train | step 5940 | loss 0.0246 0.0667 0.2645 0.4231 0.8038 | lr 2.1e-04 | norm 0.0802 | dt 0.018
|
| 595 |
+
type train | step 5950 | loss 0.0244 0.0665 0.2597 0.4149 0.7973 | lr 2.1e-04 | norm 0.0968 | dt 0.018
|
| 596 |
+
type train | step 5960 | loss 0.0247 0.0666 0.2637 0.4243 0.8031 | lr 2.1e-04 | norm 0.0650 | dt 0.018
|
| 597 |
+
type train | step 5970 | loss 0.0246 0.0666 0.2637 0.4216 0.8079 | lr 2.1e-04 | norm 0.0814 | dt 0.018
|
| 598 |
+
type train | step 5980 | loss 0.0245 0.0666 0.2642 0.4246 0.8072 | lr 2.1e-04 | norm 0.1038 | dt 0.018
|
| 599 |
+
type train | step 5990 | loss 0.0246 0.0667 0.2636 0.4226 0.8042 | lr 2.1e-04 | norm 0.0756 | dt 0.018
|
| 600 |
+
type train | step 6000 | loss 0.0245 0.0665 0.2649 0.4253 0.8052 | lr 2.1e-04 | norm 0.0716 | dt 0.018
|
| 601 |
+
type train | step 6010 | loss 0.0245 0.0664 0.2637 0.4265 0.8040 | lr 2.0e-04 | norm 0.0872 | dt 0.018
|
| 602 |
+
type train | step 6020 | loss 0.0248 0.0667 0.2652 0.4277 0.8096 | lr 2.0e-04 | norm 0.0686 | dt 0.018
|
| 603 |
+
type train | step 6030 | loss 0.0244 0.0660 0.2624 0.4182 0.8013 | lr 2.0e-04 | norm 0.0693 | dt 0.018
|
| 604 |
+
type train | step 6040 | loss 0.0244 0.0662 0.2656 0.4247 0.8056 | lr 2.0e-04 | norm 0.0888 | dt 0.018
|
| 605 |
+
type train | step 6050 | loss 0.0247 0.0670 0.2636 0.4232 0.8063 | lr 2.0e-04 | norm 0.0898 | dt 0.018
|
| 606 |
+
type train | step 6060 | loss 0.0245 0.0664 0.2653 0.4287 0.8095 | lr 2.0e-04 | norm 0.0999 | dt 0.018
|
| 607 |
+
type train | step 6070 | loss 0.0244 0.0669 0.2624 0.4199 0.8050 | lr 2.0e-04 | norm 0.1172 | dt 0.018
|
| 608 |
+
type train | step 6080 | loss 0.0247 0.0665 0.2642 0.4248 0.8038 | lr 1.9e-04 | norm 0.0762 | dt 0.018
|
| 609 |
+
type train | step 6090 | loss 0.0243 0.0657 0.2628 0.4171 0.7987 | lr 1.9e-04 | norm 0.0698 | dt 0.018
|
| 610 |
+
type train | step 6100 | loss 0.0244 0.0663 0.2644 0.4245 0.8063 | lr 1.9e-04 | norm 0.1158 | dt 0.019
|
| 611 |
+
type train | step 6110 | loss 0.0245 0.0662 0.2641 0.4204 0.8022 | lr 1.9e-04 | norm 0.0657 | dt 0.018
|
| 612 |
+
type train | step 6120 | loss 0.0244 0.0662 0.2651 0.4248 0.8075 | lr 1.9e-04 | norm 0.0812 | dt 0.018
|
| 613 |
+
type train | step 6130 | loss 0.0243 0.0664 0.2605 0.4186 0.8002 | lr 1.9e-04 | norm 0.0796 | dt 0.018
|
| 614 |
+
type train | step 6140 | loss 0.0248 0.0672 0.2654 0.4326 0.8150 | lr 1.9e-04 | norm 0.0990 | dt 0.018
|
| 615 |
+
type train | step 6150 | loss 0.0244 0.0663 0.2633 0.4157 0.7998 | lr 1.9e-04 | norm 0.0688 | dt 0.018
|
| 616 |
+
type train | step 6160 | loss 0.0242 0.0660 0.2621 0.4179 0.8002 | lr 1.8e-04 | norm 0.0990 | dt 0.018
|
| 617 |
+
type train | step 6170 | loss 0.0244 0.0660 0.2627 0.4192 0.8005 | lr 1.8e-04 | norm 0.0610 | dt 0.018
|
| 618 |
+
type train | step 6180 | loss 0.0246 0.0669 0.2667 0.4326 0.8152 | lr 1.8e-04 | norm 0.0670 | dt 0.018
|
| 619 |
+
type train | step 6190 | loss 0.0244 0.0665 0.2614 0.4202 0.8037 | lr 1.8e-04 | norm 0.0831 | dt 0.018
|
| 620 |
+
type train | step 6200 | loss 0.0243 0.0659 0.2599 0.4154 0.7955 | lr 1.8e-04 | norm 0.0733 | dt 0.018
|
| 621 |
+
type train | step 6210 | loss 0.0245 0.0666 0.2630 0.4186 0.8019 | lr 1.8e-04 | norm 0.0633 | dt 0.018
|
| 622 |
+
type train | step 6220 | loss 0.0247 0.0671 0.2644 0.4204 0.8060 | lr 1.8e-04 | norm 0.0638 | dt 0.018
|
| 623 |
+
type train | step 6230 | loss 0.0245 0.0664 0.2627 0.4211 0.8045 | lr 1.8e-04 | norm 0.0687 | dt 0.018
|
| 624 |
+
type train | step 6240 | loss 0.0250 0.0679 0.2671 0.4295 0.8106 | lr 1.8e-04 | norm 0.1011 | dt 0.018
|
| 625 |
+
type train | step 6250 | loss 0.0245 0.0668 0.2636 0.4230 0.8091 | lr 1.7e-04 | norm 0.0876 | dt 0.018
|
| 626 |
+
type train | step 6260 | loss 0.0245 0.0663 0.2631 0.4216 0.8039 | lr 1.7e-04 | norm 0.0731 | dt 0.018
|
| 627 |
+
type train | step 6270 | loss 0.0242 0.0660 0.2633 0.4160 0.7981 | lr 1.7e-04 | norm 0.0728 | dt 0.018
|
| 628 |
+
type train | step 6280 | loss 0.0246 0.0667 0.2649 0.4229 0.8060 | lr 1.7e-04 | norm 0.0801 | dt 0.018
|
| 629 |
+
type train | step 6290 | loss 0.0244 0.0665 0.2635 0.4206 0.8062 | lr 1.7e-04 | norm 0.0902 | dt 0.018
|
| 630 |
+
type train | step 6300 | loss 0.0247 0.0674 0.2665 0.4304 0.8128 | lr 1.7e-04 | norm 0.0782 | dt 0.018
|
| 631 |
+
type train | step 6310 | loss 0.0247 0.0669 0.2625 0.4232 0.8063 | lr 1.7e-04 | norm 0.0716 | dt 0.018
|
| 632 |
+
type train | step 6320 | loss 0.0244 0.0661 0.2629 0.4216 0.7996 | lr 1.7e-04 | norm 0.0924 | dt 0.018
|
| 633 |
+
type train | step 6330 | loss 0.0242 0.0657 0.2628 0.4171 0.7970 | lr 1.7e-04 | norm 0.0701 | dt 0.018
|
| 634 |
+
type train | step 6340 | loss 0.0245 0.0668 0.2676 0.4255 0.8073 | lr 1.6e-04 | norm 0.0691 | dt 0.018
|
| 635 |
+
type train | step 6350 | loss 0.0245 0.0664 0.2620 0.4182 0.8032 | lr 1.6e-04 | norm 0.0772 | dt 0.018
|
| 636 |
+
type train | step 6360 | loss 0.0245 0.0661 0.2630 0.4186 0.8010 | lr 1.6e-04 | norm 0.0767 | dt 0.018
|
| 637 |
+
type train | step 6370 | loss 0.0246 0.0668 0.2624 0.4200 0.7999 | lr 1.6e-04 | norm 0.0580 | dt 0.018
|
| 638 |
+
type train | step 6380 | loss 0.0247 0.0667 0.2633 0.4253 0.8116 | lr 1.6e-04 | norm 0.1024 | dt 0.018
|
| 639 |
+
type train | step 6390 | loss 0.0244 0.0662 0.2614 0.4158 0.8005 | lr 1.6e-04 | norm 0.0820 | dt 0.019
|
| 640 |
+
type train | step 6400 | loss 0.0245 0.0666 0.2674 0.4224 0.8064 | lr 1.6e-04 | norm 0.0726 | dt 0.018
|
| 641 |
+
type train | step 6410 | loss 0.0248 0.0668 0.2630 0.4223 0.8092 | lr 1.6e-04 | norm 0.0808 | dt 0.018
|
| 642 |
+
type train | step 6420 | loss 0.0246 0.0667 0.2621 0.4198 0.8064 | lr 1.6e-04 | norm 0.0889 | dt 0.018
|
| 643 |
+
type train | step 6430 | loss 0.0245 0.0664 0.2646 0.4242 0.8045 | lr 1.5e-04 | norm 0.0674 | dt 0.018
|
| 644 |
+
type train | step 6440 | loss 0.0243 0.0660 0.2626 0.4216 0.8026 | lr 1.5e-04 | norm 0.0588 | dt 0.018
|
| 645 |
+
type train | step 6450 | loss 0.0245 0.0663 0.2633 0.4190 0.8046 | lr 1.5e-04 | norm 0.0779 | dt 0.018
|
| 646 |
+
type train | step 6460 | loss 0.0245 0.0663 0.2604 0.4150 0.7990 | lr 1.5e-04 | norm 0.0815 | dt 0.018
|
| 647 |
+
type train | step 6470 | loss 0.0244 0.0663 0.2638 0.4211 0.8056 | lr 1.5e-04 | norm 0.0733 | dt 0.018
|
| 648 |
+
type train | step 6480 | loss 0.0247 0.0671 0.2642 0.4287 0.8144 | lr 1.5e-04 | norm 0.0789 | dt 0.018
|
| 649 |
+
type train | step 6490 | loss 0.0244 0.0660 0.2638 0.4212 0.8011 | lr 1.5e-04 | norm 0.0705 | dt 0.018
|
| 650 |
+
type train | step 6500 | loss 0.0244 0.0669 0.2615 0.4184 0.8001 | lr 1.5e-04 | norm 0.1031 | dt 0.018
|
| 651 |
+
type train | step 6510 | loss 0.0245 0.0663 0.2635 0.4216 0.8026 | lr 1.5e-04 | norm 0.0658 | dt 0.018
|
| 652 |
+
type train | step 6520 | loss 0.0245 0.0668 0.2629 0.4211 0.8068 | lr 1.5e-04 | norm 0.0850 | dt 0.018
|
| 653 |
+
type train | step 6530 | loss 0.0245 0.0663 0.2658 0.4246 0.8077 | lr 1.5e-04 | norm 0.0983 | dt 0.018
|
| 654 |
+
type train | step 6540 | loss 0.0244 0.0664 0.2610 0.4171 0.8024 | lr 1.4e-04 | norm 0.0805 | dt 0.018
|
| 655 |
+
type train | step 6550 | loss 0.0245 0.0666 0.2642 0.4226 0.8031 | lr 1.4e-04 | norm 0.0631 | dt 0.018
|
| 656 |
+
type train | step 6560 | loss 0.0244 0.0663 0.2594 0.4145 0.7966 | lr 1.4e-04 | norm 0.0866 | dt 0.018
|
| 657 |
+
type train | step 6570 | loss 0.0247 0.0665 0.2635 0.4241 0.8025 | lr 1.4e-04 | norm 0.0702 | dt 0.018
|
| 658 |
+
type train | step 6580 | loss 0.0245 0.0665 0.2634 0.4211 0.8072 | lr 1.4e-04 | norm 0.0639 | dt 0.018
|
| 659 |
+
type train | step 6590 | loss 0.0244 0.0665 0.2639 0.4241 0.8066 | lr 1.4e-04 | norm 0.0955 | dt 0.018
|
| 660 |
+
type train | step 6600 | loss 0.0245 0.0666 0.2633 0.4221 0.8034 | lr 1.4e-04 | norm 0.0773 | dt 0.018
|
| 661 |
+
type train | step 6610 | loss 0.0245 0.0664 0.2647 0.4248 0.8046 | lr 1.4e-04 | norm 0.0882 | dt 0.018
|
| 662 |
+
type train | step 6620 | loss 0.0244 0.0662 0.2635 0.4260 0.8034 | lr 1.4e-04 | norm 0.0855 | dt 0.018
|
| 663 |
+
type train | step 6630 | loss 0.0248 0.0665 0.2649 0.4274 0.8090 | lr 1.4e-04 | norm 0.0647 | dt 0.018
|
| 664 |
+
type train | step 6640 | loss 0.0243 0.0658 0.2622 0.4179 0.8006 | lr 1.4e-04 | norm 0.0714 | dt 0.018
|
| 665 |
+
type train | step 6650 | loss 0.0244 0.0660 0.2653 0.4242 0.8049 | lr 1.3e-04 | norm 0.1160 | dt 0.018
|
| 666 |
+
type train | step 6660 | loss 0.0247 0.0668 0.2633 0.4226 0.8054 | lr 1.3e-04 | norm 0.0947 | dt 0.018
|
| 667 |
+
type train | step 6670 | loss 0.0245 0.0663 0.2651 0.4282 0.8087 | lr 1.3e-04 | norm 0.0758 | dt 0.018
|
| 668 |
+
type train | step 6680 | loss 0.0243 0.0668 0.2621 0.4195 0.8042 | lr 1.3e-04 | norm 0.0889 | dt 0.018
|
| 669 |
+
type train | step 6690 | loss 0.0246 0.0663 0.2640 0.4244 0.8033 | lr 1.3e-04 | norm 0.0716 | dt 0.018
|
| 670 |
+
type train | step 6700 | loss 0.0242 0.0656 0.2626 0.4167 0.7981 | lr 1.3e-04 | norm 0.0730 | dt 0.018
|
| 671 |
+
type train | step 6710 | loss 0.0244 0.0662 0.2642 0.4241 0.8057 | lr 1.3e-04 | norm 0.1215 | dt 0.020
|
| 672 |
+
type train | step 6720 | loss 0.0244 0.0661 0.2639 0.4200 0.8016 | lr 1.3e-04 | norm 0.0754 | dt 0.018
|
| 673 |
+
type train | step 6730 | loss 0.0244 0.0660 0.2648 0.4243 0.8068 | lr 1.3e-04 | norm 0.0862 | dt 0.018
|
| 674 |
+
type train | step 6740 | loss 0.0243 0.0662 0.2602 0.4181 0.7994 | lr 1.3e-04 | norm 0.0720 | dt 0.018
|
| 675 |
+
type train | step 6750 | loss 0.0248 0.0671 0.2651 0.4322 0.8143 | lr 1.3e-04 | norm 0.0798 | dt 0.018
|
| 676 |
+
type train | step 6760 | loss 0.0243 0.0661 0.2631 0.4153 0.7991 | lr 1.3e-04 | norm 0.0732 | dt 0.018
|
| 677 |
+
type train | step 6770 | loss 0.0242 0.0659 0.2619 0.4176 0.7996 | lr 1.3e-04 | norm 0.0769 | dt 0.018
|
| 678 |
+
type train | step 6780 | loss 0.0243 0.0659 0.2625 0.4189 0.8001 | lr 1.3e-04 | norm 0.0725 | dt 0.018
|
| 679 |
+
type train | step 6790 | loss 0.0246 0.0668 0.2665 0.4321 0.8146 | lr 1.2e-04 | norm 0.0670 | dt 0.018
|
| 680 |
+
type train | step 6800 | loss 0.0244 0.0663 0.2611 0.4197 0.8030 | lr 1.2e-04 | norm 0.0723 | dt 0.018
|
| 681 |
+
type train | step 6810 | loss 0.0243 0.0657 0.2597 0.4150 0.7951 | lr 1.2e-04 | norm 0.0579 | dt 0.018
|
| 682 |
+
type train | step 6820 | loss 0.0244 0.0665 0.2628 0.4182 0.8012 | lr 1.2e-04 | norm 0.0604 | dt 0.020
|
| 683 |
+
type train | step 6830 | loss 0.0247 0.0670 0.2642 0.4200 0.8055 | lr 1.2e-04 | norm 0.0749 | dt 0.021
|
| 684 |
+
type train | step 6840 | loss 0.0245 0.0663 0.2625 0.4208 0.8041 | lr 1.2e-04 | norm 0.0945 | dt 0.019
|
| 685 |
+
type train | step 6850 | loss 0.0249 0.0678 0.2668 0.4290 0.8097 | lr 1.2e-04 | norm 0.1018 | dt 0.018
|
| 686 |
+
type train | step 6860 | loss 0.0245 0.0667 0.2634 0.4225 0.8083 | lr 1.2e-04 | norm 0.0773 | dt 0.018
|
| 687 |
+
type train | step 6870 | loss 0.0245 0.0661 0.2629 0.4213 0.8033 | lr 1.2e-04 | norm 0.0767 | dt 0.019
|
| 688 |
+
type train | step 6880 | loss 0.0242 0.0659 0.2632 0.4156 0.7975 | lr 1.2e-04 | norm 0.0731 | dt 0.018
|
| 689 |
+
type train | step 6890 | loss 0.0246 0.0666 0.2648 0.4225 0.8055 | lr 1.2e-04 | norm 0.0800 | dt 0.018
|
| 690 |
+
type train | step 6900 | loss 0.0244 0.0664 0.2633 0.4203 0.8057 | lr 1.2e-04 | norm 0.0895 | dt 0.018
|
| 691 |
+
type train | step 6910 | loss 0.0247 0.0673 0.2662 0.4299 0.8120 | lr 1.2e-04 | norm 0.0822 | dt 0.018
|
| 692 |
+
type train | step 6920 | loss 0.0247 0.0668 0.2623 0.4229 0.8057 | lr 1.2e-04 | norm 0.0681 | dt 0.018
|
| 693 |
+
type train | step 6930 | loss 0.0244 0.0660 0.2628 0.4213 0.7990 | lr 1.2e-04 | norm 0.0805 | dt 0.018
|
| 694 |
+
type train | step 6940 | loss 0.0242 0.0655 0.2626 0.4167 0.7964 | lr 1.2e-04 | norm 0.0699 | dt 0.018
|
| 695 |
+
type train | step 6950 | loss 0.0245 0.0667 0.2674 0.4251 0.8067 | lr 1.1e-04 | norm 0.0718 | dt 0.018
|
| 696 |
+
type train | step 6960 | loss 0.0245 0.0663 0.2619 0.4179 0.8027 | lr 1.1e-04 | norm 0.0608 | dt 0.018
|
| 697 |
+
type train | step 6970 | loss 0.0245 0.0660 0.2627 0.4183 0.8004 | lr 1.1e-04 | norm 0.0727 | dt 0.018
|
| 698 |
+
type train | step 6980 | loss 0.0245 0.0667 0.2622 0.4197 0.7994 | lr 1.1e-04 | norm 0.0568 | dt 0.018
|
| 699 |
+
type train | step 6990 | loss 0.0247 0.0666 0.2632 0.4250 0.8109 | lr 1.1e-04 | norm 0.0796 | dt 0.018
|
| 700 |
+
type train | step 7000 | loss 0.0244 0.0661 0.2612 0.4154 0.7999 | lr 1.1e-04 | norm 0.1025 | dt 0.018
|
| 701 |
+
type train | step 7010 | loss 0.0244 0.0665 0.2672 0.4221 0.8059 | lr 1.1e-04 | norm 0.0960 | dt 0.018
|
| 702 |
+
type train | step 7020 | loss 0.0248 0.0667 0.2628 0.4220 0.8087 | lr 1.1e-04 | norm 0.0906 | dt 0.019
|
| 703 |
+
type train | step 7030 | loss 0.0245 0.0666 0.2619 0.4194 0.8057 | lr 1.1e-04 | norm 0.0776 | dt 0.019
|
| 704 |
+
type train | step 7040 | loss 0.0245 0.0663 0.2644 0.4239 0.8039 | lr 1.1e-04 | norm 0.0648 | dt 0.018
|
| 705 |
+
type train | step 7050 | loss 0.0243 0.0659 0.2625 0.4213 0.8021 | lr 1.1e-04 | norm 0.0723 | dt 0.019
|
| 706 |
+
type train | step 7060 | loss 0.0244 0.0661 0.2632 0.4187 0.8039 | lr 1.1e-04 | norm 0.0942 | dt 0.018
|
| 707 |
+
type train | step 7070 | loss 0.0244 0.0662 0.2603 0.4147 0.7985 | lr 1.1e-04 | norm 0.0719 | dt 0.018
|
| 708 |
+
type train | step 7080 | loss 0.0244 0.0663 0.2636 0.4208 0.8050 | lr 1.1e-04 | norm 0.0741 | dt 0.024
|
| 709 |
+
type train | step 7090 | loss 0.0247 0.0670 0.2640 0.4284 0.8138 | lr 1.1e-04 | norm 0.0700 | dt 0.018
|
| 710 |
+
type train | step 7100 | loss 0.0244 0.0659 0.2637 0.4209 0.8007 | lr 1.1e-04 | norm 0.0687 | dt 0.018
|
| 711 |
+
type train | step 7110 | loss 0.0243 0.0667 0.2613 0.4181 0.7996 | lr 1.1e-04 | norm 0.1030 | dt 0.018
|
| 712 |
+
type train | step 7120 | loss 0.0245 0.0662 0.2634 0.4213 0.8020 | lr 1.1e-04 | norm 0.0637 | dt 0.018
|
| 713 |
+
type train | step 7130 | loss 0.0245 0.0666 0.2627 0.4208 0.8064 | lr 1.1e-04 | norm 0.0855 | dt 0.018
|
| 714 |
+
type train | step 7140 | loss 0.0244 0.0662 0.2657 0.4243 0.8072 | lr 1.1e-04 | norm 0.0570 | dt 0.018
|
| 715 |
+
type train | step 7150 | loss 0.0244 0.0662 0.2608 0.4168 0.8019 | lr 1.1e-04 | norm 0.0703 | dt 0.018
|
| 716 |
+
type train | step 7160 | loss 0.0245 0.0665 0.2640 0.4223 0.8026 | lr 1.1e-04 | norm 0.0628 | dt 0.018
|
| 717 |
+
type train | step 7170 | loss 0.0244 0.0662 0.2592 0.4143 0.7961 | lr 1.1e-04 | norm 0.0677 | dt 0.018
|
| 718 |
+
type train | step 7180 | loss 0.0246 0.0664 0.2633 0.4239 0.8021 | lr 1.0e-04 | norm 0.0634 | dt 0.019
|
| 719 |
+
type train | step 7190 | loss 0.0245 0.0664 0.2632 0.4208 0.8068 | lr 1.0e-04 | norm 0.0865 | dt 0.019
|
| 720 |
+
type train | step 7200 | loss 0.0244 0.0664 0.2638 0.4238 0.8061 | lr 1.0e-04 | norm 0.0634 | dt 0.018
|
| 721 |
+
type train | step 7210 | loss 0.0245 0.0665 0.2631 0.4218 0.8028 | lr 1.0e-04 | norm 0.0721 | dt 0.018
|
| 722 |
+
type train | step 7220 | loss 0.0244 0.0663 0.2645 0.4246 0.8042 | lr 1.0e-04 | norm 0.0794 | dt 0.018
|
| 723 |
+
type train | step 7230 | loss 0.0244 0.0661 0.2632 0.4257 0.8029 | lr 1.0e-04 | norm 0.0809 | dt 0.019
|
| 724 |
+
type train | step 7240 | loss 0.0248 0.0664 0.2647 0.4272 0.8087 | lr 1.0e-04 | norm 0.1074 | dt 0.018
|
| 725 |
+
type train | step 7250 | loss 0.0243 0.0658 0.2620 0.4176 0.8001 | lr 1.0e-04 | norm 0.0665 | dt 0.018
|
| 726 |
+
type train | step 7260 | loss 0.0243 0.0659 0.2651 0.4239 0.8044 | lr 1.0e-04 | norm 0.0879 | dt 0.018
|
| 727 |
+
type train | step 7270 | loss 0.0247 0.0667 0.2630 0.4222 0.8048 | lr 1.0e-04 | norm 0.0755 | dt 0.018
|
| 728 |
+
type train | step 7280 | loss 0.0245 0.0662 0.2649 0.4278 0.8083 | lr 1.0e-04 | norm 0.0935 | dt 0.018
|
| 729 |
+
type train | step 7290 | loss 0.0243 0.0666 0.2618 0.4192 0.8037 | lr 1.0e-04 | norm 0.0844 | dt 0.018
|
| 730 |
+
type train | step 7300 | loss 0.0246 0.0662 0.2638 0.4242 0.8029 | lr 1.0e-04 | norm 0.0651 | dt 0.018
|
| 731 |
+
type train | step 7310 | loss 0.0242 0.0655 0.2624 0.4164 0.7977 | lr 1.0e-04 | norm 0.0695 | dt 0.018
|
| 732 |
+
type train | step 7320 | loss 0.0244 0.0661 0.2640 0.4238 0.8054 | lr 1.0e-04 | norm 0.1089 | dt 0.020
|
| 733 |
+
type train | step 7330 | loss 0.0244 0.0660 0.2637 0.4198 0.8012 | lr 1.0e-04 | norm 0.0664 | dt 0.018
|
| 734 |
+
type train | step 7340 | loss 0.0244 0.0659 0.2647 0.4241 0.8063 | lr 1.0e-04 | norm 0.0732 | dt 0.018
|
| 735 |
+
type train | step 7350 | loss 0.0243 0.0661 0.2600 0.4179 0.7990 | lr 1.0e-04 | norm 0.0613 | dt 0.018
|
| 736 |
+
type train | step 7360 | loss 0.0248 0.0670 0.2650 0.4320 0.8140 | lr 1.0e-04 | norm 0.0764 | dt 0.018
|
| 737 |
+
type train | step 7370 | loss 0.0243 0.0660 0.2629 0.4151 0.7987 | lr 1.0e-04 | norm 0.0844 | dt 0.018
|
| 738 |
+
type train | step 7380 | loss 0.0242 0.0658 0.2617 0.4173 0.7993 | lr 1.0e-04 | norm 0.0911 | dt 0.020
|
| 739 |
+
type train | step 7390 | loss 0.0243 0.0658 0.2624 0.4188 0.7997 | lr 1.0e-04 | norm 0.0678 | dt 0.018
|
| 740 |
+
type train | step 7400 | loss 0.0246 0.0667 0.2663 0.4319 0.8141 | lr 1.0e-04 | norm 0.0663 | dt 0.018
|
| 741 |
+
type train | step 7410 | loss 0.0243 0.0662 0.2610 0.4195 0.8026 | lr 1.0e-04 | norm 0.0754 | dt 0.018
|
| 742 |
+
type train | step 7420 | loss 0.0243 0.0657 0.2596 0.4148 0.7948 | lr 1.0e-04 | norm 0.0655 | dt 0.018
|
| 743 |
+
type train | step 7430 | loss 0.0244 0.0664 0.2627 0.4180 0.8008 | lr 1.0e-04 | norm 0.0718 | dt 0.018
|
| 744 |
+
type train | step 7440 | loss 0.0246 0.0669 0.2640 0.4198 0.8052 | lr 1.0e-04 | norm 0.0885 | dt 0.018
|
| 745 |
+
type train | step 7450 | loss 0.0245 0.0662 0.2624 0.4206 0.8038 | lr 1.0e-04 | norm 0.1102 | dt 0.018
|
| 746 |
+
type train | step 7460 | loss 0.0249 0.0677 0.2666 0.4287 0.8093 | lr 1.0e-04 | norm 0.1042 | dt 0.018
|
| 747 |
+
type train | step 7470 | loss 0.0245 0.0666 0.2633 0.4223 0.8079 | lr 1.0e-04 | norm 0.0763 | dt 0.018
|
| 748 |
+
type train | step 7480 | loss 0.0245 0.0660 0.2628 0.4211 0.8030 | lr 1.0e-04 | norm 0.0867 | dt 0.018
|
| 749 |
+
type train | step 7490 | loss 0.0241 0.0658 0.2631 0.4154 0.7971 | lr 1.0e-04 | norm 0.0893 | dt 0.019
|
| 750 |
+
type train | step 7500 | loss 0.0246 0.0665 0.2647 0.4223 0.8051 | lr 1.0e-04 | norm 0.0932 | dt 0.018
|