diff --git a/attention_kindselective_n_heads2_seed1341/args.json b/attention_kindselective_n_heads2_seed1341/args.json index 7b7c266e28d627250970e1817493006b1c1e300d..851c1a076be2c0fb3ef71bd181fbff7fa689a696 100644 --- a/attention_kindselective_n_heads2_seed1341/args.json +++ b/attention_kindselective_n_heads2_seed1341/args.json @@ -1 +1 @@ -{"hellaswag": true, "attention_kind": "selective", "log_dir": "wider_is_better_6/attention_kindselective_n_heads2_seed1341", "resume_checkpoint": null, "resume_optimizer": false, "add_a_head": false, "add_head_to_start": true, "new_head_init": "normal", "n_heads": 2, "protect_bos_token": true, "prevent_from_masking_myself": true, "max_steps": 10000, "warmup_steps": 200, "group": "wider_is_better_6", "use_wandb": true, "kill_self_after_run": false, "random_seed": 1341, "memory_penalty_epsilon": 0.1, "selection_head_linear_combo": "none", "selection_head_linear_combo_scale": 1.0, "protection_kind": "none", "leaky_relu_alpha": null, "leaky_relu_bias": null, "use_compile": true, "use_mini_model": false, "upload_to_hf": true, "seq_len": 256, "batch_size": 40, "total_batch_size": 10240, "protection_head_scaling_factor": 1.0, "protection_head_bias": 0.0, "n_sliced_masks": null, "n_latent_masks": null, "mask_layernorm": false, "residual_attention_masks": false, "compute_base_shapes": false, "base_shapes_savefile": null, "mup": true, "disable_selection": false, "mup_enable_coord_check_logging": false, "max_lr": 0.00015, "decay_lr": true, "readout_zero_init": false, "query_zero_init": false, "l1_loss": false, "debugpy": false, "key": "15e-5_10240_2_1341", "n_embd": 128} \ No newline at end of file +{"hellaswag": true, "attention_kind": "selective", "log_dir": "wider_is_better_7/attention_kindselective_n_heads2_seed1341", "resume_checkpoint": null, "resume_optimizer": false, "add_a_head": false, "add_head_to_start": true, "new_head_init": "normal", "n_heads": 2, "protect_bos_token": true, "prevent_from_masking_myself": true, "max_steps": 50000, "warmup_steps": 200, "group": "wider_is_better_7", "use_wandb": true, "kill_self_after_run": false, "random_seed": 1341, "memory_penalty_epsilon": 0.1, "selection_head_linear_combo": "none", "selection_head_linear_combo_scale": 1.0, "protection_kind": "none", "leaky_relu_alpha": null, "leaky_relu_bias": null, "use_compile": true, "use_mini_model": false, "upload_to_hf": true, "seq_len": 256, "batch_size": 40, "total_batch_size": 10240, "protection_head_scaling_factor": 1.0, "protection_head_bias": 0.0, "n_sliced_masks": null, "n_latent_masks": null, "mask_layernorm": false, "residual_attention_masks": false, "compute_base_shapes": false, "base_shapes_savefile": null, "mup": true, "disable_selection": false, "mup_enable_coord_check_logging": false, "max_lr": 0.0001, "decay_lr": true, "readout_zero_init": false, "query_zero_init": false, "l1_loss": false, "debugpy": false, "key": "10e-5_10240_2_1341", "n_embd": 128} \ No newline at end of file diff --git a/attention_kindselective_n_heads2_seed1341/dataloader_10000.pt b/attention_kindselective_n_heads2_seed1341/dataloader_10000.pt new file mode 100644 index 0000000000000000000000000000000000000000..e1452fd777edd0ccb65d6b47710c50208c14b312 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/dataloader_10000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3858f6c832feea78a674d8c5c384061cc7d4f22cddbd0a2be6de33bc91e2c72 +size 964 diff --git a/attention_kindselective_n_heads2_seed1341/dataloader_12500.pt b/attention_kindselective_n_heads2_seed1341/dataloader_12500.pt new file mode 100644 index 0000000000000000000000000000000000000000..f63806424a0177a7f2d678c2c63138219ed021f3 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/dataloader_12500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab3779d33c2e0a7873fcd8c39402e44260740665950323ad1445480ec339965a +size 964 diff --git a/attention_kindselective_n_heads2_seed1341/dataloader_15000.pt b/attention_kindselective_n_heads2_seed1341/dataloader_15000.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ea0f5a1bfab75667c4ebb0ca01b358cdc836a54 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/dataloader_15000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:450020c7c306c73e5c07c463518ab937102d657515ea5a38da6f2e7291f20324 +size 964 diff --git a/attention_kindselective_n_heads2_seed1341/dataloader_17500.pt b/attention_kindselective_n_heads2_seed1341/dataloader_17500.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb9392348b1209c827e3e376b05eeda80e779aa8 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/dataloader_17500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0696a655d7c8a9d7d275c7489f74a5a948ee029ac3941b045d6abaf12544a5b1 +size 964 diff --git a/attention_kindselective_n_heads2_seed1341/dataloader_20000.pt b/attention_kindselective_n_heads2_seed1341/dataloader_20000.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba32fca059aec40e4f758de96bdac6df23b9d9f5 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/dataloader_20000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eb226bdcee777fd1ca493533704dae226c077ef79c842fc9dc59a534d5381c1 +size 964 diff --git a/attention_kindselective_n_heads2_seed1341/dataloader_22500.pt b/attention_kindselective_n_heads2_seed1341/dataloader_22500.pt new file mode 100644 index 0000000000000000000000000000000000000000..498dc444f528d893090328a5bd1e2f37da46dc12 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/dataloader_22500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88833bfd821adac4edd1dd0772083ae007c7b8d33041f66e53a679e1fa8993e0 +size 964 diff --git a/attention_kindselective_n_heads2_seed1341/dataloader_25000.pt b/attention_kindselective_n_heads2_seed1341/dataloader_25000.pt new file mode 100644 index 0000000000000000000000000000000000000000..b657bc134192d0ea956f984c289d0c682979a1f4 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/dataloader_25000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:554555a425bac43d626c36f1c81c2b0aba51eda3281dab27a9cb56b61f413354 +size 964 diff --git a/attention_kindselective_n_heads2_seed1341/dataloader_27500.pt b/attention_kindselective_n_heads2_seed1341/dataloader_27500.pt new file mode 100644 index 0000000000000000000000000000000000000000..d92d43d89390714f43db4f0782e49af0145b4a90 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/dataloader_27500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a52940b3b45414e6cdbec0dbaeba848f52d681c2daf78c269027057332d7fbd +size 964 diff --git a/attention_kindselective_n_heads2_seed1341/dataloader_30000.pt b/attention_kindselective_n_heads2_seed1341/dataloader_30000.pt new file mode 100644 index 0000000000000000000000000000000000000000..06856f53253b6c8cbefd7d595d9b9b7266b22621 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/dataloader_30000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:775519ea26122cb70d533c496bcdbbc19f759f3d096e83e98cca1dc10275fe8e +size 964 diff --git a/attention_kindselective_n_heads2_seed1341/dataloader_32500.pt b/attention_kindselective_n_heads2_seed1341/dataloader_32500.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc0b4b40e57f41aa1046b3bd2697256635160c09 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/dataloader_32500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab3422c8205fe45210246fed3dd6c317b75df02228cd8b75fba669574ce3b2d9 +size 964 diff --git a/attention_kindselective_n_heads2_seed1341/dataloader_35000.pt b/attention_kindselective_n_heads2_seed1341/dataloader_35000.pt new file mode 100644 index 0000000000000000000000000000000000000000..62647b540fa7925361626b9f8dfa3959eebb7608 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/dataloader_35000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:199ed3be67b88981f686112c97a2261729a37e0be3d4b0f4a289985a95d3cdf1 +size 964 diff --git a/attention_kindselective_n_heads2_seed1341/dataloader_37500.pt b/attention_kindselective_n_heads2_seed1341/dataloader_37500.pt new file mode 100644 index 0000000000000000000000000000000000000000..475d8b538138a8e39b76a4cf04c8eaeac074d295 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/dataloader_37500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f77178b3be9dd3f8cd03c935236251f73fde6da7948ba9feda0c888fb8912dfe +size 964 diff --git a/attention_kindselective_n_heads2_seed1341/dataloader_40000.pt b/attention_kindselective_n_heads2_seed1341/dataloader_40000.pt new file mode 100644 index 0000000000000000000000000000000000000000..50ec00ef6be330bbdb4cdf88e9a1097345da0d4d --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/dataloader_40000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84f58ba3b4a1f9be7da4e697e06782f0e1ce4d3aca49f1997087fc83aa466dd9 +size 964 diff --git a/attention_kindselective_n_heads2_seed1341/dataloader_42500.pt b/attention_kindselective_n_heads2_seed1341/dataloader_42500.pt new file mode 100644 index 0000000000000000000000000000000000000000..74e133f73a7293d5f4d6407784703c91f705d6e3 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/dataloader_42500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf6d24c78d89100d146bce9f26be940db3d71092473d9b55db97d6b35531eac2 +size 964 diff --git a/attention_kindselective_n_heads2_seed1341/dataloader_45000.pt b/attention_kindselective_n_heads2_seed1341/dataloader_45000.pt new file mode 100644 index 0000000000000000000000000000000000000000..718bc149d695b1e9498bdd0693053d7417207818 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/dataloader_45000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22bb90b43d81f3da5454f91a70e1ed29aeb2f470a727ce38390ff8a5c4924889 +size 964 diff --git a/attention_kindselective_n_heads2_seed1341/dataloader_47500.pt b/attention_kindselective_n_heads2_seed1341/dataloader_47500.pt new file mode 100644 index 0000000000000000000000000000000000000000..3eab97c7c6d4b86d90405bf1c4f3435727495da4 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/dataloader_47500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55507725e6988f190e4963078652fafa6b68e8d4f79221387612612babf3e1c1 +size 964 diff --git a/attention_kindselective_n_heads2_seed1341/dataloader_49999.pt b/attention_kindselective_n_heads2_seed1341/dataloader_49999.pt new file mode 100644 index 0000000000000000000000000000000000000000..d87f88b62a343a49411f8a6feee8f527879fcd1f --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/dataloader_49999.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47776cddb8021172f048a950b83f25b692cb340214b800ce3837c15ceb58907c +size 964 diff --git a/attention_kindselective_n_heads2_seed1341/log2.txt b/attention_kindselective_n_heads2_seed1341/log2.txt index 65dbc62cbef4deca89167d2bcaa829a84f6d86b3..f526ec0f8bbf8e11898172597eadef1a90cf0000 100644 --- a/attention_kindselective_n_heads2_seed1341/log2.txt +++ b/attention_kindselective_n_heads2_seed1341/log2.txt @@ -1,560 +1,4401 @@ -max_steps: 10000 -1500 val loss 7.3014 -1500 val perplexity 1482.3606 -1500 train 7.226598 (lr=9.6149e-05) (hash(x)=49016270) -1600 val loss 7.2945 -1600 val perplexity 1472.1656 -1600 train 7.086260 (lr=9.5544e-05) (hash(x)=46100488) +max_steps: 50000 +max_steps: 50000 0 val loss 11.7712 0 val perplexity 129466.1094 -1700 val loss 7.2754 -1700 val perplexity 1444.2773 -1700 train 7.331821 (lr=9.4897e-05) (hash(x)=49185350) -1800 val loss 7.2540 -1800 val perplexity 1413.7415 -1800 train 7.142362 (lr=9.4209e-05) (hash(x)=48024574) -1900 val loss 7.2813 -1900 val perplexity 1452.8243 -1900 train 7.092393 (lr=9.3481e-05) (hash(x)=45823189) -0 train 11.770625 (lr=7.5000e-07) (hash(x)=47078120) -2000 val loss 7.2275 -2000 val perplexity 1376.7460 -2000 train 7.047328 (lr=9.2714e-05) (hash(x)=45703932) -100 val loss 9.4343 -100 val perplexity 12510.2705 -100 train 9.394177 (lr=7.5750e-05) (hash(x)=43429388) -2100 val loss 7.1969 -2100 val perplexity 1335.3459 -2100 train 7.812517 (lr=9.1908e-05) (hash(x)=58570170) -200 val loss 7.8396 -200 val perplexity 2539.0632 -200 train 8.055985 (lr=1.5000e-04) (hash(x)=52929681) -2200 val loss 7.1753 -2200 val perplexity 1306.7522 -2200 train 7.213409 (lr=9.1064e-05) (hash(x)=55262880) -300 val loss 7.6860 -300 val perplexity 2177.5486 -300 train 7.698088 (lr=1.4997e-04) (hash(x)=49930367) -2300 val loss 7.1819 -2300 val perplexity 1315.4077 -2300 train 6.962734 (lr=9.0182e-05) (hash(x)=46415497) -400 val loss 7.6539 -400 val perplexity 2108.8242 -2400 val loss 7.1252 -2400 val perplexity 1242.9492 -400 train 7.473444 (lr=1.4986e-04) (hash(x)=48542946) -2400 train 7.056499 (lr=8.9265e-05) (hash(x)=49272278) -2500 val loss 7.0784 -2500 val perplexity 1186.0667 -2500 train 6.929870 (lr=8.8313e-05) (hash(x)=48390803) -500 val loss 7.5836 -500 val perplexity 1965.7809 -500 train 7.809551 (lr=1.4969e-04) (hash(x)=55286048) -2600 val loss 7.0476 -2600 val perplexity 1150.1053 -2600 train 6.941715 (lr=8.7326e-05) (hash(x)=47450116) -600 val loss 7.5162 -600 val perplexity 1837.5521 -600 train 7.756805 (lr=1.4945e-04) (hash(x)=51577760) -2700 val loss 7.0309 -2700 val perplexity 1131.0837 -2700 train 7.128078 (lr=8.6306e-05) (hash(x)=52681152) -700 val loss 7.4236 -700 val perplexity 1675.0077 -700 train 7.572020 (lr=1.4913e-04) (hash(x)=57433471) -2800 val loss 6.9949 -2800 val perplexity 1091.0746 -2800 train 7.043819 (lr=8.5254e-05) (hash(x)=50664094) -800 val loss 7.3456 -800 val perplexity 1549.4303 -800 train 7.255013 (lr=1.4876e-04) (hash(x)=49799291) -2900 val loss 6.9631 -2900 val perplexity 1056.9043 -2900 train 6.776724 (lr=8.4170e-05) (hash(x)=47067144) -900 val loss 7.2976 -900 val perplexity 1476.7948 -900 train 7.272717 (lr=1.4831e-04) (hash(x)=49502839) -3000 val loss 6.9730 -3000 val perplexity 1067.3795 -3000 train 6.783483 (lr=8.3057e-05) (hash(x)=45015009) -1000 val loss 7.2546 -1000 val perplexity 1414.6605 -1000 train 7.555295 (lr=1.4779e-04) (hash(x)=51142904) -3100 val loss 6.9279 -3100 val perplexity 1020.3347 -3100 train 6.763106 (lr=8.1915e-05) (hash(x)=45245896) -1100 val loss 7.2519 -1100 val perplexity 1410.7919 -1100 train 7.300293 (lr=1.4721e-04) (hash(x)=52751086) -3200 val loss 6.9114 -3200 val perplexity 1003.6149 -3200 train 6.848857 (lr=8.0745e-05) (hash(x)=49995942) -1200 val loss 7.2147 -1200 val perplexity 1359.3206 -1200 train 7.214197 (lr=1.4656e-04) (hash(x)=51538621) -3300 val loss 6.8949 -3300 val perplexity 987.2419 -3300 train 6.779411 (lr=7.9549e-05) (hash(x)=52311504) -3400 val loss 6.8761 -3400 val perplexity 968.8881 -3400 train 6.842221 (lr=7.8328e-05) (hash(x)=44332917) -1300 val loss 7.1409 -1300 val perplexity 1262.5270 -1300 train 7.198671 (lr=1.4585e-04) (hash(x)=52034040) -3500 val loss 6.8513 -3500 val perplexity 945.1112 -3500 train 6.955316 (lr=7.7082e-05) (hash(x)=56517159) -1400 val loss 7.1240 -1400 val perplexity 1241.3577 -1400 train 7.189587 (lr=1.4507e-04) (hash(x)=50640105) -3600 val loss 6.8735 -3600 val perplexity 966.3168 -3600 train 6.768483 (lr=7.5814e-05) (hash(x)=50720920) -1500 val loss 7.0398 -1500 val perplexity 1141.1345 -1500 train 6.955029 (lr=1.4422e-04) (hash(x)=49016270) -3700 val loss 6.8349 -3700 val perplexity 929.7078 -3700 train 7.343882 (lr=7.4525e-05) (hash(x)=62727701) -1600 val loss 6.9730 -1600 val perplexity 1067.4009 -1600 train 6.752039 (lr=1.4332e-04) (hash(x)=46100488) -3800 val loss 6.8337 -3800 val perplexity 928.6059 -3800 train 6.693852 (lr=7.3215e-05) (hash(x)=54772539) -3900 val loss 6.8087 -3900 val perplexity 905.6780 -3900 train 6.816524 (lr=7.1887e-05) (hash(x)=52274485) -1700 val loss 6.9253 -1700 val perplexity 1017.7491 -1700 train 6.972584 (lr=1.4235e-04) (hash(x)=49185350) -4000 val loss 6.7848 -4000 val perplexity 884.2670 -4000 train 6.490056 (lr=7.0541e-05) (hash(x)=50118307) -1800 val loss 6.9016 -1800 val perplexity 993.8886 -1800 train 6.798162 (lr=1.4131e-04) (hash(x)=48024574) -4100 val loss 6.7726 -4100 val perplexity 873.5496 -4100 train 6.365652 (lr=6.9180e-05) (hash(x)=42771647) -1900 val loss 6.8735 -1900 val perplexity 966.3652 -1900 train 6.644013 (lr=1.4022e-04) (hash(x)=45823189) -4200 val loss 6.7686 -4200 val perplexity 870.0539 -4200 train 6.847145 (lr=6.7804e-05) (hash(x)=51748836) -2000 val loss 6.8751 -2000 val perplexity 967.8617 -2000 train 6.675347 (lr=1.3907e-04) (hash(x)=45703932) -4300 val loss 6.7499 -4300 val perplexity 853.9985 -4300 train 6.751280 (lr=6.6414e-05) (hash(x)=49021280) -2100 val loss 6.8334 -2100 val perplexity 928.3230 -2100 train 7.513886 (lr=1.3786e-04) (hash(x)=58570170) -4400 val loss 6.7124 -4400 val perplexity 822.5333 -4400 train 6.766258 (lr=6.5013e-05) (hash(x)=55200309) -2200 val loss 6.8604 -2200 val perplexity 953.7519 -2200 train 6.891486 (lr=1.3660e-04) (hash(x)=55262880) -4500 val loss 6.7050 -4500 val perplexity 816.5079 -4500 train 6.752545 (lr=6.3602e-05) (hash(x)=52085049) -2300 val loss 6.8071 -2300 val perplexity 904.2579 -2300 train 6.533747 (lr=1.3527e-04) (hash(x)=46415497) -4600 val loss 6.6968 -4600 val perplexity 809.8085 -4600 train 6.673974 (lr=6.2182e-05) (hash(x)=48935595) -2400 val loss 6.8048 -2400 val perplexity 902.1807 -2400 train 6.739892 (lr=1.3390e-04) (hash(x)=49272278) -4700 val loss 6.6875 -4700 val perplexity 802.3204 -4700 train 6.886833 (lr=6.0754e-05) (hash(x)=49182380) -2500 val loss 6.8094 -2500 val perplexity 906.3088 -2500 train 6.669321 (lr=1.3247e-04) (hash(x)=48390803) -4800 val loss 6.6605 -4800 val perplexity 780.9518 -4800 train 6.491550 (lr=5.9321e-05) (hash(x)=43941929) -2600 val loss 6.7963 -2600 val perplexity 894.5168 -2600 train 6.670294 (lr=1.3099e-04) (hash(x)=47450116) -4900 val loss 6.6521 -4900 val perplexity 774.4189 -4900 train 6.824918 (lr=5.7883e-05) (hash(x)=51852773) -2700 val loss 6.7732 -2700 val perplexity 874.0683 -2700 train 6.866146 (lr=1.2946e-04) (hash(x)=52681152) -5000 val loss 6.6418 -5000 val perplexity 766.5018 -5000 train 6.268013 (lr=5.6442e-05) (hash(x)=40509616) -2800 val loss 6.7278 -2800 val perplexity 835.3218 -2800 train 6.812080 (lr=1.2788e-04) (hash(x)=50664094) -5100 val loss 6.6423 -5100 val perplexity 766.8707 -5100 train 6.882291 (lr=5.5000e-05) (hash(x)=57585369) -2900 val loss 6.7396 -2900 val perplexity 845.1917 -2900 train 6.555730 (lr=1.2626e-04) (hash(x)=47067144) -5200 val loss 6.6396 -5200 val perplexity 764.7626 -5200 train 6.626321 (lr=5.3558e-05) (hash(x)=51042313) -3000 val loss 6.7224 -3000 val perplexity 830.8346 -3000 train 6.528443 (lr=1.2459e-04) (hash(x)=45015009) -5300 val loss 6.6180 -5300 val perplexity 748.4346 -5300 train 6.767743 (lr=5.2117e-05) (hash(x)=52001684) -3100 val loss 6.7166 -3100 val perplexity 826.0220 -3100 train 6.519020 (lr=1.2287e-04) (hash(x)=45245896) -5400 val loss 6.6201 -5400 val perplexity 750.0352 -5400 train 6.522018 (lr=5.0679e-05) (hash(x)=48831647) -3200 val loss 6.7114 -3200 val perplexity 821.6827 -3200 train 6.646773 (lr=1.2112e-04) (hash(x)=49995942) -5500 val loss 6.6321 -5500 val perplexity 759.0754 -5500 train 7.037599 (lr=4.9246e-05) (hash(x)=50192069) -3300 val loss 6.6904 -3300 val perplexity 804.6768 -3300 train 6.555431 (lr=1.1932e-04) (hash(x)=52311504) -5600 val loss 6.6034 -5600 val perplexity 737.5775 -5600 train 6.437107 (lr=4.7818e-05) (hash(x)=47208852) -3400 val loss 6.6894 -3400 val perplexity 803.8519 -3400 train 6.636265 (lr=1.1749e-04) (hash(x)=44332917) -5700 val loss 6.6001 -5700 val perplexity 735.1389 -5700 train 6.192906 (lr=4.6398e-05) (hash(x)=44061694) -3500 val loss 6.6821 -3500 val perplexity 798.0287 -3500 train 6.763896 (lr=1.1562e-04) (hash(x)=56517159) -5800 val loss 6.5920 -5800 val perplexity 729.2386 -5800 train 6.948812 (lr=4.4987e-05) (hash(x)=56513279) -5900 val loss 6.5843 -5900 val perplexity 723.6619 -3600 val loss 6.6827 -3600 val perplexity 798.4920 -5900 train 6.729167 (lr=4.3586e-05) (hash(x)=50412818) -3600 train 6.536693 (lr=1.1372e-04) (hash(x)=50720920) -6000 val loss 6.5736 -6000 val perplexity 715.9265 -6000 train 6.400131 (lr=4.2196e-05) (hash(x)=47159634) -3700 val loss 6.6724 -3700 val perplexity 790.3077 -3700 train 7.203786 (lr=1.1179e-04) (hash(x)=62727701) -6100 val loss 6.5569 -6100 val perplexity 704.1142 -6100 train 6.635352 (lr=4.0820e-05) (hash(x)=54312795) -3800 val loss 6.6596 -3800 val perplexity 780.2100 -3800 train 6.530156 (lr=1.0982e-04) (hash(x)=54772539) -6200 val loss 6.5587 -6200 val perplexity 705.3464 -6200 train 6.674115 (lr=3.9459e-05) (hash(x)=54187587) -3900 val loss 6.6374 -3900 val perplexity 763.1033 -3900 train 6.637521 (lr=1.0783e-04) (hash(x)=52274485) -6300 val loss 6.5444 -6300 val perplexity 695.3438 -6300 train 6.631743 (lr=3.8113e-05) (hash(x)=53620387) -4000 val loss 6.6360 -4000 val perplexity 762.0699 -4000 train 6.343906 (lr=1.0581e-04) (hash(x)=50118307) -6400 val loss 6.5333 -6400 val perplexity 687.6434 -6400 train 6.477994 (lr=3.6785e-05) (hash(x)=48761774) -4100 val loss 6.6389 -4100 val perplexity 764.2654 -4100 train 6.220299 (lr=1.0377e-04) (hash(x)=42771647) -6500 val loss 6.5243 -6500 val perplexity 681.5228 -6500 train 6.738882 (lr=3.5475e-05) (hash(x)=56690281) -4200 val loss 6.6299 -4200 val perplexity 757.4057 -4200 train 6.721921 (lr=1.0171e-04) (hash(x)=51748836) -6600 val loss 6.5221 -6600 val perplexity 679.9893 -6600 train 6.334319 (lr=3.4186e-05) (hash(x)=42985269) -4300 val loss 6.6093 -4300 val perplexity 741.9653 -4300 train 6.632129 (lr=9.9622e-05) (hash(x)=49021280) -6700 val loss 6.5310 -6700 val perplexity 686.0870 -6700 train 6.588527 (lr=3.2918e-05) (hash(x)=53315447) -4400 val loss 6.5829 -4400 val perplexity 722.6357 -4400 train 6.619546 (lr=9.7520e-05) (hash(x)=55200309) -6800 val loss 6.5043 -6800 val perplexity 667.9774 -6800 train 6.952129 (lr=3.1672e-05) (hash(x)=61577166) -4500 val loss 6.5829 -4500 val perplexity 722.6595 -4500 train 6.642871 (lr=9.5403e-05) (hash(x)=52085049) -6900 val loss 6.5010 -6900 val perplexity 665.8378 -6900 train 6.649751 (lr=3.0451e-05) (hash(x)=54641005) -4600 val loss 6.5709 -4600 val perplexity 714.0108 -4600 train 6.567044 (lr=9.3273e-05) (hash(x)=48935595) -7000 val loss 6.4974 -7000 val perplexity 663.4460 -7000 train 7.009356 (lr=2.9255e-05) (hash(x)=60579512) -4700 val loss 6.5487 -4700 val perplexity 698.3546 -4700 train 6.793222 (lr=9.1132e-05) (hash(x)=49182380) -7100 val loss 6.4923 -7100 val perplexity 660.0073 -7100 train 6.391061 (lr=2.8085e-05) (hash(x)=53151549) -4800 val loss 6.5477 -4800 val perplexity 697.6464 -4800 train 6.381069 (lr=8.8982e-05) (hash(x)=43941929) -7200 val loss 6.4895 -7200 val perplexity 658.1876 -7200 train 7.420212 (lr=2.6943e-05) (hash(x)=71842455) -4900 val loss 6.5418 -4900 val perplexity 693.5468 -4900 train 6.709642 (lr=8.6825e-05) (hash(x)=51852773) -7300 val loss 6.4850 -7300 val perplexity 655.2300 -7300 train 6.246530 (lr=2.5830e-05) (hash(x)=44516452) -5000 val loss 6.5309 -5000 val perplexity 686.0157 -5000 train 6.152505 (lr=8.4663e-05) (hash(x)=40509616) -7400 val loss 6.4924 -7400 val perplexity 660.0942 -7400 train 6.132852 (lr=2.4746e-05) (hash(x)=42667710) -5100 val loss 6.5262 -5100 val perplexity 682.7995 -5100 train 6.765361 (lr=8.2500e-05) (hash(x)=57585369) -7500 val loss 6.4816 -7500 val perplexity 653.0245 -7500 train 6.229507 (lr=2.3694e-05) (hash(x)=47050797) -5200 val loss 6.5454 -5200 val perplexity 696.0334 -5200 train 6.526903 (lr=8.0337e-05) (hash(x)=51042313) -7600 val loss 6.4780 -7600 val perplexity 650.6663 -7600 train 6.379726 (lr=2.2674e-05) (hash(x)=49785056) -5300 val loss 6.5289 -5300 val perplexity 684.6187 -5300 train 6.668643 (lr=7.8175e-05) (hash(x)=52001684) -7700 val loss 6.4767 -7700 val perplexity 649.8347 -7700 train 6.284160 (lr=2.1687e-05) (hash(x)=53232030) -5400 val loss 6.5403 -5400 val perplexity 692.4857 -5400 train 6.453519 (lr=7.6018e-05) (hash(x)=48831647) -7800 val loss 6.4654 -7800 val perplexity 642.5144 -7800 train 6.285869 (lr=2.0735e-05) (hash(x)=48049749) -5500 val loss 6.5345 -5500 val perplexity 688.4803 -5500 train 6.949864 (lr=7.3868e-05) (hash(x)=50192069) -7900 val loss 6.4628 -7900 val perplexity 640.8224 -7900 train 6.296690 (lr=1.9818e-05) (hash(x)=44768513) -5600 val loss 6.5215 -5600 val perplexity 679.6085 -5600 train 6.368748 (lr=7.1727e-05) (hash(x)=47208852) -8000 val loss 6.4540 -8000 val perplexity 635.2330 -8000 train 6.295214 (lr=1.8936e-05) (hash(x)=46228039) -5700 val loss 6.5209 -5700 val perplexity 679.1575 -5700 train 6.097320 (lr=6.9597e-05) (hash(x)=44061694) -8100 val loss 6.4509 -8100 val perplexity 633.2829 -8100 train 6.780039 (lr=1.8092e-05) (hash(x)=60017091) -5800 val loss 6.5331 -5800 val perplexity 687.5231 -5800 train 6.887940 (lr=6.7480e-05) (hash(x)=56513279) -8200 val loss 6.4417 -8200 val perplexity 627.4558 -8200 train 6.451499 (lr=1.7286e-05) (hash(x)=49910198) -5900 val loss 6.5202 -5900 val perplexity 678.6848 -8300 val loss 6.4402 -8300 val perplexity 626.5251 -5900 train 6.650718 (lr=6.5378e-05) (hash(x)=50412818) -8300 train 6.745976 (lr=1.6519e-05) (hash(x)=57919055) -8400 val loss 6.4405 -8400 val perplexity 626.7328 -8400 train 6.515765 (lr=1.5791e-05) (hash(x)=49694964) -6000 val loss 6.5113 -6000 val perplexity 672.6966 -6000 train 6.349458 (lr=6.3294e-05) (hash(x)=47159634) -8500 val loss 6.4337 -8500 val perplexity 622.4640 -8500 train 6.469025 (lr=1.5103e-05) (hash(x)=53762585) -6100 val loss 6.4893 -6100 val perplexity 658.0756 -6100 train 6.571654 (lr=6.1230e-05) (hash(x)=54312795) -8600 val loss 6.4334 -8600 val perplexity 622.3082 -8600 train 6.465587 (lr=1.4456e-05) (hash(x)=51166973) -6200 val loss 6.4849 -6200 val perplexity 655.2000 -6200 train 6.599206 (lr=5.9188e-05) (hash(x)=54187587) -8700 val loss 6.4311 -8700 val perplexity 620.8644 -8700 train 6.505373 (lr=1.3851e-05) (hash(x)=53968049) -6300 val loss 6.4821 -6300 val perplexity 653.3412 -6300 train 6.581801 (lr=5.7169e-05) (hash(x)=53620387) -8800 val loss 6.4265 -8800 val perplexity 617.9882 -8800 train 6.509236 (lr=1.3289e-05) (hash(x)=59231056) -6400 val loss 6.4599 -6400 val perplexity 638.9672 -6400 train 6.395986 (lr=5.5177e-05) (hash(x)=48761774) -8900 val loss 6.4245 -8900 val perplexity 616.7817 -8900 train 6.301777 (lr=1.2769e-05) (hash(x)=50488048) -6500 val loss 6.4580 -6500 val perplexity 637.7895 -6500 train 6.647865 (lr=5.3213e-05) (hash(x)=56690281) -9000 val loss 6.4237 -9000 val perplexity 616.2996 -9000 train 6.162522 (lr=1.2292e-05) (hash(x)=44492956) -6600 val loss 6.4526 -6600 val perplexity 634.3764 -6600 train 6.277388 (lr=5.1279e-05) (hash(x)=42985269) -9100 val loss 6.4321 -9100 val perplexity 621.4722 -9100 train 6.460136 (lr=1.1860e-05) (hash(x)=51134989) -6700 val loss 6.4537 -6700 val perplexity 635.0168 -6700 train 6.495444 (lr=4.9377e-05) (hash(x)=53315447) -9200 val loss 6.4255 -9200 val perplexity 617.3776 -9200 train 6.232402 (lr=1.1472e-05) (hash(x)=48636056) -6800 val loss 6.4483 -6800 val perplexity 631.6550 -6800 train 6.872110 (lr=4.7509e-05) (hash(x)=61577166) -9300 val loss 6.4248 -9300 val perplexity 616.9818 -9300 train 6.371828 (lr=1.1128e-05) (hash(x)=50200551) -6900 val loss 6.4408 -6900 val perplexity 626.9297 -6900 train 6.579897 (lr=4.5676e-05) (hash(x)=54641005) -9400 val loss 6.4224 -9400 val perplexity 615.4691 -9400 train 6.256755 (lr=1.0830e-05) (hash(x)=48057228) -7000 val loss 6.4378 -7000 val perplexity 625.0102 -7000 train 6.968445 (lr=4.3882e-05) (hash(x)=60579512) -9500 val loss 6.4180 -9500 val perplexity 612.8030 -9500 train 6.189470 (lr=1.0577e-05) (hash(x)=48125171) -7100 val loss 6.4326 -7100 val perplexity 621.8027 -7100 train 6.320305 (lr=4.2128e-05) (hash(x)=53151549) -9600 val loss 6.4210 -9600 val perplexity 614.6295 -9600 train 6.340978 (lr=1.0369e-05) (hash(x)=53375853) -7200 val loss 6.4300 -7200 val perplexity 620.1948 -7200 train 7.380653 (lr=4.0414e-05) (hash(x)=71842455) -9700 val loss 6.4135 -9700 val perplexity 610.0555 -9700 train 7.223229 (lr=1.0208e-05) (hash(x)=53924631) -7300 val loss 6.4297 -7300 val perplexity 620.0109 -7300 train 6.191623 (lr=3.8745e-05) (hash(x)=44516452) -9800 val loss 6.4102 -9800 val perplexity 608.0391 -9800 train 6.454649 (lr=1.0092e-05) (hash(x)=48895047) -7400 val loss 6.4289 -7400 val perplexity 619.4763 -7400 train 6.074793 (lr=3.7120e-05) (hash(x)=42667710) -9900 val loss 6.4071 -9900 val perplexity 606.1632 -9900 train 6.266374 (lr=1.0023e-05) (hash(x)=44269923) -7500 val loss 6.4316 -7500 val perplexity 621.1490 -7500 train 6.169018 (lr=3.5541e-05) (hash(x)=47050797) -9999 val loss 6.4119 -9999 val perplexity 609.0312 -7600 val loss 6.4237 -7600 val perplexity 616.2653 -7600 train 6.329450 (lr=3.4011e-05) (hash(x)=49785056) -7700 val loss 6.4194 -7700 val perplexity 613.6569 -7700 train 6.231349 (lr=3.2531e-05) (hash(x)=53232030) -7800 val loss 6.4186 -7800 val perplexity 613.1287 -7800 train 6.243061 (lr=3.1102e-05) (hash(x)=48049749) -7900 val loss 6.4043 -7900 val perplexity 604.4531 -7900 train 6.246456 (lr=2.9726e-05) (hash(x)=44768513) -8000 val loss 6.4115 -8000 val perplexity 608.7827 -8000 train 6.265230 (lr=2.8405e-05) (hash(x)=46228039) -8100 val loss 6.4030 -8100 val perplexity 603.6253 -8100 train 6.710750 (lr=2.7138e-05) (hash(x)=60017091) -8200 val loss 6.3952 -8200 val perplexity 598.9441 -8200 train 6.407417 (lr=2.5929e-05) (hash(x)=49910198) -8300 val loss 6.3936 -8300 val perplexity 598.0146 -8300 train 6.694312 (lr=2.4778e-05) (hash(x)=57919055) -8400 val loss 6.3967 -8400 val perplexity 599.8673 -8400 train 6.466394 (lr=2.3686e-05) (hash(x)=49694964) -8500 val loss 6.3893 -8500 val perplexity 595.4279 -8500 train 6.434396 (lr=2.2655e-05) (hash(x)=53762585) -8600 val loss 6.3880 -8600 val perplexity 594.6692 -8600 train 6.429276 (lr=2.1685e-05) (hash(x)=51166973) -8700 val loss 6.3826 -8700 val perplexity 591.4665 -8700 train 6.472390 (lr=2.0777e-05) (hash(x)=53968049) -8800 val loss 6.3809 -8800 val perplexity 590.4394 -8800 train 6.468944 (lr=1.9933e-05) (hash(x)=59231056) -8900 val loss 6.3768 -8900 val perplexity 588.0292 -8900 train 6.261299 (lr=1.9153e-05) (hash(x)=50488048) -9000 val loss 6.3776 -9000 val perplexity 588.4850 -9000 train 6.119425 (lr=1.8439e-05) (hash(x)=44492956) -9100 val loss 6.3821 -9100 val perplexity 591.1530 -9100 train 6.412546 (lr=1.7790e-05) (hash(x)=51134989) -9200 val loss 6.3779 -9200 val perplexity 588.6931 -9200 train 6.194124 (lr=1.7208e-05) (hash(x)=48636056) -9300 val loss 6.3750 -9300 val perplexity 587.0090 -9300 train 6.325751 (lr=1.6692e-05) (hash(x)=50200551) -9400 val loss 6.3751 -9400 val perplexity 587.0705 -9400 train 6.198377 (lr=1.6245e-05) (hash(x)=48057228) -9500 val loss 6.3753 -9500 val perplexity 587.1808 -9500 train 6.169206 (lr=1.5865e-05) (hash(x)=48125171) -9600 val loss 6.3750 -9600 val perplexity 587.0090 -9600 train 6.290072 (lr=1.5554e-05) (hash(x)=53375853) -9700 val loss 6.3654 -9700 val perplexity 581.3495 -9700 train 7.198069 (lr=1.5312e-05) (hash(x)=53924631) -9800 val loss 6.3617 -9800 val perplexity 579.2410 -9800 train 6.411182 (lr=1.5139e-05) (hash(x)=48895047) -9900 val loss 6.3613 -9900 val perplexity 579.0135 -9900 train 6.211933 (lr=1.5035e-05) (hash(x)=44269923) -9999 val loss 6.3711 -9999 val perplexity 584.7159 +0 val loss 11.7712 +0 val perplexity 129466.1094 +0 val loss 11.7712 +0 val perplexity 129466.1094 +0 train 11.770625 (lr=2.5000e-07) (hash(x)=47078120) +0 train 11.770625 (lr=3.5000e-07) (hash(x)=47078120) +0 train 11.770625 (lr=5.0000e-07) (hash(x)=47078120) +100 val loss 10.2033 +100 val perplexity 26992.5645 +100 train 10.206360 (lr=2.5250e-05) (hash(x)=43429388) +100 val loss 10.0565 +100 val perplexity 23306.7559 +100 train 10.048087 (lr=3.5350e-05) (hash(x)=43429388) +100 val loss 9.8951 +100 val perplexity 19833.2090 +100 train 9.880125 (lr=5.0500e-05) (hash(x)=43429388) +200 val loss 8.5846 +200 val perplexity 5348.3901 +200 train 8.746173 (lr=5.0000e-05) (hash(x)=52929681) +200 val loss 8.3626 +200 val perplexity 4284.0156 +200 train 8.533326 (lr=7.0000e-05) (hash(x)=52929681) +200 val loss 8.3264 +200 val perplexity 4131.3823 +200 train 8.494742 (lr=1.0000e-04) (hash(x)=52929681) +300 val loss 7.8316 +300 val perplexity 2518.9448 +300 train 7.851068 (lr=5.0000e-05) (hash(x)=49930367) +300 val loss 7.8070 +300 val perplexity 2457.8367 +300 train 7.819790 (lr=6.9999e-05) (hash(x)=49930367) +300 val loss 7.8235 +300 val perplexity 2498.6895 +300 train 7.841080 (lr=9.9999e-05) (hash(x)=49930367) +400 val loss 7.6793 +400 val perplexity 2163.0315 +400 train 7.493921 (lr=4.9998e-05) (hash(x)=48542946) +400 val loss 7.6705 +400 val perplexity 2144.2366 +400 train 7.483693 (lr=6.9997e-05) (hash(x)=48542946) +400 val loss 7.6975 +400 val perplexity 2202.8335 +400 train 7.526471 (lr=9.9996e-05) (hash(x)=48542946) +500 val loss 7.6384 +500 val perplexity 2076.3428 +500 train 7.835117 (lr=4.9996e-05) (hash(x)=55286048) +500 val loss 7.6544 +500 val perplexity 2109.8975 +500 train 7.862077 (lr=6.9994e-05) (hash(x)=55286048) +500 val loss 7.6798 +500 val perplexity 2164.1704 +500 train 7.879671 (lr=9.9992e-05) (hash(x)=55286048) +600 val loss 7.6124 +600 val perplexity 2023.1598 +600 train 7.864288 (lr=4.9993e-05) (hash(x)=51577760) +600 val loss 7.6192 +600 val perplexity 2036.9033 +600 train 7.893260 (lr=6.9990e-05) (hash(x)=51577760) +600 val loss 7.6376 +600 val perplexity 2074.8047 +600 train 7.888408 (lr=9.9986e-05) (hash(x)=51577760) +700 val loss 7.6056 +700 val perplexity 2009.3633 +700 train 7.753090 (lr=4.9989e-05) (hash(x)=57433471) +700 val loss 7.6017 +700 val perplexity 2001.4990 +700 train 7.751354 (lr=6.9984e-05) (hash(x)=57433471) +700 val loss 7.6035 +700 val perplexity 2005.2074 +700 train 7.749254 (lr=9.9978e-05) (hash(x)=57433471) +800 val loss 7.6036 +800 val perplexity 2005.4731 +800 train 7.522329 (lr=4.9984e-05) (hash(x)=49799291) +800 val loss 7.6126 +800 val perplexity 2023.5988 +800 train 7.552643 (lr=6.9977e-05) (hash(x)=49799291) +800 val loss 7.5791 +800 val perplexity 1956.7739 +800 train 7.513973 (lr=9.9968e-05) (hash(x)=49799291) +900 val loss 7.6089 +900 val perplexity 2016.0988 +900 train 7.618979 (lr=4.9978e-05) (hash(x)=49502839) +900 val loss 7.6029 +900 val perplexity 2004.0411 +900 train 7.605560 (lr=6.9969e-05) (hash(x)=49502839) +900 val loss 7.5441 +900 val perplexity 1889.4799 +900 train 7.559354 (lr=9.9956e-05) (hash(x)=49502839) +1000 val loss 7.5819 +1000 val perplexity 1962.3719 +1000 train 7.842342 (lr=4.9971e-05) (hash(x)=51142904) +1000 val loss 7.6253 +1000 val perplexity 2049.4331 +1000 train 7.876435 (lr=6.9960e-05) (hash(x)=51142904) +1100 val loss 7.5698 +1100 val perplexity 1938.7089 +1100 train 7.650688 (lr=4.9964e-05) (hash(x)=52751086) +1000 val loss 7.5053 +1000 val perplexity 1817.6809 +1000 train 7.764091 (lr=9.9943e-05) (hash(x)=51142904) +1100 val loss 7.5860 +1100 val perplexity 1970.4799 +1100 train 7.674089 (lr=6.9949e-05) (hash(x)=52751086) +1200 val loss 7.5671 +1200 val perplexity 1933.4513 +1200 train 7.568090 (lr=4.9955e-05) (hash(x)=51538621) +1200 val loss 7.5782 +1200 val perplexity 1955.1324 +1200 train 7.595935 (lr=6.9937e-05) (hash(x)=51538621) +1100 val loss 7.4754 +1100 val perplexity 1764.1299 +1100 train 7.548927 (lr=9.9927e-05) (hash(x)=52751086) +1300 val loss 7.5720 +1300 val perplexity 1942.9651 +1300 train 7.642082 (lr=4.9946e-05) (hash(x)=52034040) +1300 val loss 7.5800 +1300 val perplexity 1958.6017 +1300 train 7.652497 (lr=6.9924e-05) (hash(x)=52034040) +1200 val loss 7.4633 +1200 val perplexity 1742.9686 +1200 train 7.467877 (lr=9.9910e-05) (hash(x)=51538621) +1400 val loss 7.5476 +1400 val perplexity 1896.1561 +1400 train 7.586929 (lr=4.9936e-05) (hash(x)=50640105) +1400 val loss 7.5772 +1400 val perplexity 1953.2203 +1400 train 7.614067 (lr=6.9910e-05) (hash(x)=50640105) +1300 val loss 7.4409 +1300 val perplexity 1704.1986 +1300 train 7.514560 (lr=9.9892e-05) (hash(x)=52034040) +1500 val loss 7.5311 +1500 val perplexity 1865.0898 +1500 train 7.483514 (lr=4.9924e-05) (hash(x)=49016270) +1500 val loss 7.5687 +1500 val perplexity 1936.6226 +1500 train 7.509407 (lr=6.9894e-05) (hash(x)=49016270) +1400 val loss 7.4407 +1400 val perplexity 1704.0084 +1400 train 7.479903 (lr=9.9871e-05) (hash(x)=50640105) +1600 val loss 7.5091 +1600 val perplexity 1824.6228 +1600 train 7.283084 (lr=4.9912e-05) (hash(x)=46100488) +1600 val loss 7.5822 +1600 val perplexity 1962.8669 +1600 train 7.371550 (lr=6.9877e-05) (hash(x)=46100488) +1500 val loss 7.4013 +1500 val perplexity 1638.1898 +1500 train 7.332983 (lr=9.9849e-05) (hash(x)=49016270) +1700 val loss 7.4860 +1700 val perplexity 1782.9028 +1700 train 7.526432 (lr=4.9899e-05) (hash(x)=49185350) +1700 val loss 7.5687 +1700 val perplexity 1936.6531 +1700 train 7.600106 (lr=6.9859e-05) (hash(x)=49185350) +1600 val loss 7.3796 +1600 val perplexity 1602.8821 +1600 train 7.147182 (lr=9.9825e-05) (hash(x)=46100488) +1800 val loss 7.4800 +1800 val perplexity 1772.1639 +1800 train 7.367154 (lr=4.9885e-05) (hash(x)=48024574) +1800 val loss 7.5553 +1800 val perplexity 1910.8547 +1800 train 7.461845 (lr=6.9840e-05) (hash(x)=48024574) +1700 val loss 7.3547 +1700 val perplexity 1563.4885 +1700 train 7.391090 (lr=9.9799e-05) (hash(x)=49185350) +1900 val loss 7.4720 +1900 val perplexity 1758.0442 +1900 train 7.292343 (lr=4.9871e-05) (hash(x)=45823189) +1900 val loss 7.5419 +1900 val perplexity 1885.3346 +1900 train 7.362976 (lr=6.9819e-05) (hash(x)=45823189) +1800 val loss 7.3457 +1800 val perplexity 1549.4635 +1800 train 7.229240 (lr=9.9771e-05) (hash(x)=48024574) +2000 val loss 7.4559 +2000 val perplexity 1730.0703 +2000 train 7.296420 (lr=4.9855e-05) (hash(x)=45703932) +2000 val loss 7.5337 +2000 val perplexity 1869.9216 +2000 train 7.385001 (lr=6.9797e-05) (hash(x)=45703932) +2100 val loss 7.4495 +2100 val perplexity 1719.0724 +2100 train 8.025798 (lr=4.9839e-05) (hash(x)=58570170) +1900 val loss 7.3341 +1900 val perplexity 1531.6666 +2100 val loss 7.4990 +2100 val perplexity 1806.2311 +1900 train 7.152208 (lr=9.9741e-05) (hash(x)=45823189) +2100 train 8.082744 (lr=6.9774e-05) (hash(x)=58570170) +2200 val loss 7.4700 +2200 val perplexity 1754.6849 +2200 train 7.494737 (lr=4.9821e-05) (hash(x)=55262880) +2200 val loss 7.4779 +2200 val perplexity 1768.5037 +2200 train 7.508498 (lr=6.9750e-05) (hash(x)=55262880) +2000 val loss 7.3298 +2000 val perplexity 1525.1315 +2000 train 7.174179 (lr=9.9710e-05) (hash(x)=45703932) +2300 val loss 7.4234 +2300 val perplexity 1674.7913 +2300 train 7.192656 (lr=4.9803e-05) (hash(x)=46415497) +2300 val loss 7.4373 +2300 val perplexity 1698.2014 +2300 train 7.212271 (lr=6.9724e-05) (hash(x)=46415497) +2100 val loss 7.3045 +2100 val perplexity 1486.9984 +2100 train 7.906702 (lr=9.9677e-05) (hash(x)=58570170) +2400 val loss 7.4013 +2400 val perplexity 1638.1156 +2400 train 7.342383 (lr=4.9784e-05) (hash(x)=49272278) +2400 val loss 7.3972 +2400 val perplexity 1631.4749 +2400 train 7.336201 (lr=6.9697e-05) (hash(x)=49272278) +2200 val loss 7.3089 +2200 val perplexity 1493.5055 +2200 train 7.352629 (lr=9.9642e-05) (hash(x)=55262880) +2500 val loss 7.3858 +2500 val perplexity 1612.8698 +2500 train 7.269034 (lr=4.9764e-05) (hash(x)=48390803) +2500 val loss 7.3657 +2500 val perplexity 1580.8145 +2500 train 7.247119 (lr=6.9669e-05) (hash(x)=48390803) +2300 val loss 7.3411 +2300 val perplexity 1542.3964 +2300 train 7.163018 (lr=9.9606e-05) (hash(x)=46415497) +2600 val loss 7.3594 +2600 val perplexity 1570.8942 +2600 train 7.243450 (lr=4.9743e-05) (hash(x)=47450116) +2600 val loss 7.3575 +2600 val perplexity 1567.9031 +2600 train 7.243428 (lr=6.9640e-05) (hash(x)=47450116) +2400 val loss 7.3191 +2400 val perplexity 1508.7885 +2400 train 7.264038 (lr=9.9567e-05) (hash(x)=49272278) +2700 val loss 7.3460 +2700 val perplexity 1549.9852 +2700 train 7.436283 (lr=4.9721e-05) (hash(x)=52681152) +2700 val loss 7.3453 +2700 val perplexity 1548.8527 +2700 train 7.419794 (lr=6.9609e-05) (hash(x)=52681152) +2500 val loss 7.2346 +2500 val perplexity 1386.6477 +2500 train 7.094385 (lr=9.9527e-05) (hash(x)=48390803) +2800 val loss 7.3179 +2800 val perplexity 1507.0958 +2800 train 7.384782 (lr=4.9698e-05) (hash(x)=50664094) +2800 val loss 7.3382 +2800 val perplexity 1537.8748 +2800 train 7.414604 (lr=6.9577e-05) (hash(x)=50664094) +2900 val loss 7.3156 +2900 val perplexity 1503.5679 +2900 train 7.132506 (lr=4.9674e-05) (hash(x)=47067144) +2600 val loss 7.2024 +2600 val perplexity 1342.6188 +2600 train 7.080667 (lr=9.9485e-05) (hash(x)=47450116) +2900 val loss 7.3248 +2900 val perplexity 1517.5225 +2900 train 7.166916 (lr=6.9544e-05) (hash(x)=47067144) +3000 val loss 7.2966 +3000 val perplexity 1475.3104 +3000 train 7.111231 (lr=4.9650e-05) (hash(x)=45015009) +2700 val loss 7.1974 +2700 val perplexity 1335.9701 +2700 train 7.291355 (lr=9.9442e-05) (hash(x)=52681152) +3000 val loss 7.3050 +3000 val perplexity 1487.7729 +3000 train 7.111160 (lr=6.9510e-05) (hash(x)=45015009) +3100 val loss 7.2757 +3100 val perplexity 1444.8168 +3100 train 7.088307 (lr=4.9625e-05) (hash(x)=45245896) +3100 val loss 7.2994 +3100 val perplexity 1479.3949 +3100 train 7.128348 (lr=6.9474e-05) (hash(x)=45245896) +2800 val loss 7.1475 +2800 val perplexity 1270.9235 +2800 train 7.205905 (lr=9.9396e-05) (hash(x)=50664094) +3200 val loss 7.2659 +3200 val perplexity 1430.6354 +3200 train 7.239969 (lr=4.9598e-05) (hash(x)=49995942) +3200 val loss 7.2771 +3200 val perplexity 1446.7656 +3200 train 7.227925 (lr=6.9438e-05) (hash(x)=49995942) +2900 val loss 7.1312 +2900 val perplexity 1250.3710 +2900 train 6.949361 (lr=9.9349e-05) (hash(x)=47067144) +3300 val loss 7.2506 +3300 val perplexity 1408.8812 +3300 train 7.149277 (lr=4.9571e-05) (hash(x)=52311504) +3300 val loss 7.2734 +3300 val perplexity 1441.4346 +3300 train 7.174790 (lr=6.9400e-05) (hash(x)=52311504) +3000 val loss 7.1105 +3000 val perplexity 1224.7368 +3000 train 6.942707 (lr=9.9300e-05) (hash(x)=45015009) +3400 val loss 7.2187 +3400 val perplexity 1364.7137 +3400 train 7.215257 (lr=4.9543e-05) (hash(x)=44332917) +3400 val loss 7.2563 +3400 val perplexity 1417.0701 +3400 train 7.229104 (lr=6.9360e-05) (hash(x)=44332917) +3100 val loss 7.0858 +3100 val perplexity 1194.9200 +3100 train 6.913786 (lr=9.9249e-05) (hash(x)=45245896) +3500 val loss 7.1893 +3500 val perplexity 1325.1709 +3500 train 7.291782 (lr=4.9514e-05) (hash(x)=56517159) +3500 val loss 7.2113 +3500 val perplexity 1354.6410 +3500 train 7.321904 (lr=6.9320e-05) (hash(x)=56517159) +3200 val loss 7.0656 +3200 val perplexity 1170.9994 +3200 train 7.020722 (lr=9.9197e-05) (hash(x)=49995942) +3600 val loss 7.1847 +3600 val perplexity 1319.0482 +3600 train 7.071852 (lr=4.9484e-05) (hash(x)=50720920) +3600 val loss 7.2092 +3600 val perplexity 1351.8141 +3600 train 7.138223 (lr=6.9278e-05) (hash(x)=50720920) +3300 val loss 7.0669 +3300 val perplexity 1172.4705 +3300 train 6.962692 (lr=9.9142e-05) (hash(x)=52311504) +3700 val loss 7.1715 +3700 val perplexity 1301.7917 +3700 train 7.680056 (lr=4.9454e-05) (hash(x)=62727701) +3700 val loss 7.1758 +3700 val perplexity 1307.4110 +3700 train 7.640621 (lr=6.9235e-05) (hash(x)=62727701) +3400 val loss 7.0259 +3400 val perplexity 1125.4165 +3400 train 7.003755 (lr=9.9086e-05) (hash(x)=44332917) +3800 val loss 7.1476 +3800 val perplexity 1271.0247 +3800 train 6.989200 (lr=4.9422e-05) (hash(x)=54772539) +3800 val loss 7.1592 +3800 val perplexity 1285.8239 +3800 train 7.013257 (lr=6.9191e-05) (hash(x)=54772539) +3500 val loss 7.0122 +3500 val perplexity 1110.0574 +3500 train 7.132928 (lr=9.9028e-05) (hash(x)=56517159) +3900 val loss 7.1487 +3900 val perplexity 1272.4905 +3900 train 7.162163 (lr=4.9390e-05) (hash(x)=52274485) +3900 val loss 7.1598 +3900 val perplexity 1286.6543 +3900 train 7.188324 (lr=6.9146e-05) (hash(x)=52274485) +4000 val loss 7.1395 +4000 val perplexity 1260.7595 +4000 train 6.859540 (lr=4.9357e-05) (hash(x)=50118307) +3600 val loss 6.9864 +3600 val perplexity 1081.8583 +3600 train 6.872036 (lr=9.8969e-05) (hash(x)=50720920) +4000 val loss 7.1493 +4000 val perplexity 1273.2048 +4000 train 6.882396 (lr=6.9099e-05) (hash(x)=50118307) +4100 val loss 7.1080 +4100 val perplexity 1221.7352 +4100 train 6.696227 (lr=4.9322e-05) (hash(x)=42771647) +3700 val loss 6.9570 +3700 val perplexity 1050.4807 +3700 train 7.451654 (lr=9.8908e-05) (hash(x)=62727701) +4100 val loss 7.1660 +4100 val perplexity 1294.6802 +4100 train 6.767435 (lr=6.9051e-05) (hash(x)=42771647) +4200 val loss 7.0914 +4200 val perplexity 1201.6149 +4200 train 7.170551 (lr=4.9287e-05) (hash(x)=51748836) +3800 val loss 6.9337 +3800 val perplexity 1026.3269 +3800 train 6.792619 (lr=9.8845e-05) (hash(x)=54772539) +4200 val loss 7.1143 +4200 val perplexity 1229.4664 +4200 train 7.180784 (lr=6.9002e-05) (hash(x)=51748836) +4300 val loss 7.0601 +4300 val perplexity 1164.5834 +4300 train 7.057050 (lr=4.9252e-05) (hash(x)=49021280) +3900 val loss 6.9030 +3900 val perplexity 995.2720 +3900 train 6.928247 (lr=9.8780e-05) (hash(x)=52274485) +4300 val loss 7.1378 +4300 val perplexity 1258.7052 +4300 train 7.148933 (lr=6.8952e-05) (hash(x)=49021280) +4400 val loss 7.0244 +4400 val perplexity 1123.7247 +4400 train 7.103628 (lr=4.9215e-05) (hash(x)=55200309) +4000 val loss 6.8878 +4000 val perplexity 980.2460 +4000 train 6.603559 (lr=9.8713e-05) (hash(x)=50118307) +4400 val loss 7.0874 +4400 val perplexity 1196.8223 +4400 train 7.168321 (lr=6.8901e-05) (hash(x)=55200309) +4500 val loss 7.0041 +4500 val perplexity 1101.1179 +4500 train 7.044929 (lr=4.9177e-05) (hash(x)=52085049) +4500 val loss 7.0589 +4500 val perplexity 1163.2164 +4500 train 7.096609 (lr=6.8848e-05) (hash(x)=52085049) +4100 val loss 6.8764 +4100 val perplexity 969.1635 +4100 train 6.470846 (lr=9.8645e-05) (hash(x)=42771647) +4600 val loss 7.0027 +4600 val perplexity 1099.6326 +4600 train 6.979061 (lr=4.9139e-05) (hash(x)=48935595) +4600 val loss 7.0364 +4600 val perplexity 1137.3259 +4600 train 6.995081 (lr=6.8794e-05) (hash(x)=48935595) +4200 val loss 6.8608 +4200 val perplexity 954.1253 +4200 train 6.947160 (lr=9.8575e-05) (hash(x)=51748836) +4700 val loss 6.9578 +4700 val perplexity 1051.2834 +4700 train 7.129683 (lr=4.9099e-05) (hash(x)=49182380) +4700 val loss 7.0110 +4700 val perplexity 1108.7338 +4700 train 7.189000 (lr=6.8739e-05) (hash(x)=49182380) +4300 val loss 6.8324 +4300 val perplexity 927.4540 +4300 train 6.841238 (lr=9.8503e-05) (hash(x)=49021280) +4800 val loss 6.9223 +4800 val perplexity 1014.6456 +4800 train 6.725775 (lr=4.9059e-05) (hash(x)=43941929) +4800 val loss 7.0088 +4800 val perplexity 1106.3794 +4800 train 6.818189 (lr=6.8683e-05) (hash(x)=43941929) +4400 val loss 6.8061 +4400 val perplexity 903.3645 +4400 train 6.879786 (lr=9.8430e-05) (hash(x)=55200309) +4900 val loss 6.8891 +4900 val perplexity 981.5084 +4900 train 7.054278 (lr=4.9018e-05) (hash(x)=51852773) +4900 val loss 6.9903 +4900 val perplexity 1086.0919 +4900 train 7.134183 (lr=6.8626e-05) (hash(x)=51852773) +4500 val loss 6.8024 +4500 val perplexity 899.9898 +4500 train 6.836329 (lr=9.8355e-05) (hash(x)=52085049) +5000 val loss 6.8573 +5000 val perplexity 950.8198 +5000 train 6.482135 (lr=4.8976e-05) (hash(x)=40509616) +5000 val loss 6.9693 +5000 val perplexity 1063.5103 +5000 train 6.612258 (lr=6.8567e-05) (hash(x)=40509616) +5100 val loss 6.8343 +5100 val perplexity 929.1343 +5100 train 7.126708 (lr=4.8934e-05) (hash(x)=57585369) +4600 val loss 6.7937 +4600 val perplexity 892.2471 +4600 train 6.779639 (lr=9.8278e-05) (hash(x)=48935595) +5100 val loss 6.9601 +5100 val perplexity 1053.7472 +5100 train 7.232564 (lr=6.8507e-05) (hash(x)=57585369) +5200 val loss 6.8134 +5200 val perplexity 910.0000 +5200 train 6.828725 (lr=4.8890e-05) (hash(x)=51042313) +4700 val loss 6.7569 +4700 val perplexity 859.9806 +4700 train 6.949156 (lr=9.8199e-05) (hash(x)=49182380) +5200 val loss 6.9511 +5200 val perplexity 1044.2822 +5200 train 6.954596 (lr=6.8446e-05) (hash(x)=51042313) +5300 val loss 6.7903 +5300 val perplexity 889.1386 +5300 train 6.950994 (lr=4.8846e-05) (hash(x)=52001684) +4800 val loss 6.7365 +4800 val perplexity 842.6316 +4800 train 6.560075 (lr=9.8119e-05) (hash(x)=43941929) +5300 val loss 6.9428 +5300 val perplexity 1035.6433 +5300 train 7.068288 (lr=6.8384e-05) (hash(x)=52001684) +5400 val loss 6.7679 +5400 val perplexity 869.5019 +5400 train 6.695953 (lr=4.8800e-05) (hash(x)=48831647) +4900 val loss 6.7258 +4900 val perplexity 833.6343 +4900 train 6.891153 (lr=9.8036e-05) (hash(x)=51852773) +5400 val loss 6.9532 +5400 val perplexity 1046.4855 +5400 train 6.901043 (lr=6.8320e-05) (hash(x)=48831647) +5500 val loss 6.7634 +5500 val perplexity 865.5666 +5500 train 7.144577 (lr=4.8754e-05) (hash(x)=50192069) +5000 val loss 6.7134 +5000 val perplexity 823.3696 +5500 val loss 6.9024 +5500 val perplexity 994.6591 +5500 train 7.260624 (lr=6.8256e-05) (hash(x)=50192069) +5000 train 6.339698 (lr=9.7953e-05) (hash(x)=40509616) +5600 val loss 6.7351 +5600 val perplexity 841.4648 +5600 train 6.562131 (lr=4.8707e-05) (hash(x)=47208852) +5600 val loss 6.9064 +5600 val perplexity 998.6816 +5600 train 6.739335 (lr=6.8190e-05) (hash(x)=47208852) +5100 val loss 6.6933 +5100 val perplexity 807.0188 +5100 train 6.913021 (lr=9.7867e-05) (hash(x)=57585369) +5700 val loss 6.7383 +5700 val perplexity 844.1457 +5700 train 6.331465 (lr=4.8659e-05) (hash(x)=44061694) +5700 val loss 6.8881 +5700 val perplexity 980.5087 +5700 train 6.497312 (lr=6.8123e-05) (hash(x)=44061694) +5200 val loss 6.6851 +5200 val perplexity 800.4271 +5200 train 6.668004 (lr=9.7780e-05) (hash(x)=51042313) +5800 val loss 6.7127 +5800 val perplexity 822.7604 +5800 train 7.059242 (lr=4.8611e-05) (hash(x)=56513279) +5800 val loss 6.8917 +5800 val perplexity 984.1093 +5800 train 7.228807 (lr=6.8055e-05) (hash(x)=56513279) +5300 val loss 6.6836 +5300 val perplexity 799.2283 +5300 train 6.820241 (lr=9.7691e-05) (hash(x)=52001684) +5900 val loss 6.6940 +5900 val perplexity 807.5531 +5900 train 6.839691 (lr=4.8561e-05) (hash(x)=50412818) +5900 val loss 6.8624 +5900 val perplexity 955.6297 +5900 train 6.993732 (lr=6.7985e-05) (hash(x)=50412818) +5400 val loss 6.6707 +5400 val perplexity 788.9334 +5400 train 6.575157 (lr=9.7600e-05) (hash(x)=48831647) +6000 val loss 6.6806 +6000 val perplexity 796.7877 +6000 train 6.477222 (lr=4.8511e-05) (hash(x)=47159634) +6000 val loss 6.8573 +6000 val perplexity 950.7881 +6000 train 6.679455 (lr=6.7915e-05) (hash(x)=47159634) +5500 val loss 6.6925 +5500 val perplexity 806.3149 +5500 train 7.105333 (lr=9.7508e-05) (hash(x)=50192069) +6100 val loss 6.6626 +6100 val perplexity 782.5916 +6100 train 6.761533 (lr=4.8459e-05) (hash(x)=54312795) +6100 val loss 6.8378 +6100 val perplexity 932.4249 +6100 train 6.926886 (lr=6.7843e-05) (hash(x)=54312795) +6200 val loss 6.6512 +6200 val perplexity 773.7161 +6200 train 6.779728 (lr=4.8407e-05) (hash(x)=54187587) +5600 val loss 6.6484 +5600 val perplexity 771.5285 +5600 train 6.471294 (lr=9.7414e-05) (hash(x)=47208852) +6200 val loss 6.8263 +6200 val perplexity 921.8000 +6200 train 6.935552 (lr=6.7770e-05) (hash(x)=54187587) +6300 val loss 6.6467 +6300 val perplexity 770.2537 +6300 train 6.743858 (lr=4.8355e-05) (hash(x)=53620387) +5700 val loss 6.6725 +5700 val perplexity 790.3552 +5700 train 6.252309 (lr=9.7318e-05) (hash(x)=44061694) +6300 val loss 6.8196 +6300 val perplexity 915.6463 +6300 train 6.899407 (lr=6.7696e-05) (hash(x)=53620387) +6400 val loss 6.6123 +6400 val perplexity 744.2178 +6400 train 6.562149 (lr=4.8301e-05) (hash(x)=48761774) +5800 val loss 6.6491 +5800 val perplexity 772.0687 +5800 train 6.999820 (lr=9.7221e-05) (hash(x)=56513279) +6400 val loss 6.7858 +6400 val perplexity 885.2179 +6400 train 6.744049 (lr=6.7621e-05) (hash(x)=48761774) +6500 val loss 6.6012 +6500 val perplexity 735.9983 +6500 train 6.817581 (lr=4.8246e-05) (hash(x)=56690281) +6500 val loss 6.7862 +6500 val perplexity 885.5020 +6500 train 7.016493 (lr=6.7545e-05) (hash(x)=56690281) +5900 val loss 6.6350 +5900 val perplexity 761.2691 +5900 train 6.762035 (lr=9.7122e-05) (hash(x)=50412818) +6600 val loss 6.5925 +6600 val perplexity 729.6180 +6600 train 6.397733 (lr=4.8191e-05) (hash(x)=42985269) +6600 val loss 6.7616 +6600 val perplexity 863.9914 +6600 train 6.561972 (lr=6.7467e-05) (hash(x)=42985269) +6000 val loss 6.6299 +6000 val perplexity 757.4346 +6000 train 6.458057 (lr=9.7021e-05) (hash(x)=47159634) +6700 val loss 6.6130 +6700 val perplexity 744.6924 +6700 train 6.649519 (lr=4.8135e-05) (hash(x)=53315447) +6700 val loss 6.8267 +6700 val perplexity 922.1284 +6700 train 6.879135 (lr=6.7389e-05) (hash(x)=53315447) +6100 val loss 6.5997 +6100 val perplexity 734.8572 +6100 train 6.667270 (lr=9.6919e-05) (hash(x)=54312795) +6800 val loss 6.5657 +6800 val perplexity 710.2944 +6800 train 7.060288 (lr=4.8078e-05) (hash(x)=61577166) +6800 val loss 6.7405 +6800 val perplexity 845.9574 +6800 train 7.170722 (lr=6.7309e-05) (hash(x)=61577166) +6200 val loss 6.6275 +6200 val perplexity 755.5559 +6200 train 6.753746 (lr=9.6815e-05) (hash(x)=54187587) +6900 val loss 6.5577 +6900 val perplexity 704.6382 +6900 train 6.695563 (lr=4.8020e-05) (hash(x)=54641005) +6900 val loss 6.7316 +6900 val perplexity 838.4496 +6900 train 6.866562 (lr=6.7228e-05) (hash(x)=54641005) +7000 val loss 6.5502 +7000 val perplexity 699.4157 +7000 train 7.104306 (lr=4.7961e-05) (hash(x)=60579512) +6300 val loss 6.5743 +6300 val perplexity 716.4271 +6300 train 6.656224 (lr=9.6709e-05) (hash(x)=53620387) +7000 val loss 6.7276 +7000 val perplexity 835.1517 +7000 train 7.248949 (lr=6.7146e-05) (hash(x)=60579512) +7100 val loss 6.5454 +7100 val perplexity 696.0660 +7100 train 6.414138 (lr=4.7902e-05) (hash(x)=53151549) +6400 val loss 6.5723 +6400 val perplexity 715.0309 +6400 train 6.517745 (lr=9.6602e-05) (hash(x)=48761774) +7100 val loss 6.7227 +7100 val perplexity 831.0735 +7100 train 6.612114 (lr=6.7063e-05) (hash(x)=53151549) +7200 val loss 6.5428 +7200 val perplexity 694.2211 +7200 train 7.489408 (lr=4.7842e-05) (hash(x)=71842455) +6500 val loss 6.5639 +6500 val perplexity 709.0126 +6500 train 6.793872 (lr=9.6493e-05) (hash(x)=56690281) +7200 val loss 6.7122 +7200 val perplexity 822.3823 +7200 train 7.659578 (lr=6.6978e-05) (hash(x)=71842455) +7300 val loss 6.5334 +7300 val perplexity 687.7208 +7300 train 6.287797 (lr=4.7781e-05) (hash(x)=44516452) +6600 val loss 6.5889 +6600 val perplexity 727.0155 +6600 train 6.406914 (lr=9.6382e-05) (hash(x)=42985269) +7300 val loss 6.7050 +7300 val perplexity 816.5002 +7300 train 6.460290 (lr=6.6893e-05) (hash(x)=44516452) +7400 val loss 6.5283 +7400 val perplexity 684.2463 +7400 train 6.159605 (lr=4.7719e-05) (hash(x)=42667710) +6700 val loss 6.6013 +6700 val perplexity 736.0158 +6700 train 6.640049 (lr=9.6270e-05) (hash(x)=53315447) +7400 val loss 6.7047 +7400 val perplexity 816.1969 +7400 train 6.357360 (lr=6.6806e-05) (hash(x)=42667710) +7500 val loss 6.5114 +7500 val perplexity 672.7684 +7500 train 6.242702 (lr=4.7656e-05) (hash(x)=47050797) +6800 val loss 6.5671 +6800 val perplexity 711.2753 +6800 train 7.001522 (lr=9.6156e-05) (hash(x)=61577166) +7500 val loss 6.6879 +7500 val perplexity 802.6105 +7500 train 6.419368 (lr=6.6718e-05) (hash(x)=47050797) +7600 val loss 6.5051 +7600 val perplexity 668.5150 +7600 train 6.399702 (lr=4.7593e-05) (hash(x)=49785056) +6900 val loss 6.5791 +6900 val perplexity 719.8727 +7600 val loss 6.7069 +7600 val perplexity 818.0633 +6900 train 6.707396 (lr=9.6040e-05) (hash(x)=54641005) +7600 train 6.603954 (lr=6.6630e-05) (hash(x)=49785056) +7700 val loss 6.4963 +7700 val perplexity 662.7036 +7700 train 6.285435 (lr=4.7528e-05) (hash(x)=53232030) +7700 val loss 6.6720 +7700 val perplexity 790.0093 +7700 train 6.461568 (lr=6.6540e-05) (hash(x)=53232030) +7000 val loss 6.5579 +7000 val perplexity 704.8250 +7000 train 7.086350 (lr=9.5923e-05) (hash(x)=60579512) +7800 val loss 6.4855 +7800 val perplexity 655.5969 +7800 train 6.298987 (lr=4.7463e-05) (hash(x)=48049749) +7800 val loss 6.6668 +7800 val perplexity 785.8955 +7800 train 6.494149 (lr=6.6448e-05) (hash(x)=48049749) +7100 val loss 6.5446 +7100 val perplexity 695.4625 +7100 train 6.413547 (lr=9.5804e-05) (hash(x)=53151549) +7900 val loss 6.4717 +7900 val perplexity 646.5621 +7900 train 6.300259 (lr=4.7397e-05) (hash(x)=44768513) +7900 val loss 6.6829 +7900 val perplexity 798.5929 +7900 train 6.505653 (lr=6.6356e-05) (hash(x)=44768513) +7200 val loss 6.5303 +7200 val perplexity 685.6115 +7200 train 7.385260 (lr=9.5683e-05) (hash(x)=71842455) +8000 val loss 6.4533 +8000 val perplexity 634.7982 +8000 train 6.299805 (lr=4.7331e-05) (hash(x)=46228039) +8000 val loss 6.6378 +8000 val perplexity 763.4367 +8000 train 6.497655 (lr=6.6263e-05) (hash(x)=46228039) +8100 val loss 6.4586 +8100 val perplexity 638.1406 +8100 train 6.806199 (lr=4.7263e-05) (hash(x)=60017091) +7300 val loss 6.5274 +7300 val perplexity 683.5856 +7300 train 6.305147 (lr=9.5561e-05) (hash(x)=44516452) +8100 val loss 6.6301 +8100 val perplexity 757.5613 +8100 train 6.976375 (lr=6.6169e-05) (hash(x)=60017091) +8200 val loss 6.4392 +8200 val perplexity 625.8813 +8200 train 6.448041 (lr=4.7195e-05) (hash(x)=49910198) +7400 val loss 6.5183 +7400 val perplexity 677.4284 +7400 train 6.144113 (lr=9.5437e-05) (hash(x)=42667710) +8200 val loss 6.6033 +8200 val perplexity 737.5395 +8200 train 6.593031 (lr=6.6073e-05) (hash(x)=49910198) +8300 val loss 6.4276 +8300 val perplexity 618.6628 +8300 train 6.752931 (lr=4.7126e-05) (hash(x)=57919055) +7500 val loss 6.5126 +7500 val perplexity 673.5985 +7500 train 6.239816 (lr=9.5312e-05) (hash(x)=47050797) +8300 val loss 6.6012 +8300 val perplexity 735.9520 +8300 train 6.932031 (lr=6.5976e-05) (hash(x)=57919055) +8400 val loss 6.4415 +8400 val perplexity 627.3317 +8400 train 6.512486 (lr=4.7056e-05) (hash(x)=49694964) +7600 val loss 6.5044 +7600 val perplexity 668.0494 +7600 train 6.396592 (lr=9.5185e-05) (hash(x)=49785056) +8400 val loss 6.5869 +8400 val perplexity 725.5267 +8400 train 6.661754 (lr=6.5879e-05) (hash(x)=49694964) +8500 val loss 6.4245 +8500 val perplexity 616.7479 +8500 train 6.462635 (lr=4.6986e-05) (hash(x)=53762585) +7700 val loss 6.5081 +7700 val perplexity 670.5755 +7700 train 6.280765 (lr=9.5057e-05) (hash(x)=53232030) +8500 val loss 6.5770 +8500 val perplexity 718.4002 +8500 train 6.605002 (lr=6.5780e-05) (hash(x)=53762585) +8600 val loss 6.4246 +8600 val perplexity 616.8341 +8600 train 6.470189 (lr=4.6914e-05) (hash(x)=51166973) +7800 val loss 6.5016 +7800 val perplexity 666.1758 +7800 train 6.321699 (lr=9.4926e-05) (hash(x)=48049749) +8600 val loss 6.5822 +8600 val perplexity 722.0977 +8600 train 6.605167 (lr=6.5680e-05) (hash(x)=51166973) +8700 val loss 6.4141 +8700 val perplexity 610.3843 +8700 train 6.485646 (lr=4.6842e-05) (hash(x)=53968049) +7900 val loss 6.4816 +7900 val perplexity 653.0329 +7900 train 6.317730 (lr=9.4795e-05) (hash(x)=44768513) +8700 val loss 6.5770 +8700 val perplexity 718.3944 +8700 train 6.651061 (lr=6.5579e-05) (hash(x)=53968049) +8800 val loss 6.4053 +8800 val perplexity 605.0353 +8800 train 6.495617 (lr=4.6769e-05) (hash(x)=59231056) +8000 val loss 6.4775 +8000 val perplexity 650.3123 +8000 train 6.328767 (lr=9.4661e-05) (hash(x)=46228039) +8800 val loss 6.5543 +8800 val perplexity 702.2714 +8800 train 6.639665 (lr=6.5477e-05) (hash(x)=59231056) +8900 val loss 6.4061 +8900 val perplexity 605.5363 +8900 train 6.274081 (lr=4.6696e-05) (hash(x)=50488048) +8100 val loss 6.5112 +8100 val perplexity 672.6090 +8100 train 6.866035 (lr=9.4526e-05) (hash(x)=60017091) +8900 val loss 6.5646 +8900 val perplexity 709.5159 +8900 train 6.441329 (lr=6.5374e-05) (hash(x)=50488048) +9000 val loss 6.4001 +9000 val perplexity 601.9169 +9000 train 6.120277 (lr=4.6621e-05) (hash(x)=44492956) +8200 val loss 6.4541 +8200 val perplexity 635.3290 +9000 val loss 6.5750 +9000 val perplexity 716.9766 +8200 train 6.460249 (lr=9.4390e-05) (hash(x)=49910198) +9000 train 6.295722 (lr=6.5270e-05) (hash(x)=44492956) +9100 val loss 6.4258 +9100 val perplexity 617.5504 +9100 train 6.443644 (lr=4.6546e-05) (hash(x)=51134989) +9100 val loss 6.5854 +9100 val perplexity 724.4301 +9100 train 6.569467 (lr=6.5164e-05) (hash(x)=51134989) +8300 val loss 6.4611 +8300 val perplexity 639.7861 +8300 train 6.775478 (lr=9.4252e-05) (hash(x)=57919055) +9200 val loss 6.3941 +9200 val perplexity 598.3110 +9200 train 6.185190 (lr=4.6470e-05) (hash(x)=48636056) +9200 val loss 6.5649 +9200 val perplexity 709.7270 +9200 train 6.369600 (lr=6.5058e-05) (hash(x)=48636056) +8400 val loss 6.4673 +8400 val perplexity 643.7472 +8400 train 6.531004 (lr=9.4112e-05) (hash(x)=49694964) +9300 val loss 6.3914 +9300 val perplexity 596.6716 +9300 train 6.323339 (lr=4.6393e-05) (hash(x)=50200551) +9300 val loss 6.5857 +9300 val perplexity 724.6741 +9300 train 6.534290 (lr=6.4951e-05) (hash(x)=50200551) +9400 val loss 6.3927 +9400 val perplexity 597.4857 +9400 train 6.197774 (lr=4.6316e-05) (hash(x)=48057228) +8500 val loss 6.4442 +8500 val perplexity 629.0693 +8500 train 6.469104 (lr=9.3971e-05) (hash(x)=53762585) +9400 val loss 6.5753 +9400 val perplexity 717.1948 +9400 train 6.404617 (lr=6.4842e-05) (hash(x)=48057228) +9500 val loss 6.3774 +9500 val perplexity 588.3885 +9500 train 6.152433 (lr=4.6238e-05) (hash(x)=48125171) +8600 val loss 6.4367 +8600 val perplexity 624.3689 +8600 train 6.454939 (lr=9.3828e-05) (hash(x)=51166973) +9500 val loss 6.5685 +9500 val perplexity 712.2860 +9500 train 6.320446 (lr=6.4733e-05) (hash(x)=48125171) +9600 val loss 6.3833 +9600 val perplexity 591.8793 +9600 train 6.289352 (lr=4.6159e-05) (hash(x)=53375853) +8700 val loss 6.4376 +8700 val perplexity 624.8827 +8700 train 6.501090 (lr=9.3684e-05) (hash(x)=53968049) +9600 val loss 6.5832 +9600 val perplexity 722.8222 +9600 train 6.485303 (lr=6.4622e-05) (hash(x)=53375853) +9700 val loss 6.3611 +9700 val perplexity 578.8823 +9700 train 7.147769 (lr=4.6079e-05) (hash(x)=53924631) +8800 val loss 6.4275 +8800 val perplexity 618.6161 +8800 train 6.505811 (lr=9.3538e-05) (hash(x)=59231056) +9700 val loss 6.5385 +9700 val perplexity 691.2393 +9700 train 7.267034 (lr=6.4511e-05) (hash(x)=53924631) +9800 val loss 6.3427 +9800 val perplexity 568.3274 +9800 train 6.385958 (lr=4.5999e-05) (hash(x)=48895047) +8900 val loss 6.4225 +8900 val perplexity 615.5196 +8900 train 6.296882 (lr=9.3391e-05) (hash(x)=50488048) +9800 val loss 6.5275 +9800 val perplexity 683.7016 +9800 train 6.557533 (lr=6.4398e-05) (hash(x)=48895047) +9900 val loss 6.3405 +9900 val perplexity 567.0519 +9900 train 6.189950 (lr=4.5917e-05) (hash(x)=44269923) +9000 val loss 6.4359 +9000 val perplexity 623.8701 +9000 train 6.163263 (lr=9.3242e-05) (hash(x)=44492956) +9900 val loss 6.5423 +9900 val perplexity 693.8755 +9900 train 6.399669 (lr=6.4284e-05) (hash(x)=44269923) +10000 val loss 6.3566 +10000 val perplexity 576.3036 +10000 train 6.248072 (lr=4.5835e-05) (hash(x)=49666623) +9100 val loss 6.4585 +9100 val perplexity 638.0742 +9100 train 6.373821 (lr=9.3092e-05) (hash(x)=51134989) +10000 val loss 6.5586 +10000 val perplexity 705.2722 +10000 train 6.450226 (lr=6.4170e-05) (hash(x)=49666623) +10100 val loss 6.3390 +10100 val perplexity 566.2075 +10100 train 5.839239 (lr=4.5753e-05) (hash(x)=39732485) +9200 val loss 6.4227 +9200 val perplexity 615.6746 +9200 train 6.224191 (lr=9.2940e-05) (hash(x)=48636056) +10100 val loss 6.5345 +10100 val perplexity 688.5092 +10100 train 6.054434 (lr=6.4054e-05) (hash(x)=39732485) +10200 val loss 6.3173 +10200 val perplexity 554.0823 +10200 train 7.417423 (lr=4.5669e-05) (hash(x)=45176813) +9300 val loss 6.4372 +9300 val perplexity 624.6777 +9300 train 6.348765 (lr=9.2786e-05) (hash(x)=50200551) +10200 val loss 6.5042 +10200 val perplexity 667.9411 +10200 train 7.562184 (lr=6.3937e-05) (hash(x)=45176813) +10300 val loss 6.3193 +10300 val perplexity 555.1849 +10300 train 6.179464 (lr=4.5585e-05) (hash(x)=47779031) +10300 val loss 6.4966 +10300 val perplexity 662.9116 +10300 train 6.374511 (lr=6.3820e-05) (hash(x)=47779031) +9400 val loss 6.4242 +9400 val perplexity 616.5568 +9400 train 6.235063 (lr=9.2632e-05) (hash(x)=48057228) +10400 val loss 6.3183 +10400 val perplexity 554.6057 +10400 train 6.190692 (lr=4.5501e-05) (hash(x)=47802708) +10400 val loss 6.4972 +10400 val perplexity 663.3062 +10400 train 6.364641 (lr=6.3701e-05) (hash(x)=47802708) +9500 val loss 6.3993 +9500 val perplexity 601.4312 +9500 train 6.173857 (lr=9.2475e-05) (hash(x)=48125171) +10500 val loss 6.3165 +10500 val perplexity 553.6346 +10500 train 6.168552 (lr=4.5415e-05) (hash(x)=47245542) +10500 val loss 6.4939 +10500 val perplexity 661.0754 +10500 train 6.316882 (lr=6.3581e-05) (hash(x)=47245542) +9600 val loss 6.4629 +9600 val perplexity 640.9107 +9600 train 6.345786 (lr=9.2317e-05) (hash(x)=53375853) +10600 val loss 6.3047 +10600 val perplexity 547.1288 +10600 train 6.354541 (lr=4.5329e-05) (hash(x)=50902224) +10600 val loss 6.4867 +10600 val perplexity 656.3573 +10600 train 6.530337 (lr=6.3460e-05) (hash(x)=50902224) +9700 val loss 6.4034 +9700 val perplexity 603.8904 +9700 train 7.180225 (lr=9.2158e-05) (hash(x)=53924631) +10700 val loss 6.3065 +10700 val perplexity 548.1271 +10700 train 6.179185 (lr=4.5242e-05) (hash(x)=51581765) +10700 val loss 6.5164 +10700 val perplexity 676.1608 +10700 train 6.390903 (lr=6.3339e-05) (hash(x)=51581765) +9800 val loss 6.3846 +9800 val perplexity 592.6229 +9800 train 6.434079 (lr=9.1997e-05) (hash(x)=48895047) +10800 val loss 6.2876 +10800 val perplexity 537.8709 +10800 train 6.323123 (lr=4.5154e-05) (hash(x)=50137616) +10800 val loss 6.4807 +10800 val perplexity 652.4384 +10800 train 6.485484 (lr=6.3216e-05) (hash(x)=50137616) +10900 val loss 6.2884 +10900 val perplexity 538.2848 +10900 train 5.996392 (lr=4.5066e-05) (hash(x)=45446745) +9900 val loss 6.4061 +9900 val perplexity 605.5161 +9900 train 6.268369 (lr=9.1835e-05) (hash(x)=44269923) +10900 val loss 6.4753 +10900 val perplexity 648.9175 +10900 train 6.172770 (lr=6.3092e-05) (hash(x)=45446745) +11000 val loss 6.2933 +11000 val perplexity 540.9515 +11000 train 6.114547 (lr=4.4977e-05) (hash(x)=50403725) +10000 val loss 6.4034 +10000 val perplexity 603.8922 +10000 train 6.308901 (lr=9.1671e-05) (hash(x)=49666623) +11000 val loss 6.4815 +11000 val perplexity 652.9236 +11000 train 6.322019 (lr=6.2968e-05) (hash(x)=50403725) +11100 val loss 6.2851 +11100 val perplexity 536.5330 +11100 train 5.760800 (lr=4.4887e-05) (hash(x)=42015206) +11100 val loss 6.4642 +11100 val perplexity 641.7599 +11100 train 5.946821 (lr=6.2842e-05) (hash(x)=42015206) +10100 val loss 6.3869 +10100 val perplexity 594.0414 +10100 train 5.912936 (lr=9.1506e-05) (hash(x)=39732485) +11200 val loss 6.2854 +11200 val perplexity 536.6940 +11200 train 6.147406 (lr=4.4797e-05) (hash(x)=52280259) +11200 val loss 6.4705 +11200 val perplexity 645.8220 +11200 train 6.342646 (lr=6.2715e-05) (hash(x)=52280259) +10200 val loss 6.3806 +10200 val perplexity 590.3018 +10200 train 7.562238 (lr=9.1339e-05) (hash(x)=45176813) +11300 val loss 6.2873 +11300 val perplexity 537.6758 +11300 train 6.038785 (lr=4.4706e-05) (hash(x)=50264744) +11300 val loss 6.4712 +11300 val perplexity 646.2400 +11300 train 6.238277 (lr=6.2588e-05) (hash(x)=50264744) +10300 val loss 6.3753 +10300 val perplexity 587.1789 +10300 train 6.239228 (lr=9.1171e-05) (hash(x)=47779031) +11400 val loss 6.2907 +11400 val perplexity 539.5180 +11400 train 6.176596 (lr=4.4614e-05) (hash(x)=49817375) +11400 val loss 6.4692 +11400 val perplexity 644.9919 +11400 train 6.366453 (lr=6.2459e-05) (hash(x)=49817375) +10400 val loss 6.3915 +10400 val perplexity 596.7308 +10400 train 6.279093 (lr=9.1001e-05) (hash(x)=47802708) +11500 val loss 6.2874 +11500 val perplexity 537.7419 +11500 train 6.267765 (lr=4.4521e-05) (hash(x)=51662160) +11500 val loss 6.4767 +11500 val perplexity 649.7969 +11500 train 6.471603 (lr=6.2330e-05) (hash(x)=51662160) +10500 val loss 6.3759 +10500 val perplexity 587.5049 +10500 train 6.220656 (lr=9.0830e-05) (hash(x)=47245542) +11600 val loss 6.2850 +11600 val perplexity 536.4857 +11600 train 6.226350 (lr=4.4428e-05) (hash(x)=48891495) +11600 val loss 6.4661 +11600 val perplexity 642.9567 +11600 train 6.398019 (lr=6.2199e-05) (hash(x)=48891495) +10600 val loss 6.3739 +10600 val perplexity 586.3585 +10600 train 6.414763 (lr=9.0658e-05) (hash(x)=50902224) +11700 val loss 6.2578 +11700 val perplexity 522.0562 +11700 train 6.255913 (lr=4.4334e-05) (hash(x)=46306682) +11700 val loss 6.4435 +11700 val perplexity 628.6244 +11700 train 6.432802 (lr=6.2068e-05) (hash(x)=46306682) +10700 val loss 6.3873 +10700 val perplexity 594.2664 +10700 train 6.234374 (lr=9.0484e-05) (hash(x)=51581765) +11800 val loss 6.2535 +11800 val perplexity 519.8424 +11800 train 6.217044 (lr=4.4240e-05) (hash(x)=46605293) +11800 val loss 6.4419 +11800 val perplexity 627.6228 +11800 train 6.387120 (lr=6.1936e-05) (hash(x)=46605293) +10800 val loss 6.3749 +10800 val perplexity 586.9379 +10800 train 6.412324 (lr=9.0308e-05) (hash(x)=50137616) +11900 val loss 6.2447 +11900 val perplexity 515.2994 +11900 train 6.179983 (lr=4.4145e-05) (hash(x)=53746201) +11900 val loss 6.4192 +11900 val perplexity 613.4951 +11900 train 6.365595 (lr=6.1802e-05) (hash(x)=53746201) +12000 val loss 6.2409 +12000 val perplexity 513.2961 +12000 train 6.334722 (lr=4.4049e-05) (hash(x)=48587180) +10900 val loss 6.3581 +10900 val perplexity 577.1487 +10900 train 6.080709 (lr=9.0132e-05) (hash(x)=45446745) +12000 val loss 6.4090 +12000 val perplexity 607.2846 +12000 train 6.480982 (lr=6.1668e-05) (hash(x)=48587180) +12100 val loss 6.2393 +12100 val perplexity 512.4902 +12100 train 6.029146 (lr=4.3952e-05) (hash(x)=45786729) +11000 val loss 6.3604 +11000 val perplexity 578.4651 +11000 train 6.196943 (lr=8.9954e-05) (hash(x)=50403725) +12100 val loss 6.4212 +12100 val perplexity 614.7646 +12100 train 6.189254 (lr=6.1533e-05) (hash(x)=45786729) +12200 val loss 6.2346 +12200 val perplexity 510.0944 +12200 train 6.160137 (lr=4.3855e-05) (hash(x)=42858336) +11100 val loss 6.3505 +11100 val perplexity 572.7651 +11100 train 5.849023 (lr=8.9774e-05) (hash(x)=42015206) +12200 val loss 6.4066 +12200 val perplexity 605.8104 +12200 train 6.332368 (lr=6.1397e-05) (hash(x)=42858336) +12300 val loss 6.2333 +12300 val perplexity 509.4522 +12300 train 6.406464 (lr=4.3757e-05) (hash(x)=51944313) +11200 val loss 6.3685 +11200 val perplexity 583.1981 +11200 train 6.236219 (lr=8.9593e-05) (hash(x)=52280259) +12300 val loss 6.4023 +12300 val perplexity 603.2524 +12300 train 6.598873 (lr=6.1260e-05) (hash(x)=51944313) +12400 val loss 6.2126 +12400 val perplexity 498.9987 +12400 train 6.714368 (lr=4.3659e-05) (hash(x)=59357191) +12400 val loss 6.3986 +12400 val perplexity 601.0114 +12400 train 6.895336 (lr=6.1122e-05) (hash(x)=59357191) +11300 val loss 6.3664 +11300 val perplexity 581.9650 +11300 train 6.100276 (lr=8.9411e-05) (hash(x)=50264744) +12500 val loss 6.2195 +12500 val perplexity 502.4362 +12500 train 6.334024 (lr=4.3560e-05) (hash(x)=49770062) +12500 val loss 6.4069 +12500 val perplexity 606.0037 +12500 train 6.522009 (lr=6.0984e-05) (hash(x)=49770062) +11400 val loss 6.3802 +11400 val perplexity 590.0237 +11400 train 6.277043 (lr=8.9227e-05) (hash(x)=49817375) +12600 val loss 6.2030 +12600 val perplexity 494.2460 +12600 train 5.980834 (lr=4.3460e-05) (hash(x)=47235788) +12600 val loss 6.3836 +12600 val perplexity 592.0518 +12600 train 6.151051 (lr=6.0844e-05) (hash(x)=47235788) +11500 val loss 6.3619 +11500 val perplexity 579.3739 +11500 train 6.346138 (lr=8.9043e-05) (hash(x)=51662160) +12700 val loss 6.2019 +12700 val perplexity 493.7014 +12700 train 6.207354 (lr=4.3360e-05) (hash(x)=51700943) +12700 val loss 6.3822 +12700 val perplexity 591.2164 +12700 train 6.418068 (lr=6.0703e-05) (hash(x)=51700943) +11600 val loss 6.3612 +11600 val perplexity 578.9637 +11600 train 6.309485 (lr=8.8856e-05) (hash(x)=48891495) +12800 val loss 6.2139 +12800 val perplexity 499.6537 +12800 train 6.169513 (lr=4.3259e-05) (hash(x)=50532124) +12800 val loss 6.3777 +12800 val perplexity 588.5892 +12800 train 6.337963 (lr=6.0562e-05) (hash(x)=50532124) +12900 val loss 6.1960 +12900 val perplexity 490.7574 +12900 train 6.306409 (lr=4.3157e-05) (hash(x)=51948273) +11700 val loss 6.3349 +11700 val perplexity 563.9124 +11700 train 6.321611 (lr=8.8668e-05) (hash(x)=46306682) +12900 val loss 6.3772 +12900 val perplexity 588.2719 +12900 train 6.483133 (lr=6.0420e-05) (hash(x)=51948273) +13000 val loss 6.2050 +13000 val perplexity 495.2059 +13000 train 6.372721 (lr=4.3055e-05) (hash(x)=46732738) +11800 val loss 6.3301 +11800 val perplexity 561.2119 +11800 train 6.298628 (lr=8.8479e-05) (hash(x)=46605293) +13000 val loss 6.3865 +13000 val perplexity 593.7670 +13000 train 6.531954 (lr=6.0277e-05) (hash(x)=46732738) +13100 val loss 6.1958 +13100 val perplexity 490.7025 +13100 train 6.221768 (lr=4.2952e-05) (hash(x)=49837250) +11900 val loss 6.3153 +11900 val perplexity 552.9648 +11900 train 6.255790 (lr=8.8289e-05) (hash(x)=53746201) +13100 val loss 6.3670 +13100 val perplexity 582.2797 +13100 train 6.390731 (lr=6.0133e-05) (hash(x)=49837250) +13200 val loss 6.2022 +13200 val perplexity 493.8576 +13200 train 6.488420 (lr=4.2848e-05) (hash(x)=57646893) +12000 val loss 6.3122 +12000 val perplexity 551.2727 +12000 train 6.383875 (lr=8.8097e-05) (hash(x)=48587180) +13200 val loss 6.3708 +13200 val perplexity 584.5456 +13200 train 6.642096 (lr=5.9988e-05) (hash(x)=57646893) +13300 val loss 6.1930 +13300 val perplexity 489.3283 +13300 train 5.744956 (lr=4.2744e-05) (hash(x)=42055305) +12100 val loss 6.3312 +12100 val perplexity 561.8511 +12100 train 6.104850 (lr=8.7904e-05) (hash(x)=45786729) +13300 val loss 6.3628 +13300 val perplexity 579.8581 +13300 train 5.922172 (lr=5.9842e-05) (hash(x)=42055305) +13400 val loss 6.2072 +13400 val perplexity 496.3196 +13400 train 5.900372 (lr=4.2640e-05) (hash(x)=44684844) +12200 val loss 6.3282 +12200 val perplexity 560.1316 +12200 train 6.244182 (lr=8.7710e-05) (hash(x)=42858336) +13400 val loss 6.3918 +13400 val perplexity 596.9084 +13400 train 6.109066 (lr=5.9695e-05) (hash(x)=44684844) +13500 val loss 6.1840 +13500 val perplexity 484.9107 +13500 train 5.862307 (lr=4.2534e-05) (hash(x)=46971715) +12300 val loss 6.3317 +12300 val perplexity 562.0834 +12300 train 6.538304 (lr=8.7515e-05) (hash(x)=51944313) +13500 val loss 6.3781 +13500 val perplexity 588.7840 +13500 train 6.054822 (lr=5.9548e-05) (hash(x)=46971715) +13600 val loss 6.1918 +13600 val perplexity 488.7330 +13600 train 6.017723 (lr=4.2428e-05) (hash(x)=48728506) +12400 val loss 6.3200 +12400 val perplexity 555.5686 +12400 train 6.784978 (lr=8.7318e-05) (hash(x)=59357191) +13600 val loss 6.3750 +13600 val perplexity 586.9799 +13600 train 6.203294 (lr=5.9400e-05) (hash(x)=48728506) +13700 val loss 6.1925 +13700 val perplexity 489.0685 +13700 train 5.980932 (lr=4.2322e-05) (hash(x)=50739726) +12500 val loss 6.3089 +12500 val perplexity 549.4297 +13700 val loss 6.3813 +13700 val perplexity 590.6945 +13700 train 6.179378 (lr=5.9251e-05) (hash(x)=50739726) +12500 train 6.419203 (lr=8.7119e-05) (hash(x)=49770062) +13800 val loss 6.1862 +13800 val perplexity 486.0093 +13800 train 6.287631 (lr=4.2215e-05) (hash(x)=52174318) +13800 val loss 6.3732 +13800 val perplexity 585.9345 +13800 train 6.482311 (lr=5.9101e-05) (hash(x)=52174318) +12600 val loss 6.3111 +12600 val perplexity 550.6550 +12600 train 6.092916 (lr=8.6920e-05) (hash(x)=47235788) +13900 val loss 6.1776 +13900 val perplexity 481.8454 +13900 train 5.557216 (lr=4.2107e-05) (hash(x)=44141266) +13900 val loss 6.3569 +13900 val perplexity 576.4279 +13900 train 5.761450 (lr=5.8950e-05) (hash(x)=44141266) +12700 val loss 6.3178 +12700 val perplexity 554.3524 +12700 train 6.334719 (lr=8.6719e-05) (hash(x)=51700943) +14000 val loss 6.1711 +14000 val perplexity 478.7227 +14000 train 6.133667 (lr=4.1999e-05) (hash(x)=50880921) +14000 val loss 6.3533 +14000 val perplexity 574.3917 +14000 train 6.342307 (lr=5.8799e-05) (hash(x)=50880921) +12800 val loss 6.3147 +12800 val perplexity 552.6302 +12800 train 6.270829 (lr=8.6517e-05) (hash(x)=50532124) +14100 val loss 6.1645 +14100 val perplexity 475.5845 +14100 train 6.102219 (lr=4.1890e-05) (hash(x)=44179328) +14100 val loss 6.3473 +14100 val perplexity 570.9208 +14100 train 6.280088 (lr=5.8646e-05) (hash(x)=44179328) +12900 val loss 6.3020 +12900 val perplexity 545.6871 +12900 train 6.406723 (lr=8.6314e-05) (hash(x)=51948273) +14200 val loss 6.1537 +14200 val perplexity 470.4647 +14200 train 6.252338 (lr=4.1781e-05) (hash(x)=52989772) +14200 val loss 6.3343 +14200 val perplexity 563.5675 +14200 train 6.417709 (lr=5.8493e-05) (hash(x)=52989772) +14300 val loss 6.1503 +14300 val perplexity 468.8498 +14300 train 6.385016 (lr=4.1671e-05) (hash(x)=55406567) +13000 val loss 6.3186 +13000 val perplexity 554.7871 +13000 train 6.466562 (lr=8.6110e-05) (hash(x)=46732738) +14300 val loss 6.3292 +14300 val perplexity 560.6941 +14300 train 6.554401 (lr=5.8339e-05) (hash(x)=55406567) +14400 val loss 6.1478 +14400 val perplexity 467.6949 +14400 train 6.203402 (lr=4.1560e-05) (hash(x)=47274361) +13100 val loss 6.3065 +13100 val perplexity 548.1247 +13100 train 6.335435 (lr=8.5904e-05) (hash(x)=49837250) +14400 val loss 6.3307 +14400 val perplexity 561.5286 +14400 train 6.420453 (lr=5.8184e-05) (hash(x)=47274361) +14500 val loss 6.1361 +14500 val perplexity 462.2333 +14500 train 6.565048 (lr=4.1449e-05) (hash(x)=53333322) +13200 val loss 6.3183 +13200 val perplexity 554.6125 +13200 train 6.586292 (lr=8.5697e-05) (hash(x)=57646893) +14500 val loss 6.3286 +14500 val perplexity 560.3875 +14500 train 6.732544 (lr=5.8029e-05) (hash(x)=53333322) +14600 val loss 6.1396 +14600 val perplexity 463.8659 +14600 train 6.265152 (lr=4.1337e-05) (hash(x)=48578234) +13300 val loss 6.3036 +13300 val perplexity 546.5611 +13300 train 5.868500 (lr=8.5489e-05) (hash(x)=42055305) +14600 val loss 6.3275 +14600 val perplexity 559.7396 +14600 train 6.437446 (lr=5.7872e-05) (hash(x)=48578234) +14700 val loss 6.1651 +14700 val perplexity 475.8719 +14700 train 5.846576 (lr=4.1225e-05) (hash(x)=45694052) +13400 val loss 6.3046 +13400 val perplexity 547.0742 +13400 train 6.021350 (lr=8.5279e-05) (hash(x)=44684844) +14700 val loss 6.3525 +14700 val perplexity 573.9468 +14700 train 6.048125 (lr=5.7715e-05) (hash(x)=45694052) +14800 val loss 6.1441 +14800 val perplexity 465.9411 +14800 train 5.925498 (lr=4.1113e-05) (hash(x)=46245798) +14800 val loss 6.3277 +14800 val perplexity 559.8675 +14800 train 6.138581 (lr=5.7558e-05) (hash(x)=46245798) +13500 val loss 6.3102 +13500 val perplexity 550.1745 +13500 train 5.992841 (lr=8.5069e-05) (hash(x)=46971715) +14900 val loss 6.1411 +14900 val perplexity 464.5530 +14900 train 6.283280 (lr=4.0999e-05) (hash(x)=55970820) +14900 val loss 6.3363 +14900 val perplexity 564.6882 +14900 train 6.498343 (lr=5.7399e-05) (hash(x)=55970820) +13600 val loss 6.2936 +13600 val perplexity 541.0870 +13600 train 6.112821 (lr=8.4857e-05) (hash(x)=48728506) +15000 val loss 6.1492 +15000 val perplexity 468.3618 +15000 train 6.255603 (lr=4.0885e-05) (hash(x)=55635805) +15000 val loss 6.3258 +15000 val perplexity 558.8156 +15000 train 6.419089 (lr=5.7240e-05) (hash(x)=55635805) +13700 val loss 6.2896 +13700 val perplexity 538.9217 +13700 train 6.094644 (lr=8.4644e-05) (hash(x)=50739726) +15100 val loss 6.1270 +15100 val perplexity 458.0396 +15100 train 6.044188 (lr=4.0771e-05) (hash(x)=48884573) +15100 val loss 6.3032 +15100 val perplexity 546.3135 +15100 train 6.197279 (lr=5.7079e-05) (hash(x)=48884573) +13800 val loss 6.3031 +13800 val perplexity 546.2784 +13800 train 6.412201 (lr=8.4430e-05) (hash(x)=52174318) +15200 val loss 6.1233 +15200 val perplexity 456.3787 +15200 train 6.300706 (lr=4.0656e-05) (hash(x)=59283076) +15200 val loss 6.3089 +15200 val perplexity 549.4617 +15200 train 6.510170 (lr=5.6919e-05) (hash(x)=59283076) +13900 val loss 6.2823 +13900 val perplexity 535.0106 +13900 train 5.696769 (lr=8.4214e-05) (hash(x)=44141266) +15300 val loss 6.1188 +15300 val perplexity 454.2971 +15300 train 6.042745 (lr=4.0541e-05) (hash(x)=48258741) +15300 val loss 6.3095 +15300 val perplexity 549.7542 +15300 train 6.225156 (lr=5.6757e-05) (hash(x)=48258741) +14000 val loss 6.2851 +14000 val perplexity 536.5412 +14000 train 6.278280 (lr=8.3998e-05) (hash(x)=50880921) +15400 val loss 6.1319 +15400 val perplexity 460.3272 +15400 train 6.158960 (lr=4.0425e-05) (hash(x)=49433427) +15400 val loss 6.3035 +15400 val perplexity 546.4634 +15400 train 6.343267 (lr=5.6595e-05) (hash(x)=49433427) +14100 val loss 6.2732 +14100 val perplexity 530.1518 +14100 train 6.208812 (lr=8.3780e-05) (hash(x)=44179328) +15500 val loss 6.1150 +15500 val perplexity 452.6169 +15500 val loss 6.3211 +15500 val perplexity 556.1944 +15500 train 5.899845 (lr=4.0308e-05) (hash(x)=42504793) +15500 train 6.069526 (lr=5.6432e-05) (hash(x)=42504793) +14200 val loss 6.2535 +14200 val perplexity 519.8256 +14200 train 6.344846 (lr=8.3561e-05) (hash(x)=52989772) +15600 val loss 6.1173 +15600 val perplexity 453.6441 +15600 train 6.015007 (lr=4.0191e-05) (hash(x)=48860388) +15600 val loss 6.2998 +15600 val perplexity 544.4466 +15600 train 6.159374 (lr=5.6268e-05) (hash(x)=48860388) +14300 val loss 6.2454 +14300 val perplexity 515.6539 +14300 train 6.448986 (lr=8.3341e-05) (hash(x)=55406567) +15700 val loss 6.2799 +15700 val perplexity 533.7578 +15700 val loss 6.0992 +15700 val perplexity 445.5022 +15700 train 6.086686 (lr=5.6104e-05) (hash(x)=49229381) +15700 train 5.920973 (lr=4.0074e-05) (hash(x)=49229381) +14400 val loss 6.2421 +14400 val perplexity 513.9424 +14400 train 6.370075 (lr=8.3120e-05) (hash(x)=47274361) +15800 val loss 6.1038 +15800 val perplexity 447.5447 +15800 train 5.934508 (lr=3.9956e-05) (hash(x)=48471211) +15800 val loss 6.2983 +15800 val perplexity 543.6398 +15800 train 6.073653 (lr=5.5938e-05) (hash(x)=48471211) +14500 val loss 6.2622 +14500 val perplexity 524.3958 +14500 train 6.735986 (lr=8.2898e-05) (hash(x)=53333322) +15900 val loss 6.1048 +15900 val perplexity 447.9807 +15900 train 5.814809 (lr=3.9838e-05) (hash(x)=47555877) +15900 val loss 6.2992 +15900 val perplexity 544.1287 +15900 train 6.006874 (lr=5.5773e-05) (hash(x)=47555877) +16000 val loss 6.1117 +16000 val perplexity 451.1030 +16000 train 5.804303 (lr=3.9719e-05) (hash(x)=48397986) +14600 val loss 6.2519 +14600 val perplexity 519.0102 +14600 train 6.356515 (lr=8.2675e-05) (hash(x)=48578234) +16000 val loss 6.2971 +16000 val perplexity 542.9921 +16000 train 5.997636 (lr=5.5606e-05) (hash(x)=48397986) +16100 val loss 6.1077 +16100 val perplexity 449.2983 +16100 train 6.201589 (lr=3.9599e-05) (hash(x)=53299004) +14700 val loss 6.2862 +14700 val perplexity 537.1274 +14700 train 5.999903 (lr=8.2451e-05) (hash(x)=45694052) +16100 val loss 6.2833 +16100 val perplexity 535.5461 +16100 train 6.334311 (lr=5.5439e-05) (hash(x)=53299004) +16200 val loss 6.1119 +16200 val perplexity 451.2162 +16200 train 5.900352 (lr=3.9479e-05) (hash(x)=45763277) +16200 val loss 6.2982 +16200 val perplexity 543.6183 +16200 train 6.074014 (lr=5.5271e-05) (hash(x)=45763277) +14800 val loss 6.2675 +14800 val perplexity 527.1354 +14800 train 6.069706 (lr=8.2225e-05) (hash(x)=46245798) +16300 val loss 6.1061 +16300 val perplexity 448.5692 +16300 train 5.591532 (lr=3.9359e-05) (hash(x)=39675609) +16300 val loss 6.2827 +16300 val perplexity 535.2198 +16300 train 5.740005 (lr=5.5102e-05) (hash(x)=39675609) +14900 val loss 6.2519 +14900 val perplexity 519.0052 +14900 train 6.398389 (lr=8.1998e-05) (hash(x)=55970820) +16400 val loss 6.0969 +16400 val perplexity 444.4576 +16400 train 6.162115 (lr=3.9238e-05) (hash(x)=48973557) +16400 val loss 6.2844 +16400 val perplexity 536.1591 +16400 train 6.375087 (lr=5.4933e-05) (hash(x)=48973557) +15000 val loss 6.2376 +15000 val perplexity 511.6510 +15000 train 6.338455 (lr=8.1771e-05) (hash(x)=55635805) +16500 val loss 6.0883 +16500 val perplexity 440.6679 +16500 train 6.090487 (lr=3.9117e-05) (hash(x)=46121211) +16500 val loss 6.2651 +16500 val perplexity 525.9087 +16500 train 6.226743 (lr=5.4763e-05) (hash(x)=46121211) +15100 val loss 6.2410 +15100 val perplexity 513.3783 +15100 train 6.131569 (lr=8.1542e-05) (hash(x)=48884573) +16600 val loss 6.0918 +16600 val perplexity 442.2100 +16600 train 6.352380 (lr=3.8995e-05) (hash(x)=49181055) +16600 val loss 6.2623 +16600 val perplexity 524.4390 +16600 train 6.502903 (lr=5.4593e-05) (hash(x)=49181055) +15200 val loss 6.2412 +15200 val perplexity 513.4949 +15200 train 6.447044 (lr=8.1312e-05) (hash(x)=59283076) +16700 val loss 6.0925 +16700 val perplexity 442.5309 +16700 train 6.040877 (lr=3.8873e-05) (hash(x)=49361213) +16700 val loss 6.2583 +16700 val perplexity 522.3468 +16700 train 6.198591 (lr=5.4422e-05) (hash(x)=49361213) +15300 val loss 6.2669 +15300 val perplexity 526.8291 +15300 train 6.188086 (lr=8.1082e-05) (hash(x)=48258741) +16800 val loss 6.0950 +16800 val perplexity 443.6194 +16800 train 6.086593 (lr=3.8750e-05) (hash(x)=52739987) +16800 val loss 6.2646 +16800 val perplexity 525.6302 +16800 train 6.249082 (lr=5.4250e-05) (hash(x)=52739987) +15400 val loss 6.2680 +15400 val perplexity 527.4374 +15400 train 6.287550 (lr=8.0850e-05) (hash(x)=49433427) +16900 val loss 6.2472 +16900 val perplexity 516.5728 +16900 train 6.389314 (lr=5.4078e-05) (hash(x)=55994797) +16900 val loss 6.0762 +16900 val perplexity 435.3646 +16900 train 6.241480 (lr=3.8627e-05) (hash(x)=55994797) +15500 val loss 6.2683 +15500 val perplexity 527.6044 +15500 train 6.028718 (lr=8.0617e-05) (hash(x)=42504793) +17000 val loss 6.0823 +17000 val perplexity 438.0379 +17000 train 6.104437 (lr=3.8503e-05) (hash(x)=50109331) +17000 val loss 6.2478 +17000 val perplexity 516.8850 +17000 train 6.280750 (lr=5.3905e-05) (hash(x)=50109331) +17100 val loss 6.2370 +17100 val perplexity 511.3335 +17100 train 6.350257 (lr=5.3731e-05) (hash(x)=50292169) +17100 val loss 6.0777 +17100 val perplexity 436.0165 +17100 train 6.172071 (lr=3.8379e-05) (hash(x)=50292169) +15600 val loss 6.2485 +15600 val perplexity 517.2164 +15600 train 6.143048 (lr=8.0383e-05) (hash(x)=48860388) +17200 val loss 6.0749 +17200 val perplexity 434.8055 +17200 train 6.132131 (lr=3.8255e-05) (hash(x)=50722386) +17200 val loss 6.2532 +17200 val perplexity 519.6669 +17200 train 6.294482 (lr=5.3557e-05) (hash(x)=50722386) +15700 val loss 6.2353 +15700 val perplexity 510.4428 +15700 train 6.055554 (lr=8.0148e-05) (hash(x)=49229381) +17300 val loss 6.0610 +17300 val perplexity 428.7892 +17300 train 6.203306 (lr=3.8130e-05) (hash(x)=54010042) +17300 val loss 6.2296 +17300 val perplexity 507.5507 +17300 train 6.329121 (lr=5.3382e-05) (hash(x)=54010042) +15800 val loss 6.2615 +15800 val perplexity 524.0236 +15800 train 6.045529 (lr=7.9912e-05) (hash(x)=48471211) +17400 val loss 6.0610 +17400 val perplexity 428.7897 +17400 train 6.018238 (lr=3.8005e-05) (hash(x)=53970990) +17400 val loss 6.2369 +17400 val perplexity 511.2510 +17400 train 6.211182 (lr=5.3206e-05) (hash(x)=53970990) +15900 val loss 6.2596 +15900 val perplexity 523.0138 +15900 train 5.964798 (lr=7.9675e-05) (hash(x)=47555877) +17500 val loss 6.0548 +17500 val perplexity 426.1338 +17500 val loss 6.2278 +17500 val perplexity 506.6382 +17500 train 6.134178 (lr=3.7879e-05) (hash(x)=52544667) +17500 train 6.304132 (lr=5.3030e-05) (hash(x)=52544667) +16000 val loss 6.2763 +16000 val perplexity 531.7962 +16000 train 5.966957 (lr=7.9437e-05) (hash(x)=48397986) +17600 val loss 6.0604 +17600 val perplexity 428.5526 +17600 train 6.025463 (lr=3.7753e-05) (hash(x)=50735496) +17600 val loss 6.2313 +17600 val perplexity 508.4021 +17600 train 6.178463 (lr=5.2854e-05) (hash(x)=50735496) +16100 val loss 6.2599 +16100 val perplexity 523.1580 +16100 train 6.310643 (lr=7.9198e-05) (hash(x)=53299004) +17700 val loss 6.0583 +17700 val perplexity 427.6525 +17700 train 6.439931 (lr=3.7626e-05) (hash(x)=55662551) +17700 val loss 6.2334 +17700 val perplexity 509.4889 +17700 train 6.560091 (lr=5.2677e-05) (hash(x)=55662551) +16200 val loss 6.2676 +16200 val perplexity 527.2297 +16200 train 6.036007 (lr=7.8959e-05) (hash(x)=45763277) +17800 val loss 6.0554 +17800 val perplexity 426.3960 +17800 train 5.926167 (lr=3.7499e-05) (hash(x)=51137009) +17800 val loss 6.2278 +17800 val perplexity 506.6174 +17800 train 6.111446 (lr=5.2499e-05) (hash(x)=51137009) +16300 val loss 6.2505 +16300 val perplexity 518.2463 +16300 train 5.719383 (lr=7.8718e-05) (hash(x)=39675609) +17900 val loss 6.0569 +17900 val perplexity 427.0648 +17900 train 6.852966 (lr=3.7372e-05) (hash(x)=73004834) +17900 val loss 6.2341 +17900 val perplexity 509.8237 +17900 train 7.072075 (lr=5.2321e-05) (hash(x)=73004834) +16400 val loss 6.2352 +16400 val perplexity 510.4156 +16400 train 6.316364 (lr=7.8476e-05) (hash(x)=48973557) +18000 val loss 6.0514 +18000 val perplexity 424.7275 +18000 train 6.259719 (lr=3.7244e-05) (hash(x)=51900245) +18000 val loss 6.2339 +18000 val perplexity 509.7630 +18000 train 6.430896 (lr=5.2142e-05) (hash(x)=51900245) +18100 val loss 6.0417 +18100 val perplexity 420.6046 +18100 train 6.320068 (lr=3.7116e-05) (hash(x)=56278625) +18100 val loss 6.2255 +18100 val perplexity 505.4630 +18100 train 6.494106 (lr=5.1962e-05) (hash(x)=56278625) +16500 val loss 6.2256 +16500 val perplexity 505.5129 +16500 train 6.207320 (lr=7.8233e-05) (hash(x)=46121211) +18200 val loss 6.0523 +18200 val perplexity 425.0752 +18200 train 5.916048 (lr=3.6987e-05) (hash(x)=50478164) +18200 val loss 6.2215 +18200 val perplexity 503.4511 +18200 train 6.062120 (lr=5.1782e-05) (hash(x)=50478164) +16600 val loss 6.2345 +16600 val perplexity 510.0611 +16600 train 6.504384 (lr=7.7990e-05) (hash(x)=49181055) +18300 val loss 6.0451 +18300 val perplexity 422.0386 +18300 train 6.029937 (lr=3.6859e-05) (hash(x)=47837565) +18300 val loss 6.2258 +18300 val perplexity 505.6358 +18300 train 6.176018 (lr=5.1602e-05) (hash(x)=47837565) +16700 val loss 6.2452 +16700 val perplexity 515.5415 +16700 train 6.206596 (lr=7.7745e-05) (hash(x)=49361213) +18400 val loss 6.0513 +18400 val perplexity 424.6834 +18400 train 5.898037 (lr=3.6729e-05) (hash(x)=49622704) +18400 val loss 6.2292 +18400 val perplexity 507.3617 +18400 train 6.062944 (lr=5.1421e-05) (hash(x)=49622704) +16800 val loss 6.2345 +16800 val perplexity 510.0693 +16800 train 6.248514 (lr=7.7500e-05) (hash(x)=52739987) +18500 val loss 6.0398 +18500 val perplexity 419.7979 +18500 train 5.899223 (lr=3.6600e-05) (hash(x)=49741203) +18500 val loss 6.2203 +18500 val perplexity 502.8362 +18500 train 6.031091 (lr=5.1240e-05) (hash(x)=49741203) +16900 val loss 6.2274 +16900 val perplexity 506.4341 +16900 train 6.382916 (lr=7.7254e-05) (hash(x)=55994797) +18600 val loss 6.0501 +18600 val perplexity 424.1730 +18600 train 6.520254 (lr=3.6470e-05) (hash(x)=54690634) +18600 val loss 6.2280 +18600 val perplexity 506.7186 +18600 train 6.650096 (lr=5.1058e-05) (hash(x)=54690634) +17000 val loss 6.2225 +17000 val perplexity 503.9757 +17000 train 6.270326 (lr=7.7007e-05) (hash(x)=50109331) +18700 val loss 6.2301 +18700 val perplexity 507.7843 +18700 train 6.106448 (lr=5.0875e-05) (hash(x)=53778909) +18700 val loss 6.0491 +18700 val perplexity 423.7169 +18700 train 5.904674 (lr=3.6339e-05) (hash(x)=53778909) +17100 val loss 6.2159 +17100 val perplexity 500.6514 +17100 train 6.332056 (lr=7.6758e-05) (hash(x)=50292169) +18800 val loss 6.2064 +18800 val perplexity 495.9312 +18800 train 6.366983 (lr=5.0692e-05) (hash(x)=52196791) +18800 val loss 6.0363 +18800 val perplexity 418.3224 +18800 train 6.203441 (lr=3.6209e-05) (hash(x)=52196791) +17200 val loss 6.2173 +17200 val perplexity 501.3246 +17200 train 6.273157 (lr=7.6510e-05) (hash(x)=50722386) +18900 val loss 6.1859 +18900 val perplexity 485.8321 +18900 train 6.120074 (lr=5.0509e-05) (hash(x)=49452227) +18900 val loss 6.0208 +18900 val perplexity 411.8900 +18900 train 5.965091 (lr=3.6078e-05) (hash(x)=49452227) +17300 val loss 6.2028 +17300 val perplexity 494.1235 +17300 train 6.313985 (lr=7.6260e-05) (hash(x)=54010042) +19000 val loss 6.2000 +19000 val perplexity 492.7433 +19000 train 6.192492 (lr=5.0325e-05) (hash(x)=54220718) +19000 val loss 6.0268 +19000 val perplexity 414.3761 +19000 train 6.013001 (lr=3.5946e-05) (hash(x)=54220718) +17400 val loss 6.2195 +17400 val perplexity 502.4561 +17400 train 6.203558 (lr=7.6009e-05) (hash(x)=53970990) +19100 val loss 6.2064 +19100 val perplexity 495.8910 +19100 train 6.357291 (lr=5.0140e-05) (hash(x)=54899289) +19100 val loss 6.0357 +19100 val perplexity 418.0925 +19100 train 6.170717 (lr=3.5814e-05) (hash(x)=54899289) +19200 val loss 6.1756 +19200 val perplexity 480.8738 +19200 train 6.314922 (lr=4.9955e-05) (hash(x)=52676298) +17500 val loss 6.1983 +17500 val perplexity 491.9195 +17500 train 6.253818 (lr=7.5758e-05) (hash(x)=52544667) +19200 val loss 6.0158 +19200 val perplexity 409.8688 +19200 train 6.156129 (lr=3.5682e-05) (hash(x)=52676298) +19300 val loss 6.1720 +19300 val perplexity 479.1528 +19300 train 6.221705 (lr=4.9770e-05) (hash(x)=54877992) +19300 val loss 6.0196 +19300 val perplexity 411.4324 +19300 train 6.096160 (lr=3.5550e-05) (hash(x)=54877992) +17600 val loss 6.2033 +17600 val perplexity 494.3816 +17600 train 6.163414 (lr=7.5505e-05) (hash(x)=50735496) +19400 val loss 6.2017 +19400 val perplexity 493.5873 +19400 train 6.025438 (lr=4.9584e-05) (hash(x)=46432347) +19400 val loss 6.0182 +19400 val perplexity 410.8215 +19400 train 5.834083 (lr=3.5417e-05) (hash(x)=46432347) +17700 val loss 6.2076 +17700 val perplexity 496.4959 +17700 train 6.534688 (lr=7.5252e-05) (hash(x)=55662551) +19500 val loss 6.1630 +19500 val perplexity 474.8275 +19500 train 6.253082 (lr=4.9398e-05) (hash(x)=50677944) +19500 val loss 6.0098 +19500 val perplexity 407.4086 +19500 train 6.096924 (lr=3.5284e-05) (hash(x)=50677944) +17800 val loss 6.2070 +17800 val perplexity 496.2302 +17800 train 6.056177 (lr=7.4998e-05) (hash(x)=51137009) +19600 val loss 6.1729 +19600 val perplexity 479.5898 +19600 train 6.003708 (lr=4.9211e-05) (hash(x)=48855410) +19600 val loss 6.0119 +19600 val perplexity 408.2649 +19600 train 5.852933 (lr=3.5151e-05) (hash(x)=48855410) +17900 val loss 6.2096 +17900 val perplexity 497.5264 +17900 train 7.001251 (lr=7.4744e-05) (hash(x)=73004834) +19700 val loss 6.1954 +19700 val perplexity 490.4861 +19700 train 6.463104 (lr=4.9024e-05) (hash(x)=57942870) +19700 val loss 6.0211 +19700 val perplexity 412.0120 +19700 train 6.308620 (lr=3.5017e-05) (hash(x)=57942870) +18000 val loss 6.1937 +18000 val perplexity 489.6782 +18000 train 6.391912 (lr=7.4488e-05) (hash(x)=51900245) +19800 val loss 6.1841 +19800 val perplexity 484.9574 +19800 train 6.157583 (lr=4.8837e-05) (hash(x)=56663417) +19800 val loss 6.0072 +19800 val perplexity 406.3322 +19800 train 5.984298 (lr=3.4883e-05) (hash(x)=56663417) +18100 val loss 6.2003 +18100 val perplexity 492.9055 +18100 train 6.469229 (lr=7.4232e-05) (hash(x)=56278625) +19900 val loss 6.1664 +19900 val perplexity 476.4777 +19900 train 6.036130 (lr=4.8649e-05) (hash(x)=41160884) +19900 val loss 6.0034 +19900 val perplexity 404.8085 +19900 train 5.876379 (lr=3.4749e-05) (hash(x)=41160884) +18200 val loss 6.1977 +18200 val perplexity 491.6086 +18200 train 6.040568 (lr=7.3975e-05) (hash(x)=50478164) +20000 val loss 6.1748 +20000 val perplexity 480.4738 +20000 train 6.101958 (lr=4.8461e-05) (hash(x)=53380308) +20000 val loss 6.0028 +20000 val perplexity 404.5619 +20000 train 5.943692 (lr=3.4615e-05) (hash(x)=53380308) +18300 val loss 6.2111 +18300 val perplexity 498.2598 +18300 train 6.170772 (lr=7.3717e-05) (hash(x)=47837565) +20100 val loss 6.1677 +20100 val perplexity 477.1066 +20100 train 6.002728 (lr=4.8272e-05) (hash(x)=46345322) +20100 val loss 5.9981 +20100 val perplexity 402.6804 +20100 train 5.834086 (lr=3.4480e-05) (hash(x)=46345322) +18400 val loss 6.2019 +18400 val perplexity 493.6668 +18400 train 6.035745 (lr=7.3459e-05) (hash(x)=49622704) +20200 val loss 6.1732 +20200 val perplexity 479.7181 +20200 train 5.977310 (lr=4.8083e-05) (hash(x)=45030198) +20200 val loss 6.0033 +20200 val perplexity 404.7531 +20200 train 5.833225 (lr=3.4345e-05) (hash(x)=45030198) +18500 val loss 6.2105 +18500 val perplexity 497.9562 +18500 train 6.022415 (lr=7.3199e-05) (hash(x)=49741203) +20300 val loss 6.1696 +20300 val perplexity 477.9764 +20300 train 5.636063 (lr=4.7893e-05) (hash(x)=36613892) +20300 val loss 6.0073 +20300 val perplexity 406.3669 +20300 train 5.474130 (lr=3.4209e-05) (hash(x)=36613892) +18600 val loss 6.2128 +18600 val perplexity 499.1172 +18600 train 6.630300 (lr=7.2939e-05) (hash(x)=54690634) +20400 val loss 6.1657 +20400 val perplexity 476.1436 +20400 train 5.811815 (lr=4.7703e-05) (hash(x)=52987869) +20400 val loss 6.0032 +20400 val perplexity 404.7357 +20400 train 5.619619 (lr=3.4074e-05) (hash(x)=52987869) +18700 val loss 6.2068 +18700 val perplexity 496.0882 +18700 train 6.075917 (lr=7.2679e-05) (hash(x)=53778909) +20500 val loss 6.1605 +20500 val perplexity 473.6513 +20500 train 6.055586 (lr=4.7513e-05) (hash(x)=51868861) +20500 val loss 5.9988 +20500 val perplexity 402.9633 +20500 train 5.896246 (lr=3.3938e-05) (hash(x)=51868861) +20600 val loss 6.1605 +20600 val perplexity 473.6872 +20600 train 6.005601 (lr=4.7323e-05) (hash(x)=48914729) +18800 val loss 6.1777 +18800 val perplexity 481.8716 +18800 train 6.328458 (lr=7.2417e-05) (hash(x)=52196791) +20600 val loss 6.0050 +20600 val perplexity 405.4350 +20600 train 5.827343 (lr=3.3802e-05) (hash(x)=48914729) +20700 val loss 6.1518 +20700 val perplexity 469.5664 +20700 train 6.068443 (lr=4.7132e-05) (hash(x)=52260642) +18900 val loss 6.1715 +18900 val perplexity 478.9191 +18900 train 6.102306 (lr=7.2155e-05) (hash(x)=49452227) +20700 val loss 5.9881 +20700 val perplexity 398.6614 +20700 train 5.907519 (lr=3.3665e-05) (hash(x)=52260642) +20800 val loss 6.1540 +20800 val perplexity 470.5742 +20800 train 6.611380 (lr=4.6940e-05) (hash(x)=67146535) +19000 val loss 6.1746 +19000 val perplexity 480.4142 +19000 train 6.141204 (lr=7.1892e-05) (hash(x)=54220718) +20800 val loss 5.9822 +20800 val perplexity 396.2974 +20800 train 6.462006 (lr=3.3529e-05) (hash(x)=67146535) +20900 val loss 6.1529 +20900 val perplexity 470.0727 +20900 train 6.217502 (lr=4.6749e-05) (hash(x)=54553323) +20900 val loss 5.9839 +20900 val perplexity 396.9801 +20900 train 6.038366 (lr=3.3392e-05) (hash(x)=54553323) +19100 val loss 6.1787 +19100 val perplexity 482.3727 +19100 train 6.311283 (lr=7.1629e-05) (hash(x)=54899289) +21000 val loss 6.1387 +21000 val perplexity 463.4713 +21000 train 6.117680 (lr=4.6557e-05) (hash(x)=48875330) +21000 val loss 5.9773 +21000 val perplexity 394.3770 +21000 train 5.963885 (lr=3.3255e-05) (hash(x)=48875330) +19200 val loss 6.1619 +19200 val perplexity 474.3357 +19200 train 6.279456 (lr=7.1365e-05) (hash(x)=52676298) +21100 val loss 6.1434 +21100 val perplexity 465.6494 +21100 train 6.347168 (lr=4.6365e-05) (hash(x)=51341973) +21100 val loss 5.9764 +21100 val perplexity 394.0028 +21100 train 6.203042 (lr=3.3118e-05) (hash(x)=51341973) +19300 val loss 6.1691 +19300 val perplexity 477.7597 +19300 train 6.234082 (lr=7.1100e-05) (hash(x)=54877992) +21200 val loss 6.1406 +21200 val perplexity 464.3428 +21200 train 6.099993 (lr=4.6172e-05) (hash(x)=49271158) +21200 val loss 5.9711 +21200 val perplexity 391.9223 +21200 train 5.963935 (lr=3.2980e-05) (hash(x)=49271158) +19400 val loss 6.1688 +19400 val perplexity 477.6094 +19400 train 6.003720 (lr=7.0835e-05) (hash(x)=46432347) +21300 val loss 6.1358 +21300 val perplexity 462.1079 +21300 train 5.959718 (lr=4.5979e-05) (hash(x)=45591170) +21300 val loss 5.9673 +21300 val perplexity 390.4627 +21300 train 5.794493 (lr=3.2842e-05) (hash(x)=45591170) +19500 val loss 6.1838 +19500 val perplexity 484.8080 +19500 train 6.281179 (lr=7.0569e-05) (hash(x)=50677944) +21400 val loss 6.1415 +21400 val perplexity 464.7537 +21400 train 6.203477 (lr=4.5786e-05) (hash(x)=53118930) +21400 val loss 5.9793 +21400 val perplexity 395.1776 +21400 train 6.052376 (lr=3.2704e-05) (hash(x)=53118930) +19600 val loss 6.1680 +19600 val perplexity 477.2160 +19600 train 6.011826 (lr=7.0302e-05) (hash(x)=48855410) +21500 val loss 6.1434 +21500 val perplexity 465.6130 +21500 train 5.880425 (lr=4.5592e-05) (hash(x)=52415119) +21500 val loss 5.9685 +21500 val perplexity 390.9249 +21500 train 5.735579 (lr=3.2566e-05) (hash(x)=52415119) +21600 val loss 6.1402 +21600 val perplexity 464.1633 +21600 train 5.977185 (lr=4.5399e-05) (hash(x)=48585730) +19700 val loss 6.1685 +19700 val perplexity 477.4916 +19700 train 6.435672 (lr=7.0035e-05) (hash(x)=57942870) +21600 val loss 5.9629 +21600 val perplexity 388.7413 +21600 train 5.808125 (lr=3.2428e-05) (hash(x)=48585730) +21700 val loss 6.1385 +21700 val perplexity 463.3798 +21700 train 6.320489 (lr=4.5205e-05) (hash(x)=61249800) +19800 val loss 6.1834 +19800 val perplexity 484.6146 +19800 train 6.154061 (lr=6.9767e-05) (hash(x)=56663417) +21700 val loss 5.9661 +21700 val perplexity 389.9913 +21700 train 6.156963 (lr=3.2289e-05) (hash(x)=61249800) +21800 val loss 6.1459 +21800 val perplexity 466.7917 +21800 train 5.897917 (lr=4.5010e-05) (hash(x)=49264022) +19900 val loss 6.1820 +19900 val perplexity 483.9657 +19900 train 6.063060 (lr=6.9498e-05) (hash(x)=41160884) +21800 val loss 5.9556 +21800 val perplexity 385.9250 +21800 train 5.720652 (lr=3.2150e-05) (hash(x)=49264022) +21900 val loss 6.1392 +21900 val perplexity 463.6750 +21900 train 6.063102 (lr=4.4816e-05) (hash(x)=47761044) +20000 val loss 6.1799 +20000 val perplexity 482.9412 +20000 train 6.098162 (lr=6.9229e-05) (hash(x)=53380308) +21900 val loss 5.9596 +21900 val perplexity 387.4464 +21900 train 5.900072 (lr=3.2011e-05) (hash(x)=47761044) +22000 val loss 6.1364 +22000 val perplexity 462.3898 +22000 train 6.196565 (lr=4.4621e-05) (hash(x)=49243894) +20100 val loss 6.1720 +20100 val perplexity 479.1443 +20100 train 6.015300 (lr=6.8960e-05) (hash(x)=46345322) +22000 val loss 5.9533 +22000 val perplexity 385.0378 +22000 train 6.052825 (lr=3.1872e-05) (hash(x)=49243894) +22100 val loss 6.1385 +22100 val perplexity 463.3583 +22100 train 5.857987 (lr=4.4426e-05) (hash(x)=45613618) +22100 val loss 5.9643 +22100 val perplexity 389.2872 +22100 train 5.693257 (lr=3.1733e-05) (hash(x)=45613618) +20200 val loss 6.1937 +20200 val perplexity 489.6474 +20200 train 5.996865 (lr=6.8690e-05) (hash(x)=45030198) +22200 val loss 6.1277 +22200 val perplexity 458.3710 +22200 train 6.044831 (lr=4.4231e-05) (hash(x)=47671047) +22200 val loss 5.9771 +22200 val perplexity 394.2805 +22200 train 5.903796 (lr=3.1593e-05) (hash(x)=47671047) +20300 val loss 6.1750 +20300 val perplexity 480.5879 +20300 train 5.663015 (lr=6.8419e-05) (hash(x)=36613892) +22300 val loss 6.1418 +22300 val perplexity 464.9053 +22300 train 6.115434 (lr=4.4035e-05) (hash(x)=55327350) +22300 val loss 5.9599 +22300 val perplexity 387.5772 +22300 train 5.940328 (lr=3.1454e-05) (hash(x)=55327350) +20400 val loss 6.1719 +20400 val perplexity 479.1105 +20400 train 5.803234 (lr=6.8148e-05) (hash(x)=52987869) +22400 val loss 6.1288 +22400 val perplexity 458.8643 +22400 train 6.322518 (lr=4.3839e-05) (hash(x)=49864138) +22400 val loss 5.9531 +22400 val perplexity 384.9440 +22400 train 6.157360 (lr=3.1314e-05) (hash(x)=49864138) +20500 val loss 6.1687 +20500 val perplexity 477.5483 +20500 train 6.077594 (lr=6.7876e-05) (hash(x)=51868861) +22500 val loss 6.1233 +22500 val perplexity 456.3487 +22500 train 6.059837 (lr=4.3643e-05) (hash(x)=48102324) +22500 val loss 5.9441 +22500 val perplexity 381.5111 +22500 train 5.894808 (lr=3.1174e-05) (hash(x)=48102324) +22600 val loss 6.1267 +22600 val perplexity 457.9022 +22600 train 6.135751 (lr=4.3447e-05) (hash(x)=49662723) +20600 val loss 6.1625 +20600 val perplexity 474.6221 +20600 train 6.011073 (lr=6.7604e-05) (hash(x)=48914729) +22600 val loss 5.9440 +22600 val perplexity 381.4529 +22600 train 6.013496 (lr=3.1034e-05) (hash(x)=49662723) +22700 val loss 6.1144 +22700 val perplexity 452.3062 +22700 train 6.291257 (lr=4.3251e-05) (hash(x)=51372261) +20700 val loss 6.1529 +20700 val perplexity 470.0969 +20700 train 6.074298 (lr=6.7331e-05) (hash(x)=52260642) +22700 val loss 5.9382 +22700 val perplexity 379.2613 +22700 train 6.130818 (lr=3.0894e-05) (hash(x)=51372261) +22800 val loss 6.1108 +22800 val perplexity 450.6853 +22800 train 6.071638 (lr=4.3054e-05) (hash(x)=48811743) +20800 val loss 6.1672 +20800 val perplexity 476.8450 +20800 train 6.585125 (lr=6.7058e-05) (hash(x)=67146535) +22800 val loss 5.9331 +22800 val perplexity 377.3160 +22800 train 5.895883 (lr=3.0753e-05) (hash(x)=48811743) +22900 val loss 6.1070 +22900 val perplexity 448.9683 +22900 train 5.995105 (lr=4.2858e-05) (hash(x)=51623299) +20900 val loss 6.1598 +20900 val perplexity 473.3420 +20900 train 6.234706 (lr=6.6784e-05) (hash(x)=54553323) +22900 val loss 5.9320 +22900 val perplexity 376.9069 +22900 train 5.835777 (lr=3.0613e-05) (hash(x)=51623299) +23000 val loss 6.1117 +23000 val perplexity 451.1217 +23000 train 6.000389 (lr=4.2661e-05) (hash(x)=47452756) +21000 val loss 6.1511 +21000 val perplexity 469.2110 +21000 train 6.108371 (lr=6.6510e-05) (hash(x)=48875330) +23000 val loss 5.9511 +23000 val perplexity 384.1771 +23000 train 5.855799 (lr=3.0472e-05) (hash(x)=47452756) +23100 val loss 6.1143 +23100 val perplexity 452.2760 +23100 train 5.827789 (lr=4.2464e-05) (hash(x)=43735997) +21100 val loss 6.1438 +21100 val perplexity 465.8320 +21100 train 6.351503 (lr=6.6235e-05) (hash(x)=51341973) +23100 val loss 5.9353 +23100 val perplexity 378.1685 +23100 train 5.701593 (lr=3.0331e-05) (hash(x)=43735997) +23200 val loss 6.1201 +23200 val perplexity 454.9291 +23200 train 5.997576 (lr=4.2267e-05) (hash(x)=49451282) +21200 val loss 6.1496 +21200 val perplexity 468.5362 +21200 train 6.088956 (lr=6.5960e-05) (hash(x)=49271158) +23200 val loss 5.9328 +23200 val perplexity 377.2018 +23200 train 5.856523 (lr=3.0190e-05) (hash(x)=49451282) +23300 val loss 6.1108 +23300 val perplexity 450.7143 +23300 train 6.146278 (lr=4.2069e-05) (hash(x)=56682096) +23300 val loss 5.9376 +23300 val perplexity 379.0405 +23300 train 5.990792 (lr=3.0049e-05) (hash(x)=56682096) +21300 val loss 6.1550 +21300 val perplexity 471.0694 +21300 train 5.982675 (lr=6.5684e-05) (hash(x)=45591170) +23400 val loss 6.1011 +23400 val perplexity 446.3608 +23400 train 5.769182 (lr=4.1872e-05) (hash(x)=44941532) +23400 val loss 5.9304 +23400 val perplexity 376.3157 +23400 train 5.619764 (lr=2.9908e-05) (hash(x)=44941532) +21400 val loss 6.1613 +21400 val perplexity 474.0571 +21400 train 6.212630 (lr=6.5408e-05) (hash(x)=53118930) +23500 val loss 6.1009 +23500 val perplexity 446.2529 +23500 train 6.131494 (lr=4.1674e-05) (hash(x)=49488756) +23500 val loss 5.9268 +23500 val perplexity 374.9562 +23500 train 5.927511 (lr=2.9767e-05) (hash(x)=49488756) +23600 val loss 6.1050 +23600 val perplexity 448.1021 +23600 train 5.975768 (lr=4.1476e-05) (hash(x)=49425437) +21500 val loss 6.1392 +21500 val perplexity 463.6640 +21500 train 5.844633 (lr=6.5132e-05) (hash(x)=52415119) +23600 val loss 5.9334 +23600 val perplexity 377.4465 +23600 train 5.798845 (lr=2.9626e-05) (hash(x)=49425437) +23700 val loss 6.1099 +23700 val perplexity 450.2922 +23700 train 6.504116 (lr=4.1278e-05) (hash(x)=56257944) +21600 val loss 6.1489 +21600 val perplexity 468.1996 +21600 train 6.001342 (lr=6.4855e-05) (hash(x)=48585730) +23700 val loss 5.9325 +23700 val perplexity 377.0872 +23700 train 6.344593 (lr=2.9485e-05) (hash(x)=56257944) +23800 val loss 6.1124 +23800 val perplexity 451.4183 +23800 train 6.195869 (lr=4.1080e-05) (hash(x)=51424843) +21700 val loss 6.1481 +21700 val perplexity 467.8303 +21700 train 6.325537 (lr=6.4578e-05) (hash(x)=61249800) +23800 val loss 5.9284 +23800 val perplexity 375.5578 +23800 train 6.008825 (lr=2.9343e-05) (hash(x)=51424843) +23900 val loss 6.0977 +23900 val perplexity 444.8233 +23900 train 6.149929 (lr=4.0882e-05) (hash(x)=51833590) +21800 val loss 6.1408 +21800 val perplexity 464.4139 +21800 train 5.924423 (lr=6.4300e-05) (hash(x)=49264022) +23900 val loss 5.9337 +23900 val perplexity 377.5376 +23900 train 6.000364 (lr=2.9202e-05) (hash(x)=51833590) +24000 val loss 6.0954 +24000 val perplexity 443.8301 +24000 train 5.901597 (lr=4.0684e-05) (hash(x)=48412152) +21900 val loss 6.1541 +21900 val perplexity 470.6493 +21900 train 6.081319 (lr=6.4023e-05) (hash(x)=47761044) +24000 val loss 5.9247 +24000 val perplexity 374.1620 +24000 train 5.726462 (lr=2.9060e-05) (hash(x)=48412152) +24100 val loss 6.1171 +24100 val perplexity 453.5444 +24100 train 6.002603 (lr=4.0486e-05) (hash(x)=48804835) +22000 val loss 6.1653 +22000 val perplexity 475.9423 +22000 train 6.240574 (lr=6.3744e-05) (hash(x)=49243894) +24100 val loss 5.9334 +24100 val perplexity 377.4503 +24100 train 5.848604 (lr=2.8918e-05) (hash(x)=48804835) +24200 val loss 6.0871 +24200 val perplexity 440.1234 +24200 train 6.455055 (lr=4.0287e-05) (hash(x)=56459963) +22100 val loss 6.1504 +22100 val perplexity 468.8929 +22100 train 5.862613 (lr=6.3466e-05) (hash(x)=45613618) +24200 val loss 5.9242 +24200 val perplexity 373.9656 +24200 train 6.344940 (lr=2.8777e-05) (hash(x)=56459963) +24300 val loss 6.0823 +24300 val perplexity 438.0208 +24300 train 5.985527 (lr=4.0089e-05) (hash(x)=46716240) +24300 val loss 5.9214 +24300 val perplexity 372.9502 +24300 train 5.852433 (lr=2.8635e-05) (hash(x)=46716240) +22200 val loss 6.1602 +22200 val perplexity 473.5102 +22200 train 6.061065 (lr=6.3187e-05) (hash(x)=47671047) +24400 val loss 6.0867 +24400 val perplexity 439.9503 +24400 train 6.243721 (lr=3.9891e-05) (hash(x)=53022938) +24400 val loss 5.9234 +24400 val perplexity 373.6973 +24400 train 6.087222 (lr=2.8493e-05) (hash(x)=53022938) +24500 val loss 6.0981 +24500 val perplexity 445.0075 +24500 train 6.050263 (lr=3.9692e-05) (hash(x)=49422455) +22300 val loss 6.1683 +22300 val perplexity 477.3819 +22300 train 6.139494 (lr=6.2907e-05) (hash(x)=55327350) +24500 val loss 5.9191 +24500 val perplexity 372.0645 +24500 train 5.885635 (lr=2.8351e-05) (hash(x)=49422455) +24600 val loss 6.0834 +24600 val perplexity 438.4985 +24600 train 6.347855 (lr=3.9493e-05) (hash(x)=46095419) +22400 val loss 6.1460 +22400 val perplexity 466.8687 +22400 train 6.343654 (lr=6.2628e-05) (hash(x)=49864138) +24600 val loss 5.9201 +24600 val perplexity 372.4554 +24600 train 6.198368 (lr=2.8210e-05) (hash(x)=46095419) +24700 val loss 6.0889 +24700 val perplexity 440.9454 +24700 train 6.189737 (lr=3.9295e-05) (hash(x)=52504918) +22500 val loss 6.1448 +22500 val perplexity 466.2971 +22500 train 6.051379 (lr=6.2348e-05) (hash(x)=48102324) +24700 val loss 5.9203 +24700 val perplexity 372.5203 +24700 train 6.023808 (lr=2.8068e-05) (hash(x)=52504918) +24800 val loss 6.0735 +24800 val perplexity 434.2162 +24800 train 6.016776 (lr=3.9096e-05) (hash(x)=53353568) +22600 val loss 6.1366 +22600 val perplexity 462.4595 +22600 train 6.158983 (lr=6.2068e-05) (hash(x)=49662723) +24800 val loss 5.9217 +24800 val perplexity 373.0393 +24800 train 5.857777 (lr=2.7926e-05) (hash(x)=53353568) +24900 val loss 6.0814 +24900 val perplexity 437.6431 +24900 train 5.991043 (lr=3.8897e-05) (hash(x)=47247169) +22700 val loss 6.1229 +22700 val perplexity 456.1812 +22700 train 6.288065 (lr=6.1787e-05) (hash(x)=51372261) +24900 val loss 5.9119 +24900 val perplexity 369.3970 +24900 train 5.811796 (lr=2.7784e-05) (hash(x)=47247169) +25000 val loss 6.0838 +25000 val perplexity 438.7051 +25000 train 5.958038 (lr=3.8699e-05) (hash(x)=50655375) +22800 val loss 6.1326 +22800 val perplexity 460.6456 +22800 train 6.076201 (lr=6.1506e-05) (hash(x)=48811743) +25000 val loss 5.9158 +25000 val perplexity 370.8477 +25000 train 5.768218 (lr=2.7642e-05) (hash(x)=50655375) +25100 val loss 6.0756 +25100 val perplexity 435.0909 +25100 train 5.877374 (lr=3.8500e-05) (hash(x)=50341890) +22900 val loss 6.1193 +22900 val perplexity 454.5405 +22900 train 6.036701 (lr=6.1225e-05) (hash(x)=51623299) +25100 val loss 5.9135 +25100 val perplexity 370.0094 +25100 train 5.735393 (lr=2.7500e-05) (hash(x)=50341890) +25200 val loss 6.0753 +25200 val perplexity 434.9726 +25200 train 6.088803 (lr=3.8301e-05) (hash(x)=52368607) +23000 val loss 6.1442 +23000 val perplexity 466.0197 +23000 train 6.038305 (lr=6.0944e-05) (hash(x)=47452756) +25200 val loss 5.9162 +25200 val perplexity 371.0170 +25200 train 5.956788 (lr=2.7358e-05) (hash(x)=52368607) +25300 val loss 6.0728 +25300 val perplexity 433.8882 +25300 train 5.722450 (lr=3.8103e-05) (hash(x)=46898454) +23100 val loss 6.1481 +23100 val perplexity 467.8258 +23100 train 5.882354 (lr=6.0663e-05) (hash(x)=43735997) +25300 val loss 5.9056 +25300 val perplexity 367.0740 +25300 train 5.541602 (lr=2.7216e-05) (hash(x)=46898454) +25400 val loss 6.0704 +25400 val perplexity 432.8719 +25400 train 5.998699 (lr=3.7904e-05) (hash(x)=50423971) +25400 val loss 5.9091 +25400 val perplexity 368.3924 +25400 train 5.848545 (lr=2.7074e-05) (hash(x)=50423971) +23200 val loss 6.1344 +23200 val perplexity 461.4429 +23200 train 6.015235 (lr=6.0381e-05) (hash(x)=49451282) +25500 val loss 6.0667 +25500 val perplexity 431.2422 +25500 train 5.917558 (lr=3.7705e-05) (hash(x)=46878539) +25500 val loss 5.9059 +25500 val perplexity 367.2151 +25500 train 5.774507 (lr=2.6932e-05) (hash(x)=46878539) +25600 val loss 6.0724 +25600 val perplexity 433.7289 +25600 train 6.247941 (lr=3.7507e-05) (hash(x)=54616245) +23300 val loss 6.1230 +23300 val perplexity 456.2203 +23300 train 6.182223 (lr=6.0099e-05) (hash(x)=56682096) +25600 val loss 5.9152 +25600 val perplexity 370.6167 +25600 train 6.090686 (lr=2.6790e-05) (hash(x)=54616245) +25700 val loss 6.0679 +25700 val perplexity 431.7663 +25700 train 5.807947 (lr=3.7308e-05) (hash(x)=47739146) +23400 val loss 6.1256 +23400 val perplexity 457.4387 +23400 train 5.784054 (lr=5.9817e-05) (hash(x)=44941532) +25700 val loss 5.9028 +25700 val perplexity 366.0637 +25700 train 5.666933 (lr=2.6649e-05) (hash(x)=47739146) +25800 val loss 6.0653 +25800 val perplexity 430.6711 +25800 train 5.972924 (lr=3.7109e-05) (hash(x)=49016232) +23500 val loss 6.1192 +23500 val perplexity 454.4848 +23500 train 6.172567 (lr=5.9534e-05) (hash(x)=49488756) +25800 val loss 5.9072 +25800 val perplexity 367.6897 +25800 train 5.809902 (lr=2.6507e-05) (hash(x)=49016232) +25900 val loss 6.0674 +25900 val perplexity 431.5759 +25900 train 5.965425 (lr=3.6911e-05) (hash(x)=49512666) +23600 val loss 6.1288 +23600 val perplexity 458.8991 +23600 train 5.990823 (lr=5.9252e-05) (hash(x)=49425437) +25900 val loss 5.9046 +25900 val perplexity 366.7052 +25900 train 5.809525 (lr=2.6365e-05) (hash(x)=49512666) +26000 val loss 6.0660 +26000 val perplexity 430.9323 +26000 train 5.913073 (lr=3.6713e-05) (hash(x)=44586031) +23700 val loss 6.1433 +23700 val perplexity 465.5749 +23700 train 6.546266 (lr=5.8969e-05) (hash(x)=56257944) +26000 val loss 5.8986 +26000 val perplexity 364.5334 +26000 train 5.745259 (lr=2.6223e-05) (hash(x)=44586031) +26100 val loss 6.0575 +26100 val perplexity 427.3129 +26100 train 6.091120 (lr=3.6514e-05) (hash(x)=47151445) +23800 val loss 6.1362 +23800 val perplexity 462.3072 +23800 train 6.196434 (lr=5.8686e-05) (hash(x)=51424843) +26100 val loss 5.8917 +26100 val perplexity 362.0284 +26100 train 5.956306 (lr=2.6082e-05) (hash(x)=47151445) +26200 val loss 6.0507 +26200 val perplexity 424.4200 +26200 train 6.266461 (lr=3.6316e-05) (hash(x)=54800573) +23900 val loss 6.1293 +23900 val perplexity 459.1035 +23900 train 6.186757 (lr=5.8403e-05) (hash(x)=51833590) +26200 val loss 5.8865 +26200 val perplexity 360.1413 +26200 train 6.149418 (lr=2.5940e-05) (hash(x)=54800573) +26300 val loss 6.0518 +26300 val perplexity 424.8778 +26300 train 5.914206 (lr=3.6118e-05) (hash(x)=51810733) +24000 val loss 6.1270 +24000 val perplexity 458.0797 +24000 train 5.919013 (lr=5.8120e-05) (hash(x)=48412152) +26300 val loss 5.8888 +26300 val perplexity 360.9731 +26300 train 5.753789 (lr=2.5798e-05) (hash(x)=51810733) +26400 val loss 6.0481 +26400 val perplexity 423.3150 +26400 train 6.119697 (lr=3.5920e-05) (hash(x)=51482762) +24100 val loss 6.1334 +24100 val perplexity 461.0130 +24100 train 6.042610 (lr=5.7837e-05) (hash(x)=48804835) +26400 val loss 5.8869 +26400 val perplexity 360.2805 +26400 train 5.980966 (lr=2.5657e-05) (hash(x)=51482762) +26500 val loss 6.0487 +26500 val perplexity 423.5792 +26500 train 5.941554 (lr=3.5722e-05) (hash(x)=50129702) +26500 val loss 5.8890 +26500 val perplexity 361.0482 +26500 train 5.805998 (lr=2.5515e-05) (hash(x)=50129702) +24200 val loss 6.1302 +24200 val perplexity 459.5219 +24200 train 6.511795 (lr=5.7554e-05) (hash(x)=56459963) +26600 val loss 6.0358 +26600 val perplexity 418.1230 +26600 train 6.005397 (lr=3.5524e-05) (hash(x)=51104302) +26600 val loss 5.8765 +26600 val perplexity 356.5668 +26600 train 5.861450 (lr=2.5374e-05) (hash(x)=51104302) +26700 val loss 6.0383 +26700 val perplexity 419.1601 +26700 train 5.877936 (lr=3.5326e-05) (hash(x)=48490782) +24300 val loss 6.1126 +24300 val perplexity 451.5018 +24300 train 6.014219 (lr=5.7270e-05) (hash(x)=46716240) +26700 val loss 5.8900 +26700 val perplexity 361.4176 +26700 train 5.728050 (lr=2.5233e-05) (hash(x)=48490782) +26800 val loss 6.0646 +26800 val perplexity 430.3326 +26800 train 6.085222 (lr=3.5128e-05) (hash(x)=50602590) +24400 val loss 6.1224 +24400 val perplexity 455.9637 +24400 train 6.260394 (lr=5.6987e-05) (hash(x)=53022938) +26800 val loss 5.8807 +26800 val perplexity 358.0682 +26800 train 5.918839 (lr=2.5092e-05) (hash(x)=50602590) +26900 val loss 6.0447 +26900 val perplexity 421.8910 +26900 train 5.809762 (lr=3.4931e-05) (hash(x)=45624407) +24500 val loss 6.1045 +24500 val perplexity 447.8726 +24500 train 6.065207 (lr=5.6703e-05) (hash(x)=49422455) +26900 val loss 5.8860 +26900 val perplexity 359.9544 +26900 train 5.676947 (lr=2.4951e-05) (hash(x)=45624407) +27000 val loss 6.0349 +27000 val perplexity 417.7579 +27000 train 5.814979 (lr=3.4733e-05) (hash(x)=46448811) +24600 val loss 6.1070 +24600 val perplexity 448.9721 +24600 train 6.393726 (lr=5.6419e-05) (hash(x)=46095419) +27000 val loss 5.8797 +27000 val perplexity 357.6958 +27000 train 5.645614 (lr=2.4810e-05) (hash(x)=46448811) +27100 val loss 6.0353 +27100 val perplexity 417.9307 +27100 train 5.723890 (lr=3.4536e-05) (hash(x)=42263321) +24700 val loss 6.1352 +24700 val perplexity 461.8288 +24700 train 6.243340 (lr=5.6135e-05) (hash(x)=52504918) +27100 val loss 5.8818 +27100 val perplexity 358.4509 +27100 train 5.567906 (lr=2.4669e-05) (hash(x)=42263321) +27200 val loss 6.0267 +27200 val perplexity 414.3543 +27200 train 5.776744 (lr=3.4339e-05) (hash(x)=45998703) +24800 val loss 6.0932 +24800 val perplexity 442.8545 +24800 train 6.026612 (lr=5.5852e-05) (hash(x)=53353568) +27200 val loss 5.8721 +27200 val perplexity 354.9858 +27200 train 5.627224 (lr=2.4528e-05) (hash(x)=45998703) +27300 val loss 6.0532 +27300 val perplexity 425.4879 +27300 train 6.213803 (lr=3.4142e-05) (hash(x)=58008207) +24900 val loss 6.0928 +24900 val perplexity 442.6459 +24900 train 6.006665 (lr=5.5568e-05) (hash(x)=47247169) +27300 val loss 5.8762 +27300 val perplexity 356.4554 +27300 train 5.960763 (lr=2.4387e-05) (hash(x)=58008207) +27400 val loss 6.0430 +27400 val perplexity 421.1720 +27400 train 6.103308 (lr=3.3946e-05) (hash(x)=58914934) +25000 val loss 6.1069 +25000 val perplexity 448.9342 +25000 train 5.982193 (lr=5.5284e-05) (hash(x)=50655375) +27400 val loss 5.8746 +27400 val perplexity 355.8654 +27400 train 5.926643 (lr=2.4247e-05) (hash(x)=58914934) +27500 val loss 6.0363 +27500 val perplexity 418.3502 +27500 train 5.996116 (lr=3.3749e-05) (hash(x)=54961792) +25100 val loss 6.0932 +25100 val perplexity 442.8452 +27500 val loss 5.8769 +27500 val perplexity 356.7050 +25100 train 5.905435 (lr=5.5000e-05) (hash(x)=50341890) +27600 val loss 6.0305 +27600 val perplexity 415.9225 +27600 train 5.722549 (lr=3.3553e-05) (hash(x)=43436743) +27500 train 5.835189 (lr=2.4106e-05) (hash(x)=54961792) +27700 val loss 6.0270 +27700 val perplexity 414.4666 +27700 train 5.629759 (lr=3.3357e-05) (hash(x)=43954091) +27600 val loss 5.8695 +27600 val perplexity 354.0824 +27600 train 5.544859 (lr=2.3966e-05) (hash(x)=43436743) +25200 val loss 6.0907 +25200 val perplexity 441.7247 +25200 train 6.110599 (lr=5.4716e-05) (hash(x)=52368607) +27700 val loss 5.8784 +27700 val perplexity 357.2492 +27700 train 5.473822 (lr=2.3826e-05) (hash(x)=43954091) +27800 val loss 6.0305 +27800 val perplexity 415.9158 +27800 train 5.947229 (lr=3.3161e-05) (hash(x)=49556041) +25300 val loss 6.0847 +25300 val perplexity 439.0726 +25300 train 5.709676 (lr=5.4432e-05) (hash(x)=46898454) +27800 val loss 5.8703 +27800 val perplexity 354.3638 +27800 train 5.806100 (lr=2.3686e-05) (hash(x)=49556041) +27900 val loss 6.0227 +27900 val perplexity 412.6724 +27900 train 5.838119 (lr=3.2965e-05) (hash(x)=44542698) +25400 val loss 6.0930 +25400 val perplexity 442.7607 +25400 train 6.024213 (lr=5.4148e-05) (hash(x)=50423971) +27900 val loss 5.8620 +27900 val perplexity 351.4414 +27900 train 5.659078 (lr=2.3546e-05) (hash(x)=44542698) +28000 val loss 6.0297 +28000 val perplexity 415.5857 +28000 train 5.976749 (lr=3.2769e-05) (hash(x)=51737822) +25500 val loss 6.0894 +25500 val perplexity 441.1532 +25500 train 5.953987 (lr=5.3865e-05) (hash(x)=46878539) +28000 val loss 5.8627 +28000 val perplexity 351.6655 +28000 train 5.821701 (lr=2.3407e-05) (hash(x)=51737822) +28100 val loss 6.0081 +28100 val perplexity 406.7294 +28100 train 6.438169 (lr=3.2574e-05) (hash(x)=69830597) +25600 val loss 6.0876 +25600 val perplexity 440.3661 +25600 train 6.259648 (lr=5.3581e-05) (hash(x)=54616245) +28100 val loss 5.8625 +28100 val perplexity 351.5973 +28100 train 6.329861 (lr=2.3267e-05) (hash(x)=69830597) +28200 val loss 6.0045 +28200 val perplexity 405.2684 +28200 train 6.060979 (lr=3.2379e-05) (hash(x)=51670601) +25700 val loss 6.0855 +25700 val perplexity 439.4538 +25700 train 5.831913 (lr=5.3297e-05) (hash(x)=47739146) +28200 val loss 5.8490 +28200 val perplexity 346.9010 +28200 train 5.932371 (lr=2.3128e-05) (hash(x)=51670601) +28300 val loss 6.0017 +28300 val perplexity 404.0967 +28300 train 6.050955 (lr=3.2184e-05) (hash(x)=51628878) +25800 val loss 6.0868 +25800 val perplexity 439.9898 +25800 train 5.981336 (lr=5.3013e-05) (hash(x)=49016232) +28300 val loss 5.8536 +28300 val perplexity 348.4960 +28300 train 5.921436 (lr=2.2989e-05) (hash(x)=51628878) +28400 val loss 6.0027 +28400 val perplexity 404.5355 +28400 train 6.015206 (lr=3.1990e-05) (hash(x)=50973565) +28400 val loss 5.8501 +28400 val perplexity 347.2567 +28400 train 5.864284 (lr=2.2850e-05) (hash(x)=50973565) +25900 val loss 6.0921 +25900 val perplexity 442.3286 +25900 train 5.988946 (lr=5.2730e-05) (hash(x)=49512666) +28500 val loss 6.0014 +28500 val perplexity 403.9896 +28500 train 6.132781 (lr=3.1795e-05) (hash(x)=63914328) +28500 val loss 5.8495 +28500 val perplexity 347.0701 +28500 train 5.978743 (lr=2.2711e-05) (hash(x)=63914328) +28600 val loss 6.0003 +28600 val perplexity 403.5666 +28600 train 6.013580 (lr=3.1601e-05) (hash(x)=56517839) +26000 val loss 6.0844 +26000 val perplexity 438.9579 +26000 train 5.939546 (lr=5.2446e-05) (hash(x)=44586031) +28600 val loss 5.8411 +28600 val perplexity 344.1504 +28600 train 5.840915 (lr=2.2572e-05) (hash(x)=56517839) +28700 val loss 6.0003 +28700 val perplexity 403.5629 +28700 train 6.015846 (lr=3.1408e-05) (hash(x)=47631048) +26100 val loss 6.0682 +26100 val perplexity 431.9187 +26100 train 6.100973 (lr=5.2163e-05) (hash(x)=47151445) +28700 val loss 5.8446 +28700 val perplexity 345.3616 +28700 train 5.873164 (lr=2.2434e-05) (hash(x)=47631048) +28800 val loss 6.0039 +28800 val perplexity 404.9857 +28800 train 5.804829 (lr=3.1214e-05) (hash(x)=44376567) +26200 val loss 6.0777 +26200 val perplexity 436.0070 +26200 train 6.290683 (lr=5.1880e-05) (hash(x)=54800573) +28800 val loss 5.8432 +28800 val perplexity 344.8926 +28800 train 5.650768 (lr=2.2296e-05) (hash(x)=44376567) +28900 val loss 6.0049 +28900 val perplexity 405.3988 +28900 train 5.946053 (lr=3.1021e-05) (hash(x)=50934351) +26300 val loss 6.0655 +26300 val perplexity 430.7242 +26300 train 5.922496 (lr=5.1597e-05) (hash(x)=51810733) +28900 val loss 5.8443 +28900 val perplexity 345.2609 +28900 train 5.810985 (lr=2.2158e-05) (hash(x)=50934351) +29000 val loss 6.0022 +29000 val perplexity 404.3154 +29000 train 6.185178 (lr=3.0828e-05) (hash(x)=51712329) +26400 val loss 6.0617 +26400 val perplexity 429.0950 +26400 train 6.139353 (lr=5.1314e-05) (hash(x)=51482762) +29000 val loss 5.8386 +29000 val perplexity 343.2891 +29000 train 6.037989 (lr=2.2020e-05) (hash(x)=51712329) +29100 val loss 5.9990 +29100 val perplexity 403.0212 +29100 train 5.958769 (lr=3.0635e-05) (hash(x)=51282586) +26500 val loss 6.0659 +26500 val perplexity 430.8895 +26500 train 5.955387 (lr=5.1031e-05) (hash(x)=50129702) +29100 val loss 5.8463 +29100 val perplexity 345.9400 +29100 train 5.830448 (lr=2.1882e-05) (hash(x)=51282586) +29200 val loss 5.9984 +29200 val perplexity 402.7780 +29200 train 5.718521 (lr=3.0443e-05) (hash(x)=44457224) +26600 val loss 6.0599 +26600 val perplexity 428.3233 +26600 train 6.015794 (lr=5.0748e-05) (hash(x)=51104302) +29200 val loss 5.8406 +29200 val perplexity 344.0006 +29200 train 5.555852 (lr=2.1745e-05) (hash(x)=44457224) +29300 val loss 5.9981 +29300 val perplexity 402.6825 +29300 train 5.917011 (lr=3.0251e-05) (hash(x)=47669361) +26700 val loss 6.0709 +26700 val perplexity 433.0734 +26700 train 5.911325 (lr=5.0466e-05) (hash(x)=48490782) +29300 val loss 5.8387 +29300 val perplexity 343.3423 +29300 train 5.772800 (lr=2.1608e-05) (hash(x)=47669361) +29400 val loss 5.9899 +29400 val perplexity 399.3928 +29400 train 6.062664 (lr=3.0060e-05) (hash(x)=49442135) +26800 val loss 6.0654 +26800 val perplexity 430.7034 +26800 train 6.097916 (lr=5.0183e-05) (hash(x)=50602590) +29400 val loss 5.8396 +29400 val perplexity 343.6570 +29400 train 5.912821 (lr=2.1471e-05) (hash(x)=49442135) +29500 val loss 5.9921 +29500 val perplexity 400.2511 +29500 train 5.490143 (lr=2.9868e-05) (hash(x)=39350955) +26900 val loss 6.0564 +26900 val perplexity 426.8437 +26900 train 5.828437 (lr=4.9901e-05) (hash(x)=45624407) +29500 val loss 5.8454 +29500 val perplexity 345.6369 +29500 train 5.381187 (lr=2.1335e-05) (hash(x)=39350955) +29600 val loss 5.9898 +29600 val perplexity 399.3499 +29600 train 6.467952 (lr=2.9677e-05) (hash(x)=53114560) +27000 val loss 6.0637 +27000 val perplexity 429.9518 +27000 train 5.826560 (lr=4.9619e-05) (hash(x)=46448811) +29600 val loss 5.8388 +29600 val perplexity 343.3613 +29700 val loss 5.9815 +29700 val perplexity 396.0278 +29600 train 6.333019 (lr=2.1198e-05) (hash(x)=53114560) +29700 train 5.881025 (lr=2.9487e-05) (hash(x)=45302862) +27100 val loss 6.0680 +27100 val perplexity 431.8003 +27100 train 5.762912 (lr=4.9337e-05) (hash(x)=42263321) +29700 val loss 5.8300 +29700 val perplexity 340.3463 +29700 train 5.736992 (lr=2.1062e-05) (hash(x)=45302862) +29800 val loss 5.9815 +29800 val perplexity 396.0533 +29800 train 5.968771 (lr=2.9297e-05) (hash(x)=51679878) +27200 val loss 6.0582 +27200 val perplexity 427.5987 +27200 train 5.819321 (lr=4.9056e-05) (hash(x)=45998703) +29800 val loss 5.8299 +29800 val perplexity 340.3369 +29800 train 5.763031 (lr=2.0926e-05) (hash(x)=51679878) +29900 val loss 5.9843 +29900 val perplexity 397.1570 +29900 train 5.798481 (lr=2.9107e-05) (hash(x)=47344663) +27300 val loss 6.0673 +27300 val perplexity 431.5078 +27300 train 6.224754 (lr=4.8775e-05) (hash(x)=58008207) +29900 val loss 5.8316 +29900 val perplexity 340.9067 +29900 train 5.655300 (lr=2.0791e-05) (hash(x)=47344663) +30000 val loss 5.9842 +30000 val perplexity 397.1127 +30000 train 5.728186 (lr=2.8917e-05) (hash(x)=46530414) +27400 val loss 6.0720 +27400 val perplexity 433.5327 +27400 train 6.118867 (lr=4.8494e-05) (hash(x)=58914934) +30000 val loss 5.8346 +30000 val perplexity 341.9292 +30000 train 5.589412 (lr=2.0655e-05) (hash(x)=46530414) +30100 val loss 5.9845 +30100 val perplexity 397.2377 +30100 train 5.554594 (lr=2.8728e-05) (hash(x)=43262362) +27500 val loss 6.0554 +27500 val perplexity 426.4100 +27500 train 5.994745 (lr=4.8213e-05) (hash(x)=54961792) +30100 val loss 5.8322 +30100 val perplexity 341.1007 +30100 train 5.392095 (lr=2.0520e-05) (hash(x)=43262362) +30200 val loss 5.9802 +30200 val perplexity 395.5237 +30200 train 5.816063 (lr=2.8539e-05) (hash(x)=47879732) +30200 val loss 5.8273 +30200 val perplexity 339.4288 +30200 train 5.661341 (lr=2.0385e-05) (hash(x)=47879732) +27600 val loss 6.0559 +27600 val perplexity 426.6425 +27600 train 5.739508 (lr=4.7932e-05) (hash(x)=43436743) +30300 val loss 5.9771 +30300 val perplexity 394.2851 +30300 train 5.769890 (lr=2.8351e-05) (hash(x)=47335687) +30300 val loss 5.8292 +30300 val perplexity 340.0746 +30300 train 5.628904 (lr=2.0251e-05) (hash(x)=47335687) +27700 val loss 6.0499 +27700 val perplexity 424.0787 +27700 train 5.665303 (lr=4.7652e-05) (hash(x)=43954091) +30400 val loss 5.9810 +30400 val perplexity 395.8262 +30400 train 5.997238 (lr=2.8163e-05) (hash(x)=49940701) +30400 val loss 5.8293 +30400 val perplexity 340.1254 +30400 train 5.867912 (lr=2.0117e-05) (hash(x)=49940701) +30500 val loss 5.9804 +30500 val perplexity 395.6046 +30500 train 5.857673 (lr=2.7976e-05) (hash(x)=49860489) +27800 val loss 6.0467 +27800 val perplexity 422.7273 +27800 train 5.969273 (lr=4.7372e-05) (hash(x)=49556041) +30500 val loss 5.8309 +30500 val perplexity 340.6524 +30500 train 5.724407 (lr=1.9983e-05) (hash(x)=49860489) +30600 val loss 5.9994 +30600 val perplexity 403.2059 +30600 train 5.748838 (lr=2.7789e-05) (hash(x)=44643399) +27900 val loss 6.0436 +27900 val perplexity 421.3867 +27900 train 5.862562 (lr=4.7093e-05) (hash(x)=44542698) +30600 val loss 5.8379 +30600 val perplexity 343.0716 +30600 train 5.591486 (lr=1.9849e-05) (hash(x)=44643399) +30700 val loss 5.9809 +30700 val perplexity 395.7924 +30700 train 5.793119 (lr=2.7602e-05) (hash(x)=49388442) +28000 val loss 6.0452 +28000 val perplexity 422.0614 +28000 train 5.988935 (lr=4.6813e-05) (hash(x)=51737822) +30800 val loss 5.9740 +30800 val perplexity 393.0896 +30800 train 5.937857 (lr=2.7416e-05) (hash(x)=56160596) +30700 val loss 5.8319 +30700 val perplexity 341.0164 +30700 train 5.644805 (lr=1.9716e-05) (hash(x)=49388442) +28100 val loss 6.0408 +28100 val perplexity 420.2293 +28100 train 6.431351 (lr=4.6534e-05) (hash(x)=69830597) +30800 val loss 5.8289 +30800 val perplexity 339.9865 +30800 train 5.815920 (lr=1.9583e-05) (hash(x)=56160596) +30900 val loss 5.9717 +30900 val perplexity 392.1562 +30900 train 5.930646 (lr=2.7230e-05) (hash(x)=49005636) +28200 val loss 6.0367 +28200 val perplexity 418.5054 +28200 train 6.090349 (lr=4.6256e-05) (hash(x)=51670601) +30900 val loss 5.8255 +30900 val perplexity 338.8249 +30900 train 5.797294 (lr=1.9450e-05) (hash(x)=49005636) +31000 val loss 5.9754 +31000 val perplexity 393.6448 +31000 train 6.203425 (lr=2.7045e-05) (hash(x)=54646552) +28300 val loss 6.0271 +28300 val perplexity 414.5247 +28300 train 6.049088 (lr=4.5977e-05) (hash(x)=51628878) +31000 val loss 5.8215 +31000 val perplexity 337.4719 +31000 train 6.032523 (lr=1.9318e-05) (hash(x)=54646552) +31100 val loss 5.9654 +31100 val perplexity 389.6898 +31100 train 5.804157 (lr=2.6860e-05) (hash(x)=44816272) +28400 val loss 6.0472 +28400 val perplexity 422.9353 +28400 train 6.074213 (lr=4.5700e-05) (hash(x)=50973565) +31100 val loss 5.8205 +31100 val perplexity 337.1508 +31100 train 5.646246 (lr=1.9186e-05) (hash(x)=44816272) +31200 val loss 5.9641 +31200 val perplexity 389.2166 +31200 train 6.128108 (lr=2.6675e-05) (hash(x)=53040860) +28500 val loss 6.0329 +28500 val perplexity 416.9289 +28500 train 6.163313 (lr=4.5422e-05) (hash(x)=63914328) +31200 val loss 5.8220 +31200 val perplexity 337.6446 +31200 train 6.022907 (lr=1.9054e-05) (hash(x)=53040860) +31300 val loss 5.9687 +31300 val perplexity 390.9788 +31300 train 6.174263 (lr=2.6491e-05) (hash(x)=52051764) +28600 val loss 6.0286 +28600 val perplexity 415.1244 +28600 train 6.037053 (lr=4.5145e-05) (hash(x)=56517839) +31300 val loss 5.8178 +31300 val perplexity 336.2187 +31300 train 6.046365 (lr=1.8922e-05) (hash(x)=52051764) +31400 val loss 5.9691 +31400 val perplexity 391.1619 +31400 train 5.940002 (lr=2.6308e-05) (hash(x)=51564589) +28700 val loss 6.0258 +28700 val perplexity 413.9781 +28700 train 6.036065 (lr=4.4868e-05) (hash(x)=47631048) +31400 val loss 5.8139 +31400 val perplexity 334.9343 +31400 train 5.756424 (lr=1.8791e-05) (hash(x)=51564589) +31500 val loss 5.9683 +31500 val perplexity 390.8475 +31500 train 5.699306 (lr=2.6125e-05) (hash(x)=49676250) +28800 val loss 6.0292 +28800 val perplexity 415.3858 +28800 train 5.843014 (lr=4.4592e-05) (hash(x)=44376567) +31500 val loss 5.8101 +31500 val perplexity 333.6571 +31500 train 5.550847 (lr=1.8661e-05) (hash(x)=49676250) +31600 val loss 5.9691 +31600 val perplexity 391.1578 +31600 train 5.896336 (lr=2.5942e-05) (hash(x)=49244964) +31600 val loss 5.8081 +31600 val perplexity 332.9929 +31600 train 5.752309 (lr=1.8530e-05) (hash(x)=49244964) +28900 val loss 6.0276 +28900 val perplexity 414.7323 +28900 train 5.979816 (lr=4.4316e-05) (hash(x)=50934351) +31700 val loss 5.9622 +31700 val perplexity 388.4520 +31700 train 6.168909 (lr=2.5760e-05) (hash(x)=49527873) +31700 val loss 5.8098 +31700 val perplexity 333.5623 +31700 train 6.045442 (lr=1.8400e-05) (hash(x)=49527873) +31800 val loss 5.9669 +31800 val perplexity 390.3124 +31800 train 5.923289 (lr=2.5579e-05) (hash(x)=49566755) +29000 val loss 6.0243 +29000 val perplexity 413.3477 +29000 train 6.204475 (lr=4.4040e-05) (hash(x)=51712329) +31800 val loss 5.8218 +31800 val perplexity 337.5768 +31800 train 5.783108 (lr=1.8271e-05) (hash(x)=49566755) +31900 val loss 5.9640 +31900 val perplexity 389.1502 +31900 train 5.772141 (lr=2.5398e-05) (hash(x)=45456170) +29100 val loss 6.0291 +29100 val perplexity 415.3400 +29100 train 5.993180 (lr=4.3765e-05) (hash(x)=51282586) +31900 val loss 5.8053 +31900 val perplexity 332.0555 +31900 train 5.614435 (lr=1.8141e-05) (hash(x)=45456170) +32000 val loss 5.9573 +32000 val perplexity 386.5804 +32000 train 6.021672 (lr=2.5218e-05) (hash(x)=49986978) +29200 val loss 6.0389 +29200 val perplexity 419.4180 +29200 train 5.761024 (lr=4.3490e-05) (hash(x)=44457224) +32000 val loss 5.8044 +32000 val perplexity 331.7572 +32100 val loss 5.9577 +32100 val perplexity 386.7297 +32000 train 5.883710 (lr=1.8013e-05) (hash(x)=49986978) +32100 train 5.598077 (lr=2.5038e-05) (hash(x)=50539149) +29300 val loss 6.0307 +29300 val perplexity 415.9908 +29300 train 5.962490 (lr=4.3216e-05) (hash(x)=47669361) +32200 val loss 5.9584 +32200 val perplexity 386.9891 +32200 train 5.904149 (lr=2.4858e-05) (hash(x)=50998615) +32100 val loss 5.8139 +32100 val perplexity 334.9378 +32100 train 5.460774 (lr=1.7884e-05) (hash(x)=50539149) +29400 val loss 6.0301 +29400 val perplexity 415.7476 +29400 train 6.108774 (lr=4.2942e-05) (hash(x)=49442135) +32300 val loss 5.9550 +32300 val perplexity 385.6723 +32300 train 5.784231 (lr=2.4679e-05) (hash(x)=44030191) +32200 val loss 5.8095 +32200 val perplexity 333.4435 +32200 train 5.759093 (lr=1.7756e-05) (hash(x)=50998615) +29500 val loss 6.0441 +29500 val perplexity 421.6283 +29500 train 5.537832 (lr=4.2669e-05) (hash(x)=39350955) +32400 val loss 5.9621 +32400 val perplexity 388.4196 +32400 train 5.888499 (lr=2.4501e-05) (hash(x)=47269028) +32300 val loss 5.8086 +32300 val perplexity 333.1536 +32300 train 5.670200 (lr=1.7628e-05) (hash(x)=44030191) +29600 val loss 6.0214 +29600 val perplexity 412.1352 +29600 train 6.494508 (lr=4.2396e-05) (hash(x)=53114560) +32500 val loss 5.9565 +32500 val perplexity 386.2638 +32500 train 5.777795 (lr=2.4323e-05) (hash(x)=50128289) +32400 val loss 5.8033 +32400 val perplexity 331.4057 +32400 train 5.730560 (lr=1.7501e-05) (hash(x)=47269028) +29700 val loss 6.0202 +29700 val perplexity 411.6758 +29700 train 5.940580 (lr=4.2124e-05) (hash(x)=45302862) +32600 val loss 5.9642 +32600 val perplexity 389.2467 +32600 train 5.801872 (lr=2.4146e-05) (hash(x)=42781351) +32500 val loss 5.8047 +32500 val perplexity 331.8452 +32500 train 5.641759 (lr=1.7374e-05) (hash(x)=50128289) +29800 val loss 6.0158 +29800 val perplexity 409.8520 +29800 train 6.001534 (lr=4.1852e-05) (hash(x)=51679878) +32700 val loss 5.9590 +32700 val perplexity 387.2246 +32700 train 6.055271 (lr=2.3970e-05) (hash(x)=51712117) +32600 val loss 5.8048 +32600 val perplexity 331.8872 +32600 train 5.621361 (lr=1.7247e-05) (hash(x)=42781351) +29900 val loss 6.0177 +29900 val perplexity 410.6227 +29900 train 5.824810 (lr=4.1581e-05) (hash(x)=47344663) +32800 val loss 5.9455 +32800 val perplexity 382.0401 +32800 train 6.030862 (lr=2.3794e-05) (hash(x)=52478104) +32700 val loss 5.8127 +32700 val perplexity 334.5206 +32700 train 5.901848 (lr=1.7121e-05) (hash(x)=51712117) +30000 val loss 6.0296 +30000 val perplexity 415.5389 +30000 train 5.787665 (lr=4.1310e-05) (hash(x)=46530414) +32800 val loss 5.7982 +32800 val perplexity 329.7212 +32800 train 5.903448 (lr=1.6995e-05) (hash(x)=52478104) +32900 val loss 5.9386 +32900 val perplexity 379.4034 +32900 train 5.884224 (lr=2.3618e-05) (hash(x)=53588694) +32900 val loss 5.7975 +32900 val perplexity 329.4850 +32900 train 5.737524 (lr=1.6870e-05) (hash(x)=53588694) +33000 val loss 5.9344 +33000 val perplexity 377.8080 +33000 train 5.818412 (lr=2.3443e-05) (hash(x)=50821436) +30100 val loss 6.0201 +30100 val perplexity 411.6204 +30100 train 5.600601 (lr=4.1040e-05) (hash(x)=43262362) +33000 val loss 5.7923 +33000 val perplexity 327.7762 +33000 train 5.669199 (lr=1.6745e-05) (hash(x)=50821436) +33100 val loss 5.9394 +33100 val perplexity 379.7194 +33100 train 5.892412 (lr=2.3269e-05) (hash(x)=50371199) +30200 val loss 6.0249 +30200 val perplexity 413.5985 +30200 train 5.841463 (lr=4.0771e-05) (hash(x)=47879732) +33100 val loss 5.7909 +33100 val perplexity 327.3000 +33100 train 5.743008 (lr=1.6621e-05) (hash(x)=50371199) +33200 val loss 5.9406 +33200 val perplexity 380.1528 +33200 train 6.119138 (lr=2.3095e-05) (hash(x)=54544623) +30300 val loss 6.0242 +30300 val perplexity 413.3002 +30300 train 5.807525 (lr=4.0502e-05) (hash(x)=47335687) +33200 val loss 5.7917 +33200 val perplexity 327.5760 +33300 val loss 5.9384 +33300 val perplexity 379.3323 +33200 train 6.008244 (lr=1.6497e-05) (hash(x)=54544623) +33300 train 5.717372 (lr=2.2922e-05) (hash(x)=55456921) +30400 val loss 6.0205 +30400 val perplexity 411.7994 +30400 train 6.041892 (lr=4.0233e-05) (hash(x)=49940701) +33300 val loss 5.7894 +33300 val perplexity 326.8158 +33300 train 5.569286 (lr=1.6373e-05) (hash(x)=55456921) +33400 val loss 5.9448 +33400 val perplexity 381.7524 +33400 train 5.583863 (lr=2.2750e-05) (hash(x)=37240412) +30500 val loss 6.0229 +30500 val perplexity 412.7765 +30500 train 5.896065 (lr=3.9965e-05) (hash(x)=49860489) +33500 val loss 5.9436 +33500 val perplexity 381.3225 +33500 train 6.196152 (lr=2.2578e-05) (hash(x)=56551894) +33400 val loss 5.7941 +33400 val perplexity 328.3684 +33400 train 5.386881 (lr=1.6250e-05) (hash(x)=37240412) +30600 val loss 6.0311 +30600 val perplexity 416.1628 +30600 train 5.800098 (lr=3.9698e-05) (hash(x)=44643399) +33600 val loss 5.9390 +33600 val perplexity 379.5388 +33600 train 5.841659 (lr=2.2407e-05) (hash(x)=47514225) +33500 val loss 5.7895 +33500 val perplexity 326.8537 +33500 train 6.019800 (lr=1.6127e-05) (hash(x)=56551894) +30700 val loss 6.0243 +30700 val perplexity 413.3524 +30700 train 5.884185 (lr=3.9431e-05) (hash(x)=49388442) +33700 val loss 5.9422 +33700 val perplexity 380.7785 +33700 train 5.784485 (lr=2.2237e-05) (hash(x)=51164416) +33600 val loss 5.7885 +33600 val perplexity 326.5360 +33600 train 5.714428 (lr=1.6005e-05) (hash(x)=47514225) +30800 val loss 6.0215 +30800 val perplexity 412.1853 +30800 train 5.995923 (lr=3.9165e-05) (hash(x)=56160596) +33800 val loss 5.9406 +33800 val perplexity 380.1490 +33800 train 5.760533 (lr=2.2067e-05) (hash(x)=50660204) +33700 val loss 5.7849 +33700 val perplexity 325.3629 +33700 train 5.629024 (lr=1.5883e-05) (hash(x)=51164416) +30900 val loss 6.0056 +30900 val perplexity 405.6912 +30900 train 5.963522 (lr=3.8900e-05) (hash(x)=49005636) +33900 val loss 5.9383 +33900 val perplexity 379.2924 +33900 train 5.924343 (lr=2.1898e-05) (hash(x)=48511113) +33800 val loss 5.7858 +33800 val perplexity 325.6466 +33800 train 5.623178 (lr=1.5762e-05) (hash(x)=50660204) +31000 val loss 6.0090 +31000 val perplexity 407.0585 +31000 train 6.238300 (lr=3.8635e-05) (hash(x)=54646552) +34000 val loss 5.9425 +34000 val perplexity 380.8846 +34000 train 5.565374 (lr=2.1729e-05) (hash(x)=41560072) +33900 val loss 5.7916 +33900 val perplexity 327.5377 +33900 train 5.771410 (lr=1.5641e-05) (hash(x)=48511113) +31100 val loss 6.0041 +31100 val perplexity 405.0728 +31100 train 5.864131 (lr=3.8371e-05) (hash(x)=44816272) +34100 val loss 5.9380 +34100 val perplexity 379.1898 +34100 train 5.551881 (lr=2.1561e-05) (hash(x)=50354364) +34000 val loss 5.7882 +34000 val perplexity 326.4260 +34000 train 5.413061 (lr=1.5521e-05) (hash(x)=41560072) +31200 val loss 5.9949 +31200 val perplexity 401.3797 +31200 train 6.178637 (lr=3.8108e-05) (hash(x)=53040860) +34200 val loss 5.9364 +34200 val perplexity 378.5548 +34200 train 6.116266 (lr=2.1394e-05) (hash(x)=54291562) +34100 val loss 5.7894 +34100 val perplexity 326.8008 +34100 train 5.403695 (lr=1.5401e-05) (hash(x)=50354364) +31300 val loss 5.9972 +31300 val perplexity 402.2843 +31300 train 6.192145 (lr=3.7845e-05) (hash(x)=52051764) +34300 val loss 5.9460 +34300 val perplexity 382.2118 +34300 train 6.337032 (lr=2.1227e-05) (hash(x)=53340739) +34200 val loss 5.7852 +34200 val perplexity 325.4492 +34200 train 5.969238 (lr=1.5281e-05) (hash(x)=54291562) +34400 val loss 5.9334 +34400 val perplexity 377.4218 +34400 train 5.564046 (lr=2.1062e-05) (hash(x)=44872469) +31400 val loss 5.9973 +31400 val perplexity 402.3336 +31400 train 5.958656 (lr=3.7583e-05) (hash(x)=51564589) +34300 val loss 5.7894 +34300 val perplexity 326.8222 +34300 train 6.197208 (lr=1.5162e-05) (hash(x)=53340739) +34500 val loss 5.9341 +34500 val perplexity 377.7030 +34500 train 5.025874 (lr=2.0896e-05) (hash(x)=23649625) +31500 val loss 5.9998 +31500 val perplexity 403.3503 +31500 train 5.760409 (lr=3.7321e-05) (hash(x)=49676250) +34400 val loss 5.7846 +34400 val perplexity 325.2628 +34400 train 5.422194 (lr=1.5044e-05) (hash(x)=44872469) +34600 val loss 5.9337 +34600 val perplexity 377.5572 +34600 train 5.865894 (lr=2.0732e-05) (hash(x)=46230721) +31600 val loss 5.9927 +31600 val perplexity 400.5085 +31600 train 5.933610 (lr=3.7061e-05) (hash(x)=49244964) +34500 val loss 5.7804 +34500 val perplexity 323.8937 +34500 train 4.878257 (lr=1.4926e-05) (hash(x)=23649625) +34700 val loss 5.9235 +34700 val perplexity 373.7112 +34700 train 5.711586 (lr=2.0568e-05) (hash(x)=44277516) +31700 val loss 5.9937 +31700 val perplexity 400.9005 +31700 train 6.233249 (lr=3.6801e-05) (hash(x)=49527873) +34600 val loss 5.7907 +34600 val perplexity 327.2462 +34600 train 5.752843 (lr=1.4809e-05) (hash(x)=46230721) +34800 val loss 5.9240 +34800 val perplexity 373.9077 +34800 train 6.004957 (lr=2.0405e-05) (hash(x)=50346875) +31800 val loss 6.0204 +31800 val perplexity 411.7344 +31800 train 6.004154 (lr=3.6541e-05) (hash(x)=49566755) +34700 val loss 5.7750 +34700 val perplexity 322.1385 +34700 train 5.557886 (lr=1.4692e-05) (hash(x)=44277516) +34900 val loss 5.9278 +34900 val perplexity 375.3319 +34900 train 5.866704 (lr=2.0243e-05) (hash(x)=45110550) +31900 val loss 5.9966 +31900 val perplexity 402.0706 +31900 train 5.825653 (lr=3.6283e-05) (hash(x)=45456170) +34800 val loss 5.7748 +34800 val perplexity 322.0846 +34800 train 5.829043 (lr=1.4575e-05) (hash(x)=50346875) +35000 val loss 5.9182 +35000 val perplexity 371.7319 +35000 train 5.920014 (lr=2.0081e-05) (hash(x)=46812915) +34900 val loss 5.7720 +34900 val perplexity 321.1802 +32000 val loss 5.9997 +32000 val perplexity 403.3213 +34900 train 5.727943 (lr=1.4459e-05) (hash(x)=45110550) +32000 train 6.059169 (lr=3.6025e-05) (hash(x)=49986978) +35100 val loss 5.9208 +35100 val perplexity 372.7207 +35100 train 6.078620 (lr=1.9921e-05) (hash(x)=54729285) +35000 val loss 5.7717 +35000 val perplexity 321.0736 +35000 train 5.777222 (lr=1.4344e-05) (hash(x)=46812915) +32100 val loss 5.9950 +32100 val perplexity 401.3996 +32100 train 5.665666 (lr=3.5768e-05) (hash(x)=50539149) +35200 val loss 5.9262 +35200 val perplexity 374.7274 +35200 train 5.658880 (lr=1.9760e-05) (hash(x)=49651453) +35100 val loss 5.7708 +35100 val perplexity 320.7886 +35100 train 5.920694 (lr=1.4229e-05) (hash(x)=54729285) +32200 val loss 5.9912 +32200 val perplexity 399.8976 +32200 train 5.936154 (lr=3.5512e-05) (hash(x)=50998615) +35300 val loss 5.9223 +35300 val perplexity 373.2706 +35300 train 5.794049 (lr=1.9601e-05) (hash(x)=51398605) +35200 val loss 5.7704 +35200 val perplexity 320.6660 +35200 train 5.494881 (lr=1.4115e-05) (hash(x)=49651453) +32300 val loss 5.9954 +32300 val perplexity 401.5585 +32300 train 5.825207 (lr=3.5256e-05) (hash(x)=44030191) +35400 val loss 5.9208 +35400 val perplexity 372.6965 +35400 train 5.703063 (lr=1.9442e-05) (hash(x)=52214092) +35300 val loss 5.7716 +35300 val perplexity 321.0661 +35300 train 5.669948 (lr=1.4001e-05) (hash(x)=51398605) +32400 val loss 5.9985 +32400 val perplexity 402.8164 +32400 train 5.953857 (lr=3.5002e-05) (hash(x)=47269028) +35500 val loss 5.9153 +35500 val perplexity 370.6734 +35500 train 6.209591 (lr=1.9285e-05) (hash(x)=60115839) +35400 val loss 5.7703 +35400 val perplexity 320.6328 +35400 train 5.548427 (lr=1.3887e-05) (hash(x)=52214092) +35600 val loss 5.9498 +35600 val perplexity 383.6671 +35600 train 6.562247 (lr=1.9128e-05) (hash(x)=53051302) +32500 val loss 5.9967 +32500 val perplexity 402.1194 +32500 train 5.813130 (lr=3.4748e-05) (hash(x)=50128289) +35500 val loss 5.7697 +35500 val perplexity 320.4387 +35500 train 6.081394 (lr=1.3775e-05) (hash(x)=60115839) +35700 val loss 5.9232 +35700 val perplexity 373.5927 +35700 train 5.626495 (lr=1.8971e-05) (hash(x)=47769916) +32600 val loss 6.0076 +32600 val perplexity 406.5134 +32600 train 5.874361 (lr=3.4495e-05) (hash(x)=42781351) +35600 val loss 5.7860 +35600 val perplexity 325.7147 +35600 train 6.413506 (lr=1.3663e-05) (hash(x)=53051302) +35800 val loss 5.9231 +35800 val perplexity 373.5498 +35800 train 5.692234 (lr=1.8816e-05) (hash(x)=48652911) +32700 val loss 5.9894 +32700 val perplexity 399.1628 +32700 train 6.090833 (lr=3.4242e-05) (hash(x)=51712117) +35700 val loss 5.7690 +35700 val perplexity 320.2285 +35700 train 5.468635 (lr=1.3551e-05) (hash(x)=47769916) +35900 val loss 5.9212 +35900 val perplexity 372.8640 +35900 train 5.836629 (lr=1.8661e-05) (hash(x)=50452504) +32800 val loss 5.9886 +32800 val perplexity 398.8493 +32800 train 6.088406 (lr=3.3991e-05) (hash(x)=52478104) +35800 val loss 5.7692 +35800 val perplexity 320.2763 +35800 train 5.534987 (lr=1.3440e-05) (hash(x)=48652911) +36000 val loss 5.9277 +36000 val perplexity 375.2843 +36000 train 5.916679 (lr=1.8507e-05) (hash(x)=44888966) +32900 val loss 5.9836 +32900 val perplexity 396.8552 +32900 train 5.927821 (lr=3.3740e-05) (hash(x)=53588694) +35900 val loss 5.7676 +35900 val perplexity 319.7616 +35900 train 5.702505 (lr=1.3329e-05) (hash(x)=50452504) +36100 val loss 5.9185 +36100 val perplexity 371.8711 +36100 train 5.780041 (lr=1.8354e-05) (hash(x)=48245730) +33000 val loss 5.9857 +33000 val perplexity 397.7124 +36000 val loss 5.7703 +36000 val perplexity 320.6316 +33000 train 5.878014 (lr=3.3490e-05) (hash(x)=50821436) +36000 train 5.782542 (lr=1.3219e-05) (hash(x)=44888966) +36200 val loss 5.9188 +36200 val perplexity 371.9759 +36200 train 5.890483 (lr=1.8201e-05) (hash(x)=51614607) +36100 val loss 5.7698 +36100 val perplexity 320.4659 +36100 train 5.641719 (lr=1.3110e-05) (hash(x)=48245730) +33100 val loss 5.9762 +33100 val perplexity 393.9346 +33100 train 5.923144 (lr=3.3242e-05) (hash(x)=50371199) +36300 val loss 5.9136 +36300 val perplexity 370.0232 +36300 train 5.959064 (lr=1.8050e-05) (hash(x)=43069600) +36200 val loss 5.7681 +36200 val perplexity 319.9161 +36200 train 5.747694 (lr=1.3001e-05) (hash(x)=51614607) +36400 val loss 5.9079 +36400 val perplexity 367.9286 +36400 train 5.954860 (lr=1.7899e-05) (hash(x)=52756265) +33200 val loss 5.9800 +33200 val perplexity 395.4338 +33200 train 6.167976 (lr=3.2993e-05) (hash(x)=54544623) +36300 val loss 5.7611 +36300 val perplexity 317.7116 +36300 train 5.831945 (lr=1.2893e-05) (hash(x)=43069600) +36500 val loss 5.9057 +36500 val perplexity 367.1093 +36500 train 5.705001 (lr=1.7749e-05) (hash(x)=47894210) +33300 val loss 5.9807 +33300 val perplexity 395.7107 +33300 train 5.806248 (lr=3.2746e-05) (hash(x)=55456921) +36600 val loss 5.9065 +36600 val perplexity 367.4315 +36600 train 5.917275 (lr=1.7600e-05) (hash(x)=46906979) +36400 val loss 5.7586 +36400 val perplexity 316.9061 +36400 train 5.805235 (lr=1.2785e-05) (hash(x)=52756265) +33400 val loss 5.9812 +33400 val perplexity 395.9162 +33400 train 5.589169 (lr=3.2500e-05) (hash(x)=37240412) +36700 val loss 5.9063 +36700 val perplexity 367.3376 +36700 train 6.596786 (lr=1.7452e-05) (hash(x)=45349010) +36500 val loss 5.7568 +36500 val perplexity 316.3482 +36500 train 5.554217 (lr=1.2678e-05) (hash(x)=47894210) +33500 val loss 5.9829 +33500 val perplexity 396.5762 +33500 train 6.189414 (lr=3.2255e-05) (hash(x)=56551894) +36800 val loss 5.9032 +36800 val perplexity 366.2145 +36800 train 5.672704 (lr=1.7305e-05) (hash(x)=40761454) +36600 val loss 5.7610 +36600 val perplexity 317.6748 +36600 train 5.753194 (lr=1.2572e-05) (hash(x)=46906979) +33600 val loss 5.9811 +33600 val perplexity 395.8841 +33600 train 5.884028 (lr=3.2010e-05) (hash(x)=47514225) +36900 val loss 5.9045 +36900 val perplexity 366.6984 +36900 train 6.013577 (lr=1.7158e-05) (hash(x)=52150265) +36700 val loss 5.7576 +36700 val perplexity 316.6031 +36700 train 6.510787 (lr=1.2466e-05) (hash(x)=45349010) +33700 val loss 5.9750 +33700 val perplexity 393.4758 +33700 train 5.820536 (lr=3.1767e-05) (hash(x)=51164416) +37000 val loss 5.9050 +37000 val perplexity 366.8657 +37000 train 5.954855 (lr=1.7012e-05) (hash(x)=48982242) +36800 val loss 5.7534 +36800 val perplexity 315.2697 +36800 train 5.528115 (lr=1.2360e-05) (hash(x)=40761454) +33800 val loss 5.9833 +33800 val perplexity 396.7519 +33800 train 5.811645 (lr=3.1524e-05) (hash(x)=50660204) +37100 val loss 5.9046 +37100 val perplexity 366.7078 +37100 train 5.844863 (lr=1.6867e-05) (hash(x)=52100108) +36900 val loss 5.7535 +36900 val perplexity 315.2880 +36900 train 5.866702 (lr=1.2256e-05) (hash(x)=52150265) +37200 val loss 5.9152 +37200 val perplexity 370.6419 +37200 train 5.721406 (lr=1.6723e-05) (hash(x)=47721621) +33900 val loss 5.9835 +33900 val perplexity 396.8295 +33900 train 5.958007 (lr=3.1282e-05) (hash(x)=48511113) +37000 val loss 5.7543 +37000 val perplexity 315.5526 +37000 train 5.836509 (lr=1.2152e-05) (hash(x)=48982242) +37300 val loss 5.9038 +37300 val perplexity 366.4096 +37300 train 5.695620 (lr=1.6580e-05) (hash(x)=49312812) +34000 val loss 5.9786 +34000 val perplexity 394.8889 +34000 train 5.608625 (lr=3.1041e-05) (hash(x)=41560072) +37100 val loss 5.7524 +37100 val perplexity 314.9599 +37100 train 5.706089 (lr=1.2048e-05) (hash(x)=52100108) +37400 val loss 5.9026 +37400 val perplexity 365.9883 +37400 train 5.736526 (lr=1.6438e-05) (hash(x)=48536877) +37200 val loss 5.7633 +37200 val perplexity 318.3847 +37200 train 5.581213 (lr=1.1945e-05) (hash(x)=47721621) +34100 val loss 5.9784 +34100 val perplexity 394.8224 +34100 train 5.570742 (lr=3.0802e-05) (hash(x)=50354364) +37500 val loss 5.9107 +37500 val perplexity 368.9655 +37500 train 5.860770 (lr=1.6297e-05) (hash(x)=50612814) +37300 val loss 5.7523 +37300 val perplexity 314.9262 +37300 train 5.567525 (lr=1.1843e-05) (hash(x)=49312812) +34200 val loss 5.9756 +34200 val perplexity 393.7104 +34200 train 6.144897 (lr=3.0563e-05) (hash(x)=54291562) +37600 val loss 5.9110 +37600 val perplexity 369.0783 +37600 train 6.408382 (lr=1.6156e-05) (hash(x)=60685704) +37400 val loss 5.7550 +37400 val perplexity 315.7796 +37400 train 5.594305 (lr=1.1741e-05) (hash(x)=48536877) +34300 val loss 5.9963 +34300 val perplexity 401.9445 +34300 train 6.332589 (lr=3.0325e-05) (hash(x)=53340739) +37700 val loss 5.9094 +37700 val perplexity 368.5012 +37700 train 5.984560 (lr=1.6016e-05) (hash(x)=56543272) +37500 val loss 5.7565 +37500 val perplexity 316.2277 +37500 train 5.719148 (lr=1.1640e-05) (hash(x)=50612814) +34400 val loss 5.9785 +34400 val perplexity 394.8461 +34400 train 5.606893 (lr=3.0088e-05) (hash(x)=44872469) +37800 val loss 5.9097 +37800 val perplexity 368.5869 +37800 train 6.018356 (lr=1.5878e-05) (hash(x)=56659623) +37600 val loss 5.7526 +37600 val perplexity 315.0231 +37600 train 6.202497 (lr=1.1540e-05) (hash(x)=60685704) +34500 val loss 5.9714 +34500 val perplexity 392.0702 +34500 train 5.070857 (lr=2.9852e-05) (hash(x)=23649625) +37900 val loss 5.9059 +37900 val perplexity 367.2103 +37900 train 5.579789 (lr=1.5740e-05) (hash(x)=48577952) +37700 val loss 5.7533 +37700 val perplexity 315.2282 +37700 train 5.840503 (lr=1.1440e-05) (hash(x)=56543272) +34600 val loss 5.9700 +34600 val perplexity 391.4864 +34600 train 5.886979 (lr=2.9617e-05) (hash(x)=46230721) +38000 val loss 5.9061 +38000 val perplexity 367.2748 +38000 train 5.531864 (lr=1.5603e-05) (hash(x)=46719420) +37800 val loss 5.7520 +37800 val perplexity 314.8298 +37800 train 5.849054 (lr=1.1341e-05) (hash(x)=56659623) +34700 val loss 5.9640 +34700 val perplexity 389.1593 +34700 train 5.768269 (lr=2.9383e-05) (hash(x)=44277516) +38100 val loss 5.9013 +38100 val perplexity 365.5296 +38100 train 5.842225 (lr=1.5467e-05) (hash(x)=56290069) +37900 val loss 5.7519 +37900 val perplexity 314.7726 +37900 train 5.430374 (lr=1.1243e-05) (hash(x)=48577952) +34800 val loss 5.9643 +34800 val perplexity 389.2964 +34800 train 6.035602 (lr=2.9150e-05) (hash(x)=50346875) +38200 val loss 5.8980 +38200 val perplexity 364.3053 +38200 train 5.870751 (lr=1.5332e-05) (hash(x)=50056974) +38000 val loss 5.7502 +38000 val perplexity 314.2440 +38000 train 5.378278 (lr=1.1145e-05) (hash(x)=46719420) +38300 val loss 5.8994 +38300 val perplexity 364.8189 +38300 train 5.906262 (lr=1.5198e-05) (hash(x)=48797079) +34900 val loss 5.9627 +34900 val perplexity 388.6727 +34900 train 5.890477 (lr=2.8918e-05) (hash(x)=45110550) +38100 val loss 5.7475 +38100 val perplexity 313.4057 +38100 train 5.671755 (lr=1.1048e-05) (hash(x)=56290069) +38400 val loss 5.8955 +38400 val perplexity 363.4131 +38400 train 5.766184 (lr=1.5064e-05) (hash(x)=49702043) +35000 val loss 5.9614 +35000 val perplexity 388.1683 +35000 train 5.954320 (lr=2.8688e-05) (hash(x)=46812915) +38200 val loss 5.7460 +38200 val perplexity 312.9408 +38200 train 5.734985 (lr=1.0951e-05) (hash(x)=50056974) +38500 val loss 5.8951 +38500 val perplexity 363.2463 +38500 train 5.896713 (lr=1.4932e-05) (hash(x)=53375877) +35100 val loss 5.9603 +35100 val perplexity 387.7407 +35100 train 6.104949 (lr=2.8458e-05) (hash(x)=54729285) +38300 val loss 5.7506 +38300 val perplexity 314.3904 +38300 train 5.793857 (lr=1.0855e-05) (hash(x)=48797079) +38600 val loss 5.8907 +38600 val perplexity 361.6438 +38600 train 5.924218 (lr=1.4801e-05) (hash(x)=52757407) +35200 val loss 5.9641 +35200 val perplexity 389.2074 +35200 train 5.708729 (lr=2.8229e-05) (hash(x)=49651453) +38400 val loss 5.7444 +38400 val perplexity 312.4273 +38400 train 5.612573 (lr=1.0760e-05) (hash(x)=49702043) +38700 val loss 5.8897 +38700 val perplexity 361.3105 +38700 train 5.980316 (lr=1.4670e-05) (hash(x)=51335882) +38500 val loss 5.7501 +38500 val perplexity 314.2229 +38500 train 5.758672 (lr=1.0666e-05) (hash(x)=53375877) +35300 val loss 5.9593 +35300 val perplexity 387.3341 +35300 train 5.837429 (lr=2.8002e-05) (hash(x)=51398605) +38800 val loss 5.8886 +38800 val perplexity 360.8871 +38800 train 5.843975 (lr=1.4541e-05) (hash(x)=46048181) +38600 val loss 5.7405 +38600 val perplexity 311.2345 +38600 train 5.766521 (lr=1.0572e-05) (hash(x)=52757407) +35400 val loss 5.9641 +35400 val perplexity 389.2109 +35400 train 5.720802 (lr=2.7775e-05) (hash(x)=52214092) +38900 val loss 5.8925 +38900 val perplexity 362.2949 +38900 train 5.894951 (lr=1.4412e-05) (hash(x)=50536450) +38700 val loss 5.7394 +38700 val perplexity 310.8754 +38700 train 5.840066 (lr=1.0479e-05) (hash(x)=51335882) +35500 val loss 5.9579 +35500 val perplexity 386.8046 +35500 train 6.262779 (lr=2.7549e-05) (hash(x)=60115839) +39000 val loss 5.8909 +39000 val perplexity 361.7365 +39000 train 5.733507 (lr=1.4285e-05) (hash(x)=49201278) +38800 val loss 5.7400 +38800 val perplexity 311.0584 +38800 train 5.707039 (lr=1.0386e-05) (hash(x)=46048181) +35600 val loss 5.9726 +35600 val perplexity 392.5206 +35600 train 6.608598 (lr=2.7325e-05) (hash(x)=53051302) +39100 val loss 5.8942 +39100 val perplexity 362.9412 +39100 train 5.979485 (lr=1.4158e-05) (hash(x)=52669740) +38900 val loss 5.7431 +38900 val perplexity 312.0286 +38900 train 5.794366 (lr=1.0294e-05) (hash(x)=50536450) +35700 val loss 5.9619 +35700 val perplexity 388.3659 +35700 train 5.651849 (lr=2.7102e-05) (hash(x)=47769916) +39200 val loss 5.8856 +39200 val perplexity 359.8025 +39200 train 5.918985 (lr=1.4032e-05) (hash(x)=45566341) +39000 val loss 5.7360 +39000 val perplexity 309.8333 +39000 train 5.598691 (lr=1.0203e-05) (hash(x)=49201278) +35800 val loss 5.9652 +35800 val perplexity 389.6476 +35800 train 5.729043 (lr=2.6880e-05) (hash(x)=48652911) +39300 val loss 5.8898 +39300 val perplexity 361.3280 +39300 train 6.474769 (lr=1.3908e-05) (hash(x)=50582342) +39100 val loss 5.7421 +39100 val perplexity 311.7166 +39100 train 5.855915 (lr=1.0113e-05) (hash(x)=52669740) +39400 val loss 5.8845 +39400 val perplexity 359.4338 +39400 train 5.644921 (lr=1.3784e-05) (hash(x)=43591296) +35900 val loss 5.9642 +35900 val perplexity 389.2486 +35900 train 5.887765 (lr=2.6659e-05) (hash(x)=50452504) +39200 val loss 5.7334 +39200 val perplexity 309.0333 +39200 train 5.745511 (lr=1.0023e-05) (hash(x)=45566341) +39500 val loss 5.8870 +39500 val perplexity 360.3081 +39500 train 5.672634 (lr=1.3661e-05) (hash(x)=49119520) +36000 val loss 5.9668 +36000 val perplexity 390.2736 +36000 train 5.965039 (lr=2.6439e-05) (hash(x)=44888966) +39300 val loss 5.7350 +39300 val perplexity 309.5198 +39300 train 6.364397 (lr=9.9341e-06) (hash(x)=50582342) +39600 val loss 5.8894 +39600 val perplexity 361.2062 +39600 train 6.152529 (lr=1.3540e-05) (hash(x)=50278799) +36100 val loss 5.9600 +36100 val perplexity 387.6286 +36100 train 5.837125 (lr=2.6220e-05) (hash(x)=48245730) +39400 val loss 5.7312 +39400 val perplexity 308.3344 +39400 train 5.493511 (lr=9.8458e-06) (hash(x)=43591296) +39700 val loss 5.8907 +39700 val perplexity 361.6723 +39700 train 5.793178 (lr=1.3419e-05) (hash(x)=54799713) +36200 val loss 5.9556 +36200 val perplexity 385.8949 +36200 train 5.919549 (lr=2.6002e-05) (hash(x)=51614607) +39500 val loss 5.7377 +39500 val perplexity 310.3366 +39500 train 5.531897 (lr=9.7581e-06) (hash(x)=49119520) +39800 val loss 5.8902 +39800 val perplexity 361.4893 +39800 train 5.968296 (lr=1.3299e-05) (hash(x)=46290010) +36300 val loss 5.9589 +36300 val perplexity 387.2019 +39600 val loss 5.7333 +39600 val perplexity 308.9752 +36300 train 6.025589 (lr=2.5786e-05) (hash(x)=43069600) +39600 train 5.973525 (lr=9.6712e-06) (hash(x)=50278799) +39900 val loss 5.8848 +39900 val perplexity 359.5159 +39900 train 5.575523 (lr=1.3180e-05) (hash(x)=41503413) +39700 val loss 5.7371 +39700 val perplexity 310.1647 +39700 train 5.645143 (lr=9.5849e-06) (hash(x)=54799713) +36400 val loss 5.9505 +36400 val perplexity 383.9391 +36400 train 5.992536 (lr=2.5570e-05) (hash(x)=52756265) +40000 val loss 5.8812 +40000 val perplexity 358.2535 +40000 train 5.890353 (lr=1.3063e-05) (hash(x)=44949087) +39800 val loss 5.7374 +39800 val perplexity 310.2665 +39800 train 5.844509 (lr=9.4994e-06) (hash(x)=46290010) +36500 val loss 5.9535 +36500 val perplexity 385.0833 +36500 train 5.743821 (lr=2.5356e-05) (hash(x)=47894210) +40100 val loss 5.8842 +40100 val perplexity 359.3134 +40100 train 5.445794 (lr=1.2946e-05) (hash(x)=45005676) +39900 val loss 5.7342 +39900 val perplexity 309.2613 +39900 train 5.424328 (lr=9.4146e-06) (hash(x)=41503413) +36600 val loss 5.9613 +36600 val perplexity 388.1315 +36600 train 5.988231 (lr=2.5143e-05) (hash(x)=46906979) +40200 val loss 5.8873 +40200 val perplexity 360.4409 +40200 train 6.014713 (lr=1.2830e-05) (hash(x)=51341494) +40000 val loss 5.7298 +40000 val perplexity 307.9146 +40000 train 5.780132 (lr=9.3305e-06) (hash(x)=44949087) +36700 val loss 5.9497 +36700 val perplexity 383.6526 +36700 train 6.647394 (lr=2.4931e-05) (hash(x)=45349010) +40300 val loss 5.8895 +40300 val perplexity 361.2133 +40300 train 5.618561 (lr=1.2716e-05) (hash(x)=47686820) +40100 val loss 5.7352 +40100 val perplexity 309.5796 +40100 train 5.289546 (lr=9.2472e-06) (hash(x)=45005676) +36800 val loss 5.9443 +36800 val perplexity 381.5670 +40400 val loss 5.8878 +40400 val perplexity 360.6049 +36800 train 5.698735 (lr=2.4721e-05) (hash(x)=40761454) +40400 train 5.754405 (lr=1.2602e-05) (hash(x)=47142962) +40200 val loss 5.7344 +40200 val perplexity 309.3215 +40200 train 5.891187 (lr=9.1646e-06) (hash(x)=51341494) +40500 val loss 5.8892 +40500 val perplexity 361.1151 +40500 train 5.630478 (lr=1.2489e-05) (hash(x)=47585616) +36900 val loss 5.9474 +36900 val perplexity 382.7688 +36900 train 6.036154 (lr=2.4511e-05) (hash(x)=52150265) +40300 val loss 5.7342 +40300 val perplexity 309.2594 +40300 train 5.450536 (lr=9.0827e-06) (hash(x)=47686820) +40600 val loss 5.8905 +40600 val perplexity 361.5762 +40600 train 5.703403 (lr=1.2378e-05) (hash(x)=50075832) +37000 val loss 5.9514 +37000 val perplexity 384.2935 +37000 train 6.014409 (lr=2.4303e-05) (hash(x)=48982242) +40400 val loss 5.7329 +40400 val perplexity 308.8696 +40400 train 5.605383 (lr=9.0015e-06) (hash(x)=47142962) +40700 val loss 5.8813 +40700 val perplexity 358.2859 +40700 train 5.902246 (lr=1.2267e-05) (hash(x)=52764040) +37100 val loss 5.9486 +37100 val perplexity 383.2138 +37100 train 5.875566 (lr=2.4096e-05) (hash(x)=52100108) +40500 val loss 5.7341 +40500 val perplexity 309.2464 +40500 train 5.492334 (lr=8.9211e-06) (hash(x)=47585616) +40800 val loss 5.8754 +40800 val perplexity 356.1506 +40800 train 6.216906 (lr=1.2158e-05) (hash(x)=56727171) +37200 val loss 5.9553 +37200 val perplexity 385.7828 +37200 train 5.766670 (lr=2.3890e-05) (hash(x)=47721621) +40600 val loss 5.7348 +40600 val perplexity 309.4454 +40600 train 5.560070 (lr=8.8414e-06) (hash(x)=50075832) +40900 val loss 5.8740 +40900 val perplexity 355.6850 +40900 train 5.906456 (lr=1.2049e-05) (hash(x)=53111899) +37300 val loss 5.9476 +37300 val perplexity 382.8522 +37300 train 5.734115 (lr=2.3686e-05) (hash(x)=49312812) +40700 val loss 5.7274 +40700 val perplexity 307.1826 +40700 train 5.776053 (lr=8.7624e-06) (hash(x)=52764040) +41000 val loss 5.8728 +41000 val perplexity 355.2520 +41000 train 5.748232 (lr=1.1942e-05) (hash(x)=45951630) +37400 val loss 5.9480 +37400 val perplexity 382.9915 +37400 train 5.784721 (lr=2.3483e-05) (hash(x)=48536877) +40800 val loss 5.7236 +40800 val perplexity 305.9948 +40800 train 6.051008 (lr=8.6842e-06) (hash(x)=56727171) +41100 val loss 5.8737 +41100 val perplexity 355.5605 +41100 train 6.203441 (lr=1.1836e-05) (hash(x)=59664516) +37500 val loss 5.9521 +37500 val perplexity 384.5589 +40900 val loss 5.7222 +40900 val perplexity 305.5659 +40900 train 5.765467 (lr=8.6068e-06) (hash(x)=53111899) +37500 train 5.895791 (lr=2.3281e-05) (hash(x)=50612814) +41200 val loss 5.8818 +41200 val perplexity 358.4606 +41200 train 5.769986 (lr=1.1730e-05) (hash(x)=50414471) +41000 val loss 5.7185 +41000 val perplexity 304.4630 +41000 train 5.610645 (lr=8.5301e-06) (hash(x)=45951630) +37600 val loss 5.9558 +37600 val perplexity 385.9926 +37600 train 6.439344 (lr=2.3080e-05) (hash(x)=60685704) +41300 val loss 5.8697 +41300 val perplexity 354.1432 +41300 train 5.165312 (lr=1.1626e-05) (hash(x)=33823056) +41100 val loss 5.7223 +41100 val perplexity 305.5957 +41100 train 6.084666 (lr=8.4541e-06) (hash(x)=59664516) +37700 val loss 5.9558 +37700 val perplexity 385.9683 +37700 train 6.046243 (lr=2.2881e-05) (hash(x)=56543272) +41400 val loss 5.8664 +41400 val perplexity 352.9914 +41400 train 5.976327 (lr=1.1523e-05) (hash(x)=52911940) +41200 val loss 5.7251 +41200 val perplexity 306.4495 +41200 train 5.620420 (lr=8.3789e-06) (hash(x)=50414471) +37800 val loss 5.9506 +37800 val perplexity 383.9885 +37800 train 6.052326 (lr=2.2682e-05) (hash(x)=56659623) +41500 val loss 5.8748 +41500 val perplexity 355.9481 +41500 train 5.430792 (lr=1.1421e-05) (hash(x)=40005218) +41300 val loss 5.7186 +41300 val perplexity 304.4719 +41300 train 5.012256 (lr=8.3045e-06) (hash(x)=33823056) +37900 val loss 5.9526 +37900 val perplexity 384.7452 +37900 train 5.634081 (lr=2.2485e-05) (hash(x)=48577952) +41600 val loss 5.8644 +41600 val perplexity 352.2685 +41600 train 6.291268 (lr=1.1320e-05) (hash(x)=55223673) +41400 val loss 5.7181 +41400 val perplexity 304.3268 +41400 train 5.824917 (lr=8.2308e-06) (hash(x)=52911940) +41700 val loss 5.8624 +41700 val perplexity 351.5634 +41700 train 5.768177 (lr=1.1220e-05) (hash(x)=49603559) +38000 val loss 5.9556 +38000 val perplexity 385.9087 +38000 train 5.585528 (lr=2.2290e-05) (hash(x)=46719420) +41500 val loss 5.7258 +41500 val perplexity 306.6671 +41500 train 5.284519 (lr=8.1579e-06) (hash(x)=40005218) +41800 val loss 5.8629 +41800 val perplexity 351.7599 +41800 train 5.875134 (lr=1.1121e-05) (hash(x)=51692850) +38100 val loss 5.9439 +38100 val perplexity 381.4094 +38100 train 5.886159 (lr=2.2096e-05) (hash(x)=56290069) +41600 val loss 5.7169 +41600 val perplexity 303.9648 +41600 train 6.142447 (lr=8.0858e-06) (hash(x)=55223673) +41900 val loss 5.8626 +41900 val perplexity 351.6398 +41900 train 5.640895 (lr=1.1024e-05) (hash(x)=51210799) +38200 val loss 5.9461 +38200 val perplexity 382.2668 +38200 train 5.929902 (lr=2.1903e-05) (hash(x)=50056974) +41700 val loss 5.7138 +41700 val perplexity 303.0121 +41700 train 5.615955 (lr=8.0144e-06) (hash(x)=49603559) +42000 val loss 5.8647 +42000 val perplexity 352.3722 +42000 train 6.372183 (lr=1.0927e-05) (hash(x)=60592263) +38300 val loss 5.9449 +38300 val perplexity 381.7824 +38300 train 5.941628 (lr=2.1711e-05) (hash(x)=48797079) +41800 val loss 5.7159 +41800 val perplexity 303.6550 +41800 train 5.753294 (lr=7.9438e-06) (hash(x)=51692850) +42100 val loss 5.8651 +42100 val perplexity 352.5169 +42100 train 5.739551 (lr=1.0831e-05) (hash(x)=48159119) +38400 val loss 5.9387 +38400 val perplexity 379.4237 +38400 train 5.805596 (lr=2.1521e-05) (hash(x)=49702043) +41900 val loss 5.7153 +41900 val perplexity 303.4893 +41900 train 5.475625 (lr=7.8740e-06) (hash(x)=51210799) +42200 val loss 5.8667 +42200 val perplexity 353.0951 +42200 train 5.752390 (lr=1.0737e-05) (hash(x)=48619712) +38500 val loss 5.9378 +38500 val perplexity 379.0855 +38500 train 5.958379 (lr=2.1332e-05) (hash(x)=53375877) +42000 val loss 5.7153 +42000 val perplexity 303.4699 +42000 train 6.237113 (lr=7.8050e-06) (hash(x)=60592263) +42300 val loss 5.8667 +42300 val perplexity 353.0851 +42300 train 5.594993 (lr=1.0644e-05) (hash(x)=56574312) +38600 val loss 5.9333 +38600 val perplexity 377.3937 +42100 val loss 5.7131 +42100 val perplexity 302.8004 +42100 train 5.584966 (lr=7.7368e-06) (hash(x)=48159119) +38600 train 5.949863 (lr=2.1144e-05) (hash(x)=52757407) +42400 val loss 5.8681 +42400 val perplexity 353.5847 +42400 train 5.969612 (lr=1.0552e-05) (hash(x)=47020520) +42200 val loss 5.7152 +42200 val perplexity 303.4401 +42200 train 5.610136 (lr=7.6693e-06) (hash(x)=48619712) +38700 val loss 5.9322 +38700 val perplexity 377.0004 +38700 train 6.038456 (lr=2.0957e-05) (hash(x)=51335882) +42500 val loss 5.8668 +42500 val perplexity 353.1131 +42500 train 5.754909 (lr=1.0460e-05) (hash(x)=48787571) +42300 val loss 5.7129 +42300 val perplexity 302.7350 +42300 train 5.438562 (lr=7.6027e-06) (hash(x)=56574312) +38800 val loss 5.9379 +38800 val perplexity 379.1240 +38800 train 5.899038 (lr=2.0773e-05) (hash(x)=46048181) +42600 val loss 5.8668 +42600 val perplexity 353.1163 +42600 train 5.903190 (lr=1.0370e-05) (hash(x)=53734398) +42400 val loss 5.7153 +42400 val perplexity 303.4768 +42400 train 5.836233 (lr=7.5368e-06) (hash(x)=47020520) +38900 val loss 5.9450 +38900 val perplexity 381.8209 +38900 train 5.936474 (lr=2.0589e-05) (hash(x)=50536450) +42700 val loss 5.8699 +42700 val perplexity 354.2037 +42700 train 5.664432 (lr=1.0282e-05) (hash(x)=52179072) +42500 val loss 5.7147 +42500 val perplexity 303.2787 +42500 train 5.601105 (lr=7.4717e-06) (hash(x)=48787571) +39000 val loss 5.9299 +39000 val perplexity 376.1078 +39000 train 5.762484 (lr=2.0407e-05) (hash(x)=49201278) +42800 val loss 5.8727 +42800 val perplexity 355.1973 +42800 train 5.587573 (lr=1.0194e-05) (hash(x)=45303025) +42600 val loss 5.7174 +42600 val perplexity 304.1232 +42600 train 5.799463 (lr=7.4074e-06) (hash(x)=53734398) +42900 val loss 5.8688 +42900 val perplexity 353.8340 +42900 train 6.018352 (lr=1.0107e-05) (hash(x)=53649191) +39100 val loss 5.9353 +39100 val perplexity 378.1472 +39100 train 6.017318 (lr=2.0226e-05) (hash(x)=52669740) +42700 val loss 5.7176 +42700 val perplexity 304.1840 +42700 train 5.492084 (lr=7.3440e-06) (hash(x)=52179072) +43000 val loss 5.8701 +43000 val perplexity 354.2768 +43000 train 6.291034 (lr=1.0022e-05) (hash(x)=61985066) +39200 val loss 5.9277 +39200 val perplexity 375.3090 +39200 train 5.960580 (lr=2.0046e-05) (hash(x)=45566341) +42800 val loss 5.7190 +42800 val perplexity 304.5946 +42800 train 5.429344 (lr=7.2813e-06) (hash(x)=45303025) +43100 val loss 5.8730 +43100 val perplexity 355.3002 +43100 train 5.842031 (lr=9.9373e-06) (hash(x)=48533037) +39300 val loss 5.9307 +39300 val perplexity 376.3995 +39300 train 6.510407 (lr=1.9868e-05) (hash(x)=50582342) +42900 val loss 5.7162 +42900 val perplexity 303.7518 +42900 train 5.863437 (lr=7.2194e-06) (hash(x)=53649191) +43200 val loss 5.8654 +43200 val perplexity 352.6376 +43200 train 5.989261 (lr=9.8541e-06) (hash(x)=50630327) +39400 val loss 5.9261 +39400 val perplexity 374.6799 +39400 train 5.687747 (lr=1.9692e-05) (hash(x)=43591296) +43000 val loss 5.7161 +43000 val perplexity 303.7293 +43000 train 6.172473 (lr=7.1583e-06) (hash(x)=61985066) +43300 val loss 5.8752 +43300 val perplexity 356.0905 +43300 train 5.952151 (lr=9.7720e-06) (hash(x)=54845917) +39500 val loss 5.9277 +39500 val perplexity 375.2769 +39500 train 5.709194 (lr=1.9516e-05) (hash(x)=49119520) +43100 val loss 5.7156 +43100 val perplexity 303.5709 +43100 train 5.700378 (lr=7.0981e-06) (hash(x)=48533037) +43400 val loss 5.8592 +43400 val perplexity 350.4557 +43400 train 5.792507 (lr=9.6911e-06) (hash(x)=52858739) +39600 val loss 5.9286 +39600 val perplexity 375.6375 +39600 train 6.205977 (lr=1.9342e-05) (hash(x)=50278799) +43200 val loss 5.7116 +43200 val perplexity 302.3573 +43200 train 5.828331 (lr=7.0386e-06) (hash(x)=50630327) +43500 val loss 5.8588 +43500 val perplexity 350.3079 +43500 train 5.923429 (lr=9.6113e-06) (hash(x)=49761319) +39700 val loss 5.9306 +39700 val perplexity 376.3667 +39700 train 5.834247 (lr=1.9170e-05) (hash(x)=54799713) +43600 val loss 5.8551 +43600 val perplexity 349.0164 +43600 train 6.121321 (lr=9.5326e-06) (hash(x)=64879186) +43300 val loss 5.7172 +43300 val perplexity 304.0533 +43300 train 5.803127 (lr=6.9800e-06) (hash(x)=54845917) +39800 val loss 5.9308 +39800 val perplexity 376.4626 +39800 train 6.007029 (lr=1.8999e-05) (hash(x)=46290010) +43700 val loss 5.8557 +43700 val perplexity 349.2031 +43700 train 6.082420 (lr=9.4552e-06) (hash(x)=55246427) +43400 val loss 5.7106 +43400 val perplexity 302.0658 +43400 train 5.642049 (lr=6.9222e-06) (hash(x)=52858739) +43800 val loss 5.8551 +43800 val perplexity 348.9951 +43800 train 5.781734 (lr=9.3788e-06) (hash(x)=51238313) +39900 val loss 5.9276 +39900 val perplexity 375.2378 +39900 train 5.615714 (lr=1.8829e-05) (hash(x)=41503413) +43500 val loss 5.7057 +43500 val perplexity 300.5862 +43500 train 5.784788 (lr=6.8652e-06) (hash(x)=49761319) +43900 val loss 5.8544 +43900 val perplexity 348.7738 +43900 train 5.884986 (lr=9.3036e-06) (hash(x)=51664512) +40000 val loss 5.9289 +40000 val perplexity 375.7552 +40000 train 5.930650 (lr=1.8661e-05) (hash(x)=44949087) +43600 val loss 5.7053 +43600 val perplexity 300.4639 +43600 train 5.983207 (lr=6.8090e-06) (hash(x)=64879186) +44000 val loss 5.8898 +44000 val perplexity 361.3231 +44000 train 6.113993 (lr=9.2296e-06) (hash(x)=64172296) +43700 val loss 5.7048 +43700 val perplexity 300.3196 +43700 train 5.929050 (lr=6.7537e-06) (hash(x)=55246427) +40100 val loss 5.9307 +40100 val perplexity 376.4187 +40100 train 5.513899 (lr=1.8494e-05) (hash(x)=45005676) +44100 val loss 5.8512 +44100 val perplexity 347.6626 +44100 train 5.742202 (lr=9.1568e-06) (hash(x)=50205960) +43800 val loss 5.7055 +43800 val perplexity 300.5147 +43800 train 5.646482 (lr=6.6992e-06) (hash(x)=51238313) +40200 val loss 5.9346 +40200 val perplexity 377.8765 +40200 train 6.103844 (lr=1.8329e-05) (hash(x)=51341494) +44200 val loss 5.8520 +44200 val perplexity 347.9336 +44200 train 5.883790 (lr=9.0851e-06) (hash(x)=51065522) +43900 val loss 5.7033 +43900 val perplexity 299.8510 +43900 train 5.757020 (lr=6.6455e-06) (hash(x)=51664512) +40300 val loss 5.9347 +40300 val perplexity 377.9381 +40300 train 5.685302 (lr=1.8165e-05) (hash(x)=47686820) +44300 val loss 5.8538 +44300 val perplexity 348.5608 +44300 train 5.986463 (lr=9.0146e-06) (hash(x)=56163183) +44000 val loss 5.7167 +44000 val perplexity 303.9045 +44000 train 6.008539 (lr=6.5926e-06) (hash(x)=64172296) +40400 val loss 5.9359 +40400 val perplexity 378.3888 +40400 train 5.806218 (lr=1.8003e-05) (hash(x)=47142962) +44400 val loss 5.8563 +44400 val perplexity 349.4405 +44400 train 5.900486 (lr=8.9453e-06) (hash(x)=50818020) +44100 val loss 5.7018 +44100 val perplexity 299.4078 +44100 train 5.627808 (lr=6.5406e-06) (hash(x)=50205960) +40500 val loss 5.9411 +40500 val perplexity 380.3375 +40500 train 5.667961 (lr=1.7842e-05) (hash(x)=47585616) +44500 val loss 5.8550 +44500 val perplexity 348.9717 +44500 train 5.986432 (lr=8.8771e-06) (hash(x)=52855559) +44200 val loss 5.7011 +44200 val perplexity 299.1825 +44200 train 5.744282 (lr=6.4894e-06) (hash(x)=51065522) +40600 val loss 5.9388 +40600 val perplexity 379.4912 +40600 train 5.744999 (lr=1.7683e-05) (hash(x)=50075832) +44600 val loss 5.8543 +44600 val perplexity 348.7167 +44600 train 5.761628 (lr=8.8101e-06) (hash(x)=44014306) +44300 val loss 5.7041 +44300 val perplexity 300.0847 +44300 train 5.803062 (lr=6.4390e-06) (hash(x)=56163183) +40700 val loss 5.9241 +40700 val perplexity 373.9476 +40700 train 5.948530 (lr=1.7525e-05) (hash(x)=52764040) +44700 val loss 5.8532 +44700 val perplexity 348.3551 +44700 train 6.016201 (lr=8.7443e-06) (hash(x)=53648768) +44400 val loss 5.7043 +44400 val perplexity 300.1653 +44400 train 5.741302 (lr=6.3895e-06) (hash(x)=50818020) +40800 val loss 5.9185 +40800 val perplexity 371.8546 +40800 train 6.221800 (lr=1.7368e-05) (hash(x)=56727171) +44800 val loss 5.8530 +44800 val perplexity 348.2740 +44800 train 5.767185 (lr=8.6797e-06) (hash(x)=42436477) +44500 val loss 5.7031 +44500 val perplexity 299.7960 +44500 train 5.825619 (lr=6.3408e-06) (hash(x)=52855559) +40900 val loss 5.9173 +40900 val perplexity 371.4223 +40900 train 5.929842 (lr=1.7214e-05) (hash(x)=53111899) +44900 val loss 5.8536 +44900 val perplexity 348.4800 +44900 train 5.579908 (lr=8.6163e-06) (hash(x)=48409149) +44600 val loss 5.7044 +44600 val perplexity 300.1988 +44600 train 5.614123 (lr=6.2929e-06) (hash(x)=44014306) +45000 val loss 5.8550 +45000 val perplexity 348.9794 +41000 val loss 5.9133 +41000 val perplexity 369.9288 +41000 train 5.789283 (lr=1.7060e-05) (hash(x)=45951630) +45000 train 5.766385 (lr=8.5540e-06) (hash(x)=53084805) +44700 val loss 5.7035 +44700 val perplexity 299.9299 +44700 train 5.847026 (lr=6.2459e-06) (hash(x)=53648768) +45100 val loss 5.8559 +45100 val perplexity 349.2931 +45100 train 5.713241 (lr=8.4930e-06) (hash(x)=56386155) +41100 val loss 5.9142 +41100 val perplexity 370.2535 +41100 train 6.271932 (lr=1.6908e-05) (hash(x)=59664516) +44800 val loss 5.7036 +44800 val perplexity 299.9361 +44800 train 5.658469 (lr=6.1998e-06) (hash(x)=42436477) +45200 val loss 5.8540 +45200 val perplexity 348.6206 +45200 train 5.865363 (lr=8.4331e-06) (hash(x)=53389864) +41200 val loss 5.9227 +41200 val perplexity 373.4152 +41200 train 5.812696 (lr=1.6758e-05) (hash(x)=50414471) +44900 val loss 5.7056 +44900 val perplexity 300.5589 +44900 train 5.430418 (lr=6.1545e-06) (hash(x)=48409149) +45300 val loss 5.8582 +45300 val perplexity 350.1080 +45300 train 5.890651 (lr=8.3745e-06) (hash(x)=50817456) +45000 val loss 5.7020 +45000 val perplexity 299.4788 +45000 train 5.613445 (lr=6.1100e-06) (hash(x)=53084805) +41300 val loss 5.9105 +41300 val perplexity 368.9034 +41300 train 5.231784 (lr=1.6609e-05) (hash(x)=33823056) +45400 val loss 5.8611 +45400 val perplexity 351.1030 +45400 train 5.894712 (lr=8.3170e-06) (hash(x)=50081660) +45100 val loss 5.7031 +45100 val perplexity 299.8070 +45100 train 5.568305 (lr=6.0664e-06) (hash(x)=56386155) +41400 val loss 5.9102 +41400 val perplexity 368.7863 +41400 train 6.018604 (lr=1.6462e-05) (hash(x)=52911940) +45500 val loss 5.8623 +45500 val perplexity 351.5262 +45500 train 5.613654 (lr=8.2607e-06) (hash(x)=55567121) +45200 val loss 5.7030 +45200 val perplexity 299.7704 +45200 train 5.727294 (lr=6.0237e-06) (hash(x)=53389864) +41500 val loss 5.9153 +41500 val perplexity 370.6497 +41500 train 5.463718 (lr=1.6316e-05) (hash(x)=40005218) +45600 val loss 5.8531 +45600 val perplexity 348.3091 +45600 train 5.933183 (lr=8.2057e-06) (hash(x)=48902854) +45300 val loss 5.7054 +45300 val perplexity 300.4919 +45300 train 5.737086 (lr=5.9818e-06) (hash(x)=50817456) +41600 val loss 5.9086 +41600 val perplexity 368.1989 +41600 train 6.333064 (lr=1.6172e-05) (hash(x)=55223673) +45700 val loss 5.8465 +45700 val perplexity 346.0248 +45700 train 6.066646 (lr=8.1518e-06) (hash(x)=53803301) +45400 val loss 5.7068 +45400 val perplexity 300.9168 +45400 train 5.736176 (lr=5.9407e-06) (hash(x)=50081660) +41700 val loss 5.9105 +41700 val perplexity 368.8865 +41700 train 5.812880 (lr=1.6029e-05) (hash(x)=49603559) +45800 val loss 5.8459 +45800 val perplexity 345.8038 +45800 train 5.908492 (lr=8.0992e-06) (hash(x)=47576606) +45500 val loss 5.7068 +45500 val perplexity 300.9174 +45500 train 5.440077 (lr=5.9005e-06) (hash(x)=55567121) +41800 val loss 5.9084 +41800 val perplexity 368.1216 +41800 train 5.941528 (lr=1.5888e-05) (hash(x)=51692850) +45900 val loss 5.8443 +45900 val perplexity 345.2653 +45900 train 5.754907 (lr=8.0478e-06) (hash(x)=56276981) +45600 val loss 5.7003 +45600 val perplexity 298.9519 +45600 train 5.764146 (lr=5.8612e-06) (hash(x)=48902854) +41900 val loss 5.9105 +41900 val perplexity 368.9075 +41900 train 5.696601 (lr=1.5748e-05) (hash(x)=51210799) +46000 val loss 5.8472 +46000 val perplexity 346.2800 +46000 train 5.789150 (lr=7.9976e-06) (hash(x)=52174902) +45700 val loss 5.6972 +45700 val perplexity 298.0337 +45700 train 5.943999 (lr=5.8227e-06) (hash(x)=53803301) +42000 val loss 5.9139 +42000 val perplexity 370.1391 +42000 train 6.437364 (lr=1.5610e-05) (hash(x)=60592263) +46100 val loss 5.8457 +46100 val perplexity 345.7564 +46100 train 5.966497 (lr=7.9485e-06) (hash(x)=52155490) +45800 val loss 5.6954 +45800 val perplexity 297.5070 +45800 train 5.750324 (lr=5.7851e-06) (hash(x)=47576606) +46200 val loss 5.8436 +46200 val perplexity 345.0224 +42100 val loss 5.9121 +42100 val perplexity 369.4916 +46200 train 5.706237 (lr=7.9008e-06) (hash(x)=46489984) +42100 train 5.783825 (lr=1.5474e-05) (hash(x)=48159119) +45900 val loss 5.6947 +45900 val perplexity 297.2770 +45900 train 5.579243 (lr=5.7484e-06) (hash(x)=56276981) +46300 val loss 5.8429 +46300 val perplexity 344.7708 +46300 train 5.817455 (lr=7.8542e-06) (hash(x)=56603976) +42200 val loss 5.9136 +42200 val perplexity 370.0398 +42200 train 5.816652 (lr=1.5339e-05) (hash(x)=48619712) +46000 val loss 5.6938 +46000 val perplexity 297.0068 +46000 train 5.646109 (lr=5.7125e-06) (hash(x)=52174902) +46400 val loss 5.8429 +46400 val perplexity 344.7834 +46400 train 5.657983 (lr=7.8088e-06) (hash(x)=51228412) +46100 val loss 5.6935 +46100 val perplexity 296.9176 +46100 train 5.822449 (lr=5.6775e-06) (hash(x)=52155490) +42300 val loss 5.9119 +42300 val perplexity 369.4113 +42300 train 5.644996 (lr=1.5205e-05) (hash(x)=56574312) +46500 val loss 5.8424 +46500 val perplexity 344.6148 +46500 train 5.694198 (lr=7.7647e-06) (hash(x)=48511186) +46200 val loss 5.6927 +46200 val perplexity 296.6807 +46200 train 5.518863 (lr=5.6434e-06) (hash(x)=46489984) +42400 val loss 5.9162 +42400 val perplexity 370.9913 +42400 train 5.995991 (lr=1.5074e-05) (hash(x)=47020520) +46600 val loss 5.8406 +46600 val perplexity 343.9942 +46600 train 5.857633 (lr=7.7218e-06) (hash(x)=48832039) +46300 val loss 5.6930 +46300 val perplexity 296.7935 +46300 train 5.699072 (lr=5.6101e-06) (hash(x)=56603976) +42500 val loss 5.9106 +42500 val perplexity 368.9411 +42500 train 5.795770 (lr=1.4943e-05) (hash(x)=48787571) +46700 val loss 5.8418 +46700 val perplexity 344.3911 +46700 train 6.526180 (lr=7.6801e-06) (hash(x)=65821271) +46400 val loss 5.6924 +46400 val perplexity 296.5966 +46400 train 5.499797 (lr=5.5777e-06) (hash(x)=51228412) +42600 val loss 5.9168 +42600 val perplexity 371.2081 +42600 train 5.945784 (lr=1.4815e-05) (hash(x)=53734398) +46800 val loss 5.8418 +46800 val perplexity 344.3912 +46800 train 5.541548 (lr=7.6397e-06) (hash(x)=48005593) +46500 val loss 5.6914 +46500 val perplexity 296.3133 +46500 train 5.526657 (lr=5.5462e-06) (hash(x)=48511186) +42700 val loss 5.9162 +42700 val perplexity 370.9931 +42700 train 5.741552 (lr=1.4688e-05) (hash(x)=52179072) +46900 val loss 5.8433 +46900 val perplexity 344.9227 +46900 train 6.121469 (lr=7.6004e-06) (hash(x)=53182453) +46600 val loss 5.6906 +46600 val perplexity 296.0728 +46600 train 5.720109 (lr=5.5156e-06) (hash(x)=48832039) +42800 val loss 5.9187 +42800 val perplexity 371.9264 +42800 train 5.617833 (lr=1.4563e-05) (hash(x)=45303025) +47000 val loss 5.8440 +47000 val perplexity 345.1522 +47000 train 5.734328 (lr=7.5624e-06) (hash(x)=49385983) +46700 val loss 5.6914 +46700 val perplexity 296.2935 +46700 train 6.375406 (lr=5.4858e-06) (hash(x)=65821271) +47100 val loss 5.8438 +47100 val perplexity 345.0918 +47100 train 5.823684 (lr=7.5257e-06) (hash(x)=48801622) +42900 val loss 5.9158 +42900 val perplexity 370.8459 +42900 train 6.020298 (lr=1.4439e-05) (hash(x)=53649191) +46800 val loss 5.6914 +46800 val perplexity 296.3135 +46800 train 5.400339 (lr=5.4569e-06) (hash(x)=48005593) +47200 val loss 5.8434 +47200 val perplexity 344.9362 +47200 train 5.573141 (lr=7.4901e-06) (hash(x)=45216690) +43000 val loss 5.9187 +43000 val perplexity 371.9230 +43000 train 6.334957 (lr=1.4317e-05) (hash(x)=61985066) +46900 val loss 5.6906 +46900 val perplexity 296.0840 +46900 train 5.951502 (lr=5.4289e-06) (hash(x)=53182453) +47300 val loss 5.8429 +47300 val perplexity 344.7614 +47300 train 5.664711 (lr=7.4558e-06) (hash(x)=51185601) +43100 val loss 5.9161 +43100 val perplexity 370.9705 +43100 train 5.899559 (lr=1.4196e-05) (hash(x)=48533037) +47000 val loss 5.6906 +47000 val perplexity 296.0736 +47000 train 5.593194 (lr=5.4017e-06) (hash(x)=49385983) +47400 val loss 5.8412 +47400 val perplexity 344.1809 +47400 train 5.535730 (lr=7.4228e-06) (hash(x)=47743508) +47100 val loss 5.6921 +47100 val perplexity 296.5078 +47100 train 5.685345 (lr=5.3755e-06) (hash(x)=48801622) +43200 val loss 5.9092 +43200 val perplexity 368.4084 +43200 train 6.050465 (lr=1.4077e-05) (hash(x)=50630327) +47500 val loss 5.8437 +47500 val perplexity 345.0433 +47500 train 5.638478 (lr=7.3909e-06) (hash(x)=49318833) +47200 val loss 5.6920 +47200 val perplexity 296.4918 +47200 train 5.443398 (lr=5.3501e-06) (hash(x)=45216690) +43300 val loss 5.9195 +43300 val perplexity 372.2409 +43300 train 5.995340 (lr=1.3960e-05) (hash(x)=54845917) +47600 val loss 5.8437 +47600 val perplexity 345.0602 +47600 train 5.822316 (lr=7.3603e-06) (hash(x)=50430826) +47300 val loss 5.6924 +47300 val perplexity 296.6056 +47300 train 5.513380 (lr=5.3256e-06) (hash(x)=51185601) +43400 val loss 5.9105 +43400 val perplexity 368.9018 +43400 train 5.850887 (lr=1.3844e-05) (hash(x)=52858739) +47700 val loss 5.8444 +47700 val perplexity 345.2808 +47700 train 5.769873 (lr=7.3310e-06) (hash(x)=51966931) +47400 val loss 5.6915 +47400 val perplexity 296.3458 +47400 train 5.389669 (lr=5.3020e-06) (hash(x)=47743508) +43500 val loss 5.9053 +43500 val perplexity 366.9831 +43500 train 5.984348 (lr=1.3730e-05) (hash(x)=49761319) +47800 val loss 5.8448 +47800 val perplexity 345.4438 +47800 train 5.589402 (lr=7.3029e-06) (hash(x)=49517209) +47500 val loss 5.6937 +47500 val perplexity 296.9857 +47500 train 5.459077 (lr=5.2792e-06) (hash(x)=49318833) +43600 val loss 5.9012 +43600 val perplexity 365.4760 +43600 train 6.143999 (lr=1.3618e-05) (hash(x)=64879186) +47900 val loss 5.8478 +47900 val perplexity 346.4768 +47900 train 5.651646 (lr=7.2760e-06) (hash(x)=49749702) +47600 val loss 5.6958 +47600 val perplexity 297.6037 +47600 train 5.656210 (lr=5.2574e-06) (hash(x)=50430826) +43700 val loss 5.9007 +43700 val perplexity 365.3025 +43700 train 6.098925 (lr=1.3507e-05) (hash(x)=55246427) +48000 val loss 5.8472 +48000 val perplexity 346.2651 +48000 train 5.738358 (lr=7.2504e-06) (hash(x)=52535448) +47700 val loss 5.6924 +47700 val perplexity 296.5990 +47700 train 5.622097 (lr=5.2364e-06) (hash(x)=51966931) +43800 val loss 5.9012 +43800 val perplexity 365.4811 +43800 train 5.845619 (lr=1.3398e-05) (hash(x)=51238313) +48100 val loss 5.8393 +48100 val perplexity 343.5267 +48100 train 6.504196 (lr=7.2260e-06) (hash(x)=53739181) +47800 val loss 5.6929 +47800 val perplexity 296.7492 +47800 train 5.451366 (lr=5.2163e-06) (hash(x)=49517209) +48200 val loss 5.8382 +48200 val perplexity 343.1531 +48200 train 5.818700 (lr=7.2029e-06) (hash(x)=48086710) +43900 val loss 5.8992 +43900 val perplexity 364.7461 +43900 train 5.942101 (lr=1.3291e-05) (hash(x)=51664512) +47900 val loss 5.6976 +47900 val perplexity 298.1566 +47900 train 5.511366 (lr=5.1972e-06) (hash(x)=49749702) +48300 val loss 5.8370 +48300 val perplexity 342.7467 +48300 train 5.848775 (lr=7.1810e-06) (hash(x)=43991942) +44000 val loss 5.9200 +44000 val perplexity 372.3936 +44000 train 6.194808 (lr=1.3185e-05) (hash(x)=64172296) +48000 val loss 5.6964 +48000 val perplexity 297.7842 +48000 train 5.567257 (lr=5.1788e-06) (hash(x)=52535448) +48400 val loss 5.8352 +48400 val perplexity 342.1305 +48400 train 5.733413 (lr=7.1603e-06) (hash(x)=50801906) +48100 val loss 5.6909 +48100 val perplexity 296.1606 +48100 train 6.392676 (lr=5.1614e-06) (hash(x)=53739181) +44100 val loss 5.8970 +44100 val perplexity 363.9280 +44100 train 5.794561 (lr=1.3081e-05) (hash(x)=50205960) +48500 val loss 5.8337 +48500 val perplexity 341.6209 +48500 train 5.901326 (lr=7.1409e-06) (hash(x)=48915599) +48200 val loss 5.6897 +48200 val perplexity 295.7967 +48200 train 5.674656 (lr=5.1449e-06) (hash(x)=48086710) +44200 val loss 5.8998 +44200 val perplexity 364.9543 +44200 train 5.935834 (lr=1.2979e-05) (hash(x)=51065522) +48600 val loss 5.8336 +48600 val perplexity 341.6013 +48600 train 5.971982 (lr=7.1228e-06) (hash(x)=54450172) +48300 val loss 5.6892 +48300 val perplexity 295.6688 +48300 train 5.667455 (lr=5.1293e-06) (hash(x)=43991942) +44300 val loss 5.8949 +44300 val perplexity 363.1784 +44300 train 5.992972 (lr=1.2878e-05) (hash(x)=56163183) +48700 val loss 5.8338 +48700 val perplexity 341.6383 +48700 train 5.711917 (lr=7.1059e-06) (hash(x)=48755509) +48400 val loss 5.6864 +48400 val perplexity 294.8432 +48400 train 5.564478 (lr=5.1145e-06) (hash(x)=50801906) +44400 val loss 5.8974 +44400 val perplexity 364.0868 +44400 train 5.945497 (lr=1.2779e-05) (hash(x)=50818020) +48800 val loss 5.8392 +48800 val perplexity 343.5165 +48800 train 5.730361 (lr=7.0902e-06) (hash(x)=51380096) +48500 val loss 5.6856 +48500 val perplexity 294.5995 +48500 train 5.748676 (lr=5.1007e-06) (hash(x)=48915599) +44500 val loss 5.8986 +44500 val perplexity 364.5092 +44500 train 6.022935 (lr=1.2682e-05) (hash(x)=52855559) +48900 val loss 5.8356 +48900 val perplexity 342.2801 +48900 train 5.701429 (lr=7.0758e-06) (hash(x)=43404218) +48600 val loss 5.6854 +48600 val perplexity 294.5244 +48600 train 5.860590 (lr=5.0877e-06) (hash(x)=54450172) +44600 val loss 5.8999 +44600 val perplexity 364.9890 +44600 train 5.808752 (lr=1.2586e-05) (hash(x)=44014306) +49000 val loss 5.8345 +49000 val perplexity 341.8860 +49000 train 5.789010 (lr=7.0627e-06) (hash(x)=55059739) +48700 val loss 5.6858 +48700 val perplexity 294.6566 +48700 train 5.584652 (lr=5.0756e-06) (hash(x)=48755509) +49100 val loss 5.8366 +49100 val perplexity 342.6208 +49100 train 6.049414 (lr=7.0508e-06) (hash(x)=47889309) +44700 val loss 5.9001 +44700 val perplexity 365.0735 +44700 train 6.067726 (lr=1.2492e-05) (hash(x)=53648768) +48800 val loss 5.6946 +48800 val perplexity 297.2485 +48800 train 5.575311 (lr=5.0644e-06) (hash(x)=51380096) +49200 val loss 5.8351 +49200 val perplexity 342.0996 +49200 train 5.590982 (lr=7.0401e-06) (hash(x)=46202589) +44800 val loss 5.8991 +44800 val perplexity 364.7090 +44800 train 5.813978 (lr=1.2400e-05) (hash(x)=42436477) +48900 val loss 5.6870 +48900 val perplexity 295.0107 +48900 train 5.563878 (lr=5.0542e-06) (hash(x)=43404218) +49300 val loss 5.8364 +49300 val perplexity 342.5402 +49300 train 5.780857 (lr=7.0307e-06) (hash(x)=49161813) +49000 val loss 5.6857 +49000 val perplexity 294.6233 +49000 train 5.617337 (lr=5.0448e-06) (hash(x)=55059739) +44900 val loss 5.9009 +44900 val perplexity 365.3680 +44900 train 5.626163 (lr=1.2309e-05) (hash(x)=48409149) +49400 val loss 5.8356 +49400 val perplexity 342.2803 +49400 train 5.664212 (lr=7.0226e-06) (hash(x)=49505044) +49100 val loss 5.6893 +49100 val perplexity 295.6787 +49100 train 5.932079 (lr=5.0363e-06) (hash(x)=47889309) +49500 val loss 5.8357 +49500 val perplexity 342.3185 +49500 train 5.609909 (lr=7.0157e-06) (hash(x)=46610327) +45000 val loss 5.9028 +45000 val perplexity 366.0562 +45000 train 5.809747 (lr=1.2220e-05) (hash(x)=53084805) +49600 val loss 5.8377 +49600 val perplexity 342.9799 +49600 train 6.178606 (lr=7.0100e-06) (hash(x)=51188240) +49200 val loss 5.6863 +49200 val perplexity 294.8148 +49200 train 5.418609 (lr=5.0286e-06) (hash(x)=46202589) +45100 val loss 5.9046 +45100 val perplexity 366.7288 +45100 train 5.750527 (lr=1.2133e-05) (hash(x)=56386155) +49700 val loss 5.8351 +49700 val perplexity 342.0913 +49700 train 5.623811 (lr=7.0056e-06) (hash(x)=41492016) +49300 val loss 5.6890 +49300 val perplexity 295.5885 +49300 train 5.639915 (lr=5.0219e-06) (hash(x)=49161813) +45200 val loss 5.9095 +45200 val perplexity 368.5215 +45200 train 5.916084 (lr=1.2047e-05) (hash(x)=53389864) +49800 val loss 5.8371 +49800 val perplexity 342.7840 +49800 train 5.768189 (lr=7.0025e-06) (hash(x)=53488833) +49400 val loss 5.6843 +49400 val perplexity 294.2065 +49400 train 5.520928 (lr=5.0161e-06) (hash(x)=49505044) +49900 val loss 5.8337 +49900 val perplexity 341.6134 +45300 val loss 5.9062 +45300 val perplexity 367.2926 +49900 train 6.150034 (lr=7.0006e-06) (hash(x)=60260935) +45300 train 5.941709 (lr=1.1964e-05) (hash(x)=50817456) +49500 val loss 5.6846 +49500 val perplexity 294.3100 +49500 train 5.475948 (lr=5.0112e-06) (hash(x)=46610327) +49999 val loss 5.8301 +49999 val perplexity 340.3875 +45400 val loss 5.9098 +45400 val perplexity 368.6502 +45400 train 5.938283 (lr=1.1881e-05) (hash(x)=50081660) +49600 val loss 5.6854 +49600 val perplexity 294.5216 +49600 train 6.014633 (lr=5.0072e-06) (hash(x)=51188240) +45500 val loss 5.9098 +45500 val perplexity 368.6245 +45500 train 5.674471 (lr=1.1801e-05) (hash(x)=55567121) +49700 val loss 5.6844 +49700 val perplexity 294.2508 +49700 train 5.471631 (lr=5.0040e-06) (hash(x)=41492016) +45600 val loss 5.9013 +45600 val perplexity 365.5045 +45600 train 6.000695 (lr=1.1722e-05) (hash(x)=48902854) +49800 val loss 5.6848 +49800 val perplexity 294.3614 +49800 train 5.601730 (lr=5.0018e-06) (hash(x)=53488833) +45700 val loss 5.8965 +45700 val perplexity 363.7566 +45700 train 6.110654 (lr=1.1645e-05) (hash(x)=53803301) +49900 val loss 5.6843 +49900 val perplexity 294.2244 +49900 train 6.015575 (lr=5.0004e-06) (hash(x)=60260935) +49999 val loss 5.6840 +49999 val perplexity 294.1094 +45800 val loss 5.8942 +45800 val perplexity 362.9217 +45800 train 5.937474 (lr=1.1570e-05) (hash(x)=47576606) +45900 val loss 5.8893 +45900 val perplexity 361.1489 +45900 train 5.784353 (lr=1.1497e-05) (hash(x)=56276981) +46000 val loss 5.8933 +46000 val perplexity 362.6179 +46000 train 5.853423 (lr=1.1425e-05) (hash(x)=52174902) +46100 val loss 5.8956 +46100 val perplexity 363.4301 +46100 train 6.022799 (lr=1.1355e-05) (hash(x)=52155490) +46200 val loss 5.8915 +46200 val perplexity 361.9449 +46200 train 5.770359 (lr=1.1287e-05) (hash(x)=46489984) +46300 val loss 5.8893 +46300 val perplexity 361.1601 +46300 train 5.820558 (lr=1.1220e-05) (hash(x)=56603976) +46400 val loss 5.8879 +46400 val perplexity 360.6450 +46400 train 5.693027 (lr=1.1155e-05) (hash(x)=51228412) diff --git a/attention_kindselective_n_heads2_seed1341/model_02500.pt b/attention_kindselective_n_heads2_seed1341/model_02500.pt index 9217d7d7c508a271c2ef26f11c1af80bb6dd2fc6..e128fb5f0e5060c751f21dca34ad0abed31c66b4 100644 --- a/attention_kindselective_n_heads2_seed1341/model_02500.pt +++ b/attention_kindselective_n_heads2_seed1341/model_02500.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f81c5305e28766ffe17b41d892c4195cc93a4d18969824c3a0f91c8db6cf9c61 +oid sha256:df2dbff4078c4b902e52c1b7e5066b645d7034b614be5e59574471f8960ecc8d size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_05000.pt b/attention_kindselective_n_heads2_seed1341/model_05000.pt index c4919a9e9b85ef98ccf99b69d9c1ec4881bf90b0..2dbf6f890a8b65027cefce698f03da91fbe6321b 100644 --- a/attention_kindselective_n_heads2_seed1341/model_05000.pt +++ b/attention_kindselective_n_heads2_seed1341/model_05000.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f1981d296a42527ba71384319070268006aa51ed1ce3f34d492953367c7d791d +oid sha256:2b8b2db67768b1b7c4f36a2f59bdbf2ce5cdb8709c2f1a8aab106c8f26270d14 size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_07500.pt b/attention_kindselective_n_heads2_seed1341/model_07500.pt index dd7276075cfe3e851160cff547d1bb9fa5e07893..3d3ed034710983a56c5018413de7b42c2c4ceebf 100644 --- a/attention_kindselective_n_heads2_seed1341/model_07500.pt +++ b/attention_kindselective_n_heads2_seed1341/model_07500.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1a38e1ba00e798ca0b0a8d8e9524e6b25782fda42839c6c43fc8de932eaaa2f8 +oid sha256:364f24f243b3307cbb71c22973ac7657870fb5d5ab6db86c6b6ce16187242859 size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_10000.pt b/attention_kindselective_n_heads2_seed1341/model_10000.pt new file mode 100644 index 0000000000000000000000000000000000000000..7768e426e2ce40d20d6c25b4a17533867d73742c --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/model_10000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39597fb5f366edff6ba2340cd9532f6013379941b9f4fa74758f88d9c97ae137 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_12500.pt b/attention_kindselective_n_heads2_seed1341/model_12500.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2d8055562d9eae03be94eeb78dbd00c35967ce1 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/model_12500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d31a60ee89698642b0ebc621b75f58a692275ef26044e594d7268780509fd84 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_15000.pt b/attention_kindselective_n_heads2_seed1341/model_15000.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ddb1112a848772aff3eebcfaf07b576169a74ef --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/model_15000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1af66647de9c5fc855d02d637ba618c8144799d5efe8ddc623aa3b3de6f726c +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_17500.pt b/attention_kindselective_n_heads2_seed1341/model_17500.pt new file mode 100644 index 0000000000000000000000000000000000000000..07e657cd7cec48be9dd11fe49587e843245ea6f8 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/model_17500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e82ddeade8b883e1bec9ae78a0caacae82fc16e39d9c5e40a814ee4b400df35 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_20000.pt b/attention_kindselective_n_heads2_seed1341/model_20000.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d63abf174444af1582fc82f9409bfa6a80b137d --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/model_20000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be0d57d46256da655ab0efb88dce7b9010af64ff833c4c08ae94bcb3b7138cc8 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_22500.pt b/attention_kindselective_n_heads2_seed1341/model_22500.pt new file mode 100644 index 0000000000000000000000000000000000000000..e625862ed12a6752eec8f48f9349472216417a32 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/model_22500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e93ca9b8574d2d714f7cae55c176cb3e64cb31d5ab43da6c34b0b03b0cbead07 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_25000.pt b/attention_kindselective_n_heads2_seed1341/model_25000.pt new file mode 100644 index 0000000000000000000000000000000000000000..041aac67fe2d152db5a666f8d0ae9b4de05c716c --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/model_25000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:454c04312ec7cba780576b7269421be5981dbee4e8a4d91baec0e8013e07cc02 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_27500.pt b/attention_kindselective_n_heads2_seed1341/model_27500.pt new file mode 100644 index 0000000000000000000000000000000000000000..94bebb488ffe11ca341559213409e460db771fa3 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/model_27500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8961199b8c739f6e36b00526c87a6b5246765f4d1c0dbe1093faede9f3344450 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_30000.pt b/attention_kindselective_n_heads2_seed1341/model_30000.pt new file mode 100644 index 0000000000000000000000000000000000000000..a804ad2f601d5cb2173b101e7ed3c0ba428be30f --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/model_30000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:197046029793284598988dd04e5df15c623eaef6f155707df97b86154436a148 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_32500.pt b/attention_kindselective_n_heads2_seed1341/model_32500.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8451cdf53bfcf69a02c55d700c713289b262a35 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/model_32500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96771b15cd62ac259acfeaa8c3bea3c6a0311b32a69fc5f4cbdf396812aaa6f2 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_35000.pt b/attention_kindselective_n_heads2_seed1341/model_35000.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a8eddeb3043eba9e375d56da7a4f3e745c734d3 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/model_35000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64439ba3a2d2759349369c61c46122f5f8d43d494d81def5c4c784414d39443e +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_37500.pt b/attention_kindselective_n_heads2_seed1341/model_37500.pt new file mode 100644 index 0000000000000000000000000000000000000000..848f780b62b294916b918f86e004fb05f221b4e5 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/model_37500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:974dfee447c5c353591f60ad35df9d68980e9c6f37f94c1beca686db8af86a80 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_40000.pt b/attention_kindselective_n_heads2_seed1341/model_40000.pt new file mode 100644 index 0000000000000000000000000000000000000000..39cbdaf62cdec03962771bed5603c27a182774a9 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/model_40000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92e9a07a5d7a3a5bbe8777892da0e16dbe8d52cfe28c66d9263c00ba04a7a708 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_42500.pt b/attention_kindselective_n_heads2_seed1341/model_42500.pt new file mode 100644 index 0000000000000000000000000000000000000000..f79511e67d3cc46e9186650ae50ddfc42b2b8eed --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/model_42500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff5074eedcb82cb39bd1c3cc5b79ff90173c0a1ed1cc59315acda50a131de0f9 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_45000.pt b/attention_kindselective_n_heads2_seed1341/model_45000.pt new file mode 100644 index 0000000000000000000000000000000000000000..37bf3b37beb0e770a42a976b2fcb94e3efaccccb --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/model_45000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd0be6b5f6053bea40260d382a66627f2572e0d23f93145bd41b03959b43fffb +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_47500.pt b/attention_kindselective_n_heads2_seed1341/model_47500.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd22e4be43f645b4f6ab551ade7482587c37e375 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/model_47500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f10489eeeefc32b2aec0615aaaedae12a844b5c9ea48c80427d8873fd45e4bb +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/model_49999.pt b/attention_kindselective_n_heads2_seed1341/model_49999.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b55e017ce52f1d3d7c34b56104554815bfa743f --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/model_49999.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa2f3cec42d9036e2f87260a9e0795ade9b256a480f4f1261f7cffdf84d202db +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_02500.pt b/attention_kindselective_n_heads2_seed1341/optimizer_02500.pt index b50205beb5b70fc7eb0d7625b50f231b650a8000..96eff12e37a0283c681f965d61364100e0e3efcf 100644 --- a/attention_kindselective_n_heads2_seed1341/optimizer_02500.pt +++ b/attention_kindselective_n_heads2_seed1341/optimizer_02500.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fbe352394c98f7aeb470becf95bc3d2fadc1b26eaace9a300ecb094670cdc944 +oid sha256:08f1a057ee80dea9056a043af4613f7d138e6c121717d5b995f40421c2185b5c size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_05000.pt b/attention_kindselective_n_heads2_seed1341/optimizer_05000.pt index 2c8d4350129d0bccf61e075c171c80b509385673..829295d728805c89ee7a9cc5df6097d9dee4c887 100644 --- a/attention_kindselective_n_heads2_seed1341/optimizer_05000.pt +++ b/attention_kindselective_n_heads2_seed1341/optimizer_05000.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1b8de7fcbdc412e17dba8f4280f46014e7504b0ac36b8318f82ce0046bc68161 +oid sha256:04707c9dc1a915ce8844444f88086b021a84ba76d2a51ff88b231c3a89315907 size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_07500.pt b/attention_kindselective_n_heads2_seed1341/optimizer_07500.pt index 6c5f1b755a29075693b50215f564dfd504dc2c73..dd7b8999ff3012874a12316c62404aa5107d42c0 100644 --- a/attention_kindselective_n_heads2_seed1341/optimizer_07500.pt +++ b/attention_kindselective_n_heads2_seed1341/optimizer_07500.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d1fdfd4cbdb5969528ced4c2cc8e4fab4c83f4ad557140cab2cdaecb053a8fc +oid sha256:8bde31ee1de5e0b28258329d8e1bfc745047816434565ddeb118534f3321b382 size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_10000.pt b/attention_kindselective_n_heads2_seed1341/optimizer_10000.pt new file mode 100644 index 0000000000000000000000000000000000000000..9021a85d0299df657d4d0a8a76af7e677d8b6e07 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/optimizer_10000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e5e3a84f6d97d753668237ae05b7fa519625213870b1b982e72889351306dd8 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_12500.pt b/attention_kindselective_n_heads2_seed1341/optimizer_12500.pt new file mode 100644 index 0000000000000000000000000000000000000000..95f42668d7b872639a13688bfce41b3bb68088d5 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/optimizer_12500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7c7772956f413b64e0b2580b4b54fa679916524a74e477187123032eff1ae02 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_15000.pt b/attention_kindselective_n_heads2_seed1341/optimizer_15000.pt new file mode 100644 index 0000000000000000000000000000000000000000..c94d881724e1311858f309163a2433d66cee4d12 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/optimizer_15000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17b316437951257f7981be6620775537819e53492b0b26786967acf06aaba24f +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_17500.pt b/attention_kindselective_n_heads2_seed1341/optimizer_17500.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5c6d3cfafaeacad4ba747c70a468c6e0a732a70 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/optimizer_17500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37b5c575b4a0eceaad61e429a71cacabbb2b7157606d6b02c3a150f792a4a780 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_20000.pt b/attention_kindselective_n_heads2_seed1341/optimizer_20000.pt new file mode 100644 index 0000000000000000000000000000000000000000..a27632d8a3d6d1239703531df898c01a7caf49d1 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/optimizer_20000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35fcaaa4f7c9226e4469c3c8c567557d0558d617c0ee65e7084bc28b23fcdb08 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_22500.pt b/attention_kindselective_n_heads2_seed1341/optimizer_22500.pt new file mode 100644 index 0000000000000000000000000000000000000000..25e561a7781a4aa4f8e461915b3b083ab131330d --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/optimizer_22500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01a74d2065d62b5f96d30821d64a6cb8d83df3adc4a127d9141f548214c01d10 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_25000.pt b/attention_kindselective_n_heads2_seed1341/optimizer_25000.pt new file mode 100644 index 0000000000000000000000000000000000000000..12832baa649bba8703cabd7e23dbb435a9e53046 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/optimizer_25000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f69c4fa5bd5d85abce331c063b8f8661ea0716d83655b11b5c59e07943ea8a7a +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_27500.pt b/attention_kindselective_n_heads2_seed1341/optimizer_27500.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6e131ef25884c46ff005355c3e0c9baa413121a --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/optimizer_27500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b93749c7b464e5db7708c3a19920f5e874f533484b74a8caea9b3b2c5f1598b +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_30000.pt b/attention_kindselective_n_heads2_seed1341/optimizer_30000.pt new file mode 100644 index 0000000000000000000000000000000000000000..cab2a88b7932b35fdd041d9ff750a22bf9d2816a --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/optimizer_30000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:392b205b5156664bfcdad140f817d7ee6bf4b37f656b83cb5ed96c9c2306f7c7 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_32500.pt b/attention_kindselective_n_heads2_seed1341/optimizer_32500.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe2ec2b89b15054b9ee2fbd6116f2670a7a61fb2 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/optimizer_32500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fb0c0ea688e2d87f36438404ab30d956ddfb8f310e01cc4809cbc8ed4bcedd3 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_35000.pt b/attention_kindselective_n_heads2_seed1341/optimizer_35000.pt new file mode 100644 index 0000000000000000000000000000000000000000..02f07cdb3f08403ec4357e7f1e740fe8dde67b87 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/optimizer_35000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecb017bdba3cd06c70679a5a079b90587239f0df90789e9bd2aae6dd8ce5e1be +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_37500.pt b/attention_kindselective_n_heads2_seed1341/optimizer_37500.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa353573ed1f0ae28832570e692a65e069763bb1 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/optimizer_37500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:610caea9a22bd843ba3bbc9a9b0c98cc8865d9e51f81e576865ccd2ee860315c +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_40000.pt b/attention_kindselective_n_heads2_seed1341/optimizer_40000.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f27945f0686852a4fdcee9fada2a09bf3bd1afa --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/optimizer_40000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c9fc6c8996145b248f831d6a856c99c569135e92fdadb4a9f8c234776bbb338 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_42500.pt b/attention_kindselective_n_heads2_seed1341/optimizer_42500.pt new file mode 100644 index 0000000000000000000000000000000000000000..debe7c68f9f9a3eddb6c911073c09e70aa3caf85 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/optimizer_42500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ce5b637f0f687de66c681ad9bf5b28775497b12beed5490ae4e2f3ef9fa2239 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_45000.pt b/attention_kindselective_n_heads2_seed1341/optimizer_45000.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5caa6ccc404af48d0c4f4bba571ac4011ca6635 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/optimizer_45000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e219bed0d1290d31e6fe1c5c0024faf85f4d71699c8592408b56e2059cfb5f7 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_47500.pt b/attention_kindselective_n_heads2_seed1341/optimizer_47500.pt new file mode 100644 index 0000000000000000000000000000000000000000..72d11234bc5444037b42f96133505ed2410d4200 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/optimizer_47500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40db19e0e2056eef8cf8b772f3c21365474b8d9d8725dee8528b75cd13754308 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1341/optimizer_49999.pt b/attention_kindselective_n_heads2_seed1341/optimizer_49999.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4361050de0851b51e9f4018f710248459d78076 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1341/optimizer_49999.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbd88d144afa2cebd143ee7b83af343275bd2fb3add7cc3461fd7b3376010628 +size 70895430