diff --git a/attention_kindselective_n_heads2_seed1338/args.json b/attention_kindselective_n_heads2_seed1338/args.json index 1ef962ea71a9044b62216e18292e195c2084e775..306f897da8e2e2170bd0f0747c96efbad7d6a65e 100644 --- a/attention_kindselective_n_heads2_seed1338/args.json +++ b/attention_kindselective_n_heads2_seed1338/args.json @@ -1 +1 @@ -{"hellaswag": true, "attention_kind": "selective", "log_dir": "wider_is_better_6/attention_kindselective_n_heads2_seed1338", "resume_checkpoint": null, "resume_optimizer": false, "add_a_head": false, "add_head_to_start": true, "new_head_init": "normal", "n_heads": 2, "protect_bos_token": true, "prevent_from_masking_myself": true, "max_steps": 10000, "warmup_steps": 200, "group": "wider_is_better_6", "use_wandb": true, "kill_self_after_run": false, "random_seed": 1338, "memory_penalty_epsilon": 0.1, "selection_head_linear_combo": "none", "selection_head_linear_combo_scale": 1.0, "protection_kind": "none", "leaky_relu_alpha": null, "leaky_relu_bias": null, "use_compile": true, "use_mini_model": false, "upload_to_hf": true, "seq_len": 256, "batch_size": 40, "total_batch_size": 10240, "protection_head_scaling_factor": 1.0, "protection_head_bias": 0.0, "n_sliced_masks": null, "n_latent_masks": null, "mask_layernorm": false, "residual_attention_masks": false, "compute_base_shapes": false, "base_shapes_savefile": null, "mup": true, "disable_selection": false, "mup_enable_coord_check_logging": false, "max_lr": 0.0001, "decay_lr": true, "readout_zero_init": false, "query_zero_init": false, "l1_loss": false, "debugpy": false, "key": "10e-5_10240_2_1338", "n_embd": 128} \ No newline at end of file +{"hellaswag": true, "attention_kind": "selective", "log_dir": "wider_is_better_7/attention_kindselective_n_heads2_seed1338", "resume_checkpoint": null, "resume_optimizer": false, "add_a_head": false, "add_head_to_start": true, "new_head_init": "normal", "n_heads": 2, "protect_bos_token": true, "prevent_from_masking_myself": true, "max_steps": 50000, "warmup_steps": 200, "group": "wider_is_better_7", "use_wandb": true, "kill_self_after_run": false, "random_seed": 1338, "memory_penalty_epsilon": 0.1, "selection_head_linear_combo": "none", "selection_head_linear_combo_scale": 1.0, "protection_kind": "none", "leaky_relu_alpha": null, "leaky_relu_bias": null, "use_compile": true, "use_mini_model": false, "upload_to_hf": true, "seq_len": 256, "batch_size": 40, "total_batch_size": 10240, "protection_head_scaling_factor": 1.0, "protection_head_bias": 0.0, "n_sliced_masks": null, "n_latent_masks": null, "mask_layernorm": false, "residual_attention_masks": false, "compute_base_shapes": false, "base_shapes_savefile": null, "mup": true, "disable_selection": false, "mup_enable_coord_check_logging": false, "max_lr": 7e-05, "decay_lr": true, "readout_zero_init": false, "query_zero_init": false, "l1_loss": false, "debugpy": false, "key": "7e-5_10240_2_1338", "n_embd": 128} \ No newline at end of file diff --git a/attention_kindselective_n_heads2_seed1338/dataloader_10000.pt b/attention_kindselective_n_heads2_seed1338/dataloader_10000.pt new file mode 100644 index 0000000000000000000000000000000000000000..e1452fd777edd0ccb65d6b47710c50208c14b312 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/dataloader_10000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3858f6c832feea78a674d8c5c384061cc7d4f22cddbd0a2be6de33bc91e2c72 +size 964 diff --git a/attention_kindselective_n_heads2_seed1338/dataloader_12500.pt b/attention_kindselective_n_heads2_seed1338/dataloader_12500.pt new file mode 100644 index 0000000000000000000000000000000000000000..f63806424a0177a7f2d678c2c63138219ed021f3 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/dataloader_12500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab3779d33c2e0a7873fcd8c39402e44260740665950323ad1445480ec339965a +size 964 diff --git a/attention_kindselective_n_heads2_seed1338/dataloader_15000.pt b/attention_kindselective_n_heads2_seed1338/dataloader_15000.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ea0f5a1bfab75667c4ebb0ca01b358cdc836a54 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/dataloader_15000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:450020c7c306c73e5c07c463518ab937102d657515ea5a38da6f2e7291f20324 +size 964 diff --git a/attention_kindselective_n_heads2_seed1338/dataloader_17500.pt b/attention_kindselective_n_heads2_seed1338/dataloader_17500.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb9392348b1209c827e3e376b05eeda80e779aa8 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/dataloader_17500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0696a655d7c8a9d7d275c7489f74a5a948ee029ac3941b045d6abaf12544a5b1 +size 964 diff --git a/attention_kindselective_n_heads2_seed1338/dataloader_20000.pt b/attention_kindselective_n_heads2_seed1338/dataloader_20000.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba32fca059aec40e4f758de96bdac6df23b9d9f5 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/dataloader_20000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eb226bdcee777fd1ca493533704dae226c077ef79c842fc9dc59a534d5381c1 +size 964 diff --git a/attention_kindselective_n_heads2_seed1338/dataloader_22500.pt b/attention_kindselective_n_heads2_seed1338/dataloader_22500.pt new file mode 100644 index 0000000000000000000000000000000000000000..498dc444f528d893090328a5bd1e2f37da46dc12 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/dataloader_22500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88833bfd821adac4edd1dd0772083ae007c7b8d33041f66e53a679e1fa8993e0 +size 964 diff --git a/attention_kindselective_n_heads2_seed1338/dataloader_25000.pt b/attention_kindselective_n_heads2_seed1338/dataloader_25000.pt new file mode 100644 index 0000000000000000000000000000000000000000..b657bc134192d0ea956f984c289d0c682979a1f4 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/dataloader_25000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:554555a425bac43d626c36f1c81c2b0aba51eda3281dab27a9cb56b61f413354 +size 964 diff --git a/attention_kindselective_n_heads2_seed1338/dataloader_27500.pt b/attention_kindselective_n_heads2_seed1338/dataloader_27500.pt new file mode 100644 index 0000000000000000000000000000000000000000..d92d43d89390714f43db4f0782e49af0145b4a90 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/dataloader_27500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a52940b3b45414e6cdbec0dbaeba848f52d681c2daf78c269027057332d7fbd +size 964 diff --git a/attention_kindselective_n_heads2_seed1338/dataloader_30000.pt b/attention_kindselective_n_heads2_seed1338/dataloader_30000.pt new file mode 100644 index 0000000000000000000000000000000000000000..06856f53253b6c8cbefd7d595d9b9b7266b22621 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/dataloader_30000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:775519ea26122cb70d533c496bcdbbc19f759f3d096e83e98cca1dc10275fe8e +size 964 diff --git a/attention_kindselective_n_heads2_seed1338/dataloader_32500.pt b/attention_kindselective_n_heads2_seed1338/dataloader_32500.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc0b4b40e57f41aa1046b3bd2697256635160c09 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/dataloader_32500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab3422c8205fe45210246fed3dd6c317b75df02228cd8b75fba669574ce3b2d9 +size 964 diff --git a/attention_kindselective_n_heads2_seed1338/dataloader_35000.pt b/attention_kindselective_n_heads2_seed1338/dataloader_35000.pt new file mode 100644 index 0000000000000000000000000000000000000000..62647b540fa7925361626b9f8dfa3959eebb7608 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/dataloader_35000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:199ed3be67b88981f686112c97a2261729a37e0be3d4b0f4a289985a95d3cdf1 +size 964 diff --git a/attention_kindselective_n_heads2_seed1338/dataloader_37500.pt b/attention_kindselective_n_heads2_seed1338/dataloader_37500.pt new file mode 100644 index 0000000000000000000000000000000000000000..475d8b538138a8e39b76a4cf04c8eaeac074d295 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/dataloader_37500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f77178b3be9dd3f8cd03c935236251f73fde6da7948ba9feda0c888fb8912dfe +size 964 diff --git a/attention_kindselective_n_heads2_seed1338/dataloader_40000.pt b/attention_kindselective_n_heads2_seed1338/dataloader_40000.pt new file mode 100644 index 0000000000000000000000000000000000000000..50ec00ef6be330bbdb4cdf88e9a1097345da0d4d --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/dataloader_40000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84f58ba3b4a1f9be7da4e697e06782f0e1ce4d3aca49f1997087fc83aa466dd9 +size 964 diff --git a/attention_kindselective_n_heads2_seed1338/dataloader_42500.pt b/attention_kindselective_n_heads2_seed1338/dataloader_42500.pt new file mode 100644 index 0000000000000000000000000000000000000000..74e133f73a7293d5f4d6407784703c91f705d6e3 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/dataloader_42500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf6d24c78d89100d146bce9f26be940db3d71092473d9b55db97d6b35531eac2 +size 964 diff --git a/attention_kindselective_n_heads2_seed1338/dataloader_45000.pt b/attention_kindselective_n_heads2_seed1338/dataloader_45000.pt new file mode 100644 index 0000000000000000000000000000000000000000..718bc149d695b1e9498bdd0693053d7417207818 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/dataloader_45000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22bb90b43d81f3da5454f91a70e1ed29aeb2f470a727ce38390ff8a5c4924889 +size 964 diff --git a/attention_kindselective_n_heads2_seed1338/dataloader_47500.pt b/attention_kindselective_n_heads2_seed1338/dataloader_47500.pt new file mode 100644 index 0000000000000000000000000000000000000000..3eab97c7c6d4b86d90405bf1c4f3435727495da4 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/dataloader_47500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55507725e6988f190e4963078652fafa6b68e8d4f79221387612612babf3e1c1 +size 964 diff --git a/attention_kindselective_n_heads2_seed1338/dataloader_49999.pt b/attention_kindselective_n_heads2_seed1338/dataloader_49999.pt new file mode 100644 index 0000000000000000000000000000000000000000..d87f88b62a343a49411f8a6feee8f527879fcd1f --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/dataloader_49999.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47776cddb8021172f048a950b83f25b692cb340214b800ce3837c15ceb58907c +size 964 diff --git a/attention_kindselective_n_heads2_seed1338/log2.txt b/attention_kindselective_n_heads2_seed1338/log2.txt index bec6a8359d4ae920b63a5dddb22a04d5b6aac9b0..ecaf9b229526ef9bd4dc2b4ec13a7bec621233f6 100644 --- a/attention_kindselective_n_heads2_seed1338/log2.txt +++ b/attention_kindselective_n_heads2_seed1338/log2.txt @@ -1,582 +1,4378 @@ -max_steps: 10000 -100 val loss 9.3442 -100 val perplexity 11431.8779 -100 train 9.297195 (lr=7.5750e-05) (hash(x)=52740221) -200 val loss 7.7377 -200 val perplexity 2293.1240 -200 train 7.742150 (lr=1.5000e-04) (hash(x)=49034180) +max_steps: 50000 +max_steps: 50000 +0 val loss 11.7640 +0 val perplexity 128545.8516 +0 val loss 11.7640 +0 val perplexity 128545.8516 0 val loss 11.7640 0 val perplexity 128545.8516 -300 val loss 7.6958 -300 val perplexity 2199.1255 -300 train 7.961709 (lr=1.4997e-04) (hash(x)=63180688) -400 val loss 7.6637 -400 val perplexity 2129.5801 -400 train 7.680656 (lr=1.4986e-04) (hash(x)=50373500) 0 train 11.762399 (lr=5.0000e-07) (hash(x)=50671684) -500 val loss 7.6519 -500 val perplexity 2104.6372 -500 train 7.527692 (lr=1.4969e-04) (hash(x)=44547422) -100 val loss 9.8118 -100 val perplexity 18248.5859 -600 val loss 7.7028 -600 val perplexity 2214.5046 -100 train 9.757762 (lr=5.0500e-05) (hash(x)=52740221) -600 train 7.582813 (lr=1.4945e-04) (hash(x)=47184699) -700 val loss 7.6679 -700 val perplexity 2138.6357 -700 train 7.571599 (lr=1.4913e-04) (hash(x)=51374582) -200 val loss 8.1949 -200 val perplexity 3622.3582 -200 train 8.195848 (lr=1.0000e-04) (hash(x)=49034180) -300 val loss 7.7078 -300 val perplexity 2225.6382 -800 val loss 7.6399 -800 val perplexity 2079.5740 -300 train 7.954569 (lr=9.9977e-05) (hash(x)=63180688) -800 train 7.360402 (lr=1.4876e-04) (hash(x)=46264805) -900 val loss 7.6276 -900 val perplexity 2054.1938 -900 train 7.878948 (lr=1.4831e-04) (hash(x)=61178712) -400 val loss 7.6837 -400 val perplexity 2172.6035 -400 train 7.705904 (lr=9.9908e-05) (hash(x)=50373500) -1000 val loss 7.6159 -1000 val perplexity 2030.1835 -1000 train 7.582415 (lr=1.4779e-04) (hash(x)=50886520) -500 val loss 7.6574 -500 val perplexity 2116.2241 -500 train 7.541328 (lr=9.9792e-05) (hash(x)=44547422) -1100 val loss 7.5475 -1100 val perplexity 1895.9528 -1100 train 7.305693 (lr=1.4721e-04) (hash(x)=48600099) -600 val loss 7.6596 -600 val perplexity 2120.9651 -600 train 7.551372 (lr=9.9631e-05) (hash(x)=47184699) -1200 val loss 7.5112 -1200 val perplexity 1828.3958 -1200 train 7.210708 (lr=1.4656e-04) (hash(x)=50146792) -700 val loss 7.6287 -700 val perplexity 2056.4128 -700 train 7.550183 (lr=9.9423e-05) (hash(x)=51374582) -1300 val loss 7.4972 -1300 val perplexity 1802.9724 -1300 train 7.355508 (lr=1.4585e-04) (hash(x)=52617313) -800 val loss 7.6292 -800 val perplexity 2057.5054 -800 train 7.363524 (lr=9.9170e-05) (hash(x)=46264805) -1400 val loss 7.4674 -1400 val perplexity 1749.9814 -1400 train 7.136637 (lr=1.4507e-04) (hash(x)=49794446) -900 val loss 7.6407 -900 val perplexity 2081.2048 -900 train 7.929552 (lr=9.8872e-05) (hash(x)=61178712) -1500 val loss 7.4519 -1500 val perplexity 1723.1036 -1500 train 7.113922 (lr=1.4422e-04) (hash(x)=50766317) -1000 val loss 7.6259 -1000 val perplexity 2050.5642 -1000 train 7.627928 (lr=9.8528e-05) (hash(x)=50886520) -1600 val loss 7.4349 -1600 val perplexity 1694.0500 -1600 train 7.307274 (lr=1.4332e-04) (hash(x)=55551175) -1100 val loss 7.6154 -1100 val perplexity 2029.2988 -1100 train 7.433861 (lr=9.8140e-05) (hash(x)=48600099) -1700 val loss 7.3709 -1700 val perplexity 1589.0658 -1700 train 7.524827 (lr=1.4235e-04) (hash(x)=56717172) -1200 val loss 7.6086 -1200 val perplexity 2015.4730 -1200 train 7.319845 (lr=9.7708e-05) (hash(x)=50146792) -1800 val loss 7.3132 -1800 val perplexity 1499.9122 -1800 train 7.543727 (lr=1.4131e-04) (hash(x)=55376447) -1300 val loss 7.6147 -1300 val perplexity 2027.8499 -1300 train 7.483940 (lr=9.7231e-05) (hash(x)=52617313) -1900 val loss 7.2845 -1900 val perplexity 1457.4705 -1900 train 7.016429 (lr=1.4022e-04) (hash(x)=43810837) -1400 val loss 7.6027 -1400 val perplexity 2003.5959 -1400 train 7.282799 (lr=9.6711e-05) (hash(x)=49794446) -2000 val loss 7.2760 -2000 val perplexity 1445.2384 -2000 train 7.324104 (lr=1.3907e-04) (hash(x)=50881655) -1500 val loss 7.5973 -1500 val perplexity 1992.9032 -1500 train 7.251457 (lr=9.6149e-05) (hash(x)=50766317) -2100 val loss 7.2904 -2100 val perplexity 1466.1522 -2100 train 7.182340 (lr=1.3786e-04) (hash(x)=49386015) -2200 val loss 7.2854 -2200 val perplexity 1458.8618 -2200 train 7.207294 (lr=1.3660e-04) (hash(x)=48572079) -1600 val loss 7.5756 -1600 val perplexity 1949.9363 -1600 train 7.480996 (lr=9.5544e-05) (hash(x)=55551175) -2300 val loss 7.2263 -2300 val perplexity 1375.1674 -2300 train 7.262286 (lr=1.3527e-04) (hash(x)=54950719) -1700 val loss 7.5376 -1700 val perplexity 1877.3765 -1700 train 7.684578 (lr=9.4897e-05) (hash(x)=56717172) -2400 val loss 7.2174 -2400 val perplexity 1362.8987 -2400 train 6.879970 (lr=1.3390e-04) (hash(x)=42190240) -1800 val loss 7.4586 -1800 val perplexity 1734.6367 -1800 train 7.710993 (lr=9.4209e-05) (hash(x)=55376447) -2500 val loss 7.1974 -2500 val perplexity 1335.8898 -2500 train 7.284463 (lr=1.3247e-04) (hash(x)=45223539) -1900 val loss 7.4584 -1900 val perplexity 1734.4408 -1900 train 7.186811 (lr=9.3481e-05) (hash(x)=43810837) -2600 val loss 7.1936 -2600 val perplexity 1330.8302 -2600 train 7.223955 (lr=1.3099e-04) (hash(x)=54037353) -2000 val loss 7.4416 -2000 val perplexity 1705.4586 -2000 train 7.528937 (lr=9.2714e-05) (hash(x)=50881655) -2700 val loss 7.2011 -2700 val perplexity 1340.8951 -2700 train 7.754804 (lr=1.2946e-04) (hash(x)=59131616) -2100 val loss 7.4330 -2100 val perplexity 1690.8485 -2100 train 7.312185 (lr=9.1908e-05) (hash(x)=49386015) -2800 val loss 7.1920 -2800 val perplexity 1328.6990 -2800 train 7.032724 (lr=1.2788e-04) (hash(x)=45882743) -2200 val loss 7.3995 -2200 val perplexity 1635.2444 -2200 train 7.314746 (lr=9.1064e-05) (hash(x)=48572079) -2900 val loss 7.1831 -2900 val perplexity 1316.9196 -2900 train 6.815891 (lr=1.2626e-04) (hash(x)=43758910) -2300 val loss 7.3938 -2300 val perplexity 1625.8793 -2300 train 7.450395 (lr=9.0182e-05) (hash(x)=54950719) -3000 val loss 7.1706 -3000 val perplexity 1300.5831 -3000 train 7.098728 (lr=1.2459e-04) (hash(x)=47965974) -2400 val loss 7.3578 -2400 val perplexity 1568.3666 -2400 train 7.028030 (lr=8.9265e-05) (hash(x)=42190240) -3100 val loss 7.1539 -3100 val perplexity 1279.1355 -3100 train 7.044363 (lr=1.2287e-04) (hash(x)=48205243) -2500 val loss 7.3452 -2500 val perplexity 1548.7137 -2500 train 7.422577 (lr=8.8313e-05) (hash(x)=45223539) -3200 val loss 7.1652 -3200 val perplexity 1293.6329 -3200 train 7.250436 (lr=1.2112e-04) (hash(x)=54511383) -2600 val loss 7.3222 -2600 val perplexity 1513.4851 -2600 train 7.332448 (lr=8.7326e-05) (hash(x)=54037353) -3300 val loss 7.1444 -3300 val perplexity 1266.9409 -3300 train 7.101440 (lr=1.1932e-04) (hash(x)=54428388) -2700 val loss 7.3374 -2700 val perplexity 1536.7114 -2700 train 7.783456 (lr=8.6306e-05) (hash(x)=59131616) -3400 val loss 7.1267 -3400 val perplexity 1244.7963 -3400 train 7.159663 (lr=1.1749e-04) (hash(x)=48115990) -2800 val loss 7.3442 -2800 val perplexity 1547.1792 -2800 train 7.180212 (lr=8.5254e-05) (hash(x)=45882743) -3500 val loss 7.1118 -3500 val perplexity 1226.2948 -3500 train 6.734808 (lr=1.1562e-04) (hash(x)=41137345) -2900 val loss 7.3278 -2900 val perplexity 1522.0278 -2900 train 6.986289 (lr=8.4170e-05) (hash(x)=43758910) -3600 val loss 7.1108 -3600 val perplexity 1225.1632 -3600 train 7.007893 (lr=1.1372e-04) (hash(x)=55186224) -3000 val loss 7.3364 -3000 val perplexity 1535.1770 -3000 train 7.256237 (lr=8.3057e-05) (hash(x)=47965974) -3700 val loss 7.1188 -3700 val perplexity 1235.0267 -3700 train 6.970138 (lr=1.1179e-04) (hash(x)=54990049) -3100 val loss 7.2971 -3100 val perplexity 1476.0162 -3100 train 7.206101 (lr=8.1915e-05) (hash(x)=48205243) -3800 val loss 7.0919 -3800 val perplexity 1202.2446 -3800 train 6.816278 (lr=1.0982e-04) (hash(x)=46288812) -3200 val loss 7.2821 -3200 val perplexity 1454.0240 -3200 train 7.345427 (lr=8.0745e-05) (hash(x)=54511383) -3900 val loss 7.0675 -3900 val perplexity 1173.2322 -3900 train 6.737766 (lr=1.0783e-04) (hash(x)=45829773) -3300 val loss 7.2780 -3300 val perplexity 1448.0736 -3300 train 7.247321 (lr=7.9549e-05) (hash(x)=54428388) -4000 val loss 7.0704 -4000 val perplexity 1176.6615 -4000 train 6.859371 (lr=1.0581e-04) (hash(x)=52499943) -3400 val loss 7.2686 -3400 val perplexity 1434.5167 -3400 train 7.313385 (lr=7.8328e-05) (hash(x)=48115990) -4100 val loss 7.0087 -4100 val perplexity 1106.1769 -4100 train 6.920537 (lr=1.0377e-04) (hash(x)=48563796) -3500 val loss 7.2712 -3500 val perplexity 1438.2900 -3500 train 6.913240 (lr=7.7082e-05) (hash(x)=41137345) -4200 val loss 6.9935 -4200 val perplexity 1089.5476 -4200 train 6.965348 (lr=1.0171e-04) (hash(x)=49165143) -3600 val loss 7.2623 -3600 val perplexity 1425.5751 -3600 train 7.169919 (lr=7.5814e-05) (hash(x)=55186224) -4300 val loss 6.9627 -4300 val perplexity 1056.4453 -4300 train 7.010408 (lr=9.9622e-05) (hash(x)=50973176) -3700 val loss 7.2633 -3700 val perplexity 1426.9270 -3700 train 7.101009 (lr=7.4525e-05) (hash(x)=54990049) -4400 val loss 6.9507 -4400 val perplexity 1043.8556 -4400 train 7.006516 (lr=9.7520e-05) (hash(x)=55275124) -4500 val loss 6.9582 -4500 val perplexity 1051.7567 -4500 train 7.286173 (lr=9.5403e-05) (hash(x)=58646505) -3800 val loss 7.2887 -3800 val perplexity 1463.6641 -3800 train 7.029315 (lr=7.3215e-05) (hash(x)=46288812) -4600 val loss 6.9107 -4600 val perplexity 1002.9967 -4600 train 6.718947 (lr=9.3273e-05) (hash(x)=42554666) -3900 val loss 7.2681 -3900 val perplexity 1433.8232 -3900 train 6.927856 (lr=7.1887e-05) (hash(x)=45829773) -4700 val loss 6.9037 -4700 val perplexity 995.9628 -4700 train 6.815775 (lr=9.1132e-05) (hash(x)=47846764) -4000 val loss 7.2776 -4000 val perplexity 1447.4598 -4000 train 7.064430 (lr=7.0541e-05) (hash(x)=52499943) -4800 val loss 6.8718 -4800 val perplexity 964.6489 -4800 train 7.392079 (lr=8.8982e-05) (hash(x)=58239019) -4100 val loss 7.2279 -4100 val perplexity 1377.2673 -4100 train 7.127439 (lr=6.9180e-05) (hash(x)=48563796) -4900 val loss 6.8548 -4900 val perplexity 948.4195 -4900 train 6.890259 (lr=8.6825e-05) (hash(x)=50711220) -4200 val loss 7.2032 -4200 val perplexity 1343.6794 -4200 train 7.175355 (lr=6.7804e-05) (hash(x)=49165143) -5000 val loss 6.8666 -5000 val perplexity 959.6856 -5000 train 6.797673 (lr=8.4663e-05) (hash(x)=45994194) -4300 val loss 7.1964 -4300 val perplexity 1334.5923 -4300 train 7.246315 (lr=6.6414e-05) (hash(x)=50973176) -5100 val loss 6.8380 -5100 val perplexity 932.5778 -5100 train 6.689332 (lr=8.2500e-05) (hash(x)=48659050) -4400 val loss 7.1871 -4400 val perplexity 1322.2667 -4400 train 7.213579 (lr=6.5013e-05) (hash(x)=55275124) -5200 val loss 6.8171 -5200 val perplexity 913.3361 -5200 train 6.733484 (lr=8.0337e-05) (hash(x)=49369682) -4500 val loss 7.1887 -4500 val perplexity 1324.3516 -4500 train 7.501039 (lr=6.3602e-05) (hash(x)=58646505) -5300 val loss 6.8009 -5300 val perplexity 898.6852 -5300 train 7.195892 (lr=7.8175e-05) (hash(x)=57787700) -4600 val loss 7.1673 -4600 val perplexity 1296.2893 -4600 train 6.993137 (lr=6.2182e-05) (hash(x)=42554666) -5400 val loss 6.8112 -5400 val perplexity 907.9326 -5400 train 6.721237 (lr=7.6018e-05) (hash(x)=49365400) -4700 val loss 7.1780 -4700 val perplexity 1310.3413 -4700 train 7.083921 (lr=6.0754e-05) (hash(x)=47846764) -5500 val loss 6.7819 -5500 val perplexity 881.7007 -5500 train 6.746484 (lr=7.3868e-05) (hash(x)=48720412) -4800 val loss 7.1616 -4800 val perplexity 1289.0020 -4800 train 7.689332 (lr=5.9321e-05) (hash(x)=58239019) -5600 val loss 6.7828 -5600 val perplexity 882.5264 -5600 train 7.133316 (lr=7.1727e-05) (hash(x)=55784800) -4900 val loss 7.1571 -4900 val perplexity 1283.2035 -4900 train 7.220632 (lr=5.7883e-05) (hash(x)=50711220) -5700 val loss 6.7763 -5700 val perplexity 876.8293 -5700 train 6.594879 (lr=6.9597e-05) (hash(x)=50073634) -5000 val loss 7.1550 -5000 val perplexity 1280.5098 -5000 train 7.076353 (lr=5.6442e-05) (hash(x)=45994194) -5800 val loss 6.7572 -5800 val perplexity 860.2529 -5800 train 6.577003 (lr=6.7480e-05) (hash(x)=50170324) -5100 val loss 7.1429 -5100 val perplexity 1265.0339 -5100 train 7.000565 (lr=5.5000e-05) (hash(x)=48659050) -5900 val loss 6.7658 -5900 val perplexity 867.6870 -5900 train 6.507083 (lr=6.5378e-05) (hash(x)=48410268) -5200 val loss 7.1410 -5200 val perplexity 1262.6818 -5200 train 7.094159 (lr=5.3558e-05) (hash(x)=49369682) -6000 val loss 6.7626 -6000 val perplexity 864.9288 -6000 train 6.629282 (lr=6.3294e-05) (hash(x)=49527342) -5300 val loss 7.1292 -5300 val perplexity 1247.8777 -5300 train 7.497270 (lr=5.2117e-05) (hash(x)=57787700) -6100 val loss 6.7591 -6100 val perplexity 861.8850 -6100 train 6.519784 (lr=6.1230e-05) (hash(x)=49550294) -5400 val loss 7.1447 -5400 val perplexity 1267.4213 -5400 train 7.050255 (lr=5.0679e-05) (hash(x)=49365400) -6200 val loss 6.7512 -6200 val perplexity 855.1146 -6200 train 6.288379 (lr=5.9188e-05) (hash(x)=42126106) -5500 val loss 7.1224 -5500 val perplexity 1239.3687 -5500 train 7.068456 (lr=4.9246e-05) (hash(x)=48720412) -6300 val loss 6.7449 -6300 val perplexity 849.7552 -6300 train 6.486180 (lr=5.7169e-05) (hash(x)=49608772) -5600 val loss 7.1197 -5600 val perplexity 1236.0317 -5600 train 7.443053 (lr=4.7818e-05) (hash(x)=55784800) -6400 val loss 6.7319 -6400 val perplexity 838.7435 -6400 train 6.273153 (lr=5.5177e-05) (hash(x)=52324417) -5700 val loss 7.1170 -5700 val perplexity 1232.7433 -5700 train 6.958042 (lr=4.6398e-05) (hash(x)=50073634) -6500 val loss 6.6949 -6500 val perplexity 808.3016 -6500 train 6.726796 (lr=5.3213e-05) (hash(x)=46207215) -5800 val loss 7.1130 -5800 val perplexity 1227.8770 -5800 train 6.936908 (lr=4.4987e-05) (hash(x)=50170324) -6600 val loss 6.6733 -6600 val perplexity 791.0082 -6600 train 6.626161 (lr=5.1279e-05) (hash(x)=49027014) -5900 val loss 7.1241 -5900 val perplexity 1241.5690 -5900 train 6.890422 (lr=4.3586e-05) (hash(x)=48410268) -6700 val loss 6.6657 -6700 val perplexity 784.9918 -6700 train 6.616896 (lr=4.9377e-05) (hash(x)=46232513) -6000 val loss 7.1221 -6000 val perplexity 1239.0070 -6000 train 7.008251 (lr=4.2196e-05) (hash(x)=49527342) -6800 val loss 6.6663 -6800 val perplexity 785.5156 -6800 train 6.590093 (lr=4.7509e-05) (hash(x)=47348403) -6100 val loss 7.1235 -6100 val perplexity 1240.8168 -6100 train 6.934540 (lr=4.0820e-05) (hash(x)=49550294) -6900 val loss 6.6790 -6900 val perplexity 795.5314 -6900 train 6.656285 (lr=4.5676e-05) (hash(x)=49806647) -6200 val loss 7.1133 -6200 val perplexity 1228.1627 -6200 train 6.688022 (lr=3.9459e-05) (hash(x)=42126106) -7000 val loss 6.6343 -7000 val perplexity 760.7117 -7000 train 6.703082 (lr=4.3882e-05) (hash(x)=50893018) -6300 val loss 7.1152 -6300 val perplexity 1230.5397 -6300 train 6.896497 (lr=3.8113e-05) (hash(x)=49608772) -7100 val loss 6.6442 -7100 val perplexity 768.3418 -7100 train 6.674629 (lr=4.2128e-05) (hash(x)=49157639) -6400 val loss 7.1097 -6400 val perplexity 1223.7263 -6400 train 6.698913 (lr=3.6785e-05) (hash(x)=52324417) -7200 val loss 6.6269 -7200 val perplexity 755.1032 -7200 train 6.619589 (lr=4.0414e-05) (hash(x)=47014759) -7300 val loss 6.6245 -7300 val perplexity 753.3093 -7300 train 6.609889 (lr=3.8745e-05) (hash(x)=47325591) -6500 val loss 7.0836 -6500 val perplexity 1192.3104 -6500 train 7.098672 (lr=3.5475e-05) (hash(x)=46207215) -7400 val loss 6.6178 -7400 val perplexity 748.2969 -7400 train 6.509352 (lr=3.7120e-05) (hash(x)=49184604) -6600 val loss 7.0634 -6600 val perplexity 1168.4180 -6600 train 6.992548 (lr=3.4186e-05) (hash(x)=49027014) -7500 val loss 6.6078 -7500 val perplexity 740.8351 -7500 train 6.835536 (lr=3.5541e-05) (hash(x)=55053584) -6700 val loss 7.0569 -6700 val perplexity 1160.8925 -6700 train 6.979507 (lr=3.2918e-05) (hash(x)=46232513) -7600 val loss 6.6067 -7600 val perplexity 740.0150 -7600 train 6.554074 (lr=3.4011e-05) (hash(x)=48693923) -6800 val loss 7.0579 -6800 val perplexity 1162.0583 -6800 train 6.994317 (lr=3.1672e-05) (hash(x)=47348403) -7700 val loss 6.6059 -7700 val perplexity 739.4333 -7700 train 6.199418 (lr=3.2531e-05) (hash(x)=40952882) -6900 val loss 7.0640 -6900 val perplexity 1169.1509 -6900 train 7.008790 (lr=3.0451e-05) (hash(x)=49806647) -7800 val loss 6.6001 -7800 val perplexity 735.1403 -7800 train 6.685646 (lr=3.1102e-05) (hash(x)=52487845) -7000 val loss 7.0359 -7000 val perplexity 1136.6608 -7000 train 7.066860 (lr=2.9255e-05) (hash(x)=50893018) -7900 val loss 6.5936 -7900 val perplexity 730.3727 -7900 train 6.667073 (lr=2.9726e-05) (hash(x)=50221547) -7100 val loss 7.0304 -7100 val perplexity 1130.4474 -7100 train 7.072404 (lr=2.8085e-05) (hash(x)=49157639) -8000 val loss 6.5985 -8000 val perplexity 733.9662 -8000 train 6.816906 (lr=2.8405e-05) (hash(x)=62294204) -7200 val loss 7.0286 -7200 val perplexity 1128.4714 -7200 train 6.968489 (lr=2.6943e-05) (hash(x)=47014759) -8100 val loss 6.5919 -8100 val perplexity 729.1611 -8100 train 6.290696 (lr=2.7138e-05) (hash(x)=44401967) -7300 val loss 7.0301 -7300 val perplexity 1130.1548 -7300 train 7.023178 (lr=2.5830e-05) (hash(x)=47325591) -8200 val loss 6.5940 -8200 val perplexity 730.6852 -8200 train 6.534984 (lr=2.5929e-05) (hash(x)=52769095) -7400 val loss 7.0179 -7400 val perplexity 1116.3906 -7400 train 6.888594 (lr=2.4746e-05) (hash(x)=49184604) -8300 val loss 6.6070 -8300 val perplexity 740.2457 -8300 train 6.528926 (lr=2.4778e-05) (hash(x)=56829883) -7500 val loss 7.0154 -7500 val perplexity 1113.6748 -7500 train 7.263641 (lr=2.3694e-05) (hash(x)=55053584) -8400 val loss 6.6083 -8400 val perplexity 741.1849 -8400 train 6.546304 (lr=2.3686e-05) (hash(x)=52147375) -7600 val loss 7.0119 -7600 val perplexity 1109.7356 -7600 train 6.966333 (lr=2.2674e-05) (hash(x)=48693923) -8500 val loss 6.6043 -8500 val perplexity 738.2313 -8500 train 6.838852 (lr=2.2655e-05) (hash(x)=60197820) -7700 val loss 7.0081 -7700 val perplexity 1105.5388 -7700 train 6.596260 (lr=2.1687e-05) (hash(x)=40952882) -8600 val loss 6.5924 -8600 val perplexity 729.5060 -8600 train 6.325205 (lr=2.1685e-05) (hash(x)=49377068) -7800 val loss 7.0164 -7800 val perplexity 1114.7831 -7800 train 7.092906 (lr=2.0735e-05) (hash(x)=52487845) -8700 val loss 6.6001 -8700 val perplexity 735.1347 -8700 train 6.556189 (lr=2.0777e-05) (hash(x)=51092724) -7900 val loss 7.0032 -7900 val perplexity 1100.1108 -7900 train 7.059808 (lr=1.9818e-05) (hash(x)=50221547) -8800 val loss 6.5955 -8800 val perplexity 731.8264 -8800 train 6.565369 (lr=1.9933e-05) (hash(x)=48642928) -8000 val loss 7.0017 -8000 val perplexity 1098.5424 -8000 train 7.273840 (lr=1.8936e-05) (hash(x)=62294204) -8900 val loss 6.5768 -8900 val perplexity 718.2245 -8900 train 6.758434 (lr=1.9153e-05) (hash(x)=55342246) -8100 val loss 6.9990 -8100 val perplexity 1095.5623 -8100 train 6.701060 (lr=1.8092e-05) (hash(x)=44401967) -9000 val loss 6.5617 -9000 val perplexity 707.4598 -9000 train 6.568685 (lr=1.8439e-05) (hash(x)=48093368) -8200 val loss 7.0037 -8200 val perplexity 1100.6927 -8200 train 6.968071 (lr=1.7286e-05) (hash(x)=52769095) -9100 val loss 6.5556 -9100 val perplexity 703.1831 -9100 train 6.602783 (lr=1.7790e-05) (hash(x)=48578183) -8300 val loss 7.0071 -8300 val perplexity 1104.4213 -8300 train 6.974947 (lr=1.6519e-05) (hash(x)=56829883) -9200 val loss 6.5544 -9200 val perplexity 702.3309 -9200 train 6.698106 (lr=1.7208e-05) (hash(x)=50794720) -8400 val loss 7.0149 -8400 val perplexity 1113.1216 -8400 train 6.988824 (lr=1.5791e-05) (hash(x)=52147375) -9300 val loss 6.5496 -9300 val perplexity 698.9913 -9300 train 6.316019 (lr=1.6692e-05) (hash(x)=46513190) -8500 val loss 7.0100 -8500 val perplexity 1107.6707 -8500 train 7.226442 (lr=1.5103e-05) (hash(x)=60197820) -9400 val loss 6.5440 -9400 val perplexity 695.0316 -9400 train 6.202390 (lr=1.6245e-05) (hash(x)=43808238) -8600 val loss 7.0079 -8600 val perplexity 1105.2942 -8600 train 6.780334 (lr=1.4456e-05) (hash(x)=49377068) -9500 val loss 6.5441 -9500 val perplexity 695.0981 -9500 train 6.356673 (lr=1.5865e-05) (hash(x)=45021888) -8700 val loss 7.0063 -8700 val perplexity 1103.5695 -8700 train 6.962779 (lr=1.3851e-05) (hash(x)=51092724) -9600 val loss 6.5407 -9600 val perplexity 692.7766 -9600 train 6.637218 (lr=1.5554e-05) (hash(x)=56525570) -9700 val loss 6.5368 -9700 val perplexity 690.1021 -9700 train 6.719944 (lr=1.5312e-05) (hash(x)=52585913) -8800 val loss 7.0045 -8800 val perplexity 1101.6248 -8800 train 7.003560 (lr=1.3289e-05) (hash(x)=48642928) -9800 val loss 6.5355 -9800 val perplexity 689.1559 -9800 train 6.706218 (lr=1.5139e-05) (hash(x)=52344698) -8900 val loss 6.9983 -8900 val perplexity 1094.7372 -8900 train 7.272505 (lr=1.2769e-05) (hash(x)=55342246) -9900 val loss 6.5324 -9900 val perplexity 687.0436 -9900 train 6.540698 (lr=1.5035e-05) (hash(x)=51740945) -9000 val loss 6.9807 -9000 val perplexity 1075.6921 -9000 train 6.939584 (lr=1.2292e-05) (hash(x)=48093368) -9999 val loss 6.5308 -9999 val perplexity 685.9659 -9100 val loss 6.9772 -9100 val perplexity 1071.9318 -9100 train 7.052067 (lr=1.1860e-05) (hash(x)=48578183) -9200 val loss 6.9744 -9200 val perplexity 1068.9447 -9200 train 7.153374 (lr=1.1472e-05) (hash(x)=50794720) -9300 val loss 6.9714 -9300 val perplexity 1065.6759 -9300 train 6.762959 (lr=1.1128e-05) (hash(x)=46513190) +0 train 11.762399 (lr=2.5000e-07) (hash(x)=50671684) +0 train 11.762399 (lr=3.5000e-07) (hash(x)=50671684) +100 val loss 9.8877 +100 val perplexity 19685.9785 +100 train 9.834713 (lr=5.0500e-05) (hash(x)=52740221) +100 val loss 10.1155 +100 val perplexity 24724.3379 +100 train 10.057404 (lr=3.5350e-05) (hash(x)=52740221) +100 val loss 10.2749 +100 val perplexity 28994.3867 +100 train 10.212666 (lr=2.5250e-05) (hash(x)=52740221) +200 val loss 8.2391 +200 val perplexity 3786.0645 +200 train 8.239470 (lr=1.0000e-04) (hash(x)=49034180) +200 val loss 8.5000 +200 val perplexity 4914.6611 +200 train 8.505946 (lr=7.0000e-05) (hash(x)=49034180) +200 val loss 9.0197 +200 val perplexity 8264.4795 +200 train 9.037632 (lr=5.0000e-05) (hash(x)=49034180) +300 val loss 7.7361 +300 val perplexity 2289.6355 +300 train 7.985001 (lr=9.9999e-05) (hash(x)=63180688) +300 val loss 7.7912 +300 val perplexity 2419.1409 +300 train 7.982556 (lr=6.9999e-05) (hash(x)=63180688) +300 val loss 8.1875 +300 val perplexity 3595.5947 +300 train 8.279997 (lr=5.0000e-05) (hash(x)=63180688) +400 val loss 7.6703 +400 val perplexity 2143.6765 +400 train 7.682755 (lr=9.9996e-05) (hash(x)=50373500) +400 val loss 7.6569 +400 val perplexity 2115.2031 +400 train 7.678177 (lr=6.9997e-05) (hash(x)=50373500) +400 val loss 7.7811 +400 val perplexity 2394.9141 +400 train 7.765651 (lr=4.9998e-05) (hash(x)=50373500) +500 val loss 7.6664 +500 val perplexity 2135.3586 +500 train 7.563447 (lr=9.9992e-05) (hash(x)=44547422) +500 val loss 7.6387 +500 val perplexity 2077.1033 +500 train 7.531584 (lr=6.9994e-05) (hash(x)=44547422) +600 val loss 7.6587 +600 val perplexity 2118.9907 +600 train 7.547817 (lr=9.9986e-05) (hash(x)=47184699) +500 val loss 7.6779 +500 val perplexity 2160.0034 +500 train 7.585442 (lr=4.9996e-05) (hash(x)=44547422) +600 val loss 7.6341 +600 val perplexity 2067.4236 +600 train 7.524572 (lr=6.9990e-05) (hash(x)=47184699) +700 val loss 7.6595 +700 val perplexity 2120.7214 +700 train 7.572439 (lr=9.9978e-05) (hash(x)=51374582) +600 val loss 7.6584 +600 val perplexity 2118.3877 +600 train 7.562192 (lr=4.9993e-05) (hash(x)=47184699) +700 val loss 7.6126 +700 val perplexity 2023.5380 +700 train 7.519081 (lr=6.9984e-05) (hash(x)=51374582) +800 val loss 7.6776 +800 val perplexity 2159.5378 +800 train 7.400121 (lr=9.9968e-05) (hash(x)=46264805) +700 val loss 7.6542 +700 val perplexity 2109.3865 +700 train 7.578816 (lr=4.9989e-05) (hash(x)=51374582) +800 val loss 7.5999 +800 val perplexity 1998.0842 +800 train 7.334561 (lr=6.9977e-05) (hash(x)=46264805) +900 val loss 7.6368 +900 val perplexity 2073.0603 +900 train 7.910016 (lr=9.9956e-05) (hash(x)=61178712) +800 val loss 7.6512 +800 val perplexity 2103.2065 +800 train 7.409334 (lr=4.9984e-05) (hash(x)=46264805) +1000 val loss 7.6373 +1000 val perplexity 2074.1538 +1000 train 7.641561 (lr=9.9943e-05) (hash(x)=50886520) +900 val loss 7.5833 +900 val perplexity 1965.0724 +900 train 7.806512 (lr=6.9969e-05) (hash(x)=61178712) +900 val loss 7.6487 +900 val perplexity 2097.9651 +900 train 7.925972 (lr=4.9978e-05) (hash(x)=61178712) +1100 val loss 7.6400 +1100 val perplexity 2079.6553 +1100 train 7.427147 (lr=9.9927e-05) (hash(x)=48600099) +1000 val loss 7.5656 +1000 val perplexity 1930.5631 +1000 train 7.541768 (lr=6.9960e-05) (hash(x)=50886520) +1200 val loss 7.6088 +1200 val perplexity 2015.8815 +1200 train 7.294816 (lr=9.9910e-05) (hash(x)=50146792) +1000 val loss 7.6447 +1000 val perplexity 2089.4600 +1000 train 7.667188 (lr=4.9971e-05) (hash(x)=50886520) +1100 val loss 7.5427 +1100 val perplexity 1886.9976 +1100 train 7.309322 (lr=6.9949e-05) (hash(x)=48600099) +1300 val loss 7.5447 +1300 val perplexity 1890.6191 +1300 train 7.401575 (lr=9.9892e-05) (hash(x)=52617313) +1100 val loss 7.6332 +1100 val perplexity 2065.7444 +1100 train 7.424823 (lr=4.9964e-05) (hash(x)=48600099) +1200 val loss 7.5453 +1200 val perplexity 1891.7679 +1200 train 7.256618 (lr=6.9937e-05) (hash(x)=50146792) +1400 val loss 7.4660 +1400 val perplexity 1747.6057 +1400 train 7.097398 (lr=9.9871e-05) (hash(x)=49794446) +1200 val loss 7.6416 +1200 val perplexity 2083.1062 +1200 train 7.400117 (lr=4.9955e-05) (hash(x)=50146792) +1300 val loss 7.5460 +1300 val perplexity 1893.1306 +1300 train 7.416545 (lr=6.9924e-05) (hash(x)=52617313) +1500 val loss 7.4002 +1500 val perplexity 1636.2826 +1500 train 6.987760 (lr=9.9849e-05) (hash(x)=50766317) +1300 val loss 7.6387 +1300 val perplexity 2077.0261 +1400 val loss 7.5491 +1400 val perplexity 1898.9385 +1300 train 7.525934 (lr=4.9946e-05) (hash(x)=52617313) +1400 train 7.220326 (lr=6.9910e-05) (hash(x)=49794446) +1600 val loss 7.3653 +1600 val perplexity 1580.1919 +1600 train 7.228467 (lr=9.9825e-05) (hash(x)=55551175) +1500 val loss 7.5755 +1500 val perplexity 1949.7792 +1500 train 7.219270 (lr=6.9894e-05) (hash(x)=50766317) +1400 val loss 7.6248 +1400 val perplexity 2048.3652 +1400 train 7.307927 (lr=4.9936e-05) (hash(x)=49794446) +1700 val loss 7.3389 +1700 val perplexity 1539.0521 +1700 train 7.492824 (lr=9.9799e-05) (hash(x)=56717172) +1600 val loss 7.5483 +1600 val perplexity 1897.6061 +1600 train 7.424455 (lr=6.9877e-05) (hash(x)=55551175) +1500 val loss 7.6215 +1500 val perplexity 2041.5687 +1500 train 7.282590 (lr=4.9924e-05) (hash(x)=50766317) +1800 val loss 7.2144 +1800 val perplexity 1358.8981 +1800 train 7.465829 (lr=9.9771e-05) (hash(x)=55376447) +1700 val loss 7.5245 +1700 val perplexity 1852.8307 +1700 train 7.673537 (lr=6.9859e-05) (hash(x)=56717172) +1600 val loss 7.6104 +1600 val perplexity 2019.1783 +1600 train 7.509243 (lr=4.9912e-05) (hash(x)=55551175) +1900 val loss 7.1630 +1900 val perplexity 1290.7191 +1900 train 6.876475 (lr=9.9741e-05) (hash(x)=43810837) +1800 val loss 7.5103 +1800 val perplexity 1826.7809 +1800 train 7.776741 (lr=6.9840e-05) (hash(x)=55376447) +2000 val loss 7.1165 +2000 val perplexity 1232.0780 +2000 train 7.163915 (lr=9.9710e-05) (hash(x)=50881655) +1700 val loss 7.5891 +1700 val perplexity 1976.5883 +1700 train 7.751419 (lr=4.9899e-05) (hash(x)=56717172) +1900 val loss 7.5662 +1900 val perplexity 1931.7455 +1900 train 7.293945 (lr=6.9819e-05) (hash(x)=43810837) +2100 val loss 7.0829 +2100 val perplexity 1191.4312 +2100 train 6.958876 (lr=9.9677e-05) (hash(x)=49386015) +1800 val loss 7.5521 +1800 val perplexity 1904.6635 +1800 train 7.867538 (lr=4.9885e-05) (hash(x)=55376447) +2000 val loss 7.4294 +2000 val perplexity 1684.8317 +2000 train 7.495223 (lr=6.9797e-05) (hash(x)=50881655) +2200 val loss 7.0590 +2200 val perplexity 1163.2664 +2200 train 6.966722 (lr=9.9642e-05) (hash(x)=48572079) +1900 val loss 7.5329 +1900 val perplexity 1868.5758 +1900 train 7.252420 (lr=4.9871e-05) (hash(x)=43810837) +2100 val loss 7.4149 +2100 val perplexity 1660.5729 +2100 train 7.292357 (lr=6.9774e-05) (hash(x)=49386015) +2300 val loss 7.0086 +2300 val perplexity 1106.0740 +2300 train 7.061727 (lr=9.9606e-05) (hash(x)=54950719) +2000 val loss 7.5129 +2000 val perplexity 1831.4402 +2000 train 7.594842 (lr=4.9855e-05) (hash(x)=50881655) +2200 val loss 7.3681 +2200 val perplexity 1584.5834 +2200 train 7.287286 (lr=6.9750e-05) (hash(x)=48572079) +2400 val loss 6.9859 +2400 val perplexity 1081.2404 +2400 train 6.613460 (lr=9.9567e-05) (hash(x)=42190240) +2100 val loss 7.4876 +2100 val perplexity 1785.7191 +2100 train 7.368810 (lr=4.9839e-05) (hash(x)=49386015) +2300 val loss 7.3500 +2300 val perplexity 1556.1429 +2300 train 7.414636 (lr=6.9724e-05) (hash(x)=54950719) +2500 val loss 6.9351 +2500 val perplexity 1027.6794 +2500 train 7.017910 (lr=9.9527e-05) (hash(x)=45223539) +2200 val loss 7.4640 +2200 val perplexity 1744.0586 +2200 train 7.389656 (lr=4.9821e-05) (hash(x)=48572079) +2600 val loss 6.8874 +2600 val perplexity 979.8730 +2600 train 6.912551 (lr=9.9485e-05) (hash(x)=54037353) +2400 val loss 7.3418 +2400 val perplexity 1543.5338 +2400 train 6.997776 (lr=6.9697e-05) (hash(x)=42190240) +2300 val loss 7.4442 +2300 val perplexity 1709.8500 +2300 train 7.501417 (lr=4.9803e-05) (hash(x)=54950719) +2700 val loss 6.8539 +2700 val perplexity 947.5999 +2700 train 7.300050 (lr=9.9442e-05) (hash(x)=59131616) +2500 val loss 7.3159 +2500 val perplexity 1504.0726 +2500 train 7.402029 (lr=6.9669e-05) (hash(x)=45223539) +2400 val loss 7.4315 +2400 val perplexity 1688.2817 +2400 train 7.095111 (lr=4.9784e-05) (hash(x)=42190240) +2800 val loss 6.8345 +2800 val perplexity 929.3577 +2800 train 6.668094 (lr=9.9396e-05) (hash(x)=45882743) +2600 val loss 7.3030 +2600 val perplexity 1484.7148 +2600 train 7.319759 (lr=6.9640e-05) (hash(x)=54037353) +2900 val loss 6.8112 +2900 val perplexity 907.9317 +2900 train 6.406235 (lr=9.9349e-05) (hash(x)=43758910) +2500 val loss 7.4064 +2500 val perplexity 1646.5193 +2500 train 7.476944 (lr=4.9764e-05) (hash(x)=45223539) +2700 val loss 7.2484 +2700 val perplexity 1405.8245 +2700 train 7.753444 (lr=6.9609e-05) (hash(x)=59131616) +3000 val loss 6.7988 +3000 val perplexity 896.8077 +3000 train 6.690326 (lr=9.9300e-05) (hash(x)=47965974) +2600 val loss 7.3925 +2600 val perplexity 1623.7417 +2600 train 7.406113 (lr=4.9743e-05) (hash(x)=54037353) +2800 val loss 7.2179 +2800 val perplexity 1363.6807 +2800 train 7.053506 (lr=6.9577e-05) (hash(x)=45882743) +3100 val loss 6.7826 +3100 val perplexity 882.4036 +3100 train 6.643602 (lr=9.9249e-05) (hash(x)=48205243) +2700 val loss 7.3712 +2700 val perplexity 1589.4659 +2700 train 7.893200 (lr=4.9721e-05) (hash(x)=59131616) +2900 val loss 7.1955 +2900 val perplexity 1333.4130 +2900 train 6.830922 (lr=6.9544e-05) (hash(x)=43758910) +3200 val loss 6.7688 +3200 val perplexity 870.2900 +3200 train 6.844539 (lr=9.9197e-05) (hash(x)=54511383) +2800 val loss 7.3503 +2800 val perplexity 1556.6750 +2800 train 7.189213 (lr=4.9698e-05) (hash(x)=45882743) +3000 val loss 7.1931 +3000 val perplexity 1330.2014 +3000 train 7.087551 (lr=6.9510e-05) (hash(x)=47965974) +3300 val loss 6.7786 +3300 val perplexity 878.7949 +3300 train 6.720613 (lr=9.9142e-05) (hash(x)=54428388) +2900 val loss 7.3208 +2900 val perplexity 1511.4541 +2900 train 6.946739 (lr=4.9674e-05) (hash(x)=43758910) +3100 val loss 7.1654 +3100 val perplexity 1293.8717 +3100 train 7.053102 (lr=6.9474e-05) (hash(x)=48205243) +3400 val loss 6.7422 +3400 val perplexity 847.4072 +3400 train 6.728989 (lr=9.9086e-05) (hash(x)=48115990) +3000 val loss 7.3214 +3000 val perplexity 1512.3827 +3000 train 7.244324 (lr=4.9650e-05) (hash(x)=47965974) +3200 val loss 7.1058 +3200 val perplexity 1219.0188 +3200 train 7.179151 (lr=6.9438e-05) (hash(x)=54511383) +3500 val loss 6.7449 +3500 val perplexity 849.7297 +3500 train 6.323328 (lr=9.9028e-05) (hash(x)=41137345) +3100 val loss 7.2901 +3100 val perplexity 1465.7279 +3100 train 7.179041 (lr=4.9625e-05) (hash(x)=48205243) +3300 val loss 7.0775 +3300 val perplexity 1184.9874 +3300 train 7.032140 (lr=6.9400e-05) (hash(x)=54428388) +3600 val loss 6.7404 +3600 val perplexity 845.8860 +3600 train 6.597991 (lr=9.8969e-05) (hash(x)=55186224) +3200 val loss 7.2804 +3200 val perplexity 1451.5530 +3400 val loss 7.0413 +3400 val perplexity 1142.8607 +3700 val loss 6.7670 +3700 val perplexity 868.6776 +3200 train 7.354766 (lr=4.9598e-05) (hash(x)=54511383) +3400 train 7.043338 (lr=6.9360e-05) (hash(x)=48115990) +3700 train 6.623077 (lr=9.8908e-05) (hash(x)=54990049) +3800 val loss 6.7232 +3800 val perplexity 831.5076 +3500 val loss 7.0094 +3500 val perplexity 1106.9911 +3800 train 6.410282 (lr=9.8845e-05) (hash(x)=46288812) +3500 train 6.610696 (lr=6.9320e-05) (hash(x)=41137345) +3300 val loss 7.2697 +3300 val perplexity 1436.1538 +3300 train 7.230228 (lr=4.9571e-05) (hash(x)=54428388) +3900 val loss 6.6952 +3900 val perplexity 808.5306 +3900 train 6.336929 (lr=9.8780e-05) (hash(x)=45829773) +3600 val loss 6.9960 +3600 val perplexity 1092.2891 +3600 train 6.866971 (lr=6.9278e-05) (hash(x)=55186224) +3400 val loss 7.2358 +3400 val perplexity 1388.2415 +3400 train 7.295737 (lr=4.9543e-05) (hash(x)=48115990) +4000 val loss 6.6906 +4000 val perplexity 804.8053 +4000 train 6.430512 (lr=9.8713e-05) (hash(x)=52499943) +3700 val loss 6.9571 +3700 val perplexity 1050.6265 +3700 train 6.810654 (lr=6.9235e-05) (hash(x)=54990049) +3500 val loss 7.2205 +3500 val perplexity 1367.1313 +3500 train 6.860225 (lr=4.9514e-05) (hash(x)=41137345) +4100 val loss 6.6367 +4100 val perplexity 762.5439 +4100 train 6.530697 (lr=9.8645e-05) (hash(x)=48563796) +3800 val loss 6.9252 +3800 val perplexity 1017.5701 +3800 train 6.651012 (lr=6.9191e-05) (hash(x)=46288812) +3600 val loss 7.2252 +3600 val perplexity 1373.6575 +3600 train 7.141061 (lr=4.9484e-05) (hash(x)=55186224) +4200 val loss 6.5980 +4200 val perplexity 733.6395 +4200 train 6.553297 (lr=9.8575e-05) (hash(x)=49165143) +3900 val loss 6.9015 +3900 val perplexity 993.7474 +3900 train 6.529973 (lr=6.9146e-05) (hash(x)=45829773) +3700 val loss 7.2039 +3700 val perplexity 1344.7120 +3700 train 7.037359 (lr=4.9454e-05) (hash(x)=54990049) +4300 val loss 6.5850 +4300 val perplexity 724.1366 +4300 train 6.682248 (lr=9.8503e-05) (hash(x)=50973176) +4000 val loss 6.8807 +4000 val perplexity 973.3345 +4000 train 6.645677 (lr=6.9099e-05) (hash(x)=52499943) +3800 val loss 7.1986 +3800 val perplexity 1337.5675 +3800 train 6.923230 (lr=4.9422e-05) (hash(x)=46288812) +4400 val loss 6.5625 +4400 val perplexity 708.0693 +4400 train 6.599442 (lr=9.8430e-05) (hash(x)=55275124) +4100 val loss 6.8402 +4100 val perplexity 934.6296 +4100 train 6.740988 (lr=6.9051e-05) (hash(x)=48563796) +3900 val loss 7.1692 +3900 val perplexity 1298.8608 +3900 train 6.805551 (lr=4.9390e-05) (hash(x)=45829773) +4500 val loss 6.5717 +4500 val perplexity 714.5932 +4500 train 6.896565 (lr=9.8355e-05) (hash(x)=58646505) +4200 val loss 6.7917 +4200 val perplexity 890.4531 +4200 train 6.752344 (lr=6.9002e-05) (hash(x)=49165143) +4000 val loss 7.1552 +4000 val perplexity 1280.7351 +4000 train 6.940578 (lr=4.9357e-05) (hash(x)=52499943) +4600 val loss 6.5544 +4600 val perplexity 702.3109 +4600 train 6.353131 (lr=9.8278e-05) (hash(x)=42554666) +4300 val loss 6.7639 +4300 val perplexity 866.0373 +4300 train 6.839080 (lr=6.8952e-05) (hash(x)=50973176) +4700 val loss 6.5413 +4700 val perplexity 693.1827 +4700 train 6.489670 (lr=9.8199e-05) (hash(x)=47846764) +4100 val loss 7.1129 +4100 val perplexity 1227.7599 +4100 train 7.014871 (lr=4.9322e-05) (hash(x)=48563796) +4400 val loss 6.7408 +4400 val perplexity 846.2604 +4400 train 6.786955 (lr=6.8901e-05) (hash(x)=55275124) +4800 val loss 6.5281 +4800 val perplexity 684.1279 +4800 train 7.040569 (lr=9.8119e-05) (hash(x)=58239019) +4200 val loss 7.0831 +4200 val perplexity 1191.7068 +4200 train 7.052643 (lr=4.9287e-05) (hash(x)=49165143) +4500 val loss 6.7296 +4500 val perplexity 836.7865 +4500 train 7.079769 (lr=6.8848e-05) (hash(x)=58646505) +4900 val loss 6.5555 +4900 val perplexity 703.0883 +4900 train 6.609111 (lr=9.8036e-05) (hash(x)=50711220) +4300 val loss 7.0455 +4300 val perplexity 1147.6954 +4300 train 7.109828 (lr=4.9252e-05) (hash(x)=50973176) +4600 val loss 6.6999 +4600 val perplexity 812.3441 +4600 train 6.505454 (lr=6.8794e-05) (hash(x)=42554666) +5000 val loss 6.5689 +5000 val perplexity 712.6074 +5000 train 6.496807 (lr=9.7953e-05) (hash(x)=45994194) +4400 val loss 7.0436 +4400 val perplexity 1145.4834 +4400 train 7.090459 (lr=4.9215e-05) (hash(x)=55275124) +4700 val loss 6.6921 +4700 val perplexity 805.9858 +4700 train 6.613596 (lr=6.8739e-05) (hash(x)=47846764) +5100 val loss 6.5192 +5100 val perplexity 678.0689 +5100 train 6.397672 (lr=9.7867e-05) (hash(x)=48659050) +4500 val loss 7.0142 +4500 val perplexity 1112.2759 +4500 train 7.360765 (lr=4.9177e-05) (hash(x)=58646505) +4800 val loss 6.6668 +4800 val perplexity 785.8689 +5200 val loss 6.5427 +5200 val perplexity 694.1628 +4800 train 7.199380 (lr=6.8683e-05) (hash(x)=58239019) +5200 train 6.452981 (lr=9.7780e-05) (hash(x)=49369682) +4600 val loss 6.9867 +4600 val perplexity 1082.1002 +4600 train 6.789534 (lr=4.9139e-05) (hash(x)=42554666) +5300 val loss 6.5256 +5300 val perplexity 682.4128 +5300 train 6.916782 (lr=9.7691e-05) (hash(x)=57787700) +4900 val loss 6.6509 +4900 val perplexity 773.4701 +4900 train 6.689444 (lr=6.8626e-05) (hash(x)=50711220) +4700 val loss 6.9696 +4700 val perplexity 1063.7797 +4700 train 6.875963 (lr=4.9099e-05) (hash(x)=47846764) +5400 val loss 6.5071 +5400 val perplexity 669.8989 +5400 train 6.419180 (lr=9.7600e-05) (hash(x)=49365400) +5000 val loss 6.6467 +5000 val perplexity 770.2419 +5000 train 6.595458 (lr=6.8567e-05) (hash(x)=45994194) +4800 val loss 6.9343 +4800 val perplexity 1026.9167 +4800 train 7.461365 (lr=4.9059e-05) (hash(x)=58239019) +5500 val loss 6.5101 +5500 val perplexity 671.8862 +5500 train 6.487110 (lr=9.7508e-05) (hash(x)=48720412) +5100 val loss 6.6408 +5100 val perplexity 765.7249 +5100 train 6.512000 (lr=6.8507e-05) (hash(x)=48659050) +4900 val loss 6.9144 +4900 val perplexity 1006.7140 +4900 train 6.959520 (lr=4.9018e-05) (hash(x)=50711220) +5200 val loss 6.6411 +5200 val perplexity 765.9323 +5600 val loss 6.5060 +5600 val perplexity 669.1685 +5200 train 6.528730 (lr=6.8446e-05) (hash(x)=49369682) +5600 train 6.865009 (lr=9.7414e-05) (hash(x)=55784800) +5000 val loss 6.9258 +5000 val perplexity 1018.2156 +5000 train 6.861471 (lr=4.8976e-05) (hash(x)=45994194) +5700 val loss 6.4975 +5700 val perplexity 663.4482 +5700 train 6.313526 (lr=9.7318e-05) (hash(x)=50073634) +5300 val loss 6.6297 +5300 val perplexity 757.2255 +5300 train 7.026625 (lr=6.8384e-05) (hash(x)=57787700) +5100 val loss 6.8771 +5100 val perplexity 969.8417 +5100 train 6.742400 (lr=4.8934e-05) (hash(x)=48659050) +5800 val loss 6.5029 +5800 val perplexity 667.0438 +5800 train 6.311048 (lr=9.7221e-05) (hash(x)=50170324) +5400 val loss 6.6259 +5400 val perplexity 754.4118 +5400 train 6.534162 (lr=6.8320e-05) (hash(x)=49365400) +5900 val loss 6.5182 +5900 val perplexity 677.3871 +5900 train 6.257247 (lr=9.7122e-05) (hash(x)=48410268) +5500 val loss 6.6200 +5500 val perplexity 749.9457 +5500 train 6.589300 (lr=6.8256e-05) (hash(x)=48720412) +5200 val loss 6.8380 +5200 val perplexity 932.5850 +5200 train 6.764578 (lr=4.8890e-05) (hash(x)=49369682) +6000 val loss 6.5475 +6000 val perplexity 697.4980 +6000 train 6.368574 (lr=9.7021e-05) (hash(x)=49527342) +5600 val loss 6.5915 +5600 val perplexity 728.8882 +5600 train 6.939466 (lr=6.8190e-05) (hash(x)=55784800) +5300 val loss 6.8141 +5300 val perplexity 910.6025 +5300 train 7.221012 (lr=4.8846e-05) (hash(x)=57787700) +6100 val loss 6.5278 +6100 val perplexity 683.8751 +6100 train 6.265100 (lr=9.6919e-05) (hash(x)=49550294) +5700 val loss 6.5930 +5700 val perplexity 729.9883 +5700 train 6.428129 (lr=6.8123e-05) (hash(x)=50073634) +5400 val loss 6.8054 +5400 val perplexity 902.6747 +5400 train 6.728540 (lr=4.8800e-05) (hash(x)=49365400) +6200 val loss 6.5260 +6200 val perplexity 682.6302 +6200 train 6.053761 (lr=9.6815e-05) (hash(x)=42126106) +5800 val loss 6.5845 +5800 val perplexity 723.8096 +5800 train 6.401678 (lr=6.8055e-05) (hash(x)=50170324) +5500 val loss 6.7811 +5500 val perplexity 881.0124 +5500 train 6.739115 (lr=4.8754e-05) (hash(x)=48720412) +6300 val loss 6.5189 +6300 val perplexity 677.8010 +6300 train 6.206003 (lr=9.6709e-05) (hash(x)=49608772) +5900 val loss 6.5860 +5900 val perplexity 724.8828 +5900 train 6.317381 (lr=6.7985e-05) (hash(x)=48410268) +5600 val loss 6.7626 +5600 val perplexity 864.9143 +5600 train 7.118996 (lr=4.8707e-05) (hash(x)=55784800) +6400 val loss 6.5002 +6400 val perplexity 665.2526 +6400 train 6.008442 (lr=9.6602e-05) (hash(x)=52324417) +6000 val loss 6.5948 +6000 val perplexity 731.2484 +6000 train 6.441565 (lr=6.7915e-05) (hash(x)=49527342) +5700 val loss 6.7444 +5700 val perplexity 849.2736 +5700 train 6.583510 (lr=4.8659e-05) (hash(x)=50073634) +6500 val loss 6.4622 +6500 val perplexity 640.4863 +6500 train 6.500179 (lr=9.6493e-05) (hash(x)=46207215) +6100 val loss 6.5960 +6100 val perplexity 732.1500 +6100 train 6.352420 (lr=6.7843e-05) (hash(x)=49550294) +5800 val loss 6.7344 +5800 val perplexity 840.8555 +5800 train 6.546459 (lr=4.8611e-05) (hash(x)=50170324) +6600 val loss 6.4539 +6600 val perplexity 635.1934 +6600 train 6.427457 (lr=9.6382e-05) (hash(x)=49027014) +6200 val loss 6.5994 +6200 val perplexity 734.6638 +6200 train 6.108905 (lr=6.7770e-05) (hash(x)=42126106) +6700 val loss 6.4478 +6700 val perplexity 631.3096 +6700 train 6.456262 (lr=9.6270e-05) (hash(x)=46232513) +5900 val loss 6.7407 +5900 val perplexity 846.1248 +5900 train 6.480339 (lr=4.8561e-05) (hash(x)=48410268) +6300 val loss 6.5874 +6300 val perplexity 725.9111 +6300 train 6.287298 (lr=6.7696e-05) (hash(x)=49608772) +6800 val loss 6.4340 +6800 val perplexity 622.6804 +6800 train 6.348957 (lr=9.6156e-05) (hash(x)=47348403) +6000 val loss 6.7194 +6000 val perplexity 828.2938 +6000 train 6.608191 (lr=4.8511e-05) (hash(x)=49527342) +6400 val loss 6.5683 +6400 val perplexity 712.1362 +6400 train 6.091096 (lr=6.7621e-05) (hash(x)=52324417) +6900 val loss 6.5070 +6900 val perplexity 669.8286 +6900 train 6.517502 (lr=9.6040e-05) (hash(x)=49806647) +6100 val loss 6.7156 +6100 val perplexity 825.1382 +6100 train 6.508733 (lr=4.8459e-05) (hash(x)=49550294) +7000 val loss 6.4438 +7000 val perplexity 628.7797 +7000 train 6.509642 (lr=9.5923e-05) (hash(x)=50893018) +6500 val loss 6.5369 +6500 val perplexity 690.1267 +6500 train 6.572875 (lr=6.7545e-05) (hash(x)=46207215) +6200 val loss 6.6996 +6200 val perplexity 812.1140 +6200 train 6.209795 (lr=4.8407e-05) (hash(x)=42126106) +7100 val loss 6.4454 +7100 val perplexity 629.8203 +7100 train 6.465027 (lr=9.5804e-05) (hash(x)=49157639) +6600 val loss 6.5250 +6600 val perplexity 681.9464 +6600 train 6.487035 (lr=6.7467e-05) (hash(x)=49027014) +6300 val loss 6.6951 +6300 val perplexity 808.4281 +6300 train 6.397457 (lr=4.8355e-05) (hash(x)=49608772) +7200 val loss 6.4361 +7200 val perplexity 623.9918 +7200 train 6.399066 (lr=9.5683e-05) (hash(x)=47014759) +6700 val loss 6.5086 +6700 val perplexity 670.9149 +6700 train 6.488557 (lr=6.7389e-05) (hash(x)=46232513) +6400 val loss 6.6795 +6400 val perplexity 795.8832 +6400 train 6.217346 (lr=4.8301e-05) (hash(x)=52324417) +7300 val loss 6.4366 +7300 val perplexity 624.2891 +7300 train 6.448583 (lr=9.5561e-05) (hash(x)=47325591) +6800 val loss 6.4992 +6800 val perplexity 664.5931 +6800 train 6.414500 (lr=6.7309e-05) (hash(x)=47348403) +6500 val loss 6.6300 +6500 val perplexity 757.4920 +6500 train 6.684907 (lr=4.8246e-05) (hash(x)=46207215) +7400 val loss 6.4386 +7400 val perplexity 625.5496 +7400 train 6.322974 (lr=9.5437e-05) (hash(x)=49184604) +6900 val loss 6.5452 +6900 val perplexity 695.8759 +6900 train 6.564537 (lr=6.7228e-05) (hash(x)=49806647) +7500 val loss 6.4192 +7500 val perplexity 613.5089 +7500 train 6.575971 (lr=9.5312e-05) (hash(x)=55053584) +6600 val loss 6.6089 +6600 val perplexity 741.6689 +6600 train 6.555489 (lr=4.8191e-05) (hash(x)=49027014) +7000 val loss 6.4731 +7000 val perplexity 647.4790 +7000 train 6.543548 (lr=6.7146e-05) (hash(x)=50893018) +7600 val loss 6.4436 +7600 val perplexity 628.6702 +7600 train 6.398893 (lr=9.5185e-05) (hash(x)=48693923) +6700 val loss 6.5908 +6700 val perplexity 728.3979 +6700 train 6.561954 (lr=4.8135e-05) (hash(x)=46232513) +7100 val loss 6.4636 +7100 val perplexity 641.3405 +7100 train 6.479076 (lr=6.7063e-05) (hash(x)=49157639) +7700 val loss 6.4177 +7700 val perplexity 612.6000 +7700 train 6.011420 (lr=9.5057e-05) (hash(x)=40952882) +6800 val loss 6.5809 +6800 val perplexity 721.1910 +6800 train 6.512127 (lr=4.8078e-05) (hash(x)=47348403) +7200 val loss 6.4610 +7200 val perplexity 639.6901 +7200 train 6.458044 (lr=6.6978e-05) (hash(x)=47014759) +7800 val loss 6.4294 +7800 val perplexity 619.8010 +7800 train 6.530966 (lr=9.4926e-05) (hash(x)=52487845) +6900 val loss 6.6260 +6900 val perplexity 754.4222 +6900 train 6.638486 (lr=4.8020e-05) (hash(x)=49806647) +7300 val loss 6.4570 +7300 val perplexity 637.1199 +7300 train 6.465302 (lr=6.6893e-05) (hash(x)=47325591) +7900 val loss 6.4228 +7900 val perplexity 615.7307 +7900 train 6.504926 (lr=9.4795e-05) (hash(x)=50221547) +7000 val loss 6.5531 +7000 val perplexity 701.4340 +7000 train 6.622217 (lr=4.7961e-05) (hash(x)=50893018) +8000 val loss 6.4295 +8000 val perplexity 619.8445 +8000 train 6.581017 (lr=9.4661e-05) (hash(x)=62294204) +7400 val loss 6.4536 +7400 val perplexity 634.9674 +7400 train 6.332539 (lr=6.6806e-05) (hash(x)=49184604) +7100 val loss 6.5496 +7100 val perplexity 698.9306 +7100 train 6.592482 (lr=4.7902e-05) (hash(x)=49157639) +8100 val loss 6.4141 +8100 val perplexity 610.3732 +8100 train 6.115232 (lr=9.4526e-05) (hash(x)=44401967) +7500 val loss 6.4302 +7500 val perplexity 620.3099 +7500 train 6.616399 (lr=6.6718e-05) (hash(x)=55053584) +7200 val loss 6.5452 +7200 val perplexity 695.8708 +7200 train 6.524742 (lr=4.7842e-05) (hash(x)=47014759) +8200 val loss 6.4258 +8200 val perplexity 617.5896 +8200 train 6.343907 (lr=9.4390e-05) (hash(x)=52769095) +7600 val loss 6.4444 +7600 val perplexity 629.1561 +7600 train 6.411209 (lr=6.6630e-05) (hash(x)=48693923) +7300 val loss 6.5266 +7300 val perplexity 683.0805 +7300 train 6.523350 (lr=4.7781e-05) (hash(x)=47325591) +8300 val loss 6.4401 +8300 val perplexity 626.4806 +8300 train 6.359773 (lr=9.4252e-05) (hash(x)=56829883) +7700 val loss 6.4377 +7700 val perplexity 624.9366 +7700 train 6.003014 (lr=6.6540e-05) (hash(x)=40952882) +8400 val loss 6.4204 +8400 val perplexity 614.2641 +8400 train 6.337412 (lr=9.4112e-05) (hash(x)=52147375) +7400 val loss 6.5141 +7400 val perplexity 674.5975 +7400 train 6.393788 (lr=4.7719e-05) (hash(x)=49184604) +7800 val loss 6.4467 +7800 val perplexity 630.6269 +7800 train 6.548743 (lr=6.6448e-05) (hash(x)=52487845) +8500 val loss 6.4369 +8500 val perplexity 624.4469 +8500 train 6.697473 (lr=9.3971e-05) (hash(x)=60197820) +7500 val loss 6.5090 +7500 val perplexity 671.1542 +7500 train 6.779480 (lr=4.7656e-05) (hash(x)=55053584) +7900 val loss 6.4387 +7900 val perplexity 625.5776 +7900 train 6.493108 (lr=6.6356e-05) (hash(x)=50221547) +8600 val loss 6.4427 +8600 val perplexity 628.0919 +8600 train 6.142499 (lr=9.3828e-05) (hash(x)=49377068) +7600 val loss 6.5040 +7600 val perplexity 667.8242 +7600 train 6.463983 (lr=4.7593e-05) (hash(x)=48693923) +8000 val loss 6.4375 +8000 val perplexity 624.8457 +8000 train 6.634599 (lr=6.6263e-05) (hash(x)=62294204) +8700 val loss 6.4772 +8700 val perplexity 650.1678 +8700 train 6.385171 (lr=9.3684e-05) (hash(x)=51092724) +7700 val loss 6.4977 +7700 val perplexity 663.6159 +7700 train 6.047739 (lr=4.7528e-05) (hash(x)=40952882) +8100 val loss 6.4269 +8100 val perplexity 618.2664 +8100 train 6.128141 (lr=6.6169e-05) (hash(x)=44401967) +8800 val loss 6.4397 +8800 val perplexity 626.1981 +8800 train 6.399339 (lr=9.3538e-05) (hash(x)=48642928) +8200 val loss 6.4379 +8200 val perplexity 625.0704 +8200 train 6.342501 (lr=6.6073e-05) (hash(x)=52769095) +7800 val loss 6.4949 +7800 val perplexity 661.7847 +7800 train 6.592265 (lr=4.7463e-05) (hash(x)=52487845) +8900 val loss 6.4906 +8900 val perplexity 658.9429 +8900 train 6.636524 (lr=9.3391e-05) (hash(x)=55342246) +8300 val loss 6.4344 +8300 val perplexity 622.9052 +8300 train 6.376162 (lr=6.5976e-05) (hash(x)=56829883) +7900 val loss 6.4934 +7900 val perplexity 660.7817 +7900 train 6.563413 (lr=4.7397e-05) (hash(x)=50221547) +9000 val loss 6.3977 +9000 val perplexity 600.4503 +9000 train 6.384066 (lr=9.3242e-05) (hash(x)=48093368) +8400 val loss 6.4385 +8400 val perplexity 625.4803 +8400 train 6.354476 (lr=6.5879e-05) (hash(x)=52147375) +8000 val loss 6.4764 +8000 val perplexity 649.6057 +8000 train 6.703435 (lr=4.7331e-05) (hash(x)=62294204) +9100 val loss 6.3892 +9100 val perplexity 595.4094 +9100 train 6.443357 (lr=9.3092e-05) (hash(x)=48578183) +8500 val loss 6.4415 +8500 val perplexity 627.3625 +8500 train 6.720816 (lr=6.5780e-05) (hash(x)=60197820) +8100 val loss 6.4722 +8100 val perplexity 646.8797 +8100 train 6.155170 (lr=4.7263e-05) (hash(x)=44401967) +9200 val loss 6.3970 +9200 val perplexity 600.0333 +9200 train 6.539145 (lr=9.2940e-05) (hash(x)=50794720) +8600 val loss 6.4340 +8600 val perplexity 622.6287 +8600 train 6.148525 (lr=6.5680e-05) (hash(x)=49377068) +9300 val loss 6.3945 +9300 val perplexity 598.5162 +9300 train 6.156699 (lr=9.2786e-05) (hash(x)=46513190) +8200 val loss 6.4748 +8200 val perplexity 648.6202 +8200 train 6.398126 (lr=4.7195e-05) (hash(x)=52769095) +8700 val loss 6.4341 +8700 val perplexity 622.7172 +8700 train 6.372913 (lr=6.5579e-05) (hash(x)=51092724) +9400 val loss 6.3957 +9400 val perplexity 599.2844 +9400 train 6.077312 (lr=9.2632e-05) (hash(x)=43808238) +8300 val loss 6.4782 +8300 val perplexity 650.7882 +8300 train 6.405579 (lr=4.7126e-05) (hash(x)=56829883) +8800 val loss 6.4297 +8800 val perplexity 620.0068 +8800 train 6.407724 (lr=6.5477e-05) (hash(x)=48642928) +9500 val loss 6.3885 +9500 val perplexity 594.9573 +9500 train 6.205388 (lr=9.2475e-05) (hash(x)=45021888) +8400 val loss 6.4793 +8400 val perplexity 651.4886 +8400 train 6.414195 (lr=4.7056e-05) (hash(x)=52147375) +8900 val loss 6.4926 +8900 val perplexity 660.2506 +8900 train 6.566921 (lr=6.5374e-05) (hash(x)=55342246) +9600 val loss 6.3796 +9600 val perplexity 589.7011 +9600 train 6.475274 (lr=9.2317e-05) (hash(x)=56525570) +8500 val loss 6.4731 +8500 val perplexity 647.5182 +8500 train 6.728217 (lr=4.6986e-05) (hash(x)=60197820) +9000 val loss 6.3875 +9000 val perplexity 594.3539 +9000 train 6.377201 (lr=6.5270e-05) (hash(x)=48093368) +9700 val loss 6.3735 +9700 val perplexity 586.0873 +9700 train 6.562270 (lr=9.2158e-05) (hash(x)=52585913) +8600 val loss 6.4728 +8600 val perplexity 647.3222 +8600 train 6.187857 (lr=4.6914e-05) (hash(x)=49377068) +9100 val loss 6.3731 +9100 val perplexity 585.8459 +9100 train 6.425647 (lr=6.5164e-05) (hash(x)=48578183) +9800 val loss 6.3693 +9800 val perplexity 583.6546 +9800 train 6.567293 (lr=9.1997e-05) (hash(x)=52344698) +8700 val loss 6.4632 +8700 val perplexity 641.1121 +8700 train 6.412605 (lr=4.6842e-05) (hash(x)=51092724) +9900 val loss 6.3574 +9900 val perplexity 576.7660 +9900 train 6.348727 (lr=9.1835e-05) (hash(x)=51740945) +9200 val loss 6.4086 +9200 val perplexity 607.0344 +9200 train 6.556032 (lr=6.5058e-05) (hash(x)=50794720) +8800 val loss 6.4556 +8800 val perplexity 636.2337 +8800 train 6.428358 (lr=4.6769e-05) (hash(x)=48642928) +10000 val loss 6.3627 +10000 val perplexity 579.7943 +10000 train 5.984843 (lr=9.1671e-05) (hash(x)=43208422) +9300 val loss 6.3758 +9300 val perplexity 587.4757 +9300 train 6.138883 (lr=6.4951e-05) (hash(x)=46513190) +8900 val loss 6.5076 +8900 val perplexity 670.2025 +8900 train 6.623878 (lr=4.6696e-05) (hash(x)=55342246) +10100 val loss 6.3752 +10100 val perplexity 587.0868 +10100 train 6.571931 (lr=9.1506e-05) (hash(x)=53125597) +9400 val loss 6.3617 +9400 val perplexity 579.2454 +9400 train 6.042396 (lr=6.4842e-05) (hash(x)=43808238) +10200 val loss 6.3612 +10200 val perplexity 578.9157 +10200 train 6.123744 (lr=9.1339e-05) (hash(x)=47716902) +9000 val loss 6.4190 +9000 val perplexity 613.3840 +9000 train 6.414150 (lr=4.6621e-05) (hash(x)=48093368) +9500 val loss 6.3485 +9500 val perplexity 571.6222 +9500 train 6.159647 (lr=6.4733e-05) (hash(x)=45021888) +10300 val loss 6.3563 +10300 val perplexity 576.1033 +10300 train 6.177398 (lr=9.1171e-05) (hash(x)=49594750) +9600 val loss 6.3473 +9600 val perplexity 570.9622 +9600 train 6.419503 (lr=6.4622e-05) (hash(x)=56525570) +9100 val loss 6.4128 +9100 val perplexity 609.6286 +9100 train 6.491930 (lr=4.6546e-05) (hash(x)=48578183) +10400 val loss 6.3358 +10400 val perplexity 564.4308 +10400 train 6.339329 (lr=9.1001e-05) (hash(x)=52793881) +9700 val loss 6.3426 +9700 val perplexity 568.2821 +9700 train 6.518075 (lr=6.4511e-05) (hash(x)=52585913) +9200 val loss 6.3946 +9200 val perplexity 598.6269 +9200 train 6.557541 (lr=4.6470e-05) (hash(x)=50794720) +10500 val loss 6.3488 +10500 val perplexity 571.8327 +10500 train 6.065459 (lr=9.0830e-05) (hash(x)=47821631) +9800 val loss 6.3421 +9800 val perplexity 567.9776 +9800 train 6.534833 (lr=6.4398e-05) (hash(x)=52344698) +9300 val loss 6.3776 +9300 val perplexity 588.5210 +9300 train 6.137945 (lr=4.6393e-05) (hash(x)=46513190) +10600 val loss 6.3452 +10600 val perplexity 569.7288 +10600 train 6.053771 (lr=9.0658e-05) (hash(x)=48882778) +9900 val loss 6.3410 +9900 val perplexity 567.3494 +9900 train 6.332654 (lr=6.4284e-05) (hash(x)=51740945) +9400 val loss 6.3750 +9400 val perplexity 587.0048 +9400 train 6.055810 (lr=4.6316e-05) (hash(x)=43808238) +10700 val loss 6.3658 +10700 val perplexity 581.6204 +10700 train 6.341011 (lr=9.0484e-05) (hash(x)=51648831) +10000 val loss 6.3392 +10000 val perplexity 566.3406 +10000 train 5.959545 (lr=6.4170e-05) (hash(x)=43208422) +9500 val loss 6.3694 +9500 val perplexity 583.6925 +9500 train 6.180200 (lr=4.6238e-05) (hash(x)=45021888) +10800 val loss 6.3531 +10800 val perplexity 574.2545 +10800 train 6.053787 (lr=9.0308e-05) (hash(x)=45427969) +10100 val loss 6.3547 +10100 val perplexity 575.1865 +10100 train 6.541080 (lr=6.4054e-05) (hash(x)=53125597) +10900 val loss 6.3420 +10900 val perplexity 567.9251 +10900 train 6.444570 (lr=9.0132e-05) (hash(x)=61149696) +9600 val loss 6.3622 +9600 val perplexity 579.5065 +9600 train 6.455026 (lr=4.6159e-05) (hash(x)=56525570) +10200 val loss 6.3568 +10200 val perplexity 576.4001 +10200 train 6.124059 (lr=6.3937e-05) (hash(x)=47716902) +11000 val loss 6.3613 +11000 val perplexity 579.0264 +11000 train 5.927875 (lr=8.9954e-05) (hash(x)=46764869) +9700 val loss 6.3529 +9700 val perplexity 574.1816 +9700 train 6.534677 (lr=4.6079e-05) (hash(x)=52585913) +10300 val loss 6.3512 +10300 val perplexity 573.1906 +10300 train 6.160101 (lr=6.3820e-05) (hash(x)=49594750) +11100 val loss 6.3480 +11100 val perplexity 571.3323 +11100 train 6.408381 (lr=8.9774e-05) (hash(x)=55407730) +9800 val loss 6.3488 +9800 val perplexity 571.8160 +9800 train 6.539922 (lr=4.5999e-05) (hash(x)=52344698) +10400 val loss 6.3472 +10400 val perplexity 570.9167 +10400 train 6.344323 (lr=6.3701e-05) (hash(x)=52793881) +11200 val loss 6.3485 +11200 val perplexity 571.6244 +11200 train 6.299277 (lr=8.9593e-05) (hash(x)=48597723) +9900 val loss 6.3415 +9900 val perplexity 567.6246 +9900 train 6.329786 (lr=4.5917e-05) (hash(x)=51740945) +10500 val loss 6.3294 +10500 val perplexity 560.8275 +10500 train 6.041739 (lr=6.3581e-05) (hash(x)=47821631) +11300 val loss 6.3390 +11300 val perplexity 566.2021 +11300 train 6.188590 (lr=8.9411e-05) (hash(x)=49768071) +10000 val loss 6.3388 +10000 val perplexity 566.0963 +10000 train 5.936035 (lr=4.5835e-05) (hash(x)=43208422) +11400 val loss 6.3368 +11400 val perplexity 565.0054 +11400 train 6.155240 (lr=8.9227e-05) (hash(x)=44825330) +10600 val loss 6.3269 +10600 val perplexity 559.4210 +10600 train 6.010536 (lr=6.3460e-05) (hash(x)=48882778) +10100 val loss 6.3508 +10100 val perplexity 572.9632 +10100 train 6.543714 (lr=4.5753e-05) (hash(x)=53125597) +11500 val loss 6.3175 +11500 val perplexity 554.2041 +11500 train 6.180076 (lr=8.9043e-05) (hash(x)=47923000) +10700 val loss 6.3121 +10700 val perplexity 551.2183 +10700 train 6.264770 (lr=6.3339e-05) (hash(x)=51648831) +11600 val loss 6.3106 +11600 val perplexity 550.3655 +10200 val loss 6.3387 +10200 val perplexity 566.0820 +11600 train 6.452470 (lr=8.8856e-05) (hash(x)=54438279) +10200 train 6.107014 (lr=4.5669e-05) (hash(x)=47716902) +10800 val loss 6.3108 +10800 val perplexity 550.5038 +10800 train 6.009974 (lr=6.3216e-05) (hash(x)=45427969) +11700 val loss 6.3260 +11700 val perplexity 558.9051 +11700 train 6.448069 (lr=8.8668e-05) (hash(x)=52221804) +10300 val loss 6.3289 +10300 val perplexity 560.5639 +10300 train 6.161600 (lr=4.5585e-05) (hash(x)=49594750) +10900 val loss 6.3116 +10900 val perplexity 550.9158 +10900 train 6.429803 (lr=6.3092e-05) (hash(x)=61149696) +11800 val loss 6.2993 +11800 val perplexity 544.1850 +11800 train 6.342713 (lr=8.8479e-05) (hash(x)=56224922) +10400 val loss 6.3269 +10400 val perplexity 559.4200 +10400 train 6.333057 (lr=4.5501e-05) (hash(x)=52793881) +11000 val loss 6.3291 +11000 val perplexity 560.6751 +11000 train 5.884190 (lr=6.2968e-05) (hash(x)=46764869) +11900 val loss 6.3130 +11900 val perplexity 551.7108 +11900 train 6.339056 (lr=8.8289e-05) (hash(x)=48424178) +10500 val loss 6.3233 +10500 val perplexity 557.4075 +11100 val loss 6.3075 +11100 val perplexity 548.6846 +10500 train 6.031397 (lr=4.5415e-05) (hash(x)=47821631) +11100 train 6.344897 (lr=6.2842e-05) (hash(x)=55407730) +12000 val loss 6.3101 +12000 val perplexity 550.0740 +12000 train 5.964257 (lr=8.8097e-05) (hash(x)=43777764) +11200 val loss 6.3103 +11200 val perplexity 550.2007 +11200 train 6.256108 (lr=6.2715e-05) (hash(x)=48597723) +10600 val loss 6.3293 +10600 val perplexity 560.7473 +10600 train 5.997926 (lr=4.5329e-05) (hash(x)=48882778) +12100 val loss 6.3048 +12100 val perplexity 547.1997 +12100 train 6.144682 (lr=8.7904e-05) (hash(x)=45400058) +11300 val loss 6.3155 +11300 val perplexity 553.0821 +11300 train 6.158976 (lr=6.2588e-05) (hash(x)=49768071) +10700 val loss 6.3161 +10700 val perplexity 553.4058 +10700 train 6.274066 (lr=4.5242e-05) (hash(x)=51648831) +12200 val loss 6.3136 +12200 val perplexity 552.0535 +12200 train 6.335985 (lr=8.7710e-05) (hash(x)=48249623) +11400 val loss 6.2904 +11400 val perplexity 539.3922 +11400 train 6.108674 (lr=6.2459e-05) (hash(x)=44825330) +10800 val loss 6.3367 +10800 val perplexity 564.9362 +10800 train 6.028230 (lr=4.5154e-05) (hash(x)=45427969) +12300 val loss 6.2957 +12300 val perplexity 542.2234 +12300 train 6.346626 (lr=8.7515e-05) (hash(x)=46027842) +11500 val loss 6.2798 +11500 val perplexity 533.6780 +11500 train 6.128276 (lr=6.2330e-05) (hash(x)=47923000) +12400 val loss 6.3056 +12400 val perplexity 547.6388 +10900 val loss 6.3233 +10900 val perplexity 557.4349 +12400 train 6.295945 (lr=8.7318e-05) (hash(x)=52450928) +10900 train 6.443906 (lr=4.5066e-05) (hash(x)=61149696) +11600 val loss 6.2719 +11600 val perplexity 529.4833 +11600 train 6.415489 (lr=6.2199e-05) (hash(x)=54438279) +12500 val loss 6.3274 +12500 val perplexity 559.6817 +12500 train 6.332880 (lr=8.7119e-05) (hash(x)=49013162) +11000 val loss 6.3366 +11000 val perplexity 564.8891 +11000 train 5.882841 (lr=4.4977e-05) (hash(x)=46764869) +11700 val loss 6.2685 +11700 val perplexity 527.6814 +11700 train 6.394846 (lr=6.2068e-05) (hash(x)=52221804) +12600 val loss 6.3038 +12600 val perplexity 546.6348 +12600 train 6.127820 (lr=8.6920e-05) (hash(x)=47111570) +11100 val loss 6.3171 +11100 val perplexity 553.9795 +11100 train 6.349979 (lr=4.4887e-05) (hash(x)=55407730) +11800 val loss 6.2612 +11800 val perplexity 523.8317 +11800 train 6.315621 (lr=6.1936e-05) (hash(x)=56224922) +12700 val loss 6.3205 +12700 val perplexity 555.8489 +12700 train 6.361087 (lr=8.6719e-05) (hash(x)=49555316) +11200 val loss 6.3112 +11200 val perplexity 550.6852 +11200 train 6.254844 (lr=4.4797e-05) (hash(x)=48597723) +11900 val loss 6.2633 +11900 val perplexity 524.9381 +11900 train 6.275504 (lr=6.1802e-05) (hash(x)=48424178) +12800 val loss 6.3324 +12800 val perplexity 562.4918 +12800 train 6.419411 (lr=8.6517e-05) (hash(x)=52979217) +11300 val loss 6.3252 +11300 val perplexity 558.4772 +11300 train 6.173868 (lr=4.4706e-05) (hash(x)=49768071) +12900 val loss 6.3164 +12900 val perplexity 553.5958 +12900 train 6.214180 (lr=8.6314e-05) (hash(x)=45065917) +12000 val loss 6.2708 +12000 val perplexity 528.8857 +12000 train 5.941896 (lr=6.1668e-05) (hash(x)=43777764) +11400 val loss 6.3069 +11400 val perplexity 548.3644 +11400 train 6.124208 (lr=4.4614e-05) (hash(x)=44825330) +13000 val loss 6.3286 +13000 val perplexity 560.3442 +13000 train 6.285290 (lr=8.6110e-05) (hash(x)=50001095) +12100 val loss 6.2643 +12100 val perplexity 525.4525 +12100 train 6.130254 (lr=6.1533e-05) (hash(x)=45400058) +11500 val loss 6.2812 +11500 val perplexity 534.4171 +11500 train 6.126912 (lr=4.4521e-05) (hash(x)=47923000) +13100 val loss 6.3240 +13100 val perplexity 557.8164 +13100 train 6.330221 (lr=8.5904e-05) (hash(x)=50790791) +12200 val loss 6.2461 +12200 val perplexity 516.0073 +12200 train 6.267683 (lr=6.1397e-05) (hash(x)=48249623) +11600 val loss 6.2803 +11600 val perplexity 533.9229 +11600 train 6.432261 (lr=4.4428e-05) (hash(x)=54438279) +13200 val loss 6.3360 +13200 val perplexity 564.5304 +13200 train 6.010869 (lr=8.5697e-05) (hash(x)=48219262) +12300 val loss 6.2526 +12300 val perplexity 519.3716 +12300 train 6.331329 (lr=6.1260e-05) (hash(x)=46027842) +11700 val loss 6.2676 +11700 val perplexity 527.2144 +11700 train 6.399320 (lr=4.4334e-05) (hash(x)=52221804) +13300 val loss 6.3244 +13300 val perplexity 558.0354 +13300 train 6.008586 (lr=8.5489e-05) (hash(x)=44618283) +12400 val loss 6.2559 +12400 val perplexity 521.0679 +12400 train 6.230907 (lr=6.1122e-05) (hash(x)=52450928) +13400 val loss 6.3295 +13400 val perplexity 560.8917 +13400 train 5.896322 (lr=8.5279e-05) (hash(x)=47358268) +11800 val loss 6.2568 +11800 val perplexity 521.5240 +11800 train 6.299860 (lr=4.4240e-05) (hash(x)=56224922) +12500 val loss 6.2426 +12500 val perplexity 514.2000 +12500 train 6.262283 (lr=6.0984e-05) (hash(x)=49013162) +13500 val loss 6.3351 +13500 val perplexity 564.0213 +13500 train 6.050931 (lr=8.5069e-05) (hash(x)=51257598) +11900 val loss 6.2533 +11900 val perplexity 519.7339 +11900 train 6.269313 (lr=4.4145e-05) (hash(x)=48424178) +12600 val loss 6.2428 +12600 val perplexity 514.3055 +12600 train 6.063722 (lr=6.0844e-05) (hash(x)=47111570) +13600 val loss 6.3584 +13600 val perplexity 577.2943 +13600 train 6.320657 (lr=8.4857e-05) (hash(x)=50136230) +12700 val loss 6.2437 +12700 val perplexity 514.7748 +12700 train 6.256295 (lr=6.0703e-05) (hash(x)=49555316) +12000 val loss 6.2539 +12000 val perplexity 520.0192 +12000 train 5.911357 (lr=4.4049e-05) (hash(x)=43777764) +13700 val loss 6.3369 +13700 val perplexity 565.0488 +13700 train 6.151567 (lr=8.4644e-05) (hash(x)=42107291) +12800 val loss 6.2530 +12800 val perplexity 519.5899 +12800 train 6.354881 (lr=6.0562e-05) (hash(x)=52979217) +12100 val loss 6.2436 +12100 val perplexity 514.6960 +12100 train 6.094068 (lr=4.3952e-05) (hash(x)=45400058) +13800 val loss 6.3254 +13800 val perplexity 558.5819 +13800 train 6.263063 (lr=8.4430e-05) (hash(x)=48954460) +12900 val loss 6.2371 +12900 val perplexity 511.3949 +12900 train 6.121927 (lr=6.0420e-05) (hash(x)=45065917) +12200 val loss 6.2529 +12200 val perplexity 519.5089 +12200 train 6.278914 (lr=4.3855e-05) (hash(x)=48249623) +13900 val loss 6.3189 +13900 val perplexity 554.9599 +13900 train 6.458710 (lr=8.4214e-05) (hash(x)=55095079) +13000 val loss 6.2384 +13000 val perplexity 512.0415 +13000 train 6.205297 (lr=6.0277e-05) (hash(x)=50001095) +12300 val loss 6.2445 +12300 val perplexity 515.1473 +12300 train 6.310076 (lr=4.3757e-05) (hash(x)=46027842) +14000 val loss 6.3233 +14000 val perplexity 557.3982 +14000 train 6.482796 (lr=8.3998e-05) (hash(x)=51056080) +13100 val loss 6.2426 +13100 val perplexity 514.2123 +13100 train 6.267772 (lr=6.0133e-05) (hash(x)=50790791) +12400 val loss 6.2432 +12400 val perplexity 514.5027 +12400 train 6.241460 (lr=4.3659e-05) (hash(x)=52450928) +14100 val loss 6.3012 +14100 val perplexity 545.2481 +14100 train 6.445115 (lr=8.3780e-05) (hash(x)=54360223) +13200 val loss 6.2417 +13200 val perplexity 513.7547 +13200 train 5.921969 (lr=5.9988e-05) (hash(x)=48219262) +12500 val loss 6.2370 +12500 val perplexity 511.3459 +12500 train 6.266775 (lr=4.3560e-05) (hash(x)=49013162) +14200 val loss 6.2865 +14200 val perplexity 537.2568 +14200 train 6.175780 (lr=8.3561e-05) (hash(x)=47798999) +13300 val loss 6.2357 +13300 val perplexity 510.6719 +13300 train 5.942407 (lr=5.9842e-05) (hash(x)=44618283) +12600 val loss 6.2456 +12600 val perplexity 515.7429 +12600 train 6.055700 (lr=4.3460e-05) (hash(x)=47111570) +14300 val loss 6.2690 +14300 val perplexity 527.9459 +14300 train 6.919203 (lr=8.3341e-05) (hash(x)=52507818) +13400 val loss 6.2512 +13400 val perplexity 518.6438 +13400 train 5.792593 (lr=5.9695e-05) (hash(x)=47358268) +14400 val loss 6.2662 +14400 val perplexity 526.4896 +12700 val loss 6.2209 +12700 val perplexity 503.1422 +14400 train 6.160919 (lr=8.3120e-05) (hash(x)=46397021) +12700 train 6.246767 (lr=4.3360e-05) (hash(x)=49555316) +13500 val loss 6.2548 +13500 val perplexity 520.4948 +13500 train 5.971861 (lr=5.9548e-05) (hash(x)=51257598) +14500 val loss 6.2694 +14500 val perplexity 528.1348 +14500 train 6.216896 (lr=8.2898e-05) (hash(x)=45287760) +12800 val loss 6.2286 +12800 val perplexity 507.0580 +12800 train 6.341779 (lr=4.3259e-05) (hash(x)=52979217) +13600 val loss 6.2553 +13600 val perplexity 520.7875 +13600 train 6.220039 (lr=5.9400e-05) (hash(x)=50136230) +14600 val loss 6.2567 +14600 val perplexity 521.4820 +14600 train 6.130570 (lr=8.2675e-05) (hash(x)=43800709) +12900 val loss 6.2283 +12900 val perplexity 506.9061 +12900 train 6.128829 (lr=4.3157e-05) (hash(x)=45065917) +13700 val loss 6.2503 +13700 val perplexity 518.1618 +13700 train 6.130480 (lr=5.9251e-05) (hash(x)=42107291) +14700 val loss 6.2803 +14700 val perplexity 533.9303 +14700 train 6.678439 (lr=8.2451e-05) (hash(x)=48830076) +13000 val loss 6.2187 +13000 val perplexity 502.0652 +13000 train 6.184701 (lr=4.3055e-05) (hash(x)=50001095) +13800 val loss 6.2261 +13800 val perplexity 505.7779 +13800 train 6.159091 (lr=5.9101e-05) (hash(x)=48954460) +14800 val loss 6.2665 +14800 val perplexity 526.6156 +14800 train 6.460537 (lr=8.2225e-05) (hash(x)=50972325) +13100 val loss 6.2132 +13100 val perplexity 499.2741 +13100 train 6.245650 (lr=4.2952e-05) (hash(x)=50790791) +13900 val loss 6.2355 +13900 val perplexity 510.5735 +13900 train 6.367650 (lr=5.8950e-05) (hash(x)=55095079) +14900 val loss 6.2593 +14900 val perplexity 522.8469 +14900 train 6.228427 (lr=8.1998e-05) (hash(x)=48405555) +13200 val loss 6.2192 +13200 val perplexity 502.3260 +13200 train 5.890996 (lr=4.2848e-05) (hash(x)=48219262) +14000 val loss 6.2070 +14000 val perplexity 496.1873 +14000 train 6.371080 (lr=5.8799e-05) (hash(x)=51056080) +15000 val loss 6.2446 +15000 val perplexity 515.2245 +15000 train 6.339480 (lr=8.1771e-05) (hash(x)=56833393) +13300 val loss 6.2202 +13300 val perplexity 502.8127 +13300 train 5.912935 (lr=4.2744e-05) (hash(x)=44618283) +14100 val loss 6.2021 +14100 val perplexity 493.7900 +14100 train 6.327524 (lr=5.8646e-05) (hash(x)=54360223) +15100 val loss 6.2542 +15100 val perplexity 520.1876 +15100 train 5.980985 (lr=8.1542e-05) (hash(x)=45305882) +13400 val loss 6.2234 +13400 val perplexity 504.4079 +13400 train 5.755128 (lr=4.2640e-05) (hash(x)=47358268) +14200 val loss 6.1930 +14200 val perplexity 489.3155 +14200 train 6.092063 (lr=5.8493e-05) (hash(x)=47798999) +15200 val loss 6.2475 +15200 val perplexity 516.7031 +15200 train 6.409235 (lr=8.1312e-05) (hash(x)=51788382) +13500 val loss 6.2110 +13500 val perplexity 498.1802 +13500 train 5.924446 (lr=4.2534e-05) (hash(x)=51257598) +14300 val loss 6.1935 +14300 val perplexity 489.5615 +14300 train 6.850293 (lr=5.8339e-05) (hash(x)=52507818) +15300 val loss 6.2695 +15300 val perplexity 528.2030 +15300 train 5.996936 (lr=8.1082e-05) (hash(x)=47167384) +13600 val loss 6.2255 +13600 val perplexity 505.4575 +13600 train 6.187968 (lr=4.2428e-05) (hash(x)=50136230) +14400 val loss 6.1927 +14400 val perplexity 489.1818 +14400 train 6.076591 (lr=5.8184e-05) (hash(x)=46397021) +15400 val loss 6.2532 +15400 val perplexity 519.6954 +15400 train 6.133811 (lr=8.0850e-05) (hash(x)=49618676) +13700 val loss 6.2100 +13700 val perplexity 497.6951 +13700 train 6.062501 (lr=4.2322e-05) (hash(x)=42107291) +14500 val loss 6.1932 +14500 val perplexity 489.3923 +14500 train 6.135532 (lr=5.8029e-05) (hash(x)=45287760) +15500 val loss 6.2827 +15500 val perplexity 535.2444 +15500 train 6.313911 (lr=8.0617e-05) (hash(x)=52917163) +13800 val loss 6.2008 +13800 val perplexity 493.1311 +13800 train 6.133417 (lr=4.2215e-05) (hash(x)=48954460) +14600 val loss 6.1988 +14600 val perplexity 492.1476 +14600 train 6.075559 (lr=5.7872e-05) (hash(x)=43800709) +15600 val loss 6.2696 +15600 val perplexity 528.2620 +15600 train 5.885012 (lr=8.0383e-05) (hash(x)=48982912) +14700 val loss 6.1895 +14700 val perplexity 487.6022 +14700 train 6.724518 (lr=5.7715e-05) (hash(x)=48830076) +13900 val loss 6.1980 +13900 val perplexity 491.7605 +13900 train 6.349672 (lr=4.2107e-05) (hash(x)=55095079) +15700 val loss 6.2694 +15700 val perplexity 528.1771 +15700 train 5.941476 (lr=8.0148e-05) (hash(x)=47744851) +14800 val loss 6.1740 +14800 val perplexity 480.1193 +14800 train 6.389884 (lr=5.7558e-05) (hash(x)=50972325) +14000 val loss 6.1733 +14000 val perplexity 479.7833 +14000 train 6.365474 (lr=4.1999e-05) (hash(x)=51056080) +15800 val loss 6.2424 +15800 val perplexity 514.1122 +15800 train 6.165201 (lr=7.9912e-05) (hash(x)=49110208) +14900 val loss 6.1774 +14900 val perplexity 481.7503 +14900 train 6.147580 (lr=5.7399e-05) (hash(x)=48405555) +14100 val loss 6.1699 +14100 val perplexity 478.1175 +14100 train 6.322916 (lr=4.1890e-05) (hash(x)=54360223) +15900 val loss 6.2558 +15900 val perplexity 521.0473 +15900 train 6.240242 (lr=7.9675e-05) (hash(x)=54936056) +15000 val loss 6.1697 +15000 val perplexity 478.0475 +15000 train 6.257786 (lr=5.7240e-05) (hash(x)=56833393) +14200 val loss 6.1651 +14200 val perplexity 475.8318 +14200 train 6.079196 (lr=4.1781e-05) (hash(x)=47798999) +16000 val loss 6.2675 +16000 val perplexity 527.1827 +16000 train 6.009063 (lr=7.9437e-05) (hash(x)=50672725) +15100 val loss 6.1859 +15100 val perplexity 485.8390 +15100 train 5.934335 (lr=5.7079e-05) (hash(x)=45305882) +16100 val loss 6.2848 +16100 val perplexity 536.3506 +16100 train 6.427574 (lr=7.9198e-05) (hash(x)=53551267) +14300 val loss 6.1653 +14300 val perplexity 475.9575 +14300 train 6.850716 (lr=4.1671e-05) (hash(x)=52507818) +15200 val loss 6.1781 +15200 val perplexity 482.0568 +15200 train 6.327695 (lr=5.6919e-05) (hash(x)=51788382) +16200 val loss 6.2744 +16200 val perplexity 530.7889 +16200 train 6.056026 (lr=7.8959e-05) (hash(x)=48583391) +14400 val loss 6.1652 +14400 val perplexity 475.8726 +14400 train 6.053350 (lr=4.1560e-05) (hash(x)=46397021) +15300 val loss 6.1892 +15300 val perplexity 487.4771 +15300 train 5.923156 (lr=5.6757e-05) (hash(x)=47167384) +16300 val loss 6.2651 +16300 val perplexity 525.8939 +16300 train 6.275931 (lr=7.8718e-05) (hash(x)=53347671) +14500 val loss 6.1604 +14500 val perplexity 473.6183 +14500 train 6.102114 (lr=4.1449e-05) (hash(x)=45287760) +15400 val loss 6.1689 +15400 val perplexity 477.6488 +15400 train 6.043196 (lr=5.6595e-05) (hash(x)=49618676) +16400 val loss 6.2415 +16400 val perplexity 513.6060 +16400 train 6.257770 (lr=7.8476e-05) (hash(x)=48869650) +14600 val loss 6.1592 +14600 val perplexity 473.0383 +14600 train 6.037646 (lr=4.1337e-05) (hash(x)=43800709) +15500 val loss 6.1835 +15500 val perplexity 484.6613 +15500 train 6.183216 (lr=5.6432e-05) (hash(x)=52917163) +16500 val loss 6.2435 +16500 val perplexity 514.6379 +16500 train 6.120698 (lr=7.8233e-05) (hash(x)=47065719) +14700 val loss 6.1613 +14700 val perplexity 474.0244 +14700 train 6.651090 (lr=4.1225e-05) (hash(x)=48830076) +15600 val loss 6.2063 +15600 val perplexity 495.8761 +15600 train 5.821689 (lr=5.6268e-05) (hash(x)=48982912) +16600 val loss 6.2346 +16600 val perplexity 510.0954 +16600 train 6.445741 (lr=7.7990e-05) (hash(x)=45114119) +14800 val loss 6.1683 +14800 val perplexity 477.3874 +14800 train 6.382801 (lr=4.1113e-05) (hash(x)=50972325) +15700 val loss 6.1739 +15700 val perplexity 480.0710 +15700 train 5.875499 (lr=5.6104e-05) (hash(x)=47744851) +16700 val loss 6.2311 +16700 val perplexity 508.3282 +16700 train 6.147142 (lr=7.7745e-05) (hash(x)=50876875) +14900 val loss 6.1578 +14900 val perplexity 472.3707 +14900 train 6.125040 (lr=4.0999e-05) (hash(x)=48405555) +15800 val loss 6.1699 +15800 val perplexity 478.1342 +15800 train 6.076444 (lr=5.5938e-05) (hash(x)=49110208) +16800 val loss 6.2341 +16800 val perplexity 509.8490 +16800 train 6.094985 (lr=7.7500e-05) (hash(x)=44462298) +15000 val loss 6.1623 +15000 val perplexity 474.5140 +15000 train 6.274699 (lr=4.0885e-05) (hash(x)=56833393) +15900 val loss 6.1823 +15900 val perplexity 484.1205 +15900 train 6.139167 (lr=5.5773e-05) (hash(x)=54936056) +16900 val loss 6.2454 +16900 val perplexity 515.6467 +16900 train 6.342187 (lr=7.7254e-05) (hash(x)=50199238) +16000 val loss 6.1916 +16000 val perplexity 488.6496 +16000 train 5.936776 (lr=5.5606e-05) (hash(x)=50672725) +15100 val loss 6.1556 +15100 val perplexity 471.3685 +15100 train 5.890138 (lr=4.0771e-05) (hash(x)=45305882) +17000 val loss 6.2428 +17000 val perplexity 514.3160 +17000 train 6.215977 (lr=7.7007e-05) (hash(x)=53419360) +16100 val loss 6.1853 +16100 val perplexity 485.5771 +16100 train 6.334539 (lr=5.5439e-05) (hash(x)=53551267) +15200 val loss 6.1538 +15200 val perplexity 470.5050 +15200 train 6.333811 (lr=4.0656e-05) (hash(x)=51788382) +17100 val loss 6.2870 +17100 val perplexity 537.5348 +17100 train 6.192267 (lr=7.6758e-05) (hash(x)=49594197) +16200 val loss 6.1804 +16200 val perplexity 483.1817 +16200 train 5.956752 (lr=5.5271e-05) (hash(x)=48583391) +15300 val loss 6.1553 +15300 val perplexity 471.1907 +15300 train 5.891527 (lr=4.0541e-05) (hash(x)=47167384) +17200 val loss 6.2530 +17200 val perplexity 519.5584 +17200 train 6.190289 (lr=7.6510e-05) (hash(x)=49844973) +16300 val loss 6.1557 +16300 val perplexity 471.3799 +16300 train 6.154063 (lr=5.5102e-05) (hash(x)=53347671) +17300 val loss 6.2259 +17300 val perplexity 505.6614 +17300 train 6.218332 (lr=7.6260e-05) (hash(x)=46087936) +15400 val loss 6.1429 +15400 val perplexity 465.3926 +15400 train 6.011472 (lr=4.0425e-05) (hash(x)=49618676) +16400 val loss 6.1575 +16400 val perplexity 472.2405 +16400 train 6.194392 (lr=5.4933e-05) (hash(x)=48869650) +17400 val loss 6.2448 +17400 val perplexity 515.3048 +17400 train 6.442092 (lr=7.6009e-05) (hash(x)=51117461) +15500 val loss 6.1471 +15500 val perplexity 467.3795 +15500 train 6.173469 (lr=4.0308e-05) (hash(x)=52917163) +16500 val loss 6.1645 +16500 val perplexity 475.5763 +16500 train 6.061948 (lr=5.4763e-05) (hash(x)=47065719) +17500 val loss 6.2453 +17500 val perplexity 515.6044 +17500 train 6.660619 (lr=7.5758e-05) (hash(x)=56114885) +15600 val loss 6.1569 +15600 val perplexity 471.9854 +15600 train 5.796254 (lr=4.0191e-05) (hash(x)=48982912) +16600 val loss 6.1461 +16600 val perplexity 466.8703 +16600 train 6.374580 (lr=5.4593e-05) (hash(x)=45114119) +17600 val loss 6.2538 +17600 val perplexity 519.9639 +17600 train 6.103666 (lr=7.5505e-05) (hash(x)=48553957) +15700 val loss 6.1419 +15700 val perplexity 464.9157 +15700 train 5.834342 (lr=4.0074e-05) (hash(x)=47744851) +16700 val loss 6.1453 +16700 val perplexity 466.5067 +16700 train 6.041038 (lr=5.4422e-05) (hash(x)=50876875) +17700 val loss 6.2644 +17700 val perplexity 525.5197 +17700 train 6.070863 (lr=7.5252e-05) (hash(x)=47104071) +15800 val loss 6.1289 +15800 val perplexity 458.9337 +15800 train 6.029960 (lr=3.9956e-05) (hash(x)=49110208) +16800 val loss 6.1584 +16800 val perplexity 472.6588 +16800 train 6.021567 (lr=5.4250e-05) (hash(x)=44462298) +17800 val loss 6.2764 +17800 val perplexity 531.8454 +17800 train 6.058308 (lr=7.4998e-05) (hash(x)=47700580) +15900 val loss 6.1355 +15900 val perplexity 461.9583 +15900 train 6.114843 (lr=3.9838e-05) (hash(x)=54936056) +16900 val loss 6.1304 +16900 val perplexity 459.5984 +16900 train 6.255143 (lr=5.4078e-05) (hash(x)=50199238) +17900 val loss 6.2790 +17900 val perplexity 533.2805 +17900 train 6.324335 (lr=7.4744e-05) (hash(x)=51592695) +16000 val loss 6.1388 +16000 val perplexity 463.4922 +16000 train 5.890932 (lr=3.9719e-05) (hash(x)=50672725) +18000 val loss 6.2561 +18000 val perplexity 521.1583 +18000 train 5.979774 (lr=7.4488e-05) (hash(x)=49564057) +17000 val loss 6.1309 +17000 val perplexity 459.8375 +17000 train 6.097711 (lr=5.3905e-05) (hash(x)=53419360) +16100 val loss 6.1366 +16100 val perplexity 462.4644 +16100 train 6.291780 (lr=3.9599e-05) (hash(x)=53551267) +18100 val loss 6.3057 +18100 val perplexity 547.6952 +18100 train 6.317039 (lr=7.4232e-05) (hash(x)=62327245) +17100 val loss 6.1431 +17100 val perplexity 465.4827 +17100 train 6.060675 (lr=5.3731e-05) (hash(x)=49594197) +16200 val loss 6.1307 +16200 val perplexity 459.7570 +16200 train 5.930923 (lr=3.9479e-05) (hash(x)=48583391) +18200 val loss 6.2823 +18200 val perplexity 535.0313 +18200 train 6.083569 (lr=7.3975e-05) (hash(x)=49515314) +17200 val loss 6.1452 +17200 val perplexity 466.4606 +17200 train 6.092839 (lr=5.3557e-05) (hash(x)=49844973) +16300 val loss 6.1273 +16300 val perplexity 458.2191 +16300 train 6.131173 (lr=3.9359e-05) (hash(x)=53347671) +18300 val loss 6.2961 +18300 val perplexity 542.4391 +18300 train 6.081135 (lr=7.3717e-05) (hash(x)=47161038) +17300 val loss 6.1261 +17300 val perplexity 457.6477 +17300 train 6.099728 (lr=5.3382e-05) (hash(x)=46087936) +18400 val loss 6.2821 +18400 val perplexity 534.8964 +18400 train 6.333702 (lr=7.3459e-05) (hash(x)=50557702) +16400 val loss 6.1200 +16400 val perplexity 454.8473 +16400 train 6.149902 (lr=3.9238e-05) (hash(x)=48869650) +17400 val loss 6.1275 +17400 val perplexity 458.3054 +17400 train 6.357106 (lr=5.3206e-05) (hash(x)=51117461) +18500 val loss 6.2943 +18500 val perplexity 541.4602 +18500 train 6.006214 (lr=7.3199e-05) (hash(x)=48089267) +16500 val loss 6.1180 +16500 val perplexity 453.9344 +16500 train 6.021972 (lr=3.9117e-05) (hash(x)=47065719) +17500 val loss 6.1416 +17500 val perplexity 464.8011 +17500 train 6.578073 (lr=5.3030e-05) (hash(x)=56114885) +18600 val loss 6.2541 +18600 val perplexity 520.1402 +18600 train 6.032664 (lr=7.2939e-05) (hash(x)=51754176) +16600 val loss 6.1049 +16600 val perplexity 448.0405 +16600 train 6.339742 (lr=3.8995e-05) (hash(x)=45114119) +17600 val loss 6.1223 +17600 val perplexity 455.8961 +17600 train 5.979539 (lr=5.2854e-05) (hash(x)=48553957) +18700 val loss 6.2385 +18700 val perplexity 512.1147 +18700 train 6.146007 (lr=7.2679e-05) (hash(x)=53780391) +16700 val loss 6.0934 +16700 val perplexity 442.9100 +17700 val loss 6.1506 +17700 val perplexity 468.9987 +16700 train 5.994986 (lr=3.8873e-05) (hash(x)=50876875) +17700 train 5.939078 (lr=5.2677e-05) (hash(x)=47104071) +18800 val loss 6.2396 +18800 val perplexity 512.6584 +18800 train 6.408882 (lr=7.2417e-05) (hash(x)=55893758) +17800 val loss 6.1395 +17800 val perplexity 463.8409 +17800 train 5.899301 (lr=5.2499e-05) (hash(x)=47700580) +16800 val loss 6.1072 +16800 val perplexity 449.0772 +16800 train 5.973709 (lr=3.8750e-05) (hash(x)=44462298) +18900 val loss 6.2435 +18900 val perplexity 514.6337 +18900 train 6.175440 (lr=7.2155e-05) (hash(x)=40042039) +17900 val loss 6.1377 +17900 val perplexity 462.9979 +17900 train 6.174848 (lr=5.2321e-05) (hash(x)=51592695) +16900 val loss 6.0912 +16900 val perplexity 441.9689 +16900 train 6.220161 (lr=3.8627e-05) (hash(x)=50199238) +19000 val loss 6.2168 +19000 val perplexity 501.0884 +19000 train 5.789830 (lr=7.1892e-05) (hash(x)=42042761) +18000 val loss 6.1265 +18000 val perplexity 457.8474 +18000 train 5.876102 (lr=5.2142e-05) (hash(x)=49564057) +17000 val loss 6.0882 +17000 val perplexity 440.6490 +17000 train 6.067819 (lr=3.8503e-05) (hash(x)=53419360) +19100 val loss 6.2055 +19100 val perplexity 495.4785 +19100 train 6.693513 (lr=7.1629e-05) (hash(x)=65156250) +18100 val loss 6.1619 +18100 val perplexity 474.3282 +18100 train 6.165946 (lr=5.1962e-05) (hash(x)=62327245) +17100 val loss 6.0962 +17100 val perplexity 444.1568 +17100 train 5.997578 (lr=3.8379e-05) (hash(x)=49594197) +19200 val loss 6.2055 +19200 val perplexity 495.4535 +19200 train 6.322432 (lr=7.1365e-05) (hash(x)=49335995) +18200 val loss 6.1369 +18200 val perplexity 462.6190 +18200 train 5.921450 (lr=5.1782e-05) (hash(x)=49515314) +17200 val loss 6.1099 +17200 val perplexity 450.2782 +17200 train 6.063042 (lr=3.8255e-05) (hash(x)=49844973) +19300 val loss 6.2677 +19300 val perplexity 527.2568 +19300 train 6.051413 (lr=7.1100e-05) (hash(x)=43136846) +18300 val loss 6.1355 +18300 val perplexity 461.9896 +18300 train 5.908185 (lr=5.1602e-05) (hash(x)=47161038) +17300 val loss 6.0830 +17300 val perplexity 438.3377 +17300 train 6.073275 (lr=3.8130e-05) (hash(x)=46087936) +19400 val loss 6.2263 +19400 val perplexity 505.8830 +19400 train 6.246346 (lr=7.0835e-05) (hash(x)=56752517) +18400 val loss 6.1358 +18400 val perplexity 462.1247 +18400 train 6.218168 (lr=5.1421e-05) (hash(x)=50557702) +17400 val loss 6.0849 +17400 val perplexity 439.1905 +17400 train 6.307970 (lr=3.8005e-05) (hash(x)=51117461) +19500 val loss 6.2109 +19500 val perplexity 498.1614 +19500 train 6.101372 (lr=7.0569e-05) (hash(x)=48636220) +18500 val loss 6.1427 +18500 val perplexity 465.3114 +18500 train 5.862716 (lr=5.1240e-05) (hash(x)=48089267) +17500 val loss 6.0909 +17500 val perplexity 441.8323 +17500 train 6.535150 (lr=3.7879e-05) (hash(x)=56114885) +19600 val loss 6.2141 +19600 val perplexity 499.7660 +19600 train 6.341275 (lr=7.0302e-05) (hash(x)=51384134) +18600 val loss 6.1613 +18600 val perplexity 474.0325 +18600 train 5.916649 (lr=5.1058e-05) (hash(x)=51754176) +17600 val loss 6.0784 +17600 val perplexity 436.3499 +17600 train 5.919236 (lr=3.7753e-05) (hash(x)=48553957) +19700 val loss 6.2128 +19700 val perplexity 499.1006 +19700 train 6.108931 (lr=7.0035e-05) (hash(x)=51035878) +18700 val loss 6.1296 +18700 val perplexity 459.2522 +18700 train 6.064434 (lr=5.0875e-05) (hash(x)=53780391) +17700 val loss 6.0767 +17700 val perplexity 435.6024 +19800 val loss 6.2126 +19800 val perplexity 498.9840 +19800 train 6.077251 (lr=6.9767e-05) (hash(x)=49445049) +17700 train 5.879614 (lr=3.7626e-05) (hash(x)=47104071) +18800 val loss 6.1183 +18800 val perplexity 454.0735 +18800 train 6.284585 (lr=5.0692e-05) (hash(x)=55893758) +19900 val loss 6.2128 +19900 val perplexity 499.0813 +19900 train 6.075774 (lr=6.9498e-05) (hash(x)=50392920) +17800 val loss 6.0874 +17800 val perplexity 440.2626 +17800 train 5.840871 (lr=3.7499e-05) (hash(x)=47700580) +18900 val loss 6.1242 +18900 val perplexity 456.7891 +18900 train 6.053334 (lr=5.0509e-05) (hash(x)=40042039) +20000 val loss 6.2081 +20000 val perplexity 496.7500 +20000 train 6.221522 (lr=6.9229e-05) (hash(x)=48751803) +17900 val loss 6.0715 +17900 val perplexity 433.3167 +19000 val loss 6.1117 +19000 val perplexity 451.1258 +17900 train 6.108027 (lr=3.7372e-05) (hash(x)=51592695) +19000 train 5.712022 (lr=5.0325e-05) (hash(x)=42042761) +20100 val loss 6.2396 +20100 val perplexity 512.6290 +20100 train 6.295040 (lr=6.8960e-05) (hash(x)=51953753) +19100 val loss 6.1029 +19100 val perplexity 447.1473 +19100 train 6.628739 (lr=5.0140e-05) (hash(x)=65156250) +18000 val loss 6.0719 +18000 val perplexity 433.5180 +18000 train 5.802731 (lr=3.7244e-05) (hash(x)=49564057) +20200 val loss 6.1897 +20200 val perplexity 487.7208 +20200 train 6.011936 (lr=6.8690e-05) (hash(x)=45426126) +19200 val loss 6.1080 +19200 val perplexity 449.4311 +19200 train 6.212978 (lr=4.9955e-05) (hash(x)=49335995) +18100 val loss 6.0945 +18100 val perplexity 443.4146 +18100 train 6.098621 (lr=3.7116e-05) (hash(x)=62327245) +20300 val loss 6.2015 +20300 val perplexity 493.4689 +20300 train 6.400400 (lr=6.8419e-05) (hash(x)=54319573) +19300 val loss 6.1450 +19300 val perplexity 466.3567 +19300 train 5.944755 (lr=4.9770e-05) (hash(x)=43136846) +18200 val loss 6.0712 +18200 val perplexity 433.1884 +18200 train 5.863793 (lr=3.6987e-05) (hash(x)=49515314) +20400 val loss 6.2131 +20400 val perplexity 499.2615 +20400 train 6.272861 (lr=6.8148e-05) (hash(x)=47063446) +19400 val loss 6.1025 +19400 val perplexity 446.9559 +19400 train 6.090486 (lr=4.9584e-05) (hash(x)=56752517) +18300 val loss 6.0789 +18300 val perplexity 436.5687 +18300 train 5.853120 (lr=3.6859e-05) (hash(x)=47161038) +20500 val loss 6.2192 +20500 val perplexity 502.3228 +20500 train 6.288722 (lr=6.7876e-05) (hash(x)=45471457) +19500 val loss 6.1133 +19500 val perplexity 451.8130 +19500 train 6.026237 (lr=4.9398e-05) (hash(x)=48636220) +18400 val loss 6.0720 +18400 val perplexity 433.5525 +18400 train 6.157125 (lr=3.6729e-05) (hash(x)=50557702) +20600 val loss 6.2156 +20600 val perplexity 500.5084 +20600 train 6.116632 (lr=6.7604e-05) (hash(x)=46388051) +19600 val loss 6.1042 +19600 val perplexity 447.7404 +19600 train 6.247583 (lr=4.9211e-05) (hash(x)=51384134) +18500 val loss 6.0772 +18500 val perplexity 435.8242 +18500 train 5.807039 (lr=3.6600e-05) (hash(x)=48089267) +20700 val loss 6.2356 +20700 val perplexity 510.6264 +20700 train 6.426775 (lr=6.7331e-05) (hash(x)=52392468) +19700 val loss 6.1066 +19700 val perplexity 448.8024 +19700 train 5.995576 (lr=4.9024e-05) (hash(x)=51035878) +18600 val loss 6.0818 +18600 val perplexity 437.8040 +18600 train 5.835917 (lr=3.6470e-05) (hash(x)=51754176) +20800 val loss 6.1823 +20800 val perplexity 484.0857 +20800 train 5.827569 (lr=6.7058e-05) (hash(x)=41360694) +19800 val loss 6.1347 +19800 val perplexity 461.5928 +19800 train 6.003241 (lr=4.8837e-05) (hash(x)=49445049) +18700 val loss 6.0629 +18700 val perplexity 429.6390 +18700 train 5.987162 (lr=3.6339e-05) (hash(x)=53780391) +20900 val loss 6.2006 +20900 val perplexity 493.0554 +20900 train 6.312609 (lr=6.6784e-05) (hash(x)=45677814) +19900 val loss 6.1238 +19900 val perplexity 456.5966 +19900 train 5.987473 (lr=4.8649e-05) (hash(x)=50392920) +18800 val loss 6.0548 +18800 val perplexity 426.1504 +18800 train 6.212285 (lr=3.6209e-05) (hash(x)=55893758) +21000 val loss 6.2004 +21000 val perplexity 492.9304 +21000 train 6.068942 (lr=6.6510e-05) (hash(x)=47366348) +20000 val loss 6.1109 +20000 val perplexity 450.7543 +20000 train 6.132699 (lr=4.8461e-05) (hash(x)=48751803) +21100 val loss 6.2030 +21100 val perplexity 494.2060 +21100 train 6.203363 (lr=6.6235e-05) (hash(x)=51212611) +18900 val loss 6.0562 +18900 val perplexity 426.7650 +18900 train 5.977198 (lr=3.6078e-05) (hash(x)=40042039) +20100 val loss 6.1519 +20100 val perplexity 469.6217 +20100 train 6.206024 (lr=4.8272e-05) (hash(x)=51953753) +21200 val loss 6.2010 +21200 val perplexity 493.2381 +21200 train 6.095848 (lr=6.5960e-05) (hash(x)=50634699) +19000 val loss 6.0354 +19000 val perplexity 417.9707 +19000 train 5.619792 (lr=3.5946e-05) (hash(x)=42042761) +20200 val loss 6.1028 +20200 val perplexity 447.1298 +20200 train 5.956930 (lr=4.8083e-05) (hash(x)=45426126) +21300 val loss 6.2074 +21300 val perplexity 496.4223 +21300 train 6.016956 (lr=6.5684e-05) (hash(x)=46312642) +19100 val loss 6.0344 +19100 val perplexity 417.5637 +19100 train 6.541178 (lr=3.5814e-05) (hash(x)=65156250) +20300 val loss 6.0956 +20300 val perplexity 443.9171 +20300 train 6.286335 (lr=4.7893e-05) (hash(x)=54319573) +21400 val loss 6.2123 +21400 val perplexity 498.8298 +21400 train 6.004327 (lr=6.5408e-05) (hash(x)=47897143) +19200 val loss 6.0317 +19200 val perplexity 416.4428 +19200 train 6.113360 (lr=3.5682e-05) (hash(x)=49335995) +20400 val loss 6.1197 +20400 val perplexity 454.7104 +20400 train 6.174566 (lr=4.7703e-05) (hash(x)=47063446) +21500 val loss 6.2203 +21500 val perplexity 502.8573 +21500 train 6.116334 (lr=6.5132e-05) (hash(x)=48575558) +19300 val loss 6.0598 +19300 val perplexity 428.2739 +19300 train 5.907915 (lr=3.5550e-05) (hash(x)=43136846) +20500 val loss 6.0939 +20500 val perplexity 443.1610 +20500 train 6.169994 (lr=4.7513e-05) (hash(x)=45471457) +21600 val loss 6.2154 +21600 val perplexity 500.3976 +21600 train 6.264028 (lr=6.4855e-05) (hash(x)=50213016) +19400 val loss 6.0280 +19400 val perplexity 414.8974 +19400 train 6.036599 (lr=3.5417e-05) (hash(x)=56752517) +20600 val loss 6.1221 +20600 val perplexity 455.8435 +20600 train 6.031940 (lr=4.7323e-05) (hash(x)=46388051) +21700 val loss 6.2074 +21700 val perplexity 496.4334 +21700 train 5.981525 (lr=6.4578e-05) (hash(x)=46495383) +19500 val loss 6.0405 +19500 val perplexity 420.0873 +19500 train 5.951756 (lr=3.5284e-05) (hash(x)=48636220) +20700 val loss 6.0989 +20700 val perplexity 445.3514 +20700 train 6.310101 (lr=4.7132e-05) (hash(x)=52392468) +21800 val loss 6.2141 +21800 val perplexity 499.7676 +21800 train 6.096678 (lr=6.4300e-05) (hash(x)=46012376) +20800 val loss 6.1063 +20800 val perplexity 448.6849 +20800 train 5.744279 (lr=4.6940e-05) (hash(x)=41360694) +19600 val loss 6.0314 +19600 val perplexity 416.2836 +19600 train 6.173935 (lr=3.5151e-05) (hash(x)=51384134) +21900 val loss 6.2195 +21900 val perplexity 502.4323 +21900 train 5.997425 (lr=6.4023e-05) (hash(x)=47192863) +20900 val loss 6.1004 +20900 val perplexity 446.0223 +20900 train 6.203234 (lr=4.6749e-05) (hash(x)=45677814) +19700 val loss 6.0245 +19700 val perplexity 413.4191 +19700 train 5.909437 (lr=3.5017e-05) (hash(x)=51035878) +22000 val loss 6.2150 +22000 val perplexity 500.1846 +22000 train 6.163370 (lr=6.3744e-05) (hash(x)=48441371) +21000 val loss 6.1111 +21000 val perplexity 450.8299 +21000 train 6.010984 (lr=4.6557e-05) (hash(x)=47366348) +19800 val loss 6.0235 +19800 val perplexity 413.0013 +19800 train 5.879798 (lr=3.4883e-05) (hash(x)=49445049) +22100 val loss 6.1970 +22100 val perplexity 491.2875 +22100 train 6.042936 (lr=6.3466e-05) (hash(x)=47608136) +21100 val loss 6.0839 +21100 val perplexity 438.7388 +21100 train 6.099598 (lr=4.6365e-05) (hash(x)=51212611) +22200 val loss 6.2128 +22200 val perplexity 499.1072 +22200 train 5.961196 (lr=6.3187e-05) (hash(x)=49878086) +19900 val loss 6.0350 +19900 val perplexity 417.8063 +19900 train 5.902787 (lr=3.4749e-05) (hash(x)=50392920) +21200 val loss 6.0898 +21200 val perplexity 441.3548 +21200 train 5.969763 (lr=4.6172e-05) (hash(x)=50634699) +22300 val loss 6.2030 +22300 val perplexity 494.2444 +22300 train 5.964618 (lr=6.2907e-05) (hash(x)=47412357) +20000 val loss 6.0195 +20000 val perplexity 411.3737 +20000 train 6.045093 (lr=3.4615e-05) (hash(x)=48751803) +21300 val loss 6.0835 +21300 val perplexity 438.5658 +21300 train 5.891289 (lr=4.5979e-05) (hash(x)=46312642) +22400 val loss 6.2056 +22400 val perplexity 495.5281 +22400 train 6.219774 (lr=6.2628e-05) (hash(x)=53490122) +20100 val loss 6.0833 +20100 val perplexity 438.4830 +20100 train 6.141337 (lr=3.4480e-05) (hash(x)=51953753) +21400 val loss 6.0881 +21400 val perplexity 440.5995 +21400 train 5.895478 (lr=4.5786e-05) (hash(x)=47897143) +22500 val loss 6.2006 +22500 val perplexity 493.0364 +22500 train 6.528661 (lr=6.2348e-05) (hash(x)=54719499) +20200 val loss 6.0116 +20200 val perplexity 408.1531 +20200 train 5.854043 (lr=3.4345e-05) (hash(x)=45426126) +21500 val loss 6.1038 +21500 val perplexity 447.5633 +21500 train 6.021411 (lr=4.5592e-05) (hash(x)=48575558) +22600 val loss 6.1947 +22600 val perplexity 490.1417 +22600 train 6.173838 (lr=6.2068e-05) (hash(x)=48357283) +20300 val loss 5.9985 +20300 val perplexity 402.8260 +20300 train 6.214244 (lr=3.4209e-05) (hash(x)=54319573) +21600 val loss 6.1023 +21600 val perplexity 446.8636 +21600 train 6.146419 (lr=4.5399e-05) (hash(x)=50213016) +22700 val loss 6.1753 +22700 val perplexity 480.7339 +22700 train 5.989986 (lr=6.1787e-05) (hash(x)=45320429) +20400 val loss 5.9980 +20400 val perplexity 402.6055 +20400 train 6.063937 (lr=3.4074e-05) (hash(x)=47063446) +21700 val loss 6.0924 +21700 val perplexity 442.4688 +21700 train 5.841685 (lr=4.5205e-05) (hash(x)=46495383) +22800 val loss 6.1922 +22800 val perplexity 488.9213 +22800 train 6.343354 (lr=6.1506e-05) (hash(x)=50298012) +20500 val loss 5.9888 +20500 val perplexity 398.9497 +20500 train 6.082604 (lr=3.3938e-05) (hash(x)=45471457) +21800 val loss 6.1031 +21800 val perplexity 447.2601 +21800 train 5.985949 (lr=4.5010e-05) (hash(x)=46012376) +22900 val loss 6.1654 +22900 val perplexity 475.9718 +22900 train 6.306526 (lr=6.1225e-05) (hash(x)=52461697) +21900 val loss 6.0970 +21900 val perplexity 444.5373 +20600 val loss 5.9961 +20600 val perplexity 401.8475 +21900 train 5.889640 (lr=4.4816e-05) (hash(x)=47192863) +20600 train 5.901207 (lr=3.3802e-05) (hash(x)=46388051) +23000 val loss 6.1760 +23000 val perplexity 481.0781 +23000 train 6.332297 (lr=6.0944e-05) (hash(x)=53400656) +22000 val loss 6.1057 +22000 val perplexity 448.4009 +22000 train 6.068190 (lr=4.4621e-05) (hash(x)=48441371) +20700 val loss 5.9855 +20700 val perplexity 397.6262 +20700 train 6.175598 (lr=3.3665e-05) (hash(x)=52392468) +23100 val loss 6.1797 +23100 val perplexity 482.8374 +23100 train 5.957465 (lr=6.0663e-05) (hash(x)=43571321) +22100 val loss 6.1047 +22100 val perplexity 447.9532 +22100 train 5.959590 (lr=4.4426e-05) (hash(x)=47608136) +23200 val loss 6.1696 +23200 val perplexity 477.9933 +23200 train 6.135411 (lr=6.0381e-05) (hash(x)=53841978) +20800 val loss 5.9818 +20800 val perplexity 396.1430 +20800 train 5.644462 (lr=3.3529e-05) (hash(x)=41360694) +22200 val loss 6.0896 +22200 val perplexity 441.2561 +22200 train 5.839798 (lr=4.4231e-05) (hash(x)=49878086) +23300 val loss 6.1661 +23300 val perplexity 476.3425 +23300 train 6.047857 (lr=6.0099e-05) (hash(x)=43302880) +20900 val loss 5.9784 +20900 val perplexity 394.7907 +20900 train 6.103519 (lr=3.3392e-05) (hash(x)=45677814) +22300 val loss 6.0937 +22300 val perplexity 443.0748 +22300 train 5.861329 (lr=4.4035e-05) (hash(x)=47412357) +23400 val loss 6.1601 +23400 val perplexity 473.4856 +23400 train 6.166147 (lr=5.9817e-05) (hash(x)=50174060) +21000 val loss 5.9815 +21000 val perplexity 396.0265 +21000 train 5.900531 (lr=3.3255e-05) (hash(x)=47366348) +23500 val loss 6.1540 +23500 val perplexity 470.5811 +23500 train 6.111275 (lr=5.9534e-05) (hash(x)=50773603) +22400 val loss 6.0910 +22400 val perplexity 441.8635 +22400 train 6.130796 (lr=4.3839e-05) (hash(x)=53490122) +21100 val loss 5.9731 +21100 val perplexity 392.7354 +21100 train 5.990324 (lr=3.3118e-05) (hash(x)=51212611) +23600 val loss 6.1589 +23600 val perplexity 472.9199 +23600 train 5.930746 (lr=5.9252e-05) (hash(x)=42904407) +22500 val loss 6.0909 +22500 val perplexity 441.8389 +22500 train 6.404861 (lr=4.3643e-05) (hash(x)=54719499) +21200 val loss 5.9759 +21200 val perplexity 393.8363 +21200 train 5.864847 (lr=3.2980e-05) (hash(x)=50634699) +23700 val loss 6.1692 +23700 val perplexity 477.7937 +23700 train 5.990158 (lr=5.8969e-05) (hash(x)=48638482) +22600 val loss 6.0888 +22600 val perplexity 440.8792 +22600 train 6.110184 (lr=4.3447e-05) (hash(x)=48357283) +21300 val loss 5.9822 +21300 val perplexity 396.3267 +21300 train 5.778918 (lr=3.2842e-05) (hash(x)=46312642) +23800 val loss 6.1620 +23800 val perplexity 474.3950 +23800 train 6.122009 (lr=5.8686e-05) (hash(x)=51586262) +22700 val loss 6.0708 +22700 val perplexity 433.0360 +22700 train 5.868220 (lr=4.3251e-05) (hash(x)=45320429) +21400 val loss 5.9722 +21400 val perplexity 392.3838 +21400 train 5.785509 (lr=3.2704e-05) (hash(x)=47897143) +23900 val loss 6.1731 +23900 val perplexity 479.6902 +23900 train 5.825511 (lr=5.8403e-05) (hash(x)=42002764) +22800 val loss 6.0976 +22800 val perplexity 444.7782 +22800 train 6.245061 (lr=4.3054e-05) (hash(x)=50298012) +24000 val loss 6.1669 +24000 val perplexity 476.7043 +24000 train 6.099452 (lr=5.8120e-05) (hash(x)=50286035) +21500 val loss 5.9788 +21500 val perplexity 394.9581 +21500 train 5.903249 (lr=3.2566e-05) (hash(x)=48575558) +22900 val loss 6.0685 +22900 val perplexity 432.0254 +22900 train 6.230075 (lr=4.2858e-05) (hash(x)=52461697) +24100 val loss 6.1619 +24100 val perplexity 474.3246 +24100 train 5.819885 (lr=5.7837e-05) (hash(x)=42370601) +21600 val loss 5.9872 +21600 val perplexity 398.2949 +21600 train 6.055421 (lr=3.2428e-05) (hash(x)=50213016) +23000 val loss 6.0723 +23000 val perplexity 433.6588 +23000 train 6.258580 (lr=4.2661e-05) (hash(x)=53400656) +24200 val loss 6.1956 +24200 val perplexity 490.5764 +24200 train 6.255916 (lr=5.7554e-05) (hash(x)=51085788) +21700 val loss 5.9697 +21700 val perplexity 391.3951 +21700 train 5.724377 (lr=3.2289e-05) (hash(x)=46495383) +23100 val loss 6.0747 +23100 val perplexity 434.7184 +23100 train 5.887076 (lr=4.2464e-05) (hash(x)=43571321) +24300 val loss 6.1631 +24300 val perplexity 474.8908 +24300 train 6.365586 (lr=5.7270e-05) (hash(x)=55218710) +21800 val loss 5.9716 +21800 val perplexity 392.1156 +21800 train 5.839164 (lr=3.2150e-05) (hash(x)=46012376) +23200 val loss 6.0666 +23200 val perplexity 431.1964 +23200 train 6.019709 (lr=4.2267e-05) (hash(x)=53841978) +24400 val loss 6.1684 +24400 val perplexity 477.4406 +24400 train 6.643548 (lr=5.6987e-05) (hash(x)=57711994) +21900 val loss 5.9690 +21900 val perplexity 391.1208 +21900 train 5.770936 (lr=3.2011e-05) (hash(x)=47192863) +24500 val loss 6.1679 +24500 val perplexity 477.1823 +24500 train 5.855380 (lr=5.6703e-05) (hash(x)=52233904) +23300 val loss 6.0516 +23300 val perplexity 424.8008 +23300 train 5.937085 (lr=4.2069e-05) (hash(x)=43302880) +22000 val loss 5.9721 +22000 val perplexity 392.3389 +22000 train 5.963995 (lr=3.1872e-05) (hash(x)=48441371) +24600 val loss 6.1594 +24600 val perplexity 473.1436 +24600 train 6.336185 (lr=5.6419e-05) (hash(x)=55555682) +23400 val loss 6.0522 +23400 val perplexity 425.0440 +23400 train 6.043977 (lr=4.1872e-05) (hash(x)=50174060) +22100 val loss 5.9623 +22100 val perplexity 388.5137 +22100 train 5.833640 (lr=3.1733e-05) (hash(x)=47608136) +24700 val loss 6.1714 +24700 val perplexity 478.8638 +24700 train 6.102612 (lr=5.6135e-05) (hash(x)=52403558) +23500 val loss 6.0413 +23500 val perplexity 420.4576 +23500 train 6.000183 (lr=4.1674e-05) (hash(x)=50773603) +22200 val loss 5.9675 +22200 val perplexity 390.5455 +22200 train 5.724321 (lr=3.1593e-05) (hash(x)=49878086) +24800 val loss 6.1438 +24800 val perplexity 465.7989 +24800 train 6.178074 (lr=5.5852e-05) (hash(x)=52740488) +23600 val loss 6.0581 +23600 val perplexity 427.5616 +23600 train 5.831712 (lr=4.1476e-05) (hash(x)=42904407) +22300 val loss 5.9671 +22300 val perplexity 390.3843 +22300 train 5.716329 (lr=3.1454e-05) (hash(x)=47412357) +24900 val loss 6.1404 +24900 val perplexity 464.2483 +24900 train 6.085851 (lr=5.5568e-05) (hash(x)=51355910) +23700 val loss 6.0577 +23700 val perplexity 427.3916 +23700 train 5.891452 (lr=4.1278e-05) (hash(x)=48638482) +25000 val loss 6.1414 +25000 val perplexity 464.7207 +25000 train 5.912818 (lr=5.5284e-05) (hash(x)=45069649) +22400 val loss 5.9600 +22400 val perplexity 387.6020 +22400 train 5.996737 (lr=3.1314e-05) (hash(x)=53490122) +23800 val loss 6.0427 +23800 val perplexity 421.0252 +23800 train 6.012309 (lr=4.1080e-05) (hash(x)=51586262) +25100 val loss 6.1587 +25100 val perplexity 472.8144 +25100 train 6.539679 (lr=5.5000e-05) (hash(x)=58181708) +23900 val loss 6.0952 +23900 val perplexity 443.7275 +23900 train 5.736462 (lr=4.0882e-05) (hash(x)=42002764) +22500 val loss 5.9603 +22500 val perplexity 387.7321 +22500 train 6.282959 (lr=3.1174e-05) (hash(x)=54719499) +25200 val loss 6.1299 +25200 val perplexity 459.4018 +25200 train 6.235977 (lr=5.4716e-05) (hash(x)=50361074) +24000 val loss 6.0451 +24000 val perplexity 422.0195 +24000 train 5.978853 (lr=4.0684e-05) (hash(x)=50286035) +22600 val loss 5.9660 +22600 val perplexity 389.9309 +22600 train 5.966257 (lr=3.1034e-05) (hash(x)=48357283) +25300 val loss 6.1320 +25300 val perplexity 460.3434 +25300 train 6.032018 (lr=5.4432e-05) (hash(x)=47401253) +24100 val loss 6.0361 +24100 val perplexity 418.2490 +24100 train 5.688564 (lr=4.0486e-05) (hash(x)=42370601) +22700 val loss 5.9469 +22700 val perplexity 382.5836 +22700 train 5.721082 (lr=3.0894e-05) (hash(x)=45320429) +25400 val loss 6.1344 +25400 val perplexity 461.4645 +25400 train 6.091487 (lr=5.4148e-05) (hash(x)=51076549) +24200 val loss 6.0453 +24200 val perplexity 422.1143 +24200 train 6.107017 (lr=4.0287e-05) (hash(x)=51085788) +22800 val loss 5.9479 +22800 val perplexity 382.9355 +22800 train 6.103741 (lr=3.0753e-05) (hash(x)=50298012) +25500 val loss 6.1348 +25500 val perplexity 461.6624 +25500 train 5.944718 (lr=5.3865e-05) (hash(x)=49420987) +24300 val loss 6.0508 +24300 val perplexity 424.4558 +24300 train 6.281925 (lr=4.0089e-05) (hash(x)=55218710) +22900 val loss 5.9421 +22900 val perplexity 380.7473 +22900 train 6.121511 (lr=3.0613e-05) (hash(x)=52461697) +25600 val loss 6.1328 +25600 val perplexity 460.7031 +25600 train 6.157736 (lr=5.3581e-05) (hash(x)=48566570) +24400 val loss 6.0439 +24400 val perplexity 421.5368 +24400 train 6.530954 (lr=3.9891e-05) (hash(x)=57711994) +23000 val loss 5.9385 +23000 val perplexity 379.3815 +25700 val loss 6.1504 +25700 val perplexity 468.9032 +23000 train 6.115480 (lr=3.0472e-05) (hash(x)=53400656) +25700 train 6.214624 (lr=5.3297e-05) (hash(x)=50677911) +24500 val loss 6.0579 +24500 val perplexity 427.4564 +24500 train 5.743776 (lr=3.9692e-05) (hash(x)=52233904) +25800 val loss 6.1401 +25800 val perplexity 464.0787 +25800 train 6.459859 (lr=5.3013e-05) (hash(x)=58929577) +23100 val loss 5.9341 +23100 val perplexity 377.6832 +23100 train 5.739134 (lr=3.0331e-05) (hash(x)=43571321) +24600 val loss 6.0549 +24600 val perplexity 426.1768 +24600 train 6.278240 (lr=3.9493e-05) (hash(x)=55555682) +25900 val loss 6.1282 +25900 val perplexity 458.6285 +25900 train 5.968525 (lr=5.2730e-05) (hash(x)=52002864) +23200 val loss 5.9336 +23200 val perplexity 377.5275 +23200 train 5.893770 (lr=3.0190e-05) (hash(x)=53841978) +24700 val loss 6.0532 +24700 val perplexity 425.4619 +24700 train 6.014715 (lr=3.9295e-05) (hash(x)=52403558) +26000 val loss 6.1373 +26000 val perplexity 462.8034 +26000 train 5.851420 (lr=5.2446e-05) (hash(x)=46014218) +23300 val loss 5.9248 +23300 val perplexity 374.2180 +23300 train 5.810924 (lr=3.0049e-05) (hash(x)=43302880) +24800 val loss 6.0362 +24800 val perplexity 418.3037 +24800 train 6.055576 (lr=3.9096e-05) (hash(x)=52740488) +26100 val loss 6.1450 +26100 val perplexity 466.3578 +26100 train 5.874826 (lr=5.2163e-05) (hash(x)=52740192) +23400 val loss 5.9271 +23400 val perplexity 375.0691 +23400 train 5.942403 (lr=2.9908e-05) (hash(x)=50174060) +26200 val loss 6.1363 +26200 val perplexity 462.3365 +24900 val loss 6.0253 +24900 val perplexity 413.7603 +26200 train 6.275943 (lr=5.1880e-05) (hash(x)=49738391) +24900 train 5.932157 (lr=3.8897e-05) (hash(x)=51355910) +23500 val loss 5.9215 +23500 val perplexity 372.9738 +23500 train 5.873097 (lr=2.9767e-05) (hash(x)=50773603) +26300 val loss 6.1284 +26300 val perplexity 458.7029 +26300 train 5.846912 (lr=5.1597e-05) (hash(x)=46377771) +25000 val loss 6.0242 +25000 val perplexity 413.3270 +25000 train 5.782950 (lr=3.8699e-05) (hash(x)=45069649) +23600 val loss 5.9278 +23600 val perplexity 375.3310 +23600 train 5.717057 (lr=2.9626e-05) (hash(x)=42904407) +26400 val loss 6.1266 +26400 val perplexity 457.8734 +26400 train 6.240741 (lr=5.1314e-05) (hash(x)=51066219) +25100 val loss 6.0380 +25100 val perplexity 419.0378 +25100 train 6.393308 (lr=3.8500e-05) (hash(x)=58181708) +23700 val loss 5.9217 +23700 val perplexity 373.0286 +23700 train 5.737006 (lr=2.9485e-05) (hash(x)=48638482) +26500 val loss 6.1540 +26500 val perplexity 470.5946 +26500 train 6.252472 (lr=5.1031e-05) (hash(x)=48437514) +25200 val loss 6.0253 +25200 val perplexity 413.7577 +25200 train 6.145757 (lr=3.8301e-05) (hash(x)=50361074) +26600 val loss 6.1611 +26600 val perplexity 473.9527 +26600 train 6.073585 (lr=5.0748e-05) (hash(x)=45911595) +23800 val loss 5.9237 +23800 val perplexity 373.8007 +23800 train 5.888617 (lr=2.9343e-05) (hash(x)=51586262) +25300 val loss 6.0182 +25300 val perplexity 410.8266 +25300 train 5.964751 (lr=3.8103e-05) (hash(x)=47401253) +26700 val loss 6.1345 +26700 val perplexity 461.5096 +26700 train 6.000814 (lr=5.0466e-05) (hash(x)=52464591) +23900 val loss 5.9380 +23900 val perplexity 379.1757 +23900 train 5.580739 (lr=2.9202e-05) (hash(x)=42002764) +25400 val loss 6.0101 +25400 val perplexity 407.5157 +25400 train 5.969099 (lr=3.7904e-05) (hash(x)=51076549) +26800 val loss 6.1449 +26800 val perplexity 466.3398 +26800 train 6.089945 (lr=5.0183e-05) (hash(x)=42096943) +24000 val loss 5.9273 +24000 val perplexity 375.1578 +24000 train 5.888164 (lr=2.9060e-05) (hash(x)=50286035) +25500 val loss 6.0142 +25500 val perplexity 409.1867 +25500 train 5.826285 (lr=3.7705e-05) (hash(x)=49420987) +26900 val loss 6.1494 +26900 val perplexity 468.4381 +26900 train 6.809120 (lr=4.9901e-05) (hash(x)=54163186) +24100 val loss 5.9177 +24100 val perplexity 371.5695 +24100 train 5.567852 (lr=2.8918e-05) (hash(x)=42370601) +25600 val loss 6.0085 +25600 val perplexity 406.8588 +25600 train 6.022813 (lr=3.7507e-05) (hash(x)=48566570) +27000 val loss 6.1287 +27000 val perplexity 458.8401 +27000 train 5.979765 (lr=4.9619e-05) (hash(x)=46451215) +24200 val loss 5.9299 +24200 val perplexity 376.1286 +24200 train 5.991234 (lr=2.8777e-05) (hash(x)=51085788) +25700 val loss 6.0168 +25700 val perplexity 410.2765 +25700 train 6.101202 (lr=3.7308e-05) (hash(x)=50677911) +27100 val loss 6.1562 +27100 val perplexity 471.6284 +27100 train 5.950190 (lr=4.9337e-05) (hash(x)=47581088) +24300 val loss 5.9196 +24300 val perplexity 372.2800 +25800 val loss 6.0141 +25800 val perplexity 409.1383 +24300 train 6.138320 (lr=2.8635e-05) (hash(x)=55218710) +25800 train 6.364690 (lr=3.7109e-05) (hash(x)=58929577) +27200 val loss 6.1324 +27200 val perplexity 460.5413 +27200 train 6.208570 (lr=4.9056e-05) (hash(x)=54367680) +25900 val loss 6.0207 +25900 val perplexity 411.8674 +25900 train 5.857312 (lr=3.6911e-05) (hash(x)=52002864) +24400 val loss 5.9177 +24400 val perplexity 371.5526 +24400 train 6.413055 (lr=2.8493e-05) (hash(x)=57711994) +27300 val loss 6.1418 +27300 val perplexity 464.8734 +27300 train 6.212245 (lr=4.8775e-05) (hash(x)=46570420) +26000 val loss 6.0101 +26000 val perplexity 407.5114 +26000 train 5.721799 (lr=3.6713e-05) (hash(x)=46014218) +24500 val loss 5.9206 +24500 val perplexity 372.6336 +24500 train 5.616885 (lr=2.8351e-05) (hash(x)=52233904) +27400 val loss 6.1303 +27400 val perplexity 459.5611 +27400 train 6.064974 (lr=4.8494e-05) (hash(x)=41750168) +26100 val loss 6.0232 +26100 val perplexity 412.8966 +26100 train 5.782398 (lr=3.6514e-05) (hash(x)=52740192) +27500 val loss 6.1182 +27500 val perplexity 454.0665 +27500 train 5.999010 (lr=4.8213e-05) (hash(x)=48741121) +24600 val loss 5.9226 +24600 val perplexity 373.3805 +24600 train 6.110901 (lr=2.8210e-05) (hash(x)=55555682) +26200 val loss 6.0071 +26200 val perplexity 406.2844 +26200 train 6.126141 (lr=3.6316e-05) (hash(x)=49738391) +27600 val loss 6.1267 +27600 val perplexity 457.9000 +27600 train 6.315853 (lr=4.7932e-05) (hash(x)=46656553) +24700 val loss 5.9290 +24700 val perplexity 375.7771 +24700 train 5.872314 (lr=2.8068e-05) (hash(x)=52403558) +26300 val loss 6.0087 +26300 val perplexity 406.9572 +27700 val loss 6.1235 +27700 val perplexity 456.4407 +27700 train 6.270436 (lr=4.7652e-05) (hash(x)=57938850) +26300 train 5.710693 (lr=3.6118e-05) (hash(x)=46377771) +24800 val loss 5.9096 +24800 val perplexity 368.5623 +24800 train 5.950855 (lr=2.7926e-05) (hash(x)=52740488) +27800 val loss 6.1270 +27800 val perplexity 458.0760 +27800 train 6.140831 (lr=4.7372e-05) (hash(x)=50415088) +26400 val loss 6.0060 +26400 val perplexity 405.8673 +26400 train 6.139177 (lr=3.5920e-05) (hash(x)=51066219) +24900 val loss 5.9097 +24900 val perplexity 368.5989 +24900 train 5.827918 (lr=2.7784e-05) (hash(x)=51355910) +27900 val loss 6.1222 +27900 val perplexity 455.8752 +27900 train 6.106221 (lr=4.7093e-05) (hash(x)=45350717) +26500 val loss 6.0014 +26500 val perplexity 404.0032 +26500 train 6.087047 (lr=3.5722e-05) (hash(x)=48437514) +25000 val loss 5.9087 +25000 val perplexity 368.2373 +25000 train 5.669620 (lr=2.7642e-05) (hash(x)=45069649) +28000 val loss 6.1481 +28000 val perplexity 467.8354 +28000 train 6.620662 (lr=4.6813e-05) (hash(x)=57695005) +26600 val loss 6.0171 +26600 val perplexity 410.3889 +26600 train 5.930305 (lr=3.5524e-05) (hash(x)=45911595) +25100 val loss 5.8988 +25100 val perplexity 364.5856 +25100 train 6.251714 (lr=2.7500e-05) (hash(x)=58181708) +28100 val loss 6.1375 +28100 val perplexity 462.8765 +28100 train 6.130723 (lr=4.6534e-05) (hash(x)=47727816) +26700 val loss 6.0089 +26700 val perplexity 407.0282 +26700 train 5.870820 (lr=3.5326e-05) (hash(x)=52464591) +28200 val loss 6.1117 +28200 val perplexity 451.1219 +28200 train 6.230977 (lr=4.6256e-05) (hash(x)=48551638) +25200 val loss 5.8940 +25200 val perplexity 362.8538 +25200 train 6.013651 (lr=2.7358e-05) (hash(x)=50361074) +26800 val loss 6.0165 +26800 val perplexity 410.1531 +26800 train 6.003063 (lr=3.5128e-05) (hash(x)=42096943) +28300 val loss 6.1147 +28300 val perplexity 452.4641 +28300 train 6.416711 (lr=4.5977e-05) (hash(x)=54559586) +25300 val loss 5.8939 +25300 val perplexity 362.8159 +25300 train 5.824848 (lr=2.7216e-05) (hash(x)=47401253) +26900 val loss 6.0176 +26900 val perplexity 410.6000 +26900 train 6.661689 (lr=3.4931e-05) (hash(x)=54163186) +28400 val loss 6.1327 +28400 val perplexity 460.6761 +28400 train 5.921173 (lr=4.5700e-05) (hash(x)=48389484) +25400 val loss 5.8885 +25400 val perplexity 360.8470 +25400 train 5.850532 (lr=2.7074e-05) (hash(x)=51076549) +27000 val loss 6.0070 +27000 val perplexity 406.2610 +27000 train 5.858972 (lr=3.4733e-05) (hash(x)=46451215) +28500 val loss 6.1320 +28500 val perplexity 460.3603 +28500 train 5.953368 (lr=4.5422e-05) (hash(x)=47688252) +25500 val loss 5.8936 +25500 val perplexity 362.6914 +25500 train 5.702950 (lr=2.6932e-05) (hash(x)=49420987) +27100 val loss 6.0040 +27100 val perplexity 405.0645 +27100 train 5.760614 (lr=3.4536e-05) (hash(x)=47581088) +28600 val loss 6.1196 +28600 val perplexity 454.6896 +28600 train 5.904134 (lr=4.5145e-05) (hash(x)=49302361) +27200 val loss 5.9976 +27200 val perplexity 402.4794 +27200 train 6.068856 (lr=3.4339e-05) (hash(x)=54367680) +25600 val loss 5.8880 +25600 val perplexity 360.6891 +25600 train 5.906493 (lr=2.6790e-05) (hash(x)=48566570) +28700 val loss 6.1380 +28700 val perplexity 463.1343 +28700 train 6.114074 (lr=4.4868e-05) (hash(x)=48942466) +27300 val loss 6.0023 +27300 val perplexity 404.3422 +27300 train 6.055133 (lr=3.4142e-05) (hash(x)=46570420) +25700 val loss 5.8884 +25700 val perplexity 360.8452 +25700 train 5.976617 (lr=2.6649e-05) (hash(x)=50677911) +28800 val loss 6.1323 +28800 val perplexity 460.4984 +28800 train 6.349658 (lr=4.4592e-05) (hash(x)=50092454) +27400 val loss 5.9872 +27400 val perplexity 398.2831 +27400 train 5.948752 (lr=3.3946e-05) (hash(x)=41750168) +25800 val loss 5.8807 +25800 val perplexity 358.0662 +25800 train 6.231009 (lr=2.6507e-05) (hash(x)=58929577) +28900 val loss 6.1264 +28900 val perplexity 457.7797 +28900 train 6.018862 (lr=4.4316e-05) (hash(x)=50719587) +27500 val loss 5.9894 +27500 val perplexity 399.1740 +27500 train 5.861801 (lr=3.3749e-05) (hash(x)=48741121) +25900 val loss 5.8812 +25900 val perplexity 358.2557 +25900 train 5.684187 (lr=2.6365e-05) (hash(x)=52002864) +29000 val loss 6.1327 +29000 val perplexity 460.6888 +29000 train 5.860716 (lr=4.4040e-05) (hash(x)=45143738) +27600 val loss 5.9859 +27600 val perplexity 397.7961 +27600 train 6.176133 (lr=3.3553e-05) (hash(x)=46656553) +26000 val loss 5.8761 +26000 val perplexity 356.4133 +26000 train 5.615215 (lr=2.6223e-05) (hash(x)=46014218) +29100 val loss 6.1412 +29100 val perplexity 464.6318 +29100 train 5.942729 (lr=4.3765e-05) (hash(x)=46786056) +27700 val loss 5.9771 +27700 val perplexity 394.2883 +27700 train 6.101024 (lr=3.3357e-05) (hash(x)=57938850) +29200 val loss 6.1274 +29200 val perplexity 458.2244 +29200 train 6.004820 (lr=4.3490e-05) (hash(x)=48740943) +26100 val loss 5.8825 +26100 val perplexity 358.7226 +26100 train 5.634572 (lr=2.6082e-05) (hash(x)=52740192) +27800 val loss 5.9687 +27800 val perplexity 390.9868 +27800 train 5.990761 (lr=3.3161e-05) (hash(x)=50415088) +26200 val loss 5.8756 +26200 val perplexity 356.2531 +29300 val loss 6.1632 +29300 val perplexity 474.9504 +29300 train 5.773337 (lr=4.3216e-05) (hash(x)=41648736) +26200 train 5.999710 (lr=2.5940e-05) (hash(x)=49738391) +27900 val loss 5.9671 +27900 val perplexity 390.3619 +27900 train 5.955680 (lr=3.2965e-05) (hash(x)=45350717) +29400 val loss 6.1196 +29400 val perplexity 454.6873 +29400 train 5.846704 (lr=4.2942e-05) (hash(x)=44218648) +26300 val loss 5.8794 +26300 val perplexity 357.6070 +26300 train 5.579252 (lr=2.5798e-05) (hash(x)=46377771) +28000 val loss 6.0019 +28000 val perplexity 404.1919 +28000 train 6.465910 (lr=3.2769e-05) (hash(x)=57695005) +29500 val loss 6.1140 +29500 val perplexity 452.1522 +29500 train 6.557173 (lr=4.2669e-05) (hash(x)=53334368) +26400 val loss 5.8725 +26400 val perplexity 355.1274 +26400 train 6.023158 (lr=2.5657e-05) (hash(x)=51066219) +28100 val loss 5.9785 +28100 val perplexity 394.8363 +28100 train 5.975891 (lr=3.2574e-05) (hash(x)=47727816) +29600 val loss 6.1093 +29600 val perplexity 450.0159 +29600 train 5.888193 (lr=4.2396e-05) (hash(x)=47618123) +26500 val loss 5.8797 +26500 val perplexity 357.7184 +26500 train 6.014191 (lr=2.5515e-05) (hash(x)=48437514) +28200 val loss 5.9620 +28200 val perplexity 388.3857 +28200 train 6.111741 (lr=3.2379e-05) (hash(x)=48551638) +29700 val loss 6.1133 +29700 val perplexity 451.8378 +29700 train 5.910227 (lr=4.2124e-05) (hash(x)=50661355) +26600 val loss 5.8916 +26600 val perplexity 361.9666 +26600 train 5.805231 (lr=2.5374e-05) (hash(x)=45911595) +28300 val loss 5.9668 +28300 val perplexity 390.2417 +28300 train 6.275928 (lr=3.2184e-05) (hash(x)=54559586) +29800 val loss 6.1145 +29800 val perplexity 452.3907 +29800 train 6.080892 (lr=4.1852e-05) (hash(x)=57016691) +26700 val loss 5.8783 +26700 val perplexity 357.1976 +26700 train 5.758441 (lr=2.5233e-05) (hash(x)=52464591) +28400 val loss 5.9767 +28400 val perplexity 394.1362 +28400 train 5.769044 (lr=3.1990e-05) (hash(x)=48389484) +29900 val loss 6.1145 +29900 val perplexity 452.3530 +29900 train 6.122662 (lr=4.1581e-05) (hash(x)=48125356) +26800 val loss 5.8836 +26800 val perplexity 359.0818 +26800 train 5.851243 (lr=2.5092e-05) (hash(x)=42096943) +28500 val loss 5.9739 +28500 val perplexity 393.0212 +28500 train 5.808036 (lr=3.1795e-05) (hash(x)=47688252) +30000 val loss 6.1206 +30000 val perplexity 455.1415 +30000 train 5.906503 (lr=4.1310e-05) (hash(x)=49406425) +28600 val loss 5.9659 +28600 val perplexity 389.9049 +28600 train 5.715998 (lr=3.1601e-05) (hash(x)=49302361) +26900 val loss 5.8946 +26900 val perplexity 363.0536 +26900 train 6.537422 (lr=2.4951e-05) (hash(x)=54163186) +30100 val loss 6.1106 +30100 val perplexity 450.6094 +30100 train 5.859414 (lr=4.1040e-05) (hash(x)=46392910) +28700 val loss 5.9611 +28700 val perplexity 388.0345 +28700 train 5.957855 (lr=3.1408e-05) (hash(x)=48942466) +27000 val loss 5.8811 +27000 val perplexity 358.2159 +27000 train 5.737806 (lr=2.4810e-05) (hash(x)=46451215) +30200 val loss 6.1105 +30200 val perplexity 450.5759 +30200 train 5.953054 (lr=4.0771e-05) (hash(x)=49414134) +28800 val loss 5.9640 +28800 val perplexity 389.1515 +28800 train 6.162807 (lr=3.1214e-05) (hash(x)=50092454) +30300 val loss 6.0921 +30300 val perplexity 442.3638 +30300 train 6.551651 (lr=4.0502e-05) (hash(x)=68695385) +27100 val loss 5.8769 +27100 val perplexity 356.7003 +27100 train 5.633742 (lr=2.4669e-05) (hash(x)=47581088) +28900 val loss 5.9752 +28900 val perplexity 393.5444 +28900 train 5.873323 (lr=3.1021e-05) (hash(x)=50719587) +30400 val loss 6.0920 +30400 val perplexity 442.3085 +30400 train 5.988531 (lr=4.0233e-05) (hash(x)=47071276) +27200 val loss 5.8699 +27200 val perplexity 354.2182 +27200 train 5.921309 (lr=2.4528e-05) (hash(x)=54367680) +29000 val loss 5.9667 +29000 val perplexity 390.2053 +29000 train 5.710151 (lr=3.0828e-05) (hash(x)=45143738) +30500 val loss 6.0963 +30500 val perplexity 444.2328 +30500 train 6.054086 (lr=3.9965e-05) (hash(x)=51160964) +27300 val loss 5.8766 +27300 val perplexity 356.6117 +27300 train 5.928796 (lr=2.4387e-05) (hash(x)=46570420) +29100 val loss 5.9712 +29100 val perplexity 391.9629 +29100 train 5.763094 (lr=3.0635e-05) (hash(x)=46786056) +30600 val loss 6.0838 +30600 val perplexity 438.6756 +30600 train 6.024662 (lr=3.9698e-05) (hash(x)=50809494) +27400 val loss 5.8569 +27400 val perplexity 349.6440 +27400 train 5.843185 (lr=2.4247e-05) (hash(x)=41750168) +29200 val loss 5.9753 +29200 val perplexity 393.5728 +29200 train 5.857784 (lr=3.0443e-05) (hash(x)=48740943) +30700 val loss 6.0967 +30700 val perplexity 444.3961 +30700 train 6.153099 (lr=3.9431e-05) (hash(x)=53029700) +27500 val loss 5.8598 +27500 val perplexity 350.6605 +27500 train 5.736267 (lr=2.4106e-05) (hash(x)=48741121) +30800 val loss 6.0809 +30800 val perplexity 437.4324 +30800 train 6.012371 (lr=3.9165e-05) (hash(x)=52075304) +29300 val loss 5.9725 +29300 val perplexity 392.4856 +29300 train 5.602182 (lr=3.0251e-05) (hash(x)=41648736) +27600 val loss 5.8650 +27600 val perplexity 352.4982 +27600 train 6.067800 (lr=2.3966e-05) (hash(x)=46656553) +30900 val loss 6.0801 +30900 val perplexity 437.0946 +30900 train 5.882274 (lr=3.8900e-05) (hash(x)=48077975) +29400 val loss 5.9633 +29400 val perplexity 388.8792 +29400 train 5.692812 (lr=3.0060e-05) (hash(x)=44218648) +27700 val loss 5.8559 +27700 val perplexity 349.2807 +27700 train 5.974777 (lr=2.3826e-05) (hash(x)=57938850) +31000 val loss 6.0865 +31000 val perplexity 439.8834 +31000 train 6.002836 (lr=3.8635e-05) (hash(x)=47114906) +29500 val loss 5.9605 +29500 val perplexity 387.8205 +29500 train 6.444823 (lr=2.9868e-05) (hash(x)=53334368) +27800 val loss 5.8494 +27800 val perplexity 347.0331 +27800 train 5.875774 (lr=2.3686e-05) (hash(x)=50415088) +31100 val loss 6.0770 +31100 val perplexity 435.7264 +31100 train 5.963897 (lr=3.8371e-05) (hash(x)=46927606) +29600 val loss 5.9613 +29600 val perplexity 388.1221 +29600 train 5.726595 (lr=2.9677e-05) (hash(x)=47618123) +31200 val loss 6.0839 +31200 val perplexity 438.7549 +31200 train 6.035295 (lr=3.8108e-05) (hash(x)=48254087) +27900 val loss 5.8569 +27900 val perplexity 349.6452 +27900 train 5.863978 (lr=2.3546e-05) (hash(x)=45350717) +29700 val loss 5.9681 +29700 val perplexity 390.7542 +29700 train 5.770926 (lr=2.9487e-05) (hash(x)=50661355) +31300 val loss 6.0799 +31300 val perplexity 436.9987 +31300 train 5.842219 (lr=3.7845e-05) (hash(x)=44959564) +28000 val loss 5.8815 +28000 val perplexity 358.3430 +28000 train 6.301733 (lr=2.3407e-05) (hash(x)=57695005) +29800 val loss 5.9626 +29800 val perplexity 388.6247 +29800 train 5.901276 (lr=2.9297e-05) (hash(x)=57016691) +31400 val loss 6.0878 +31400 val perplexity 440.4591 +31400 train 5.893678 (lr=3.7583e-05) (hash(x)=49444115) +29900 val loss 5.9565 +29900 val perplexity 386.2723 +29900 train 5.963272 (lr=2.9107e-05) (hash(x)=48125356) +28100 val loss 5.8544 +28100 val perplexity 348.7510 +28100 train 5.868701 (lr=2.3267e-05) (hash(x)=47727816) +31500 val loss 6.0943 +31500 val perplexity 443.3438 +31500 train 5.885118 (lr=3.7321e-05) (hash(x)=47585912) +30000 val loss 5.9575 +30000 val perplexity 386.6535 +30000 train 5.737274 (lr=2.8917e-05) (hash(x)=49406425) +28200 val loss 5.8485 +28200 val perplexity 346.7132 +28200 train 5.998513 (lr=2.3128e-05) (hash(x)=48551638) +31600 val loss 6.0810 +31600 val perplexity 437.4689 +31600 train 6.118282 (lr=3.7061e-05) (hash(x)=53844713) +30100 val loss 5.9605 +30100 val perplexity 387.7896 +30100 train 5.701468 (lr=2.8728e-05) (hash(x)=46392910) +28300 val loss 5.8473 +28300 val perplexity 346.2963 +28300 train 6.163150 (lr=2.2989e-05) (hash(x)=54559586) +31700 val loss 6.0893 +31700 val perplexity 441.0937 +31700 train 5.801013 (lr=3.6801e-05) (hash(x)=48596811) +30200 val loss 5.9626 +30200 val perplexity 388.6095 +30200 train 5.813945 (lr=2.8539e-05) (hash(x)=49414134) +28400 val loss 5.8448 +28400 val perplexity 345.4237 +28400 train 5.627731 (lr=2.2850e-05) (hash(x)=48389484) +31800 val loss 6.0955 +31800 val perplexity 443.8430 +31800 train 6.044539 (lr=3.6541e-05) (hash(x)=49976057) +30300 val loss 5.9552 +30300 val perplexity 385.7714 +30300 train 6.417881 (lr=2.8351e-05) (hash(x)=68695385) +28500 val loss 5.8446 +28500 val perplexity 345.3720 +28500 train 5.687553 (lr=2.2711e-05) (hash(x)=47688252) +31900 val loss 6.1030 +31900 val perplexity 447.2155 +31900 train 6.087860 (lr=3.6283e-05) (hash(x)=50115370) +30400 val loss 5.9506 +30400 val perplexity 383.9826 +30400 train 5.833694 (lr=2.8163e-05) (hash(x)=47071276) +28600 val loss 5.8453 +28600 val perplexity 345.6028 +28600 train 5.620392 (lr=2.2572e-05) (hash(x)=49302361) +32000 val loss 6.0952 +32000 val perplexity 443.7279 +32000 train 5.753112 (lr=3.6025e-05) (hash(x)=43312539) +30500 val loss 5.9578 +30500 val perplexity 386.7672 +30500 train 5.880878 (lr=2.7976e-05) (hash(x)=51160964) +28700 val loss 5.8456 +28700 val perplexity 345.7035 +28700 train 5.830254 (lr=2.2434e-05) (hash(x)=48942466) +32100 val loss 6.0926 +32100 val perplexity 442.5874 +32100 train 6.149951 (lr=3.5768e-05) (hash(x)=52783562) +30600 val loss 5.9366 +30600 val perplexity 378.6490 +30600 train 5.876342 (lr=2.7789e-05) (hash(x)=50809494) +28800 val loss 5.8470 +28800 val perplexity 346.1964 +28800 train 6.069959 (lr=2.2296e-05) (hash(x)=50092454) +32200 val loss 6.0940 +32200 val perplexity 443.1880 +32200 train 6.082386 (lr=3.5512e-05) (hash(x)=45183919) +30700 val loss 5.9414 +30700 val perplexity 380.4513 +30700 train 6.004590 (lr=2.7602e-05) (hash(x)=53029700) +28900 val loss 5.8492 +28900 val perplexity 346.9521 +28900 train 5.759587 (lr=2.2158e-05) (hash(x)=50719587) +32300 val loss 6.0924 +32300 val perplexity 442.4631 +32300 train 6.363565 (lr=3.5256e-05) (hash(x)=56847952) +30800 val loss 5.9393 +30800 val perplexity 379.6700 +30800 train 5.872514 (lr=2.7416e-05) (hash(x)=52075304) +32400 val loss 6.0733 +32400 val perplexity 434.1164 +32400 train 5.848068 (lr=3.5002e-05) (hash(x)=44188499) +29000 val loss 5.8461 +29000 val perplexity 345.8768 +29000 train 5.581179 (lr=2.2020e-05) (hash(x)=45143738) +30900 val loss 5.9461 +30900 val perplexity 382.2438 +30900 train 5.765499 (lr=2.7230e-05) (hash(x)=48077975) +32500 val loss 6.0802 +32500 val perplexity 437.1317 +32500 train 5.936266 (lr=3.4748e-05) (hash(x)=48357180) +31000 val loss 5.9387 +31000 val perplexity 379.4568 +31000 train 5.869512 (lr=2.7045e-05) (hash(x)=47114906) +29100 val loss 5.8487 +29100 val perplexity 346.7712 +29100 train 5.643409 (lr=2.1882e-05) (hash(x)=46786056) +32600 val loss 6.0796 +32600 val perplexity 436.8573 +32600 train 6.135863 (lr=3.4495e-05) (hash(x)=47861550) +31100 val loss 5.9304 +31100 val perplexity 376.3037 +31100 train 5.816771 (lr=2.6860e-05) (hash(x)=46927606) +29200 val loss 5.8546 +29200 val perplexity 348.8220 +29200 train 5.757584 (lr=2.1745e-05) (hash(x)=48740943) +32700 val loss 6.0764 +32700 val perplexity 435.4480 +32700 train 6.184589 (lr=3.4242e-05) (hash(x)=55305367) +31200 val loss 5.9388 +31200 val perplexity 379.4979 +31200 train 5.899786 (lr=2.6675e-05) (hash(x)=48254087) +29300 val loss 5.8593 +29300 val perplexity 350.4930 +29300 train 5.495536 (lr=2.1608e-05) (hash(x)=41648736) +32800 val loss 6.1101 +32800 val perplexity 450.3955 +32800 train 6.003266 (lr=3.3991e-05) (hash(x)=51097743) +31300 val loss 5.9291 +31300 val perplexity 375.8101 +31300 train 5.691813 (lr=2.6491e-05) (hash(x)=44959564) +29400 val loss 5.8383 +29400 val perplexity 343.1790 +29400 train 5.574665 (lr=2.1471e-05) (hash(x)=44218648) +32900 val loss 6.0801 +32900 val perplexity 437.0688 +32900 train 5.915703 (lr=3.3740e-05) (hash(x)=46776597) +31400 val loss 5.9269 +31400 val perplexity 374.9973 +31400 train 5.745422 (lr=2.6308e-05) (hash(x)=49444115) +29500 val loss 5.8411 +29500 val perplexity 344.1704 +29500 train 6.328853 (lr=2.1335e-05) (hash(x)=53334368) +33000 val loss 6.0806 +33000 val perplexity 437.3102 +33000 train 6.031677 (lr=3.3490e-05) (hash(x)=52689521) +31500 val loss 5.9295 +31500 val perplexity 375.9681 +31500 train 5.722143 (lr=2.6125e-05) (hash(x)=47585912) +29600 val loss 5.8420 +29600 val perplexity 344.4577 +29600 train 5.593561 (lr=2.1198e-05) (hash(x)=47618123) +33100 val loss 6.0945 +33100 val perplexity 443.4136 +33100 train 6.521582 (lr=3.3242e-05) (hash(x)=65258285) +31600 val loss 5.9331 +31600 val perplexity 377.3200 +31600 train 5.961406 (lr=2.5942e-05) (hash(x)=53844713) +33200 val loss 6.0642 +33200 val perplexity 430.1810 +29700 val loss 5.8379 +29700 val perplexity 343.0661 +33200 train 5.913850 (lr=3.2993e-05) (hash(x)=50703450) +29700 train 5.647583 (lr=2.1062e-05) (hash(x)=50661355) +31700 val loss 5.9291 +31700 val perplexity 375.8326 +31700 train 5.633407 (lr=2.5760e-05) (hash(x)=48596811) +33300 val loss 6.0818 +33300 val perplexity 437.8080 +33300 train 6.649391 (lr=3.2746e-05) (hash(x)=61774174) +29800 val loss 5.8428 +29800 val perplexity 344.7279 +29800 train 5.742423 (lr=2.0926e-05) (hash(x)=57016691) +31800 val loss 5.9302 +31800 val perplexity 376.2255 +31800 train 5.871071 (lr=2.5579e-05) (hash(x)=49976057) +33400 val loss 6.0782 +33400 val perplexity 436.2488 +33400 train 5.954467 (lr=3.2500e-05) (hash(x)=49173286) +29900 val loss 5.8398 +29900 val perplexity 343.7264 +29900 train 5.868258 (lr=2.0791e-05) (hash(x)=48125356) +31900 val loss 5.9322 +31900 val perplexity 377.0004 +31900 train 5.949881 (lr=2.5398e-05) (hash(x)=50115370) +33500 val loss 6.0819 +33500 val perplexity 437.8767 +33500 train 5.999985 (lr=3.2255e-05) (hash(x)=47446418) +30000 val loss 5.8423 +30000 val perplexity 344.5857 +32000 val loss 5.9269 +32000 val perplexity 374.9918 +32000 train 5.588292 (lr=2.5218e-05) (hash(x)=43312539) +30000 train 5.616962 (lr=2.0655e-05) (hash(x)=49406425) +33600 val loss 6.0835 +33600 val perplexity 438.5677 +33600 train 5.889129 (lr=3.2010e-05) (hash(x)=51614932) +32100 val loss 5.9182 +32100 val perplexity 371.7551 +32100 train 5.963882 (lr=2.5038e-05) (hash(x)=52783562) +30100 val loss 5.8403 +30100 val perplexity 343.8996 +30100 train 5.581110 (lr=2.0520e-05) (hash(x)=46392910) +33700 val loss 6.0700 +33700 val perplexity 432.6781 +33700 train 6.024288 (lr=3.1767e-05) (hash(x)=45913195) +32200 val loss 5.9128 +32200 val perplexity 369.7292 +32200 train 5.884846 (lr=2.4858e-05) (hash(x)=45183919) +30200 val loss 5.8394 +30200 val perplexity 343.5665 +30200 train 5.697734 (lr=2.0385e-05) (hash(x)=49414134) +33800 val loss 6.0750 +33800 val perplexity 434.8602 +33800 train 5.938345 (lr=3.1524e-05) (hash(x)=49792786) +32300 val loss 5.9144 +32300 val perplexity 370.3252 +32300 train 6.214451 (lr=2.4679e-05) (hash(x)=56847952) +30300 val loss 5.8307 +30300 val perplexity 340.5966 +30300 train 6.312660 (lr=2.0251e-05) (hash(x)=68695385) +33900 val loss 6.0719 +33900 val perplexity 433.5217 +33900 train 6.235732 (lr=3.1282e-05) (hash(x)=45744584) +32400 val loss 5.9114 +32400 val perplexity 369.2306 +32400 train 5.648042 (lr=2.4501e-05) (hash(x)=44188499) +34000 val loss 6.0600 +34000 val perplexity 428.3619 +34000 train 6.192116 (lr=3.1041e-05) (hash(x)=52120372) +30400 val loss 5.8361 +30400 val perplexity 342.4538 +30400 train 5.703335 (lr=2.0117e-05) (hash(x)=47071276) +32500 val loss 5.9094 +32500 val perplexity 368.4924 +32500 train 5.743899 (lr=2.4323e-05) (hash(x)=48357180) +34100 val loss 6.0626 +34100 val perplexity 429.4907 +34100 train 6.109167 (lr=3.0802e-05) (hash(x)=50537607) +30500 val loss 5.8610 +30500 val perplexity 351.0637 +30500 train 5.792782 (lr=1.9983e-05) (hash(x)=51160964) +32600 val loss 5.9089 +32600 val perplexity 368.2856 +32600 train 5.933915 (lr=2.4146e-05) (hash(x)=47861550) +34200 val loss 6.0575 +34200 val perplexity 427.3078 +34200 train 6.215541 (lr=3.0563e-05) (hash(x)=60272478) +30600 val loss 5.8234 +30600 val perplexity 338.1342 +30600 train 5.761587 (lr=1.9849e-05) (hash(x)=50809494) +32700 val loss 5.9043 +32700 val perplexity 366.6011 +32700 train 6.037351 (lr=2.3970e-05) (hash(x)=55305367) +34300 val loss 6.0580 +34300 val perplexity 427.5349 +34300 train 6.094839 (lr=3.0325e-05) (hash(x)=53956932) +30700 val loss 5.8197 +30700 val perplexity 336.8789 +30700 train 5.885623 (lr=1.9716e-05) (hash(x)=53029700) +32800 val loss 5.9092 +32800 val perplexity 368.3965 +32800 train 5.764788 (lr=2.3794e-05) (hash(x)=51097743) +34400 val loss 6.0495 +34400 val perplexity 423.8846 +34400 train 6.144381 (lr=3.0088e-05) (hash(x)=50945849) +30800 val loss 5.8197 +30800 val perplexity 336.8786 +30800 train 5.780815 (lr=1.9583e-05) (hash(x)=52075304) +32900 val loss 5.9090 +32900 val perplexity 368.3501 +32900 train 5.768863 (lr=2.3618e-05) (hash(x)=46776597) +34500 val loss 6.0482 +34500 val perplexity 423.3603 +34500 train 5.877506 (lr=2.9852e-05) (hash(x)=48188401) +30900 val loss 5.8342 +30900 val perplexity 341.7789 +30900 train 5.651695 (lr=1.9450e-05) (hash(x)=48077975) +33000 val loss 5.9074 +33000 val perplexity 367.7583 +33000 train 5.860279 (lr=2.3443e-05) (hash(x)=52689521) +34600 val loss 6.0547 +34600 val perplexity 426.1279 +34600 train 6.054193 (lr=2.9617e-05) (hash(x)=49786659) +31000 val loss 5.8193 +31000 val perplexity 336.7403 +31000 train 5.759248 (lr=1.9318e-05) (hash(x)=47114906) +33100 val loss 5.9088 +33100 val perplexity 368.2554 +33100 train 6.310955 (lr=2.3269e-05) (hash(x)=65258285) +34700 val loss 6.0519 +34700 val perplexity 424.9346 +34700 train 5.902277 (lr=2.9383e-05) (hash(x)=46244488) +31100 val loss 5.8122 +31100 val perplexity 334.3466 +31100 train 5.677906 (lr=1.9186e-05) (hash(x)=46927606) +34800 val loss 6.0848 +34800 val perplexity 439.1124 +34800 train 5.994447 (lr=2.9150e-05) (hash(x)=48889546) +33200 val loss 5.9072 +33200 val perplexity 367.6724 +33200 train 5.749067 (lr=2.3095e-05) (hash(x)=50703450) +31200 val loss 5.8183 +31200 val perplexity 336.3913 +31200 train 5.783774 (lr=1.9054e-05) (hash(x)=48254087) +34900 val loss 6.0690 +34900 val perplexity 432.2597 +33300 val loss 5.9120 +33300 val perplexity 369.4292 +34900 train 5.977556 (lr=2.8918e-05) (hash(x)=50269369) +33300 train 6.483168 (lr=2.2922e-05) (hash(x)=61774174) +31300 val loss 5.8086 +31300 val perplexity 333.1616 +31300 train 5.590723 (lr=1.8922e-05) (hash(x)=44959564) +35000 val loss 6.0642 +35000 val perplexity 430.1905 +35000 train 6.092779 (lr=2.8688e-05) (hash(x)=49753823) +33400 val loss 5.9080 +33400 val perplexity 367.9518 +33400 train 5.777689 (lr=2.2750e-05) (hash(x)=49173286) +31400 val loss 5.8141 +31400 val perplexity 334.9996 +31400 train 5.642568 (lr=1.8791e-05) (hash(x)=49444115) +35100 val loss 6.0562 +35100 val perplexity 426.7505 +35100 train 6.183851 (lr=2.8458e-05) (hash(x)=49912642) +33500 val loss 5.9103 +33500 val perplexity 368.8199 +33500 train 5.854557 (lr=2.2578e-05) (hash(x)=47446418) +31500 val loss 5.8153 +31500 val perplexity 335.3806 +31500 train 5.609188 (lr=1.8661e-05) (hash(x)=47585912) +35200 val loss 6.0579 +35200 val perplexity 427.4584 +35200 train 5.669601 (lr=2.8229e-05) (hash(x)=47694231) +33600 val loss 5.9186 +33600 val perplexity 371.8971 +33600 train 5.721021 (lr=2.2407e-05) (hash(x)=51614932) +35300 val loss 6.0538 +35300 val perplexity 425.7278 +35300 train 6.063275 (lr=2.8002e-05) (hash(x)=46875736) +31600 val loss 5.8156 +31600 val perplexity 335.4849 +31600 train 5.850074 (lr=1.8530e-05) (hash(x)=53844713) +33700 val loss 5.9139 +33700 val perplexity 370.1312 +33700 train 5.867046 (lr=2.2237e-05) (hash(x)=45913195) +35400 val loss 6.0491 +35400 val perplexity 423.7268 +35400 train 5.661890 (lr=2.7775e-05) (hash(x)=43845506) +31700 val loss 5.8164 +31700 val perplexity 335.7496 +31700 train 5.530134 (lr=1.8400e-05) (hash(x)=48596811) +33800 val loss 5.9170 +33800 val perplexity 371.3019 +33800 train 5.779035 (lr=2.2067e-05) (hash(x)=49792786) +35500 val loss 6.0538 +35500 val perplexity 425.7209 +35500 train 5.939195 (lr=2.7549e-05) (hash(x)=50826809) +31800 val loss 5.8084 +31800 val perplexity 333.0763 +33900 val loss 5.9117 +33900 val perplexity 369.3190 +31800 train 5.754044 (lr=1.8271e-05) (hash(x)=49976057) +33900 train 6.017151 (lr=2.1898e-05) (hash(x)=45744584) +35600 val loss 6.0469 +35600 val perplexity 422.7970 +35600 train 6.135223 (lr=2.7325e-05) (hash(x)=50524291) +34000 val loss 5.8977 +34000 val perplexity 364.1823 +34000 train 6.022913 (lr=2.1729e-05) (hash(x)=52120372) +31900 val loss 5.8145 +31900 val perplexity 335.1290 +31900 train 5.859596 (lr=1.8141e-05) (hash(x)=50115370) +35700 val loss 6.0618 +35700 val perplexity 429.1443 +35700 train 6.273353 (lr=2.7102e-05) (hash(x)=52216987) +34100 val loss 5.9006 +34100 val perplexity 365.2421 +34100 train 5.962339 (lr=2.1561e-05) (hash(x)=50537607) +32000 val loss 5.8131 +32000 val perplexity 334.6646 +32000 train 5.468124 (lr=1.8013e-05) (hash(x)=43312539) +35800 val loss 6.0367 +35800 val perplexity 418.5175 +35800 train 5.860612 (lr=2.6880e-05) (hash(x)=50235576) +34200 val loss 5.8942 +34200 val perplexity 362.9442 +34200 train 6.056231 (lr=2.1394e-05) (hash(x)=60272478) +32100 val loss 5.8010 +32100 val perplexity 330.6225 +32100 train 5.843699 (lr=1.7884e-05) (hash(x)=52783562) +35900 val loss 6.0343 +35900 val perplexity 417.5143 +35900 train 6.108708 (lr=2.6659e-05) (hash(x)=50543237) +34300 val loss 5.8940 +34300 val perplexity 362.8556 +34300 train 5.896127 (lr=2.1227e-05) (hash(x)=53956932) +32200 val loss 5.7962 +32200 val perplexity 329.0571 +32200 train 5.786126 (lr=1.7756e-05) (hash(x)=45183919) +36000 val loss 6.0300 +36000 val perplexity 415.7175 +36000 train 5.974373 (lr=2.6439e-05) (hash(x)=51157351) +34400 val loss 5.8889 +34400 val perplexity 361.0084 +34400 train 6.019978 (lr=2.1062e-05) (hash(x)=50945849) +32300 val loss 5.8007 +32300 val perplexity 330.5468 +32300 train 6.101268 (lr=1.7628e-05) (hash(x)=56847952) +36100 val loss 6.0281 +36100 val perplexity 414.9087 +36100 train 6.208079 (lr=2.6220e-05) (hash(x)=56249961) +34500 val loss 5.8887 +34500 val perplexity 360.9193 +34500 train 5.725052 (lr=2.0896e-05) (hash(x)=48188401) +32400 val loss 5.7938 +32400 val perplexity 328.2621 +32400 train 5.494508 (lr=1.7501e-05) (hash(x)=44188499) +36200 val loss 6.0284 +36200 val perplexity 415.0435 +36200 train 6.047647 (lr=2.6002e-05) (hash(x)=47660647) +34600 val loss 5.8902 +34600 val perplexity 361.4904 +34600 train 5.885980 (lr=2.0732e-05) (hash(x)=49786659) +32500 val loss 5.7944 +32500 val perplexity 328.4642 +32500 train 5.615626 (lr=1.7374e-05) (hash(x)=48357180) +36300 val loss 6.0360 +36300 val perplexity 418.2199 +36300 train 5.883045 (lr=2.5786e-05) (hash(x)=47677413) +34700 val loss 5.8878 +34700 val perplexity 360.6175 +34700 train 5.738782 (lr=2.0568e-05) (hash(x)=46244488) +32600 val loss 5.7934 +32600 val perplexity 328.1205 +32600 train 5.845021 (lr=1.7247e-05) (hash(x)=47861550) +36400 val loss 6.0340 +36400 val perplexity 417.3993 +36400 train 5.990715 (lr=2.5570e-05) (hash(x)=49911324) +34800 val loss 5.9131 +34800 val perplexity 369.8688 +34800 train 5.841129 (lr=2.0405e-05) (hash(x)=48889546) +36500 val loss 6.0346 +36500 val perplexity 417.6215 +36500 train 5.786546 (lr=2.5356e-05) (hash(x)=44970078) +32700 val loss 5.7922 +32700 val perplexity 327.7263 +32700 train 5.950799 (lr=1.7121e-05) (hash(x)=55305367) +34900 val loss 5.8935 +34900 val perplexity 362.6881 +34900 train 5.806423 (lr=2.0243e-05) (hash(x)=50269369) +36600 val loss 6.0291 +36600 val perplexity 415.3379 +36600 train 6.138453 (lr=2.5143e-05) (hash(x)=50037127) +32800 val loss 5.7920 +32800 val perplexity 327.6837 +32800 train 5.637249 (lr=1.6995e-05) (hash(x)=51097743) +35000 val loss 5.8909 +35000 val perplexity 361.7392 +35000 train 5.956672 (lr=2.0081e-05) (hash(x)=49753823) +36700 val loss 6.0322 +36700 val perplexity 416.6345 +36700 train 6.234177 (lr=2.4931e-05) (hash(x)=54883021) +32900 val loss 5.7902 +32900 val perplexity 327.0726 +32900 train 5.650558 (lr=1.6870e-05) (hash(x)=46776597) +35100 val loss 5.8932 +35100 val perplexity 362.5643 +35100 train 6.001988 (lr=1.9921e-05) (hash(x)=49912642) +36800 val loss 6.0331 +36800 val perplexity 417.0042 +36800 train 5.888019 (lr=2.4721e-05) (hash(x)=46663356) +33000 val loss 5.7891 +33000 val perplexity 326.7310 +33000 train 5.755414 (lr=1.6745e-05) (hash(x)=52689521) +35200 val loss 5.8927 +35200 val perplexity 362.3875 +35200 train 5.496660 (lr=1.9760e-05) (hash(x)=47694231) +36900 val loss 6.0382 +36900 val perplexity 419.1367 +36900 train 6.160713 (lr=2.4511e-05) (hash(x)=48940046) +33100 val loss 5.7988 +33100 val perplexity 329.8875 +33100 train 6.203252 (lr=1.6621e-05) (hash(x)=65258285) +35300 val loss 5.8883 +35300 val perplexity 360.7856 +35300 train 5.921743 (lr=1.9601e-05) (hash(x)=46875736) +37000 val loss 6.0342 +37000 val perplexity 417.4529 +37000 train 5.855203 (lr=2.4303e-05) (hash(x)=46161293) +35400 val loss 5.8887 +35400 val perplexity 360.9275 +35400 train 5.493360 (lr=1.9442e-05) (hash(x)=43845506) +33200 val loss 5.7869 +33200 val perplexity 325.9870 +33200 train 5.625841 (lr=1.6497e-05) (hash(x)=50703450) +37100 val loss 6.0358 +37100 val perplexity 418.1170 +37100 train 5.873707 (lr=2.4096e-05) (hash(x)=42908688) +35500 val loss 5.8901 +35500 val perplexity 361.4282 +35500 train 5.765256 (lr=1.9285e-05) (hash(x)=50826809) +33300 val loss 5.7912 +33300 val perplexity 327.4146 +33300 train 6.370647 (lr=1.6373e-05) (hash(x)=61774174) +37200 val loss 6.0285 +37200 val perplexity 415.0825 +37200 train 6.009706 (lr=2.3890e-05) (hash(x)=52582908) +35600 val loss 5.8888 +35600 val perplexity 360.9549 +35600 train 6.004541 (lr=1.9128e-05) (hash(x)=50524291) +33400 val loss 5.7938 +33400 val perplexity 328.2541 +33400 train 5.669628 (lr=1.6250e-05) (hash(x)=49173286) +37300 val loss 6.0296 +37300 val perplexity 415.5611 +37300 train 5.693473 (lr=2.3686e-05) (hash(x)=44666734) +35700 val loss 5.8860 +35700 val perplexity 359.9513 +35700 train 6.078401 (lr=1.8971e-05) (hash(x)=52216987) +33500 val loss 5.7916 +33500 val perplexity 327.5302 +33500 train 5.733210 (lr=1.6127e-05) (hash(x)=47446418) +37400 val loss 6.0345 +37400 val perplexity 417.5850 +37400 train 5.905849 (lr=2.3483e-05) (hash(x)=47663948) +35800 val loss 5.8798 +35800 val perplexity 357.7310 +35800 train 5.675416 (lr=1.8816e-05) (hash(x)=50235576) +33600 val loss 5.7891 +33600 val perplexity 326.7303 +37500 val loss 6.0235 +37500 val perplexity 413.0074 +33600 train 5.602964 (lr=1.6005e-05) (hash(x)=51614932) +37500 train 5.769259 (lr=2.3281e-05) (hash(x)=48845252) +35900 val loss 5.8755 +35900 val perplexity 356.2043 +35900 train 5.949220 (lr=1.8661e-05) (hash(x)=50543237) +37600 val loss 6.0276 +37600 val perplexity 414.7076 +37600 train 6.024345 (lr=2.3080e-05) (hash(x)=48827420) +33700 val loss 5.7898 +33700 val perplexity 326.9430 +33700 train 5.764343 (lr=1.5883e-05) (hash(x)=45913195) +36000 val loss 5.8838 +36000 val perplexity 359.1731 +36000 train 5.843504 (lr=1.8507e-05) (hash(x)=51157351) +37700 val loss 6.0278 +37700 val perplexity 414.7928 +37700 train 5.775115 (lr=2.2881e-05) (hash(x)=42619596) +33800 val loss 5.7859 +33800 val perplexity 325.6591 +33800 train 5.655394 (lr=1.5762e-05) (hash(x)=49792786) +36100 val loss 5.8823 +36100 val perplexity 358.6499 +36100 train 6.057877 (lr=1.8354e-05) (hash(x)=56249961) +37800 val loss 6.0209 +37800 val perplexity 411.9432 +37800 train 5.823175 (lr=2.2682e-05) (hash(x)=46710820) +33900 val loss 5.7851 +33900 val perplexity 325.4233 +33900 train 5.849180 (lr=1.5641e-05) (hash(x)=45744584) +36200 val loss 5.8807 +36200 val perplexity 358.0562 +36200 train 5.917603 (lr=1.8201e-05) (hash(x)=47660647) +37900 val loss 6.0177 +37900 val perplexity 410.6476 +37900 train 5.842571 (lr=2.2485e-05) (hash(x)=46931675) +34000 val loss 5.7809 +34000 val perplexity 324.0402 +34000 train 5.905314 (lr=1.5521e-05) (hash(x)=52120372) +36300 val loss 5.8758 +36300 val perplexity 356.3149 +36300 train 5.735182 (lr=1.8050e-05) (hash(x)=47677413) +38000 val loss 6.0299 +38000 val perplexity 415.6608 +38000 train 6.357445 (lr=2.2290e-05) (hash(x)=57298851) +34100 val loss 5.7771 +34100 val perplexity 322.8132 +34100 train 5.826293 (lr=1.5401e-05) (hash(x)=50537607) +36400 val loss 5.8797 +36400 val perplexity 357.6982 +36400 train 5.847999 (lr=1.7899e-05) (hash(x)=49911324) +38100 val loss 6.0191 +38100 val perplexity 411.1990 +38100 train 5.991945 (lr=2.2096e-05) (hash(x)=50789082) +34200 val loss 5.7754 +34200 val perplexity 322.2574 +34200 train 5.927985 (lr=1.5281e-05) (hash(x)=60272478) +36500 val loss 5.8781 +36500 val perplexity 357.1174 +36500 train 5.599643 (lr=1.7749e-05) (hash(x)=44970078) +38200 val loss 6.0243 +38200 val perplexity 413.3505 +38200 train 5.857540 (lr=2.1903e-05) (hash(x)=48859326) +34300 val loss 5.7720 +34300 val perplexity 321.1782 +34300 train 5.768917 (lr=1.5162e-05) (hash(x)=53956932) +36600 val loss 5.8781 +36600 val perplexity 357.1343 +36600 train 6.013189 (lr=1.7600e-05) (hash(x)=50037127) +38300 val loss 6.0233 +38300 val perplexity 412.9192 +38300 train 5.825671 (lr=2.1711e-05) (hash(x)=44063292) +36700 val loss 5.8785 +36700 val perplexity 357.2593 +36700 train 6.104803 (lr=1.7452e-05) (hash(x)=54883021) +34400 val loss 5.7735 +34400 val perplexity 321.6600 +34400 train 5.918812 (lr=1.5044e-05) (hash(x)=50945849) +38400 val loss 6.0199 +38400 val perplexity 411.5505 +38400 train 6.051054 (lr=2.1521e-05) (hash(x)=49133775) +36800 val loss 5.8765 +36800 val perplexity 356.5641 +36800 train 5.754755 (lr=1.7305e-05) (hash(x)=46663356) +34500 val loss 5.7743 +34500 val perplexity 321.9273 +34500 train 5.601540 (lr=1.4926e-05) (hash(x)=48188401) +38500 val loss 6.0181 +38500 val perplexity 410.8045 +38500 train 6.339755 (lr=2.1332e-05) (hash(x)=55413093) +36900 val loss 5.8767 +36900 val perplexity 356.6270 +36900 train 5.987236 (lr=1.7158e-05) (hash(x)=48940046) +34600 val loss 5.7738 +34600 val perplexity 321.7661 +34600 train 5.775709 (lr=1.4809e-05) (hash(x)=49786659) +38600 val loss 6.0172 +38600 val perplexity 410.4198 +38600 train 6.173201 (lr=2.1144e-05) (hash(x)=55793283) +37000 val loss 5.8809 +37000 val perplexity 358.1488 +37000 train 5.700736 (lr=1.7012e-05) (hash(x)=46161293) +34700 val loss 5.7697 +34700 val perplexity 320.4531 +34700 train 5.627788 (lr=1.4692e-05) (hash(x)=46244488) +38700 val loss 6.0202 +38700 val perplexity 411.6750 +38700 train 6.140886 (lr=2.0957e-05) (hash(x)=53927557) +37100 val loss 5.8786 +37100 val perplexity 357.3019 +37100 train 5.733128 (lr=1.6867e-05) (hash(x)=42908688) +38800 val loss 6.0211 +38800 val perplexity 412.0184 +38800 train 6.147671 (lr=2.0773e-05) (hash(x)=43344108) +34800 val loss 5.7935 +34800 val perplexity 328.1708 +34800 train 5.718929 (lr=1.4575e-05) (hash(x)=48889546) +37200 val loss 5.8761 +37200 val perplexity 356.4123 +37200 train 5.864752 (lr=1.6723e-05) (hash(x)=52582908) +38900 val loss 6.0173 +38900 val perplexity 410.4677 +38900 train 6.138345 (lr=2.0589e-05) (hash(x)=53178884) +34900 val loss 5.7731 +34900 val perplexity 321.5447 +34900 train 5.682244 (lr=1.4459e-05) (hash(x)=50269369) +37300 val loss 5.8746 +37300 val perplexity 355.8726 +37300 train 5.545277 (lr=1.6580e-05) (hash(x)=44666734) +39000 val loss 6.0148 +39000 val perplexity 409.4432 +39000 train 5.828174 (lr=2.0407e-05) (hash(x)=46845908) +35000 val loss 5.7715 +35000 val perplexity 321.0240 +35000 train 5.853223 (lr=1.4344e-05) (hash(x)=49753823) +37400 val loss 5.8751 +37400 val perplexity 356.0641 +37400 train 5.769648 (lr=1.6438e-05) (hash(x)=47663948) +39100 val loss 6.0169 +39100 val perplexity 410.3182 +39100 train 5.741581 (lr=2.0226e-05) (hash(x)=42267775) +35100 val loss 5.7750 +35100 val perplexity 322.1578 +35100 train 5.853804 (lr=1.4229e-05) (hash(x)=49912642) +37500 val loss 5.8807 +37500 val perplexity 358.0759 +37500 train 5.625692 (lr=1.6297e-05) (hash(x)=48845252) +39200 val loss 6.0208 +39200 val perplexity 411.9021 +39200 train 5.818017 (lr=2.0046e-05) (hash(x)=47475117) +35200 val loss 5.7795 +35200 val perplexity 323.6026 +35200 train 5.396245 (lr=1.4115e-05) (hash(x)=47694231) +37600 val loss 5.8735 +37600 val perplexity 355.5076 +37600 train 5.842901 (lr=1.6156e-05) (hash(x)=48827420) +39300 val loss 6.0160 +39300 val perplexity 409.9376 +39300 train 5.838203 (lr=1.9868e-05) (hash(x)=47149754) +35300 val loss 5.7738 +35300 val perplexity 321.7464 +35300 train 5.824560 (lr=1.4001e-05) (hash(x)=46875736) +37700 val loss 5.8740 +37700 val perplexity 355.6829 +37700 train 5.643019 (lr=1.6016e-05) (hash(x)=42619596) +39400 val loss 6.0176 +39400 val perplexity 410.6106 +39400 train 5.873120 (lr=1.9692e-05) (hash(x)=52368687) +37800 val loss 5.8657 +37800 val perplexity 352.7237 +37800 train 5.676892 (lr=1.5878e-05) (hash(x)=46710820) +35400 val loss 5.7717 +35400 val perplexity 321.0892 +35400 train 5.388574 (lr=1.3887e-05) (hash(x)=43845506) +39500 val loss 6.0201 +39500 val perplexity 411.6263 +39500 train 6.025097 (lr=1.9516e-05) (hash(x)=47372926) +37900 val loss 5.8645 +37900 val perplexity 352.3048 +37900 train 5.704661 (lr=1.5740e-05) (hash(x)=46931675) +35500 val loss 5.7720 +35500 val perplexity 321.1681 +35500 train 5.665552 (lr=1.3775e-05) (hash(x)=50826809) +39600 val loss 6.0174 +39600 val perplexity 410.4912 +39600 train 5.912035 (lr=1.9342e-05) (hash(x)=47724175) +38000 val loss 5.8842 +38000 val perplexity 359.3195 +38000 train 6.250093 (lr=1.5603e-05) (hash(x)=57298851) +35600 val loss 5.7689 +35600 val perplexity 320.1819 +39700 val loss 6.0136 +39700 val perplexity 408.9589 +35600 train 5.883683 (lr=1.3663e-05) (hash(x)=50524291) +39700 train 6.339597 (lr=1.9170e-05) (hash(x)=60806190) +38100 val loss 5.8659 +38100 val perplexity 352.8086 +38100 train 5.858781 (lr=1.5467e-05) (hash(x)=50789082) +39800 val loss 6.0097 +39800 val perplexity 407.3647 +39800 train 6.532265 (lr=1.8999e-05) (hash(x)=62893661) +35700 val loss 5.7724 +35700 val perplexity 321.3131 +35700 train 5.985587 (lr=1.3551e-05) (hash(x)=52216987) +38200 val loss 5.8704 +38200 val perplexity 354.3875 +38200 train 5.695572 (lr=1.5332e-05) (hash(x)=48859326) +39900 val loss 6.0074 +39900 val perplexity 406.4237 +39900 train 5.907340 (lr=1.8829e-05) (hash(x)=48064474) +35800 val loss 5.7640 +35800 val perplexity 318.6151 +35800 train 5.542949 (lr=1.3440e-05) (hash(x)=50235576) +38300 val loss 5.8656 +38300 val perplexity 352.6771 +38300 train 5.675591 (lr=1.5198e-05) (hash(x)=44063292) +40000 val loss 6.0153 +40000 val perplexity 409.6641 +40000 train 5.917370 (lr=1.8661e-05) (hash(x)=41838449) +35900 val loss 5.7611 +35900 val perplexity 317.6818 +35900 train 5.893141 (lr=1.3329e-05) (hash(x)=50543237) +38400 val loss 5.8700 +38400 val perplexity 354.2466 +38400 train 5.914752 (lr=1.5064e-05) (hash(x)=49133775) +40100 val loss 6.0126 +40100 val perplexity 408.5424 +40100 train 6.161049 (lr=1.8494e-05) (hash(x)=54447981) +36000 val loss 5.7639 +36000 val perplexity 318.6034 +36000 train 5.702169 (lr=1.3219e-05) (hash(x)=51157351) +38500 val loss 5.8694 +38500 val perplexity 354.0274 +38500 train 6.215326 (lr=1.4932e-05) (hash(x)=55413093) +40200 val loss 6.0044 +40200 val perplexity 405.1955 +40200 train 6.114748 (lr=1.8329e-05) (hash(x)=52957764) +36100 val loss 5.7585 +36100 val perplexity 316.8869 +36100 train 5.943677 (lr=1.3110e-05) (hash(x)=56249961) +38600 val loss 5.8666 +38600 val perplexity 353.0400 +38600 train 6.001817 (lr=1.4801e-05) (hash(x)=55793283) +40300 val loss 6.0094 +40300 val perplexity 407.2220 +40300 train 5.793697 (lr=1.8165e-05) (hash(x)=46269236) +36200 val loss 5.7595 +36200 val perplexity 317.1833 +36200 train 5.792177 (lr=1.3001e-05) (hash(x)=47660647) +38700 val loss 5.8691 +38700 val perplexity 353.9442 +38700 train 6.014698 (lr=1.4670e-05) (hash(x)=53927557) +40400 val loss 6.0090 +40400 val perplexity 407.0680 +40400 train 5.709672 (lr=1.8003e-05) (hash(x)=50030150) +36300 val loss 5.7578 +36300 val perplexity 316.6529 +36300 train 5.585438 (lr=1.2893e-05) (hash(x)=47677413) +38800 val loss 5.8746 +38800 val perplexity 355.8702 +38800 train 5.981199 (lr=1.4541e-05) (hash(x)=43344108) +40500 val loss 6.0091 +40500 val perplexity 407.1092 +40500 train 5.945661 (lr=1.7842e-05) (hash(x)=50278486) +36400 val loss 5.7579 +36400 val perplexity 316.6913 +36400 train 5.734710 (lr=1.2785e-05) (hash(x)=49911324) +38900 val loss 5.8659 +38900 val perplexity 352.8017 +38900 train 5.980952 (lr=1.4412e-05) (hash(x)=53178884) +40600 val loss 6.0130 +40600 val perplexity 408.6992 +40600 train 6.012603 (lr=1.7683e-05) (hash(x)=52882717) +36500 val loss 5.7538 +36500 val perplexity 315.3861 +36500 train 5.428922 (lr=1.2678e-05) (hash(x)=44970078) +39000 val loss 5.8667 +39000 val perplexity 353.0644 +39000 train 5.695461 (lr=1.4285e-05) (hash(x)=46845908) +40700 val loss 6.0072 +40700 val perplexity 406.3611 +40700 train 6.340620 (lr=1.7525e-05) (hash(x)=60687159) +36600 val loss 5.7537 +36600 val perplexity 315.3617 +39100 val loss 5.8674 +39100 val perplexity 353.3388 +36600 train 5.872190 (lr=1.2572e-05) (hash(x)=50037127) +39100 train 5.587812 (lr=1.4158e-05) (hash(x)=42267775) +40800 val loss 6.0074 +40800 val perplexity 406.4348 +40800 train 5.728396 (lr=1.7368e-05) (hash(x)=46019249) +39200 val loss 5.8643 +39200 val perplexity 352.2324 +39200 train 5.671804 (lr=1.4032e-05) (hash(x)=47475117) +36700 val loss 5.7542 +36700 val perplexity 315.5269 +36700 train 5.990573 (lr=1.2466e-05) (hash(x)=54883021) +40900 val loss 6.0061 +40900 val perplexity 405.8801 +40900 train 5.923114 (lr=1.7214e-05) (hash(x)=52139240) +39300 val loss 5.8652 +39300 val perplexity 352.5483 +39300 train 5.703058 (lr=1.3908e-05) (hash(x)=47149754) +41000 val loss 6.0021 +41000 val perplexity 404.2904 +41000 train 6.110263 (lr=1.7060e-05) (hash(x)=54091062) +36800 val loss 5.7542 +36800 val perplexity 315.5049 +36800 train 5.683995 (lr=1.2360e-05) (hash(x)=46663356) +39400 val loss 5.8684 +39400 val perplexity 353.6965 +39400 train 5.731959 (lr=1.3784e-05) (hash(x)=52368687) +41100 val loss 6.0081 +41100 val perplexity 406.7184 +41100 train 5.649855 (lr=1.6908e-05) (hash(x)=47202820) +36900 val loss 5.7522 +36900 val perplexity 314.8864 +36900 train 5.908504 (lr=1.2256e-05) (hash(x)=48940046) +39500 val loss 5.8683 +39500 val perplexity 353.6447 +39500 train 5.902698 (lr=1.3661e-05) (hash(x)=47372926) +41200 val loss 6.0077 +41200 val perplexity 406.5328 +41200 train 6.080298 (lr=1.6758e-05) (hash(x)=49488627) +37000 val loss 5.7543 +37000 val perplexity 315.5443 +37000 train 5.573208 (lr=1.2152e-05) (hash(x)=46161293) +39600 val loss 5.8658 +39600 val perplexity 352.7728 +39600 train 5.778934 (lr=1.3540e-05) (hash(x)=47724175) +41300 val loss 6.0021 +41300 val perplexity 404.2684 +41300 train 5.831370 (lr=1.6609e-05) (hash(x)=41876005) +37100 val loss 5.7504 +37100 val perplexity 314.3089 +37100 train 5.594541 (lr=1.2048e-05) (hash(x)=42908688) +39700 val loss 5.8668 +39700 val perplexity 353.1139 +39700 train 6.198074 (lr=1.3419e-05) (hash(x)=60806190) +41400 val loss 6.0026 +41400 val perplexity 404.4610 +41400 train 5.797643 (lr=1.6462e-05) (hash(x)=48467862) +37200 val loss 5.7490 +37200 val perplexity 313.8653 +37200 train 5.729559 (lr=1.1945e-05) (hash(x)=52582908) +39800 val loss 5.8576 +39800 val perplexity 349.8755 +39800 train 6.381696 (lr=1.3299e-05) (hash(x)=62893661) +41500 val loss 6.0033 +41500 val perplexity 404.7620 +41500 train 5.890572 (lr=1.6316e-05) (hash(x)=45730470) +37300 val loss 5.7477 +37300 val perplexity 313.4812 +37300 train 5.416309 (lr=1.1843e-05) (hash(x)=44666734) +39900 val loss 5.8562 +39900 val perplexity 349.3998 +39900 train 5.746709 (lr=1.3180e-05) (hash(x)=48064474) +41600 val loss 5.9980 +41600 val perplexity 402.6295 +41600 train 6.483867 (lr=1.6172e-05) (hash(x)=55724900) +37400 val loss 5.7478 +37400 val perplexity 313.5030 +37400 train 5.602635 (lr=1.1741e-05) (hash(x)=47663948) +40000 val loss 5.8599 +40000 val perplexity 350.6727 +40000 train 5.795778 (lr=1.3063e-05) (hash(x)=41838449) +41700 val loss 5.9950 +41700 val perplexity 401.3987 +41700 train 5.756382 (lr=1.6029e-05) (hash(x)=50772972) +37500 val loss 5.7467 +37500 val perplexity 313.1615 +37500 train 5.478420 (lr=1.1640e-05) (hash(x)=48845252) +40100 val loss 5.8550 +40100 val perplexity 348.9914 +41800 val loss 5.9937 +41800 val perplexity 400.8847 +40100 train 6.017983 (lr=1.2946e-05) (hash(x)=54447981) +41800 train 5.950139 (lr=1.5888e-05) (hash(x)=46673554) +37600 val loss 5.7434 +37600 val perplexity 312.1137 +37600 train 5.727497 (lr=1.1540e-05) (hash(x)=48827420) +41900 val loss 5.9915 +41900 val perplexity 400.0077 +41900 train 5.779106 (lr=1.5748e-05) (hash(x)=40133757) +40200 val loss 5.8538 +40200 val perplexity 348.5566 +40200 train 5.964606 (lr=1.2830e-05) (hash(x)=52957764) +37700 val loss 5.7451 +37700 val perplexity 312.6564 +37700 train 5.531669 (lr=1.1440e-05) (hash(x)=42619596) +42000 val loss 5.9865 +42000 val perplexity 398.0369 +42000 train 5.929700 (lr=1.5610e-05) (hash(x)=49886199) +40300 val loss 5.8528 +40300 val perplexity 348.2002 +40300 train 5.652048 (lr=1.2716e-05) (hash(x)=46269236) +37800 val loss 5.7406 +37800 val perplexity 311.2387 +37800 train 5.557806 (lr=1.1341e-05) (hash(x)=46710820) +42100 val loss 5.9875 +42100 val perplexity 398.4136 +42100 train 5.939185 (lr=1.5474e-05) (hash(x)=47568153) +40400 val loss 5.8544 +40400 val perplexity 348.7828 +40400 train 5.554566 (lr=1.2602e-05) (hash(x)=50030150) +37900 val loss 5.7449 +37900 val perplexity 312.6023 +37900 train 5.555627 (lr=1.1243e-05) (hash(x)=46931675) +42200 val loss 5.9856 +42200 val perplexity 397.6518 +42200 train 5.843505 (lr=1.5339e-05) (hash(x)=48372799) +40500 val loss 5.8531 +40500 val perplexity 348.2988 +40500 train 5.803468 (lr=1.2489e-05) (hash(x)=50278486) +38000 val loss 5.7608 +38000 val perplexity 317.5871 +38000 train 6.116046 (lr=1.1145e-05) (hash(x)=57298851) +42300 val loss 5.9870 +42300 val perplexity 398.2326 +42300 train 5.843259 (lr=1.5205e-05) (hash(x)=48060142) +40600 val loss 5.8645 +40600 val perplexity 352.3066 +40600 train 5.861084 (lr=1.2378e-05) (hash(x)=52882717) +42400 val loss 5.9924 +42400 val perplexity 400.3693 +42400 train 5.941577 (lr=1.5074e-05) (hash(x)=50489549) +40700 val loss 5.8529 +40700 val perplexity 348.2407 +40700 train 6.192591 (lr=1.2267e-05) (hash(x)=60687159) +38100 val loss 5.7432 +38100 val perplexity 312.0733 +38100 train 5.756608 (lr=1.1048e-05) (hash(x)=50789082) +42500 val loss 5.9869 +42500 val perplexity 398.1690 +42500 train 5.923371 (lr=1.4943e-05) (hash(x)=45655508) +40800 val loss 5.8554 +40800 val perplexity 349.1304 +40800 train 5.579791 (lr=1.2158e-05) (hash(x)=46019249) +38200 val loss 5.7390 +38200 val perplexity 310.7654 +38200 train 5.550455 (lr=1.0951e-05) (hash(x)=48859326) +42600 val loss 5.9908 +42600 val perplexity 399.7300 +42600 train 5.923755 (lr=1.4815e-05) (hash(x)=44196591) +40900 val loss 5.8550 +40900 val perplexity 348.9842 +40900 train 5.795238 (lr=1.2049e-05) (hash(x)=52139240) +38300 val loss 5.7407 +38300 val perplexity 311.2957 +38300 train 5.570664 (lr=1.0855e-05) (hash(x)=44063292) +42700 val loss 5.9888 +42700 val perplexity 398.9510 +42700 train 5.928371 (lr=1.4688e-05) (hash(x)=52081281) +41000 val loss 5.8547 +41000 val perplexity 348.8621 +41000 train 5.958921 (lr=1.1942e-05) (hash(x)=54091062) +38400 val loss 5.7415 +38400 val perplexity 311.5218 +38400 train 5.811736 (lr=1.0760e-05) (hash(x)=49133775) +42800 val loss 5.9940 +42800 val perplexity 401.0011 +42800 train 5.928609 (lr=1.4563e-05) (hash(x)=49992716) +41100 val loss 5.8545 +41100 val perplexity 348.7994 +41100 train 5.478621 (lr=1.1836e-05) (hash(x)=47202820) +38500 val loss 5.7402 +38500 val perplexity 311.1176 +38500 train 6.089962 (lr=1.0666e-05) (hash(x)=55413093) +42900 val loss 5.9957 +42900 val perplexity 401.6918 +42900 train 5.810283 (lr=1.4439e-05) (hash(x)=49454997) +41200 val loss 5.8556 +41200 val perplexity 349.1941 +41200 train 5.921623 (lr=1.1730e-05) (hash(x)=49488627) +38600 val loss 5.7403 +38600 val perplexity 311.1495 +38600 train 5.870380 (lr=1.0572e-05) (hash(x)=55793283) +43000 val loss 5.9974 +43000 val perplexity 402.3698 +43000 train 5.917299 (lr=1.4317e-05) (hash(x)=50905292) +41300 val loss 5.8513 +41300 val perplexity 347.6880 +41300 train 5.720685 (lr=1.1626e-05) (hash(x)=41876005) +38700 val loss 5.7392 +38700 val perplexity 310.8302 +38700 train 5.858924 (lr=1.0479e-05) (hash(x)=53927557) +43100 val loss 5.9920 +43100 val perplexity 400.2118 +43100 train 5.737865 (lr=1.4196e-05) (hash(x)=49656461) +41400 val loss 5.8522 +41400 val perplexity 347.9904 +41400 train 5.644863 (lr=1.1523e-05) (hash(x)=48467862) +38800 val loss 5.7402 +38800 val perplexity 311.1225 +38800 train 5.856015 (lr=1.0386e-05) (hash(x)=43344108) +43200 val loss 5.9925 +43200 val perplexity 400.4185 +43200 train 5.857344 (lr=1.4077e-05) (hash(x)=50601940) +41500 val loss 5.8613 +41500 val perplexity 351.1702 +41500 train 5.738167 (lr=1.1421e-05) (hash(x)=45730470) +38900 val loss 5.7403 +38900 val perplexity 311.1609 +43300 val loss 5.9901 +43300 val perplexity 399.4362 +38900 train 5.856177 (lr=1.0294e-05) (hash(x)=53178884) +43300 train 5.737657 (lr=1.3960e-05) (hash(x)=42879970) +41600 val loss 5.8504 +41600 val perplexity 347.3882 +41600 train 6.378869 (lr=1.1320e-05) (hash(x)=55724900) +43400 val loss 5.9853 +43400 val perplexity 397.5317 +43400 train 6.352472 (lr=1.3844e-05) (hash(x)=50883335) +39000 val loss 5.7352 +39000 val perplexity 309.5599 +39000 train 5.555748 (lr=1.0203e-05) (hash(x)=46845908) +41700 val loss 5.8477 +41700 val perplexity 346.4245 +41700 train 5.620795 (lr=1.1220e-05) (hash(x)=50772972) +43500 val loss 5.9850 +43500 val perplexity 397.4409 +43500 train 5.836988 (lr=1.3730e-05) (hash(x)=55373094) +39100 val loss 5.7359 +39100 val perplexity 309.7995 +39100 train 5.444112 (lr=1.0113e-05) (hash(x)=42267775) +41800 val loss 5.8504 +41800 val perplexity 347.3832 +41800 train 5.811529 (lr=1.1121e-05) (hash(x)=46673554) +43600 val loss 5.9849 +43600 val perplexity 397.3957 +43600 train 5.569566 (lr=1.3618e-05) (hash(x)=37498029) +41900 val loss 5.8452 +41900 val perplexity 345.5646 +41900 train 5.664166 (lr=1.1024e-05) (hash(x)=40133757) +39200 val loss 5.7357 +39200 val perplexity 309.7195 +39200 train 5.551753 (lr=1.0023e-05) (hash(x)=47475117) +43700 val loss 5.9805 +43700 val perplexity 395.6265 +43700 train 5.672332 (lr=1.3507e-05) (hash(x)=33982416) +42000 val loss 5.8434 +42000 val perplexity 344.9615 +42000 train 5.788061 (lr=1.0927e-05) (hash(x)=49886199) +39300 val loss 5.7356 +39300 val perplexity 309.6834 +39300 train 5.576594 (lr=9.9341e-06) (hash(x)=47149754) +43800 val loss 5.9765 +43800 val perplexity 394.0681 +43800 train 5.896466 (lr=1.3398e-05) (hash(x)=49434495) +42100 val loss 5.8449 +42100 val perplexity 345.4708 +42100 train 5.816408 (lr=1.0831e-05) (hash(x)=47568153) +39400 val loss 5.7351 +39400 val perplexity 309.5369 +39400 train 5.576975 (lr=9.8458e-06) (hash(x)=52368687) +43900 val loss 5.9798 +43900 val perplexity 395.3504 +43900 train 5.775621 (lr=1.3291e-05) (hash(x)=48424180) +42200 val loss 5.8408 +42200 val perplexity 344.0426 +42200 train 5.707599 (lr=1.0737e-05) (hash(x)=48372799) +39500 val loss 5.7372 +39500 val perplexity 310.1913 +39500 train 5.776249 (lr=9.7581e-06) (hash(x)=47372926) +44000 val loss 5.9815 +44000 val perplexity 396.0456 +44000 train 5.882079 (lr=1.3185e-05) (hash(x)=51351008) +42300 val loss 5.8461 +42300 val perplexity 345.8743 +42300 train 5.709271 (lr=1.0644e-05) (hash(x)=48060142) +39600 val loss 5.7349 +39600 val perplexity 309.4761 +39600 train 5.649189 (lr=9.6712e-06) (hash(x)=47724175) +44100 val loss 5.9792 +44100 val perplexity 395.1207 +44100 train 5.945557 (lr=1.3081e-05) (hash(x)=49893042) +42400 val loss 5.8457 +42400 val perplexity 345.7350 +42400 train 5.818581 (lr=1.0552e-05) (hash(x)=50489549) +39700 val loss 5.7349 +39700 val perplexity 309.4860 +39700 train 6.062971 (lr=9.5849e-06) (hash(x)=60806190) +44200 val loss 5.9808 +44200 val perplexity 395.7624 +44200 train 5.939646 (lr=1.2979e-05) (hash(x)=45870126) +42500 val loss 5.8443 +42500 val perplexity 345.2706 +42500 train 5.798162 (lr=1.0460e-05) (hash(x)=45655508) +44300 val loss 5.9771 +44300 val perplexity 394.2862 +44300 train 7.410599 (lr=1.2878e-05) (hash(x)=51800290) +39800 val loss 5.7295 +39800 val perplexity 307.8076 +39800 train 6.293357 (lr=9.4994e-06) (hash(x)=62893661) +42600 val loss 5.8450 +42600 val perplexity 345.5148 +42600 train 5.808473 (lr=1.0370e-05) (hash(x)=44196591) +44400 val loss 5.9726 +44400 val perplexity 392.5373 +44400 train 5.851163 (lr=1.2779e-05) (hash(x)=50241793) +39900 val loss 5.7303 +39900 val perplexity 308.0634 +39900 train 5.617630 (lr=9.4146e-06) (hash(x)=48064474) +42700 val loss 5.8455 +42700 val perplexity 345.6656 +42700 train 5.790959 (lr=1.0282e-05) (hash(x)=52081281) +44500 val loss 5.9743 +44500 val perplexity 393.2105 +44500 train 5.719716 (lr=1.2682e-05) (hash(x)=46436406) +40000 val loss 5.7338 +40000 val perplexity 309.1326 +40000 train 5.694081 (lr=9.3305e-06) (hash(x)=41838449) +42800 val loss 5.8486 +42800 val perplexity 346.7446 +42800 train 5.774008 (lr=1.0194e-05) (hash(x)=49992716) +44600 val loss 5.9772 +44600 val perplexity 394.3202 +44600 train 5.760665 (lr=1.2586e-05) (hash(x)=47314250) +40100 val loss 5.7343 +40100 val perplexity 309.3001 +40100 train 5.900437 (lr=9.2472e-06) (hash(x)=54447981) +42900 val loss 5.8460 +42900 val perplexity 345.8562 +42900 train 5.670799 (lr=1.0107e-05) (hash(x)=49454997) +44700 val loss 5.9780 +44700 val perplexity 394.6680 +44700 train 5.985039 (lr=1.2492e-05) (hash(x)=53889483) +40200 val loss 5.7264 +40200 val perplexity 306.8614 +40200 train 5.843389 (lr=9.1646e-06) (hash(x)=52957764) +43000 val loss 5.8489 +43000 val perplexity 346.8461 +43000 train 5.793878 (lr=1.0022e-05) (hash(x)=50905292) +44800 val loss 5.9836 +44800 val perplexity 396.8853 +44800 train 5.908771 (lr=1.2400e-05) (hash(x)=49309774) +40300 val loss 5.7268 +40300 val perplexity 306.9865 +40300 train 5.527029 (lr=9.0827e-06) (hash(x)=46269236) +43100 val loss 5.8441 +43100 val perplexity 345.2067 +43100 train 5.567511 (lr=9.9373e-06) (hash(x)=49656461) +44900 val loss 5.9808 +44900 val perplexity 395.7626 +44900 train 5.561521 (lr=1.2309e-05) (hash(x)=47653415) +43200 val loss 5.8457 +43200 val perplexity 345.7274 +43200 train 5.721870 (lr=9.8541e-06) (hash(x)=50601940) +40400 val loss 5.7281 +40400 val perplexity 307.3922 +40400 train 5.424637 (lr=9.0015e-06) (hash(x)=50030150) +45000 val loss 5.9806 +45000 val perplexity 395.6942 +45000 train 5.746854 (lr=1.2220e-05) (hash(x)=45963722) +43300 val loss 5.8475 +43300 val perplexity 346.3643 +43300 train 5.619240 (lr=9.7720e-06) (hash(x)=42879970) +40500 val loss 5.7317 +40500 val perplexity 308.4851 +40500 train 5.699018 (lr=8.9211e-06) (hash(x)=50278486) +45100 val loss 5.9749 +45100 val perplexity 393.4371 +45100 train 5.881765 (lr=1.2133e-05) (hash(x)=49077200) +43400 val loss 5.8427 +43400 val perplexity 344.6929 +43400 train 6.192081 (lr=9.6911e-06) (hash(x)=50883335) +40600 val loss 5.7395 +40600 val perplexity 310.9015 +40600 train 5.734146 (lr=8.8414e-06) (hash(x)=52882717) +45200 val loss 5.9708 +45200 val perplexity 391.8199 +45200 train 5.868370 (lr=1.2047e-05) (hash(x)=51016172) +43500 val loss 5.8415 +43500 val perplexity 344.2968 +43500 train 5.687728 (lr=9.6113e-06) (hash(x)=55373094) +40700 val loss 5.7258 +40700 val perplexity 306.6927 +40700 train 6.067416 (lr=8.7624e-06) (hash(x)=60687159) +45300 val loss 5.9739 +45300 val perplexity 393.0523 +45300 train 5.854820 (lr=1.1964e-05) (hash(x)=50393646) +43600 val loss 5.8405 +43600 val perplexity 343.9503 +43600 train 5.443857 (lr=9.5326e-06) (hash(x)=37498029) +40800 val loss 5.7283 +40800 val perplexity 307.4521 +40800 train 5.473702 (lr=8.6842e-06) (hash(x)=46019249) +45400 val loss 5.9688 +45400 val perplexity 391.0414 +45400 train 5.658845 (lr=1.1881e-05) (hash(x)=47534030) +43700 val loss 5.8401 +43700 val perplexity 343.8250 +43700 train 5.549407 (lr=9.4552e-06) (hash(x)=33982416) +40900 val loss 5.7269 +40900 val perplexity 307.0238 +40900 train 5.683141 (lr=8.6068e-06) (hash(x)=52139240) +45500 val loss 5.9648 +45500 val perplexity 389.4927 +45500 train 5.951511 (lr=1.1801e-05) (hash(x)=46243393) +43800 val loss 5.8359 +43800 val perplexity 342.3639 +43800 train 5.776899 (lr=9.3788e-06) (hash(x)=49434495) +41000 val loss 5.7243 +41000 val perplexity 306.2267 +41000 train 5.863314 (lr=8.5301e-06) (hash(x)=54091062) +45600 val loss 5.9686 +45600 val perplexity 390.9642 +45600 train 5.800062 (lr=1.1722e-05) (hash(x)=47947250) +43900 val loss 5.8357 +43900 val perplexity 342.3116 +43900 train 5.617706 (lr=9.3036e-06) (hash(x)=48424180) +45700 val loss 5.9635 +45700 val perplexity 388.9521 +45700 train 5.715033 (lr=1.1645e-05) (hash(x)=50616094) +41100 val loss 5.7265 +41100 val perplexity 306.8941 +41100 train 5.350012 (lr=8.4541e-06) (hash(x)=47202820) +44000 val loss 5.8332 +44000 val perplexity 341.4414 +44000 train 5.751380 (lr=9.2296e-06) (hash(x)=51351008) +45800 val loss 5.9671 +45800 val perplexity 390.3875 +45800 train 5.656040 (lr=1.1570e-05) (hash(x)=40152774) +41200 val loss 5.7265 +41200 val perplexity 306.8855 +41200 train 5.821153 (lr=8.3789e-06) (hash(x)=49488627) +44100 val loss 5.8360 +44100 val perplexity 342.4148 +44100 train 5.823476 (lr=9.1568e-06) (hash(x)=49893042) +45900 val loss 5.9672 +45900 val perplexity 390.4044 +45900 train 6.158481 (lr=1.1497e-05) (hash(x)=51348210) +41300 val loss 5.7248 +41300 val perplexity 306.3751 +41300 train 5.612682 (lr=8.3045e-06) (hash(x)=41876005) +44200 val loss 5.8349 +44200 val perplexity 342.0165 +44200 train 5.773792 (lr=9.0851e-06) (hash(x)=45870126) +46000 val loss 5.9666 +46000 val perplexity 390.1935 +46000 train 5.982023 (lr=1.1425e-05) (hash(x)=56614325) +41400 val loss 5.7258 +41400 val perplexity 306.6705 +41400 train 5.503688 (lr=8.2308e-06) (hash(x)=48467862) +44300 val loss 5.8338 +44300 val perplexity 341.6414 +44300 train 7.350794 (lr=9.0146e-06) (hash(x)=51800290) +46100 val loss 5.9664 +46100 val perplexity 390.1102 +46100 train 6.000175 (lr=1.1355e-05) (hash(x)=52083304) +41500 val loss 5.7271 +41500 val perplexity 307.0893 +41500 train 5.581468 (lr=8.1579e-06) (hash(x)=45730470) +44400 val loss 5.8364 +44400 val perplexity 342.5448 +44400 train 5.720859 (lr=8.9453e-06) (hash(x)=50241793) +46200 val loss 5.9673 +46200 val perplexity 390.4327 +46200 train 6.059251 (lr=1.1287e-05) (hash(x)=53829283) +41600 val loss 5.7263 +41600 val perplexity 306.8271 +41600 train 6.254265 (lr=8.0858e-06) (hash(x)=55724900) +44500 val loss 5.8342 +44500 val perplexity 341.7926 +44500 train 5.605958 (lr=8.8771e-06) (hash(x)=46436406) +46300 val loss 5.9755 +46300 val perplexity 393.6768 +46300 train 5.750476 (lr=1.1220e-05) (hash(x)=49943891) +44600 val loss 5.8371 +44600 val perplexity 342.7841 +44600 train 5.638780 (lr=8.8101e-06) (hash(x)=47314250) +41700 val loss 5.7214 +41700 val perplexity 305.3274 +41700 train 5.457999 (lr=8.0144e-06) (hash(x)=50772972) +46400 val loss 5.9730 +46400 val perplexity 392.6848 +46400 train 6.081055 (lr=1.1155e-05) (hash(x)=55302927) +44700 val loss 5.8348 +44700 val perplexity 342.0028 +44700 train 5.848210 (lr=8.7443e-06) (hash(x)=53889483) +41800 val loss 5.7186 +41800 val perplexity 304.4911 +41800 train 5.674916 (lr=7.9438e-06) (hash(x)=46673554) +46500 val loss 5.9696 +46500 val perplexity 391.3410 +46500 train 5.777376 (lr=1.1092e-05) (hash(x)=50350943) +44800 val loss 5.8427 +44800 val perplexity 344.7078 +44800 train 5.786187 (lr=8.6797e-06) (hash(x)=49309774) +41900 val loss 5.7186 +41900 val perplexity 304.4642 +41900 train 5.563554 (lr=7.8740e-06) (hash(x)=40133757) +46600 val loss 5.9695 +46600 val perplexity 391.3094 +46600 train 5.651725 (lr=1.1031e-05) (hash(x)=50991478) +44900 val loss 5.8396 +44900 val perplexity 343.6291 +44900 train 5.404363 (lr=8.6163e-06) (hash(x)=47653415) +42000 val loss 5.7179 +42000 val perplexity 304.2689 +42000 train 5.663506 (lr=7.8050e-06) (hash(x)=49886199) +46700 val loss 5.9724 +46700 val perplexity 392.4334 +46700 train 6.189111 (lr=1.0972e-05) (hash(x)=52275285) +45000 val loss 5.8372 +45000 val perplexity 342.8085 +45000 train 5.589178 (lr=8.5540e-06) (hash(x)=45963722) +42100 val loss 5.7183 +42100 val perplexity 304.3767 +42100 train 5.704434 (lr=7.7368e-06) (hash(x)=47568153) +46800 val loss 5.9691 +46800 val perplexity 391.1719 +46800 train 5.882543 (lr=1.0914e-05) (hash(x)=52748351) +45100 val loss 5.8346 +45100 val perplexity 341.9164 +45100 train 5.755153 (lr=8.4930e-06) (hash(x)=49077200) +46900 val loss 5.9657 +46900 val perplexity 389.8285 +46900 train 6.084998 (lr=1.0858e-05) (hash(x)=49907987) +42200 val loss 5.7156 +42200 val perplexity 303.5565 +42200 train 5.587654 (lr=7.6693e-06) (hash(x)=48372799) +45200 val loss 5.8325 +45200 val perplexity 341.2095 +45200 train 5.735439 (lr=8.4331e-06) (hash(x)=51016172) +47000 val loss 5.9609 +47000 val perplexity 387.9504 +47000 train 6.129568 (lr=1.0803e-05) (hash(x)=58296973) +42300 val loss 5.7164 +42300 val perplexity 303.8190 +42300 train 5.596444 (lr=7.6027e-06) (hash(x)=48060142) +45300 val loss 5.8334 +45300 val perplexity 341.5025 +45300 train 5.711339 (lr=8.3745e-06) (hash(x)=50393646) +47100 val loss 5.9614 +47100 val perplexity 388.1385 +47100 train 5.758422 (lr=1.0751e-05) (hash(x)=46202543) +42400 val loss 5.7175 +42400 val perplexity 304.1399 +42400 train 5.710320 (lr=7.5368e-06) (hash(x)=50489549) +45400 val loss 5.8259 +45400 val perplexity 338.9763 +45400 train 5.497879 (lr=8.3170e-06) (hash(x)=47534030) +47200 val loss 5.9575 +47200 val perplexity 386.6454 +47200 train 6.411003 (lr=1.0700e-05) (hash(x)=57611994) +42500 val loss 5.7150 +42500 val perplexity 303.3913 +42500 train 5.700217 (lr=7.4717e-06) (hash(x)=45655508) +45500 val loss 5.8259 +45500 val perplexity 338.9807 +45500 train 5.817547 (lr=8.2607e-06) (hash(x)=46243393) +47300 val loss 5.9556 +47300 val perplexity 385.9221 +47300 train 6.066801 (lr=1.0651e-05) (hash(x)=56095511) +42600 val loss 5.7152 +42600 val perplexity 303.4330 +42600 train 5.693460 (lr=7.4074e-06) (hash(x)=44196591) +45600 val loss 5.8261 +45600 val perplexity 339.0476 +45600 train 5.661569 (lr=8.2057e-06) (hash(x)=47947250) +47400 val loss 5.9550 +47400 val perplexity 385.6896 +47400 train 6.089861 (lr=1.0604e-05) (hash(x)=51127773) +42700 val loss 5.7195 +42700 val perplexity 304.7380 +42700 train 5.663694 (lr=7.3440e-06) (hash(x)=52081281) +45700 val loss 5.8253 +45700 val perplexity 338.7662 +45700 train 5.559405 (lr=8.1518e-06) (hash(x)=50616094) +47500 val loss 5.9563 +47500 val perplexity 386.1671 +47500 train 5.861132 (lr=1.0558e-05) (hash(x)=54642108) +42800 val loss 5.7232 +42800 val perplexity 305.8865 +42800 train 5.663434 (lr=7.2813e-06) (hash(x)=49992716) +45800 val loss 5.8267 +45800 val perplexity 339.2483 +45800 train 5.514427 (lr=8.0992e-06) (hash(x)=40152774) +47600 val loss 5.9568 +47600 val perplexity 386.3706 +47600 train 5.800676 (lr=1.0515e-05) (hash(x)=47872131) +45900 val loss 5.8272 +45900 val perplexity 339.4232 +45900 train 6.030647 (lr=8.0478e-06) (hash(x)=51348210) +42900 val loss 5.7190 +42900 val perplexity 304.5939 +42900 train 5.549410 (lr=7.2194e-06) (hash(x)=49454997) +47700 val loss 5.9591 +47700 val perplexity 387.2549 +47700 train 5.937770 (lr=1.0473e-05) (hash(x)=45971021) +46000 val loss 5.8273 +46000 val perplexity 339.4408 +46000 train 5.850418 (lr=7.9976e-06) (hash(x)=56614325) +43000 val loss 5.7203 +43000 val perplexity 304.9880 +43000 train 5.659162 (lr=7.1583e-06) (hash(x)=50905292) +47800 val loss 5.9578 +47800 val perplexity 386.7474 +47800 train 5.779443 (lr=1.0433e-05) (hash(x)=49707099) +46100 val loss 5.8242 +46100 val perplexity 338.3926 +46100 train 5.854746 (lr=7.9485e-06) (hash(x)=52083304) +47900 val loss 5.9576 +47900 val perplexity 386.6694 +47900 train 5.733462 (lr=1.0394e-05) (hash(x)=50127863) +43100 val loss 5.7183 +43100 val perplexity 304.3931 +43100 train 5.437091 (lr=7.0981e-06) (hash(x)=49656461) +46200 val loss 5.8264 +46200 val perplexity 339.1287 +46200 train 5.914385 (lr=7.9008e-06) (hash(x)=53829283) +48000 val loss 5.9576 +48000 val perplexity 386.6626 +48000 train 5.802861 (lr=1.0358e-05) (hash(x)=46879177) +43200 val loss 5.7169 +43200 val perplexity 303.9756 +43200 train 5.604518 (lr=7.0386e-06) (hash(x)=50601940) +46300 val loss 5.8282 +46300 val perplexity 339.7559 +46300 train 5.615003 (lr=7.8542e-06) (hash(x)=49943891) +48100 val loss 5.9588 +48100 val perplexity 387.1308 +48100 train 5.737384 (lr=1.0323e-05) (hash(x)=49271148) +43300 val loss 5.7178 +43300 val perplexity 304.2271 +43300 train 5.492190 (lr=6.9800e-06) (hash(x)=42879970) +46400 val loss 5.8316 +46400 val perplexity 340.8872 +46400 train 5.930750 (lr=7.8088e-06) (hash(x)=55302927) +48200 val loss 5.9583 +48200 val perplexity 386.9623 +48200 train 5.501403 (lr=1.0290e-05) (hash(x)=40698784) +43400 val loss 5.7115 +43400 val perplexity 302.3269 +43400 train 6.040001 (lr=6.9222e-06) (hash(x)=50883335) +46500 val loss 5.8299 +46500 val perplexity 340.3283 +46500 train 5.657043 (lr=7.7647e-06) (hash(x)=50350943) +48300 val loss 5.9615 +48300 val perplexity 388.1891 +48300 train 5.764637 (lr=1.0259e-05) (hash(x)=51381202) +43500 val loss 5.7093 +43500 val perplexity 301.6588 +43500 train 5.615258 (lr=6.8652e-06) (hash(x)=55373094) +46600 val loss 5.8278 +46600 val perplexity 339.6255 +46600 train 5.504874 (lr=7.7218e-06) (hash(x)=50991478) +48400 val loss 5.9592 +48400 val perplexity 387.3171 +48400 train 5.646142 (lr=1.0229e-05) (hash(x)=46128392) +43600 val loss 5.7099 +43600 val perplexity 301.8439 +43600 train 5.311540 (lr=6.8090e-06) (hash(x)=37498029) +46700 val loss 5.8308 +46700 val perplexity 340.6428 +46700 train 6.068571 (lr=7.6801e-06) (hash(x)=52275285) +48500 val loss 5.9627 +48500 val perplexity 388.6603 +48500 train 5.629219 (lr=1.0201e-05) (hash(x)=45126703) +43700 val loss 5.7124 +43700 val perplexity 302.5842 +43700 train 5.427121 (lr=6.7537e-06) (hash(x)=33982416) +46800 val loss 5.8294 +46800 val perplexity 340.1641 +46800 train 5.744896 (lr=7.6397e-06) (hash(x)=52748351) +48600 val loss 5.9607 +48600 val perplexity 387.8623 +48600 train 5.786981 (lr=1.0175e-05) (hash(x)=48001878) +43800 val loss 5.7078 +43800 val perplexity 301.2163 +43800 train 5.656410 (lr=6.6992e-06) (hash(x)=49434495) +46900 val loss 5.8280 +46900 val perplexity 339.6885 +46900 train 5.966805 (lr=7.6004e-06) (hash(x)=49907987) +48700 val loss 5.9563 +48700 val perplexity 386.1835 +48700 train 5.790398 (lr=1.0151e-05) (hash(x)=50726237) +43900 val loss 5.7084 +43900 val perplexity 301.3849 +43900 train 5.504514 (lr=6.6455e-06) (hash(x)=48424180) +47000 val loss 5.8240 +47000 val perplexity 338.3255 +47000 train 5.996557 (lr=7.5624e-06) (hash(x)=58296973) +48800 val loss 5.9591 +48800 val perplexity 387.2560 +48800 train 5.930704 (lr=1.0129e-05) (hash(x)=53023918) +44000 val loss 5.7065 +44000 val perplexity 300.8104 +47100 val loss 5.8246 +47100 val perplexity 338.5108 +44000 train 5.630850 (lr=6.5926e-06) (hash(x)=51351008) +47100 train 5.621247 (lr=7.5257e-06) (hash(x)=46202543) +48900 val loss 5.9545 +48900 val perplexity 385.4752 +48900 train 5.696513 (lr=1.0108e-05) (hash(x)=46623158) +47200 val loss 5.8222 +47200 val perplexity 337.7066 +47200 train 6.286562 (lr=7.4901e-06) (hash(x)=57611994) +44100 val loss 5.7089 +44100 val perplexity 301.5485 +44100 train 5.696458 (lr=6.5406e-06) (hash(x)=49893042) +49000 val loss 5.9558 +49000 val perplexity 385.9745 +49000 train 5.784910 (lr=1.0090e-05) (hash(x)=48558395) +47300 val loss 5.8207 +47300 val perplexity 337.2145 +47300 train 5.948167 (lr=7.4558e-06) (hash(x)=56095511) +49100 val loss 5.9543 +49100 val perplexity 385.3882 +49100 train 5.769840 (lr=1.0073e-05) (hash(x)=48791085) +44200 val loss 5.7073 +44200 val perplexity 301.0651 +44200 train 5.642707 (lr=6.4894e-06) (hash(x)=45870126) +49200 val loss 5.9535 +49200 val perplexity 385.1021 +49200 train 6.634163 (lr=1.0057e-05) (hash(x)=58625942) +47400 val loss 5.8206 +47400 val perplexity 337.1643 +47400 train 5.982783 (lr=7.4228e-06) (hash(x)=51127773) +44300 val loss 5.7062 +44300 val perplexity 300.7272 +44300 train 7.239173 (lr=6.4390e-06) (hash(x)=51800290) +49300 val loss 5.9529 +49300 val perplexity 384.8654 +49300 train 5.908692 (lr=1.0044e-05) (hash(x)=52680896) +47500 val loss 5.8207 +47500 val perplexity 337.2129 +47500 train 5.715343 (lr=7.3909e-06) (hash(x)=54642108) +44400 val loss 5.7072 +44400 val perplexity 301.0246 +44400 train 5.587411 (lr=6.3895e-06) (hash(x)=50241793) +49400 val loss 5.9527 +49400 val perplexity 384.7845 +49400 train 6.085359 (lr=1.0032e-05) (hash(x)=59381598) +47600 val loss 5.8199 +47600 val perplexity 336.9422 +47600 train 5.682220 (lr=7.3603e-06) (hash(x)=47872131) +44500 val loss 5.7089 +44500 val perplexity 301.5303 +44500 train 5.464929 (lr=6.3408e-06) (hash(x)=46436406) +49500 val loss 5.9521 +49500 val perplexity 384.5719 +49500 train 6.251823 (lr=1.0022e-05) (hash(x)=51678773) +47700 val loss 5.8202 +47700 val perplexity 337.0356 +47700 train 5.819744 (lr=7.3310e-06) (hash(x)=45971021) +44600 val loss 5.7088 +44600 val perplexity 301.5209 +44600 train 5.502678 (lr=6.2929e-06) (hash(x)=47314250) +49600 val loss 5.9516 +49600 val perplexity 384.3549 +49600 train 5.705044 (lr=1.0014e-05) (hash(x)=49092923) +47800 val loss 5.8187 +47800 val perplexity 336.5439 +47800 train 5.620420 (lr=7.3029e-06) (hash(x)=49707099) +44700 val loss 5.7087 +44700 val perplexity 301.4715 +44700 train 5.710402 (lr=6.2459e-06) (hash(x)=53889483) +49700 val loss 5.9537 +49700 val perplexity 385.1722 +49700 train 6.194215 (lr=1.0008e-05) (hash(x)=55550116) +47900 val loss 5.8216 +47900 val perplexity 337.5248 +47900 train 5.619407 (lr=7.2760e-06) (hash(x)=50127863) +44800 val loss 5.7098 +44800 val perplexity 301.7980 +44800 train 5.642576 (lr=6.1998e-06) (hash(x)=49309774) +49800 val loss 5.9529 +49800 val perplexity 384.8697 +49800 train 5.801697 (lr=1.0004e-05) (hash(x)=48422352) +48000 val loss 5.8198 +48000 val perplexity 336.8892 +48000 train 5.675518 (lr=7.2504e-06) (hash(x)=46879177) +44900 val loss 5.7067 +44900 val perplexity 300.8901 +44900 train 5.258067 (lr=6.1545e-06) (hash(x)=47653415) +49900 val loss 5.9544 +49900 val perplexity 385.4561 +49900 train 6.041197 (lr=1.0001e-05) (hash(x)=52576880) +48100 val loss 5.8218 +48100 val perplexity 337.5779 +48100 train 5.608066 (lr=7.2260e-06) (hash(x)=49271148) +45000 val loss 5.7067 +45000 val perplexity 300.8669 +45000 train 5.444721 (lr=6.1100e-06) (hash(x)=45963722) +49999 val loss 5.9532 +49999 val perplexity 384.9947 +48200 val loss 5.8222 +48200 val perplexity 337.7130 +48200 train 5.358508 (lr=7.2029e-06) (hash(x)=40698784) +45100 val loss 5.7033 +45100 val perplexity 299.8424 +45100 train 5.622741 (lr=6.0664e-06) (hash(x)=49077200) +48300 val loss 5.8237 +48300 val perplexity 338.2278 +48300 train 5.616930 (lr=7.1810e-06) (hash(x)=51381202) +45200 val loss 5.7030 +45200 val perplexity 299.7675 +45200 train 5.592342 (lr=6.0237e-06) (hash(x)=51016172) +48400 val loss 5.8242 +48400 val perplexity 338.4068 +48400 train 5.515686 (lr=7.1603e-06) (hash(x)=46128392) +45300 val loss 5.7015 +45300 val perplexity 299.3142 +45300 train 5.566947 (lr=5.9818e-06) (hash(x)=50393646) +48500 val loss 5.8244 +48500 val perplexity 338.4699 +48500 train 5.506929 (lr=7.1409e-06) (hash(x)=45126703) +45400 val loss 5.6997 +45400 val perplexity 298.7663 +45400 train 5.368531 (lr=5.9407e-06) (hash(x)=47534030) +48600 val loss 5.8229 +48600 val perplexity 337.9637 +48600 train 5.647099 (lr=7.1228e-06) (hash(x)=48001878) +45500 val loss 5.6992 +45500 val perplexity 298.6272 +45500 train 5.702720 (lr=5.9005e-06) (hash(x)=46243393) +48700 val loss 5.8186 +48700 val perplexity 336.4957 +48700 train 5.643133 (lr=7.1059e-06) (hash(x)=50726237) +45600 val loss 5.6997 +45600 val perplexity 298.7830 +45600 train 5.531170 (lr=5.8612e-06) (hash(x)=47947250) +48800 val loss 5.8258 +48800 val perplexity 338.9241 +48800 train 5.823271 (lr=7.0902e-06) (hash(x)=53023918) +45700 val loss 5.7005 +45700 val perplexity 299.0294 +45700 train 5.432637 (lr=5.8227e-06) (hash(x)=50616094) +48900 val loss 5.8196 +48900 val perplexity 336.8214 +48900 train 5.570236 (lr=7.0758e-06) (hash(x)=46623158) +49000 val loss 5.8194 +49000 val perplexity 336.7809 +49000 train 5.652606 (lr=7.0627e-06) (hash(x)=48558395) +45800 val loss 5.7023 +45800 val perplexity 299.5463 +45800 train 5.420711 (lr=5.7851e-06) (hash(x)=40152774) +49100 val loss 5.8166 +49100 val perplexity 335.8358 +49100 train 5.638320 (lr=7.0508e-06) (hash(x)=48791085) +45900 val loss 5.6999 +45900 val perplexity 298.8448 +45900 train 5.921465 (lr=5.7484e-06) (hash(x)=51348210) +49200 val loss 5.8175 +49200 val perplexity 336.1171 +49200 train 6.548712 (lr=7.0401e-06) (hash(x)=58625942) +46000 val loss 5.7025 +46000 val perplexity 299.6086 +46000 train 5.720208 (lr=5.7125e-06) (hash(x)=56614325) +49300 val loss 5.8155 +49300 val perplexity 335.4521 +49300 train 5.764616 (lr=7.0307e-06) (hash(x)=52680896) +46100 val loss 5.6996 +46100 val perplexity 298.7516 +46100 train 5.737432 (lr=5.6775e-06) (hash(x)=52083304) +49400 val loss 5.8155 +49400 val perplexity 335.4525 +49400 train 5.938956 (lr=7.0226e-06) (hash(x)=59381598) +46200 val loss 5.6987 +46200 val perplexity 298.4711 +46200 train 5.791500 (lr=5.6434e-06) (hash(x)=53829283) diff --git a/attention_kindselective_n_heads2_seed1338/model_02500.pt b/attention_kindselective_n_heads2_seed1338/model_02500.pt index fd29cfb4715229a1a3374f31849c04a48699f802..61685ac54690ea5904ab9c1ddcdb1a77143236d1 100644 --- a/attention_kindselective_n_heads2_seed1338/model_02500.pt +++ b/attention_kindselective_n_heads2_seed1338/model_02500.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b9157bcc1377e68b286966c0a0341a5e69ef9fc2e367bcc48aba49860b5ba4d4 +oid sha256:a54d1f2b670087ba176c7561a6cf2d8ec26b363bcc6c9707a21c9a5980179b6f size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_05000.pt b/attention_kindselective_n_heads2_seed1338/model_05000.pt index 69c2382bf4cc39c701096a5d85a44c53b853d583..f558f1b83ef0d0e65020ba6964a9d9d1129cb266 100644 --- a/attention_kindselective_n_heads2_seed1338/model_05000.pt +++ b/attention_kindselective_n_heads2_seed1338/model_05000.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f41aca9adf817c5b42c183649b46a97fcd1baba88fea3644adb57163bd39ec2 +oid sha256:3f928a89bb8241803129e09214de466b0e25be48f89b7e2934f99d6d5749d67c size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_07500.pt b/attention_kindselective_n_heads2_seed1338/model_07500.pt index 1e55d7bd8c78efd55ac32ca9427b1d43804c973d..7b6a7d3022f554da44e2a8d7be439a183ba66cb7 100644 --- a/attention_kindselective_n_heads2_seed1338/model_07500.pt +++ b/attention_kindselective_n_heads2_seed1338/model_07500.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f15805825428b7a60e7c78f2ff49c625a1b71822b49facc1d5e458f2e62df939 +oid sha256:193c8827a7354eef33e70e2ceba9a981bdc28b041466300f379b40cff44dc5f2 size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_10000.pt b/attention_kindselective_n_heads2_seed1338/model_10000.pt new file mode 100644 index 0000000000000000000000000000000000000000..16155174e9cf7b46e13e73521ee180e582d130fc --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/model_10000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1d94a8368a19c2219bdff3ede1b15f35e4186b2ee041e02eeab9cfcbe2d47bc +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_12500.pt b/attention_kindselective_n_heads2_seed1338/model_12500.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9403ada3ba9e00d638b39f4c3353834e1f564d8 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/model_12500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a70054d4344f6fa14b7d23b7d8cd915dbe453e71455454765a255640317d713 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_15000.pt b/attention_kindselective_n_heads2_seed1338/model_15000.pt new file mode 100644 index 0000000000000000000000000000000000000000..8751947944f5d448a20aa748924cb47569f40875 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/model_15000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdc755cb32eae94febfe4656f80338626101e8d7cc9946ce8ce2c3bf03de8ea8 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_17500.pt b/attention_kindselective_n_heads2_seed1338/model_17500.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9a2cbc287ceb66f113ed2490c4dc59627027cc3 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/model_17500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2355f2bfc63b002de74251d03a24a176aa53512f7eaf6c2641e5f33290d8e80f +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_20000.pt b/attention_kindselective_n_heads2_seed1338/model_20000.pt new file mode 100644 index 0000000000000000000000000000000000000000..79cace9b338b0529be25aefde2605fe350a5e732 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/model_20000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a4d736bf77b416703a4797c1191ca35753f8493f5d4668711b9c95f183b3da +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_22500.pt b/attention_kindselective_n_heads2_seed1338/model_22500.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7142f98c77383c22fe5863a92b495d14ecf4fbe --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/model_22500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c839040407e035acdd07369f6a7cacacdf771ee7182ba8d65658ed3500a4ba0 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_25000.pt b/attention_kindselective_n_heads2_seed1338/model_25000.pt new file mode 100644 index 0000000000000000000000000000000000000000..4fbbe507cb94e50bb5829c0e0f9bed74bd41d9c2 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/model_25000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:199ef7504e1bb9885ff6357018b4e74c7f0825a645526cfcb799d3d826826947 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_27500.pt b/attention_kindselective_n_heads2_seed1338/model_27500.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ac2b4a1716257499b016d3014b2994136c11190 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/model_27500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a019be3e9ddc6d464dd7a14b1e6170af67015c64b2949663d79def4b69860b15 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_30000.pt b/attention_kindselective_n_heads2_seed1338/model_30000.pt new file mode 100644 index 0000000000000000000000000000000000000000..e16d30656490eb18ede81d10affc80dc173ca372 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/model_30000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:845e4abf4f3ce7be46e04f77330a53b3c4267e819ddbc02dde4963588f5df254 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_32500.pt b/attention_kindselective_n_heads2_seed1338/model_32500.pt new file mode 100644 index 0000000000000000000000000000000000000000..d02abe10a437f159640ea6e494e9d5cad7737323 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/model_32500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:872c0cf97d562fef9c0eecef1ef811dca604f09bf2632f5c08075360f48425b6 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_35000.pt b/attention_kindselective_n_heads2_seed1338/model_35000.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf5d2d5e36e99b5e7a08989677b7eff0f704f2f2 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/model_35000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09f1131731afab9f36100fb3952537712567ab425d20a58900922e1915b4b8ca +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_37500.pt b/attention_kindselective_n_heads2_seed1338/model_37500.pt new file mode 100644 index 0000000000000000000000000000000000000000..81509756012d5ccf94b64dd8640437e01e945c5e --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/model_37500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83bd1e335e970a4ba1d9774c9192782c48865ff9b195b90ab4a04a8a5836506c +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_40000.pt b/attention_kindselective_n_heads2_seed1338/model_40000.pt new file mode 100644 index 0000000000000000000000000000000000000000..da5598ac249e48d9bb27d44cb204e48aa9582b59 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/model_40000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31e467aae3098a92e7e473884dce191399b479efca63f963f5687f6a2b8d2fee +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_42500.pt b/attention_kindselective_n_heads2_seed1338/model_42500.pt new file mode 100644 index 0000000000000000000000000000000000000000..eca674ebdb80b1ee38ad2801d9bdba0514ad24f0 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/model_42500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0583628a4a6e9c9b032c427141c68cb9110d7d9d4011cf30fa9adbe6064f090b +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_45000.pt b/attention_kindselective_n_heads2_seed1338/model_45000.pt new file mode 100644 index 0000000000000000000000000000000000000000..6dc11c02424989583c9f582f4e46c3370e5ba79c --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/model_45000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad45144a3fb89602eff01bc07342d70cc8afb3ad6bc2a5e404be5777c109e494 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_47500.pt b/attention_kindselective_n_heads2_seed1338/model_47500.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1fabdc7c029d27c8fb2bc0a26d9d7732385baaa --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/model_47500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b293468bbf9bdf1ff4b1c1d9ccc3eb09e6d7b5bfbac4c2312351919feacb287 +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/model_49999.pt b/attention_kindselective_n_heads2_seed1338/model_49999.pt new file mode 100644 index 0000000000000000000000000000000000000000..fba420fbf7bd9d095f40228c34a0f4886cee587c --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/model_49999.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce9b65be8797af36d0449099a38afc632e8eea9f71b403705410b30e880b62ea +size 38587970 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_02500.pt b/attention_kindselective_n_heads2_seed1338/optimizer_02500.pt index 76ef78f5b2e7a4fe2aed05460c8e1b5f39812425..3ac8f75bef52180fbb97273b36911fb538239c44 100644 --- a/attention_kindselective_n_heads2_seed1338/optimizer_02500.pt +++ b/attention_kindselective_n_heads2_seed1338/optimizer_02500.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:47848b329fdc5de70e6f1d2c743be1b06f2fc0459c1128539106cc34eb7bd6cb +oid sha256:b7092b3753d70721281439387dd8c0466cf32802e18f2c8e3ebb74e373ad9d54 size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_05000.pt b/attention_kindselective_n_heads2_seed1338/optimizer_05000.pt index d0839d6ff68fd5516530b3f0819a8a8398b28528..9e7984c958f750eaac35091821e434693e575d73 100644 --- a/attention_kindselective_n_heads2_seed1338/optimizer_05000.pt +++ b/attention_kindselective_n_heads2_seed1338/optimizer_05000.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:23d2ca9117d48314c932bb7aa5c430b045f768930d81a6a6a25c137857f81aad +oid sha256:777848a2f295ac1f4321dda85cf830242cd2621f9860c8db9ad5f9f9a02fbb67 size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_07500.pt b/attention_kindselective_n_heads2_seed1338/optimizer_07500.pt index 06b1ef376a0bdd6503f26253a0b10b28ef2ca7f4..4497ce235cef2c9c92a06097ad20f21a3f96b1a8 100644 --- a/attention_kindselective_n_heads2_seed1338/optimizer_07500.pt +++ b/attention_kindselective_n_heads2_seed1338/optimizer_07500.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b8615fd9af3900fecade21ff7a416b6897eb44dab958718ebeb0d5baae84031b +oid sha256:c7a13322cf084a9889bde7b67a5341128f1bf6f74c17096982b119e271984a42 size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_10000.pt b/attention_kindselective_n_heads2_seed1338/optimizer_10000.pt new file mode 100644 index 0000000000000000000000000000000000000000..b40ddc2fee34d8cd32a6c9d6db564779088b6cec --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/optimizer_10000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ef371e7f771a14b9fa2724183416ca81d90cde94a9714987887efb51463c96b +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_12500.pt b/attention_kindselective_n_heads2_seed1338/optimizer_12500.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a0c9b9fc32e455d35e760ffb6f0b73f1d35a1d2 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/optimizer_12500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a99f478b8c7d4ba9df1dead510f69ced94b7074669697b8edcbd10632e530b6 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_15000.pt b/attention_kindselective_n_heads2_seed1338/optimizer_15000.pt new file mode 100644 index 0000000000000000000000000000000000000000..f043249f44e8d90381344624a20448f2cbe2126f --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/optimizer_15000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:868dae1e0f3077c15bc16ad0f8c175245e98ab505151c94f20694e0bca006442 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_17500.pt b/attention_kindselective_n_heads2_seed1338/optimizer_17500.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a8ebed017f28f7083364dc03da5d806cabf5a03 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/optimizer_17500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:956ac4f5e89edc78d304613e83ac3273cbb525614c2379275873d34db6c7a3e9 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_20000.pt b/attention_kindselective_n_heads2_seed1338/optimizer_20000.pt new file mode 100644 index 0000000000000000000000000000000000000000..a35aa04ebdb32956c461991eee8223c571d176f6 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/optimizer_20000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:519c8914bf3b0de76b351423fa89e323b48bf7518149bbbc9e5d66c998c8ac23 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_22500.pt b/attention_kindselective_n_heads2_seed1338/optimizer_22500.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e9cfb38bf027a60286bff168ea3ea1e464b4ecb --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/optimizer_22500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:803e0bf8dd7fcbaa44b1bd613dad95c2e96ff9712b4d7f9787f4352ab4ce5da3 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_25000.pt b/attention_kindselective_n_heads2_seed1338/optimizer_25000.pt new file mode 100644 index 0000000000000000000000000000000000000000..47b0cb05be57726c371f8766da7bfc77b6a80caa --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/optimizer_25000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20b8749ff3895e79986c24589212dbacb2058a06bfb9596ad5cd21599b9062d6 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_27500.pt b/attention_kindselective_n_heads2_seed1338/optimizer_27500.pt new file mode 100644 index 0000000000000000000000000000000000000000..edaedb4434574160126aef994d9bad9507267699 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/optimizer_27500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afd8a1b22bbba12a3c72efd70214ac15f5d304126134894d797fbed790ef6ef5 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_30000.pt b/attention_kindselective_n_heads2_seed1338/optimizer_30000.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c5a832dfa24aa9a8532198e2e89656998f26d97 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/optimizer_30000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16f2dec0aac9f1da12e2f06988a2be697955594ae225250f2a203de88498201a +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_32500.pt b/attention_kindselective_n_heads2_seed1338/optimizer_32500.pt new file mode 100644 index 0000000000000000000000000000000000000000..1810995d9e1f9d6d68ef58e72f22b7ceda2843f2 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/optimizer_32500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93426294ec5f595bdb39a5517a2d0f740fe8993a4d81b04290c2da208f650b55 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_35000.pt b/attention_kindselective_n_heads2_seed1338/optimizer_35000.pt new file mode 100644 index 0000000000000000000000000000000000000000..908ee39d04055a128e74480ca0c411f5baff18d7 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/optimizer_35000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e33c6faf2dcd2e2ec56ca1999a1be79b262979e57419fc53ec3e32916d57678b +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_37500.pt b/attention_kindselective_n_heads2_seed1338/optimizer_37500.pt new file mode 100644 index 0000000000000000000000000000000000000000..616d483ebab10e614b04eb0f98314aec16d56262 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/optimizer_37500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ee5c0c3bdf5d64ebf3f0c7f1614361f69b9acb1a7938d41ed8eb3c0c86618ce +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_40000.pt b/attention_kindselective_n_heads2_seed1338/optimizer_40000.pt new file mode 100644 index 0000000000000000000000000000000000000000..196a6964c9045faae8b676740bbed2769427fc0d --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/optimizer_40000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:038da380a90d0f3a3ff1f8a8cc805cfa0790e4014a458c7138d8dd6a2b50e5fa +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_42500.pt b/attention_kindselective_n_heads2_seed1338/optimizer_42500.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd526a552c644a8da44e722355e3dfccdd95fd89 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/optimizer_42500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd996b57c32f76d090a9779436b1904f1829f2772b25933692896324d69cf9bc +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_45000.pt b/attention_kindselective_n_heads2_seed1338/optimizer_45000.pt new file mode 100644 index 0000000000000000000000000000000000000000..c798780da42523ca62c606ca342c2b1b6d6f0458 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/optimizer_45000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4254e56f80cb905e03718cbefc34fe7a5e8094876ee7e8c57d07edc92b5a629 +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_47500.pt b/attention_kindselective_n_heads2_seed1338/optimizer_47500.pt new file mode 100644 index 0000000000000000000000000000000000000000..710fe6e092b88e0a4f9c1c0b9e4eeeeaf499e38a --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/optimizer_47500.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c37c86b4e20e6b5bdb363fe3b9c85c6990f1c70148fe30d865985a64f890c5a +size 70895430 diff --git a/attention_kindselective_n_heads2_seed1338/optimizer_49999.pt b/attention_kindselective_n_heads2_seed1338/optimizer_49999.pt new file mode 100644 index 0000000000000000000000000000000000000000..98d2d046ba18b63abe5935a02b394704e13494d8 --- /dev/null +++ b/attention_kindselective_n_heads2_seed1338/optimizer_49999.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83c29e09bd070775f2d445f6d518fffce6ff64c7c8ddb501cf9f402c84006c5b +size 70895430