andrew-healey commited on
Commit
4015d3c
·
verified ·
1 Parent(s): b16008e

Upload folder using huggingface_hub

Browse files
attention_kindselective_n_heads2_seed1338/log2.txt CHANGED
@@ -4417,3 +4417,92 @@ max_steps: 50000
4417
  47000 val loss 5.6956
4418
  47000 val perplexity 297.5557
4419
  47000 train 5.862358 (lr=5.4017e-06) (hash(x)=58296973)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4417
  47000 val loss 5.6956
4418
  47000 val perplexity 297.5557
4419
  47000 train 5.862358 (lr=5.4017e-06) (hash(x)=58296973)
4420
+ 47100 val loss 5.6962
4421
+ 47100 val perplexity 297.7274
4422
+ 47100 train 5.498293 (lr=5.3755e-06) (hash(x)=46202543)
4423
+ 47200 val loss 5.6949
4424
+ 47200 val perplexity 297.3451
4425
+ 47200 train 6.162602 (lr=5.3501e-06) (hash(x)=57611994)
4426
+ 47300 val loss 5.6936
4427
+ 47300 val perplexity 296.9551
4428
+ 47300 train 5.818519 (lr=5.3256e-06) (hash(x)=56095511)
4429
+ 47400 val loss 5.6926
4430
+ 47400 val perplexity 296.6577
4431
+ 47400 train 5.885818 (lr=5.3020e-06) (hash(x)=51127773)
4432
+ 47500 val loss 5.6941
4433
+ 47500 val perplexity 297.1053
4434
+ 47500 train 5.578781 (lr=5.2792e-06) (hash(x)=54642108)
4435
+ 47600 val loss 5.6947
4436
+ 47600 val perplexity 297.2766
4437
+ 47600 train 5.554379 (lr=5.2574e-06) (hash(x)=47872131)
4438
+ 47700 val loss 5.6924
4439
+ 47700 val perplexity 296.6082
4440
+ 47700 train 5.694735 (lr=5.2364e-06) (hash(x)=45971021)
4441
+ 47800 val loss 5.6943
4442
+ 47800 val perplexity 297.1729
4443
+ 47800 train 5.494211 (lr=5.2163e-06) (hash(x)=49707099)
4444
+ 47900 val loss 5.6944
4445
+ 47900 val perplexity 297.1941
4446
+ 47900 train 5.501933 (lr=5.1972e-06) (hash(x)=50127863)
4447
+ 48000 val loss 5.6916
4448
+ 48000 val perplexity 296.3759
4449
+ 48000 train 5.535919 (lr=5.1788e-06) (hash(x)=46879177)
4450
+ 48100 val loss 5.6930
4451
+ 48100 val perplexity 296.7791
4452
+ 48100 train 5.493693 (lr=5.1614e-06) (hash(x)=49271148)
4453
+ 48200 val loss 5.6936
4454
+ 48200 val perplexity 296.9648
4455
+ 48200 train 5.231916 (lr=5.1449e-06) (hash(x)=40698784)
4456
+ 48300 val loss 5.6938
4457
+ 48300 val perplexity 297.0309
4458
+ 48300 train 5.482736 (lr=5.1293e-06) (hash(x)=51381202)
4459
+ 48400 val loss 5.6927
4460
+ 48400 val perplexity 296.6988
4461
+ 48400 train 5.372602 (lr=5.1145e-06) (hash(x)=46128392)
4462
+ 48500 val loss 5.6943
4463
+ 48500 val perplexity 297.1581
4464
+ 48500 train 5.393999 (lr=5.1007e-06) (hash(x)=45126703)
4465
+ 48600 val loss 5.6963
4466
+ 48600 val perplexity 297.7726
4467
+ 48600 train 5.534537 (lr=5.0877e-06) (hash(x)=48001878)
4468
+ 48700 val loss 5.6909
4469
+ 48700 val perplexity 296.1459
4470
+ 48700 train 5.510094 (lr=5.0756e-06) (hash(x)=50726237)
4471
+ 48800 val loss 5.6946
4472
+ 48800 val perplexity 297.2566
4473
+ 48800 train 5.679790 (lr=5.0644e-06) (hash(x)=53023918)
4474
+ 48900 val loss 5.6915
4475
+ 48900 val perplexity 296.3504
4476
+ 48900 train 5.466436 (lr=5.0542e-06) (hash(x)=46623158)
4477
+ 49000 val loss 5.6904
4478
+ 49000 val perplexity 296.0216
4479
+ 49000 train 5.534313 (lr=5.0448e-06) (hash(x)=48558395)
4480
+ 49100 val loss 5.6884
4481
+ 49100 val perplexity 295.4112
4482
+ 49100 train 5.531930 (lr=5.0363e-06) (hash(x)=48791085)
4483
+ 49200 val loss 5.6897
4484
+ 49200 val perplexity 295.7928
4485
+ 49200 train 6.411170 (lr=5.0286e-06) (hash(x)=58625942)
4486
+ 49300 val loss 5.6890
4487
+ 49300 val perplexity 295.5917
4488
+ 49300 train 5.664113 (lr=5.0219e-06) (hash(x)=52680896)
4489
+ 49400 val loss 5.6888
4490
+ 49400 val perplexity 295.5517
4491
+ 49400 train 5.797068 (lr=5.0161e-06) (hash(x)=59381598)
4492
+ 49500 val loss 5.6911
4493
+ 49500 val perplexity 296.2069
4494
+ 49500 train 5.969440 (lr=5.0112e-06) (hash(x)=51678773)
4495
+ 49600 val loss 5.6903
4496
+ 49600 val perplexity 295.9921
4497
+ 49600 train 5.465007 (lr=5.0072e-06) (hash(x)=49092923)
4498
+ 49700 val loss 5.6907
4499
+ 49700 val perplexity 296.1018
4500
+ 49700 train 5.918400 (lr=5.0040e-06) (hash(x)=55550116)
4501
+ 49800 val loss 5.6898
4502
+ 49800 val perplexity 295.8321
4503
+ 49800 train 5.565441 (lr=5.0018e-06) (hash(x)=48422352)
4504
+ 49900 val loss 5.6916
4505
+ 49900 val perplexity 296.3545
4506
+ 49900 train 5.764893 (lr=5.0004e-06) (hash(x)=52576880)
4507
+ 49999 val loss 5.6909
4508
+ 49999 val perplexity 296.1590
attention_kindselective_n_heads2_seed1338/model_47500.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b293468bbf9bdf1ff4b1c1d9ccc3eb09e6d7b5bfbac4c2312351919feacb287
3
  size 38587970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c61c7e55bc6fdc5a2b29941a2b66a41aec21ae86bcf0b7f65e27fce807d6198d
3
  size 38587970
attention_kindselective_n_heads2_seed1338/model_49999.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b40a2bad5cb6d06712afd9dd8b3e80440264db3a5252aafcb3940326cfd13cce
3
  size 38587970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33163b2f060c6c5f5a1cd71a9bcd5d1e2acf53e0674e95c52e8495d8ecf19f49
3
  size 38587970
attention_kindselective_n_heads2_seed1338/optimizer_47500.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c37c86b4e20e6b5bdb363fe3b9c85c6990f1c70148fe30d865985a64f890c5a
3
  size 70895430
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:336781bbe92c707410ef574515b5a29bed7ef2b59eae47fac04a5d57c5a08b0d
3
  size 70895430
attention_kindselective_n_heads2_seed1338/optimizer_49999.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a87ba1dab58634637df50543e586292f0c40067c9a7b0458888736cd9020686
3
  size 70895430
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f919e489d0aee4db2867bde0664bdf90b8304dc8b004630eb73a6f44d69afdf
3
  size 70895430