andrew-healey commited on
Commit
58697ab
·
verified ·
1 Parent(s): 4015d3c

Upload folder using huggingface_hub

Browse files
attention_kindselective_n_heads2_seed1341/log2.txt CHANGED
@@ -4402,3 +4402,107 @@ max_steps: 50000
4402
  46500 val loss 5.8856
4403
  46500 val perplexity 359.8179
4404
  46500 train 5.713169 (lr=1.1092e-05) (hash(x)=48511186)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4402
  46500 val loss 5.8856
4403
  46500 val perplexity 359.8179
4404
  46500 train 5.713169 (lr=1.1092e-05) (hash(x)=48511186)
4405
+ 46600 val loss 5.8860
4406
+ 46600 val perplexity 359.9745
4407
+ 46600 train 5.918826 (lr=1.1031e-05) (hash(x)=48832039)
4408
+ 46700 val loss 5.8864
4409
+ 46700 val perplexity 360.1195
4410
+ 46700 train 6.561471 (lr=1.0972e-05) (hash(x)=65821271)
4411
+ 46800 val loss 5.8886
4412
+ 46800 val perplexity 360.8994
4413
+ 46800 train 5.569217 (lr=1.0914e-05) (hash(x)=48005593)
4414
+ 46900 val loss 5.8879
4415
+ 46900 val perplexity 360.6612
4416
+ 46900 train 6.157232 (lr=1.0858e-05) (hash(x)=53182453)
4417
+ 47000 val loss 5.8908
4418
+ 47000 val perplexity 361.6933
4419
+ 47000 train 5.775409 (lr=1.0803e-05) (hash(x)=49385983)
4420
+ 47100 val loss 5.8881
4421
+ 47100 val perplexity 360.7214
4422
+ 47100 train 5.876601 (lr=1.0751e-05) (hash(x)=48801622)
4423
+ 47200 val loss 5.8882
4424
+ 47200 val perplexity 360.7730
4425
+ 47200 train 5.618414 (lr=1.0700e-05) (hash(x)=45216690)
4426
+ 47300 val loss 5.8918
4427
+ 47300 val perplexity 362.0417
4428
+ 47300 train 5.716314 (lr=1.0651e-05) (hash(x)=51185601)
4429
+ 47400 val loss 5.8878
4430
+ 47400 val perplexity 360.6154
4431
+ 47400 train 5.607826 (lr=1.0604e-05) (hash(x)=47743508)
4432
+ 47500 val loss 5.8948
4433
+ 47500 val perplexity 363.1413
4434
+ 47500 train 5.676246 (lr=1.0558e-05) (hash(x)=49318833)
4435
+ 47600 val loss 5.8933
4436
+ 47600 val perplexity 362.6100
4437
+ 47600 train 5.876511 (lr=1.0515e-05) (hash(x)=50430826)
4438
+ 47700 val loss 5.8931
4439
+ 47700 val perplexity 362.5116
4440
+ 47700 train 5.814244 (lr=1.0473e-05) (hash(x)=51966931)
4441
+ 47800 val loss 5.8937
4442
+ 47800 val perplexity 362.7275
4443
+ 47800 train 5.641396 (lr=1.0433e-05) (hash(x)=49517209)
4444
+ 47900 val loss 5.8987
4445
+ 47900 val perplexity 364.5595
4446
+ 47900 train 5.698562 (lr=1.0394e-05) (hash(x)=49749702)
4447
+ 48000 val loss 5.8991
4448
+ 48000 val perplexity 364.6937
4449
+ 48000 train 5.766129 (lr=1.0358e-05) (hash(x)=52535448)
4450
+ 48100 val loss 5.8882
4451
+ 48100 val perplexity 360.7525
4452
+ 48100 train 6.531771 (lr=1.0323e-05) (hash(x)=53739181)
4453
+ 48200 val loss 5.8824
4454
+ 48200 val perplexity 358.6651
4455
+ 48200 train 5.854337 (lr=1.0290e-05) (hash(x)=48086710)
4456
+ 48300 val loss 5.8826
4457
+ 48300 val perplexity 358.7256
4458
+ 48300 train 5.921894 (lr=1.0259e-05) (hash(x)=43991942)
4459
+ 48400 val loss 5.8792
4460
+ 48400 val perplexity 357.5202
4461
+ 48400 train 5.780863 (lr=1.0229e-05) (hash(x)=50801906)
4462
+ 48500 val loss 5.8788
4463
+ 48500 val perplexity 357.3769
4464
+ 48500 train 5.955594 (lr=1.0201e-05) (hash(x)=48915599)
4465
+ 48600 val loss 5.8765
4466
+ 48600 val perplexity 356.5590
4467
+ 48600 train 6.042453 (lr=1.0175e-05) (hash(x)=54450172)
4468
+ 48700 val loss 5.8783
4469
+ 48700 val perplexity 357.1961
4470
+ 48700 train 5.748240 (lr=1.0151e-05) (hash(x)=48755509)
4471
+ 48800 val loss 5.8904
4472
+ 48800 val perplexity 361.5333
4473
+ 48800 train 5.780968 (lr=1.0129e-05) (hash(x)=51380096)
4474
+ 48900 val loss 5.8791
4475
+ 48900 val perplexity 357.4912
4476
+ 48900 train 5.744174 (lr=1.0108e-05) (hash(x)=43404218)
4477
+ 49000 val loss 5.8779
4478
+ 49000 val perplexity 357.0585
4479
+ 49000 train 5.845617 (lr=1.0090e-05) (hash(x)=55059739)
4480
+ 49100 val loss 5.8824
4481
+ 49100 val perplexity 358.6769
4482
+ 49100 train 6.111704 (lr=1.0073e-05) (hash(x)=47889309)
4483
+ 49200 val loss 5.8832
4484
+ 49200 val perplexity 358.9541
4485
+ 49200 train 5.629024 (lr=1.0057e-05) (hash(x)=46202589)
4486
+ 49300 val loss 5.8825
4487
+ 49300 val perplexity 358.7058
4488
+ 49300 train 5.780267 (lr=1.0044e-05) (hash(x)=49161813)
4489
+ 49400 val loss 5.8826
4490
+ 49400 val perplexity 358.7585
4491
+ 49400 train 5.719767 (lr=1.0032e-05) (hash(x)=49505044)
4492
+ 49500 val loss 5.8832
4493
+ 49500 val perplexity 358.9488
4494
+ 49500 train 5.663615 (lr=1.0022e-05) (hash(x)=46610327)
4495
+ 49600 val loss 5.8839
4496
+ 49600 val perplexity 359.2018
4497
+ 49600 train 6.220163 (lr=1.0014e-05) (hash(x)=51188240)
4498
+ 49700 val loss 5.8821
4499
+ 49700 val perplexity 358.5739
4500
+ 49700 train 5.680971 (lr=1.0008e-05) (hash(x)=41492016)
4501
+ 49800 val loss 5.8812
4502
+ 49800 val perplexity 358.2258
4503
+ 49800 train 5.788856 (lr=1.0004e-05) (hash(x)=53488833)
4504
+ 49900 val loss 5.8811
4505
+ 49900 val perplexity 358.1883
4506
+ 49900 train 6.194769 (lr=1.0001e-05) (hash(x)=60260935)
4507
+ 49999 val loss 5.8753
4508
+ 49999 val perplexity 356.1245
attention_kindselective_n_heads2_seed1341/model_47500.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f10489eeeefc32b2aec0615aaaedae12a844b5c9ea48c80427d8873fd45e4bb
3
  size 38587970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb22aa6f5947b15de1ac7135ec3e686429f487241f1a5524207c53012a4790f7
3
  size 38587970
attention_kindselective_n_heads2_seed1341/model_49999.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db6fabd2091d123286c1aa08ccfbc214046b86add0bb88e105e13fcbaa156726
3
  size 38587970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31806c8a239461981a224edaa0eb46f8d1f3413c143e682980dd953b9856c440
3
  size 38587970
attention_kindselective_n_heads2_seed1341/optimizer_47500.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40db19e0e2056eef8cf8b772f3c21365474b8d9d8725dee8528b75cd13754308
3
  size 70895430
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43c587204270bfbb51bcc6f940266318ee64b48c78c0d8846062182ba107b060
3
  size 70895430
attention_kindselective_n_heads2_seed1341/optimizer_49999.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fadeba3455a1c32c7d23175ef110cf852d10f450c42218ecd6c029b481bc691
3
  size 70895430
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d87df62bab55321fe1386a280814d4558ef6273be3157bbcccf05f163a097b2e
3
  size 70895430