andrew-healey commited on
Commit
aa984e8
·
verified ·
1 Parent(s): e7f1b9c

Upload folder using huggingface_hub

Browse files
attention_kindselective_n_heads2_seed1340/log2.txt CHANGED
@@ -421,3 +421,95 @@ max_steps: 10000
421
  6900 val loss 6.4899
422
  6900 val perplexity 658.4554
423
  6900 train 6.663884 (lr=4.5676e-05) (hash(x)=46534986)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
  6900 val loss 6.4899
422
  6900 val perplexity 658.4554
423
  6900 train 6.663884 (lr=4.5676e-05) (hash(x)=46534986)
424
+ 7000 val loss 6.4768
425
+ 7000 val perplexity 649.8740
426
+ 7000 train 6.840258 (lr=4.3882e-05) (hash(x)=49317888)
427
+ 7100 val loss 6.4738
428
+ 7100 val perplexity 647.9160
429
+ 7100 train 6.493526 (lr=4.2128e-05) (hash(x)=50360484)
430
+ 7200 val loss 6.4698
431
+ 7200 val perplexity 645.3282
432
+ 7200 train 6.339218 (lr=4.0414e-05) (hash(x)=49515094)
433
+ 7300 val loss 6.4730
434
+ 7300 val perplexity 647.4099
435
+ 7300 train 6.455276 (lr=3.8745e-05) (hash(x)=51546861)
436
+ 7400 val loss 6.4622
437
+ 7400 val perplexity 640.4369
438
+ 7400 train 6.428791 (lr=3.7120e-05) (hash(x)=48320948)
439
+ 7500 val loss 6.4678
440
+ 7500 val perplexity 644.0776
441
+ 7500 train 6.414234 (lr=3.5541e-05) (hash(x)=40167457)
442
+ 7600 val loss 6.4528
443
+ 7600 val perplexity 634.4956
444
+ 7600 train 6.400621 (lr=3.4011e-05) (hash(x)=49942165)
445
+ 7700 val loss 6.4515
446
+ 7700 val perplexity 633.6224
447
+ 7700 train 6.178792 (lr=3.2531e-05) (hash(x)=48853311)
448
+ 7800 val loss 6.4469
449
+ 7800 val perplexity 630.7529
450
+ 7800 train 6.332662 (lr=3.1102e-05) (hash(x)=48510117)
451
+ 7900 val loss 6.4452
452
+ 7900 val perplexity 629.6969
453
+ 7900 train 6.454893 (lr=2.9726e-05) (hash(x)=48339781)
454
+ 8000 val loss 6.4485
455
+ 8000 val perplexity 631.7830
456
+ 8000 train 6.561584 (lr=2.8405e-05) (hash(x)=54927320)
457
+ 8100 val loss 6.4404
458
+ 8100 val perplexity 626.6378
459
+ 8100 train 6.167403 (lr=2.7138e-05) (hash(x)=46461786)
460
+ 8200 val loss 6.4388
461
+ 8200 val perplexity 625.6752
462
+ 8200 train 6.347755 (lr=2.5929e-05) (hash(x)=51536260)
463
+ 8300 val loss 6.4365
464
+ 8300 val perplexity 624.2048
465
+ 8300 train 6.190253 (lr=2.4778e-05) (hash(x)=44770722)
466
+ 8400 val loss 6.4290
467
+ 8400 val perplexity 619.5564
468
+ 8400 train 6.434593 (lr=2.3686e-05) (hash(x)=50104957)
469
+ 8500 val loss 6.4245
470
+ 8500 val perplexity 616.7615
471
+ 8500 train 6.533162 (lr=2.2655e-05) (hash(x)=50132971)
472
+ 8600 val loss 6.4212
473
+ 8600 val perplexity 614.7573
474
+ 8600 train 6.378406 (lr=2.1685e-05) (hash(x)=52193699)
475
+ 8700 val loss 6.4183
476
+ 8700 val perplexity 612.9755
477
+ 8700 train 6.427931 (lr=2.0777e-05) (hash(x)=47902319)
478
+ 8800 val loss 6.4139
479
+ 8800 val perplexity 610.2626
480
+ 8800 train 6.709031 (lr=1.9933e-05) (hash(x)=54904230)
481
+ 8900 val loss 6.4051
482
+ 8900 val perplexity 604.9390
483
+ 8900 train 6.288378 (lr=1.9153e-05) (hash(x)=46311615)
484
+ 9000 val loss 6.4113
485
+ 9000 val perplexity 608.7029
486
+ 9000 train 6.270270 (lr=1.8439e-05) (hash(x)=48535188)
487
+ 9100 val loss 6.4026
488
+ 9100 val perplexity 603.3902
489
+ 9100 train 6.419470 (lr=1.7790e-05) (hash(x)=51757372)
490
+ 9200 val loss 6.4024
491
+ 9200 val perplexity 603.2843
492
+ 9200 train 6.288178 (lr=1.7208e-05) (hash(x)=51131708)
493
+ 9300 val loss 6.4021
494
+ 9300 val perplexity 603.1321
495
+ 9300 train 6.361725 (lr=1.6692e-05) (hash(x)=44784276)
496
+ 9400 val loss 6.4006
497
+ 9400 val perplexity 602.2206
498
+ 9400 train 6.529187 (lr=1.6245e-05) (hash(x)=51981169)
499
+ 9500 val loss 6.3956
500
+ 9500 val perplexity 599.1955
501
+ 9500 train 6.358074 (lr=1.5865e-05) (hash(x)=47232936)
502
+ 9600 val loss 6.3954
503
+ 9600 val perplexity 599.1069
504
+ 9600 train 6.410747 (lr=1.5554e-05) (hash(x)=53800450)
505
+ 9700 val loss 6.3937
506
+ 9700 val perplexity 598.0549
507
+ 9700 train 6.500488 (lr=1.5312e-05) (hash(x)=55768123)
508
+ 9800 val loss 6.3977
509
+ 9800 val perplexity 600.4852
510
+ 9800 train 6.321241 (lr=1.5139e-05) (hash(x)=47745177)
511
+ 9900 val loss 6.3977
512
+ 9900 val perplexity 600.4746
513
+ 9900 train 6.664391 (lr=1.5035e-05) (hash(x)=56592246)
514
+ 9999 val loss 6.4009
515
+ 9999 val perplexity 602.3967
attention_kindselective_n_heads2_seed1340/model_07500.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43a6634ddb7efa2797ba98f1129965459e095430bafbbef119764211ecce5d4f
3
  size 38587970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:933638ce43638ef9bd5900408f7bbcb30a32be65bd1c3e240216a1265da81d33
3
  size 38587970
attention_kindselective_n_heads2_seed1340/model_09999.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3b59f7ce706fb6b80035e89fbc4a42332e13eb0a0bc36b121c007413b2e495b
3
  size 38587970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c6f68fd809731ce63a07f277326cf637cffea9c9a2009b8829099fe17da7950
3
  size 38587970
attention_kindselective_n_heads2_seed1340/optimizer_07500.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28aada8d81583aad9ff815ca63797851095f2d61265dbd7042c05b5071bebf1c
3
  size 70895430
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c475776587ad7676fd824bc7c9c9570e94a3ffb32776966605d3ccf70784164
3
  size 70895430
attention_kindselective_n_heads2_seed1340/optimizer_09999.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93e34e940783142b8f4cb2c5281bcbe179e89f9432b789eb0baeb86a876f3e07
3
  size 70895430
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb9517903ac3b15db5cf8d112e5905aa1e0fd70df70394bae89e0ac4a5c21274
3
  size 70895430