andrew-healey commited on
Commit
af5c489
·
verified ·
1 Parent(s): 71bf3ac

Upload folder using huggingface_hub

Browse files
attention_kindselective_n_heads2_seed1341/log2.txt CHANGED
@@ -493,3 +493,68 @@ max_steps: 10000
493
  7800 val loss 6.4186
494
  7800 val perplexity 613.1287
495
  7800 train 6.243061 (lr=3.1102e-05) (hash(x)=48049749)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  7800 val loss 6.4186
494
  7800 val perplexity 613.1287
495
  7800 train 6.243061 (lr=3.1102e-05) (hash(x)=48049749)
496
+ 7900 val loss 6.4043
497
+ 7900 val perplexity 604.4531
498
+ 7900 train 6.246456 (lr=2.9726e-05) (hash(x)=44768513)
499
+ 8000 val loss 6.4115
500
+ 8000 val perplexity 608.7827
501
+ 8000 train 6.265230 (lr=2.8405e-05) (hash(x)=46228039)
502
+ 8100 val loss 6.4030
503
+ 8100 val perplexity 603.6253
504
+ 8100 train 6.710750 (lr=2.7138e-05) (hash(x)=60017091)
505
+ 8200 val loss 6.3952
506
+ 8200 val perplexity 598.9441
507
+ 8200 train 6.407417 (lr=2.5929e-05) (hash(x)=49910198)
508
+ 8300 val loss 6.3936
509
+ 8300 val perplexity 598.0146
510
+ 8300 train 6.694312 (lr=2.4778e-05) (hash(x)=57919055)
511
+ 8400 val loss 6.3967
512
+ 8400 val perplexity 599.8673
513
+ 8400 train 6.466394 (lr=2.3686e-05) (hash(x)=49694964)
514
+ 8500 val loss 6.3893
515
+ 8500 val perplexity 595.4279
516
+ 8500 train 6.434396 (lr=2.2655e-05) (hash(x)=53762585)
517
+ 8600 val loss 6.3880
518
+ 8600 val perplexity 594.6692
519
+ 8600 train 6.429276 (lr=2.1685e-05) (hash(x)=51166973)
520
+ 8700 val loss 6.3826
521
+ 8700 val perplexity 591.4665
522
+ 8700 train 6.472390 (lr=2.0777e-05) (hash(x)=53968049)
523
+ 8800 val loss 6.3809
524
+ 8800 val perplexity 590.4394
525
+ 8800 train 6.468944 (lr=1.9933e-05) (hash(x)=59231056)
526
+ 8900 val loss 6.3768
527
+ 8900 val perplexity 588.0292
528
+ 8900 train 6.261299 (lr=1.9153e-05) (hash(x)=50488048)
529
+ 9000 val loss 6.3776
530
+ 9000 val perplexity 588.4850
531
+ 9000 train 6.119425 (lr=1.8439e-05) (hash(x)=44492956)
532
+ 9100 val loss 6.3821
533
+ 9100 val perplexity 591.1530
534
+ 9100 train 6.412546 (lr=1.7790e-05) (hash(x)=51134989)
535
+ 9200 val loss 6.3779
536
+ 9200 val perplexity 588.6931
537
+ 9200 train 6.194124 (lr=1.7208e-05) (hash(x)=48636056)
538
+ 9300 val loss 6.3750
539
+ 9300 val perplexity 587.0090
540
+ 9300 train 6.325751 (lr=1.6692e-05) (hash(x)=50200551)
541
+ 9400 val loss 6.3751
542
+ 9400 val perplexity 587.0705
543
+ 9400 train 6.198377 (lr=1.6245e-05) (hash(x)=48057228)
544
+ 9500 val loss 6.3753
545
+ 9500 val perplexity 587.1808
546
+ 9500 train 6.169206 (lr=1.5865e-05) (hash(x)=48125171)
547
+ 9600 val loss 6.3750
548
+ 9600 val perplexity 587.0090
549
+ 9600 train 6.290072 (lr=1.5554e-05) (hash(x)=53375853)
550
+ 9700 val loss 6.3654
551
+ 9700 val perplexity 581.3495
552
+ 9700 train 7.198069 (lr=1.5312e-05) (hash(x)=53924631)
553
+ 9800 val loss 6.3617
554
+ 9800 val perplexity 579.2410
555
+ 9800 train 6.411182 (lr=1.5139e-05) (hash(x)=48895047)
556
+ 9900 val loss 6.3613
557
+ 9900 val perplexity 579.0135
558
+ 9900 train 6.211933 (lr=1.5035e-05) (hash(x)=44269923)
559
+ 9999 val loss 6.3711
560
+ 9999 val perplexity 584.7159
attention_kindselective_n_heads2_seed1341/model_09999.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a259b68f4b982bba61e3791426a03173b6b6d7cf182ef648db0331096494d40
3
  size 38587970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43bb848496fc20a758cf6d49ce282145dd82b89869f968b14ca22f6a77beab2a
3
  size 38587970
attention_kindselective_n_heads2_seed1341/optimizer_09999.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcc01240e1af4fab2e549d468dffd2af1d74e3d805146895fa6e76f4aea91cae
3
  size 70895430
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f2cf6f588f31fa92cdbe2ab5c683118c1d1e79391ba5aba099a972d82549afc
3
  size 70895430