andrew-healey commited on
Commit
e7f1b9c
·
verified ·
1 Parent(s): b7ae9ea

Upload folder using huggingface_hub

Browse files
attention_kindselective_n_heads2_seed1339/log2.txt CHANGED
@@ -532,3 +532,47 @@ max_steps: 10000
532
  8500 val loss 6.4191
533
  8500 val perplexity 613.4542
534
  8500 train 6.478737 (lr=2.2655e-05) (hash(x)=56176595)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
  8500 val loss 6.4191
533
  8500 val perplexity 613.4542
534
  8500 train 6.478737 (lr=2.2655e-05) (hash(x)=56176595)
535
+ 8600 val loss 6.4165
536
+ 8600 val perplexity 611.8454
537
+ 8600 train 6.476886 (lr=2.1685e-05) (hash(x)=55184249)
538
+ 8700 val loss 6.4175
539
+ 8700 val perplexity 612.4998
540
+ 8700 train 6.344475 (lr=2.0777e-05) (hash(x)=46471646)
541
+ 8800 val loss 6.4215
542
+ 8800 val perplexity 614.9026
543
+ 8800 train 6.112791 (lr=1.9933e-05) (hash(x)=46233162)
544
+ 8900 val loss 6.4167
545
+ 8900 val perplexity 611.9857
546
+ 8900 train 6.344871 (lr=1.9153e-05) (hash(x)=47233684)
547
+ 9000 val loss 6.4089
548
+ 9000 val perplexity 607.2270
549
+ 9000 train 6.273678 (lr=1.8439e-05) (hash(x)=48374529)
550
+ 9100 val loss 6.3961
551
+ 9100 val perplexity 599.5264
552
+ 9100 train 6.349834 (lr=1.7790e-05) (hash(x)=48065371)
553
+ 9200 val loss 6.3860
554
+ 9200 val perplexity 593.4916
555
+ 9200 train 6.378371 (lr=1.7208e-05) (hash(x)=47408078)
556
+ 9300 val loss 6.3823
557
+ 9300 val perplexity 591.3016
558
+ 9300 train 6.362077 (lr=1.6692e-05) (hash(x)=50749781)
559
+ 9400 val loss 6.3772
560
+ 9400 val perplexity 588.2960
561
+ 9400 train 6.570877 (lr=1.6245e-05) (hash(x)=48560169)
562
+ 9500 val loss 6.3806
563
+ 9500 val perplexity 590.2719
564
+ 9500 train 6.477550 (lr=1.5865e-05) (hash(x)=50936392)
565
+ 9600 val loss 6.3755
566
+ 9600 val perplexity 587.2906
567
+ 9600 train 6.395058 (lr=1.5554e-05) (hash(x)=50651714)
568
+ 9700 val loss 6.3734
569
+ 9700 val perplexity 586.0524
570
+ 9700 train 6.412555 (lr=1.5312e-05) (hash(x)=47311384)
571
+ 9800 val loss 6.3752
572
+ 9800 val perplexity 587.0786
573
+ 9800 train 6.476438 (lr=1.5139e-05) (hash(x)=50921139)
574
+ 9900 val loss 6.3796
575
+ 9900 val perplexity 589.6859
576
+ 9900 train 6.516968 (lr=1.5035e-05) (hash(x)=48142455)
577
+ 9999 val loss 6.3698
578
+ 9999 val perplexity 583.9539
attention_kindselective_n_heads2_seed1339/model_09999.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a57a691fc3cbc9e4af5a526d24a0a868b7545da36257b4a8a890e820e336a821
3
  size 38587970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c18e968e2e4cfd80b488a29f5c86cfbf6836e1e7c6aec84c0e3cac29e04433b
3
  size 38587970
attention_kindselective_n_heads2_seed1339/optimizer_09999.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dea87d02bb36351ad7cc4bbd01b389a9536dc03d472159789104875f6578d1e2
3
  size 70895430
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fbbdb2805e757033351f0ad60ce70888b71aa7093fd3548bd7d54145f2c8cac
3
  size 70895430