andrew-healey commited on
Commit
3cabd13
·
verified ·
1 Parent(s): a2aba29

Upload folder using huggingface_hub

Browse files
attention_kindselective_n_heads4_seed1339/log2.txt CHANGED
@@ -439,3 +439,65 @@ max_steps: 8750
439
  8700 train 5.756435 (lr=5.0041e-06) (hash(x)=146417632)
440
  8749 val loss 5.5336
441
  8749 val perplexity 253.0482
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
  8700 train 5.756435 (lr=5.0041e-06) (hash(x)=146417632)
440
  8749 val loss 5.5336
441
  8749 val perplexity 253.0482
442
+ 6800 val loss 5.7526
443
+ 6800 val perplexity 314.9956
444
+ 6800 train 5.682679 (lr=8.7387e-06) (hash(x)=155640155)
445
+ 6900 val loss 5.7481
446
+ 6900 val perplexity 313.5948
447
+ 6900 train 5.721629 (lr=8.2849e-06) (hash(x)=153722115)
448
+ 7000 val loss 5.7443
449
+ 7000 val perplexity 312.4002
450
+ 7000 train 5.650160 (lr=7.8510e-06) (hash(x)=146953450)
451
+ 7100 val loss 5.7435
452
+ 7100 val perplexity 312.1492
453
+ 7100 train 5.580120 (lr=7.4377e-06) (hash(x)=137663885)
454
+ 7200 val loss 5.7340
455
+ 7200 val perplexity 309.2181
456
+ 7200 train 5.792004 (lr=7.0455e-06) (hash(x)=146172950)
457
+ 7300 val loss 5.7297
458
+ 7300 val perplexity 307.8705
459
+ 7300 train 5.620568 (lr=6.6749e-06) (hash(x)=150018163)
460
+ 7400 val loss 5.7279
461
+ 7400 val perplexity 307.3379
462
+ 7400 train 5.719522 (lr=6.3266e-06) (hash(x)=145351166)
463
+ 7500 val loss 5.7288
464
+ 7500 val perplexity 307.5870
465
+ 7500 train 5.506716 (lr=6.0010e-06) (hash(x)=145292116)
466
+ 7600 val loss 5.7217
467
+ 7600 val perplexity 305.4134
468
+ 7600 train 5.636374 (lr=5.6986e-06) (hash(x)=150235132)
469
+ 7700 val loss 5.7184
470
+ 7700 val perplexity 304.4276
471
+ 7700 train 5.628303 (lr=5.4198e-06) (hash(x)=154543455)
472
+ 7800 val loss 5.7168
473
+ 7800 val perplexity 303.9220
474
+ 7800 train 5.608966 (lr=5.1650e-06) (hash(x)=142456852)
475
+ 7900 val loss 5.7176
476
+ 7900 val perplexity 304.1714
477
+ 7900 train 5.495376 (lr=4.9347e-06) (hash(x)=147363479)
478
+ 8000 val loss 5.7109
479
+ 8000 val perplexity 302.1479
480
+ 8000 train 5.746116 (lr=4.7291e-06) (hash(x)=156122973)
481
+ 8100 val loss 5.7083
482
+ 8100 val perplexity 301.3624
483
+ 8100 train 5.747890 (lr=4.5486e-06) (hash(x)=156153179)
484
+ 8200 val loss 5.7069
485
+ 8200 val perplexity 300.9515
486
+ 8200 train 5.745360 (lr=4.3933e-06) (hash(x)=146430698)
487
+ 8300 val loss 5.7069
488
+ 8300 val perplexity 300.9449
489
+ 8300 train 5.574670 (lr=4.2636e-06) (hash(x)=143507257)
490
+ 8400 val loss 5.7014
491
+ 8400 val perplexity 299.2712
492
+ 8400 train 5.766168 (lr=4.1596e-06) (hash(x)=166272643)
493
+ 8500 val loss 5.7005
494
+ 8500 val perplexity 299.0067
495
+ 8500 train 5.603253 (lr=4.0815e-06) (hash(x)=143887848)
496
+ 8600 val loss 5.6995
497
+ 8600 val perplexity 298.7304
498
+ 8600 train 5.748824 (lr=4.0294e-06) (hash(x)=156900341)
499
+ 8700 val loss 5.6953
500
+ 8700 val perplexity 297.4741
501
+ 8700 train 5.929177 (lr=4.0033e-06) (hash(x)=146417632)
502
+ 8749 val loss 5.6947
503
+ 8749 val perplexity 297.2797
attention_kindselective_n_heads4_seed1339/model_08749.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60ed2548fcfa7fffe8d4b22ec9f8b7995758e15c929311acbe1a883e5373605f
3
  size 92843394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7abe87c847229bf1684006b8bde723ef8c75a76d7a05ed400156a9bc400f66d2
3
  size 92843394
attention_kindselective_n_heads4_seed1339/optimizer_08749.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f65d1d60081c55216179bc3356d012509461de93333d3ddfdd692c3fe5f2144
3
  size 179406214
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:523933e1721db59976135c171ccec9ef73e577aada970afe6a5478b35e1e05b1
3
  size 179406214