Upload folder using huggingface_hub
Browse files
attention_kindselective_n_heads2_seed1341/log2.txt
CHANGED
|
@@ -493,3 +493,68 @@ max_steps: 10000
|
|
| 493 |
7800 val loss 6.4186
|
| 494 |
7800 val perplexity 613.1287
|
| 495 |
7800 train 6.243061 (lr=3.1102e-05) (hash(x)=48049749)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
7800 val loss 6.4186
|
| 494 |
7800 val perplexity 613.1287
|
| 495 |
7800 train 6.243061 (lr=3.1102e-05) (hash(x)=48049749)
|
| 496 |
+
7900 val loss 6.4043
|
| 497 |
+
7900 val perplexity 604.4531
|
| 498 |
+
7900 train 6.246456 (lr=2.9726e-05) (hash(x)=44768513)
|
| 499 |
+
8000 val loss 6.4115
|
| 500 |
+
8000 val perplexity 608.7827
|
| 501 |
+
8000 train 6.265230 (lr=2.8405e-05) (hash(x)=46228039)
|
| 502 |
+
8100 val loss 6.4030
|
| 503 |
+
8100 val perplexity 603.6253
|
| 504 |
+
8100 train 6.710750 (lr=2.7138e-05) (hash(x)=60017091)
|
| 505 |
+
8200 val loss 6.3952
|
| 506 |
+
8200 val perplexity 598.9441
|
| 507 |
+
8200 train 6.407417 (lr=2.5929e-05) (hash(x)=49910198)
|
| 508 |
+
8300 val loss 6.3936
|
| 509 |
+
8300 val perplexity 598.0146
|
| 510 |
+
8300 train 6.694312 (lr=2.4778e-05) (hash(x)=57919055)
|
| 511 |
+
8400 val loss 6.3967
|
| 512 |
+
8400 val perplexity 599.8673
|
| 513 |
+
8400 train 6.466394 (lr=2.3686e-05) (hash(x)=49694964)
|
| 514 |
+
8500 val loss 6.3893
|
| 515 |
+
8500 val perplexity 595.4279
|
| 516 |
+
8500 train 6.434396 (lr=2.2655e-05) (hash(x)=53762585)
|
| 517 |
+
8600 val loss 6.3880
|
| 518 |
+
8600 val perplexity 594.6692
|
| 519 |
+
8600 train 6.429276 (lr=2.1685e-05) (hash(x)=51166973)
|
| 520 |
+
8700 val loss 6.3826
|
| 521 |
+
8700 val perplexity 591.4665
|
| 522 |
+
8700 train 6.472390 (lr=2.0777e-05) (hash(x)=53968049)
|
| 523 |
+
8800 val loss 6.3809
|
| 524 |
+
8800 val perplexity 590.4394
|
| 525 |
+
8800 train 6.468944 (lr=1.9933e-05) (hash(x)=59231056)
|
| 526 |
+
8900 val loss 6.3768
|
| 527 |
+
8900 val perplexity 588.0292
|
| 528 |
+
8900 train 6.261299 (lr=1.9153e-05) (hash(x)=50488048)
|
| 529 |
+
9000 val loss 6.3776
|
| 530 |
+
9000 val perplexity 588.4850
|
| 531 |
+
9000 train 6.119425 (lr=1.8439e-05) (hash(x)=44492956)
|
| 532 |
+
9100 val loss 6.3821
|
| 533 |
+
9100 val perplexity 591.1530
|
| 534 |
+
9100 train 6.412546 (lr=1.7790e-05) (hash(x)=51134989)
|
| 535 |
+
9200 val loss 6.3779
|
| 536 |
+
9200 val perplexity 588.6931
|
| 537 |
+
9200 train 6.194124 (lr=1.7208e-05) (hash(x)=48636056)
|
| 538 |
+
9300 val loss 6.3750
|
| 539 |
+
9300 val perplexity 587.0090
|
| 540 |
+
9300 train 6.325751 (lr=1.6692e-05) (hash(x)=50200551)
|
| 541 |
+
9400 val loss 6.3751
|
| 542 |
+
9400 val perplexity 587.0705
|
| 543 |
+
9400 train 6.198377 (lr=1.6245e-05) (hash(x)=48057228)
|
| 544 |
+
9500 val loss 6.3753
|
| 545 |
+
9500 val perplexity 587.1808
|
| 546 |
+
9500 train 6.169206 (lr=1.5865e-05) (hash(x)=48125171)
|
| 547 |
+
9600 val loss 6.3750
|
| 548 |
+
9600 val perplexity 587.0090
|
| 549 |
+
9600 train 6.290072 (lr=1.5554e-05) (hash(x)=53375853)
|
| 550 |
+
9700 val loss 6.3654
|
| 551 |
+
9700 val perplexity 581.3495
|
| 552 |
+
9700 train 7.198069 (lr=1.5312e-05) (hash(x)=53924631)
|
| 553 |
+
9800 val loss 6.3617
|
| 554 |
+
9800 val perplexity 579.2410
|
| 555 |
+
9800 train 6.411182 (lr=1.5139e-05) (hash(x)=48895047)
|
| 556 |
+
9900 val loss 6.3613
|
| 557 |
+
9900 val perplexity 579.0135
|
| 558 |
+
9900 train 6.211933 (lr=1.5035e-05) (hash(x)=44269923)
|
| 559 |
+
9999 val loss 6.3711
|
| 560 |
+
9999 val perplexity 584.7159
|
attention_kindselective_n_heads2_seed1341/model_09999.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 38587970
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43bb848496fc20a758cf6d49ce282145dd82b89869f968b14ca22f6a77beab2a
|
| 3 |
size 38587970
|
attention_kindselective_n_heads2_seed1341/optimizer_09999.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 70895430
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f2cf6f588f31fa92cdbe2ab5c683118c1d1e79391ba5aba099a972d82549afc
|
| 3 |
size 70895430
|