Upload folder using huggingface_hub
Browse files
attention_kindselective_n_heads2_seed1339/log2.txt
CHANGED
|
@@ -532,3 +532,47 @@ max_steps: 10000
|
|
| 532 |
8500 val loss 6.4191
|
| 533 |
8500 val perplexity 613.4542
|
| 534 |
8500 train 6.478737 (lr=2.2655e-05) (hash(x)=56176595)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 532 |
8500 val loss 6.4191
|
| 533 |
8500 val perplexity 613.4542
|
| 534 |
8500 train 6.478737 (lr=2.2655e-05) (hash(x)=56176595)
|
| 535 |
+
8600 val loss 6.4165
|
| 536 |
+
8600 val perplexity 611.8454
|
| 537 |
+
8600 train 6.476886 (lr=2.1685e-05) (hash(x)=55184249)
|
| 538 |
+
8700 val loss 6.4175
|
| 539 |
+
8700 val perplexity 612.4998
|
| 540 |
+
8700 train 6.344475 (lr=2.0777e-05) (hash(x)=46471646)
|
| 541 |
+
8800 val loss 6.4215
|
| 542 |
+
8800 val perplexity 614.9026
|
| 543 |
+
8800 train 6.112791 (lr=1.9933e-05) (hash(x)=46233162)
|
| 544 |
+
8900 val loss 6.4167
|
| 545 |
+
8900 val perplexity 611.9857
|
| 546 |
+
8900 train 6.344871 (lr=1.9153e-05) (hash(x)=47233684)
|
| 547 |
+
9000 val loss 6.4089
|
| 548 |
+
9000 val perplexity 607.2270
|
| 549 |
+
9000 train 6.273678 (lr=1.8439e-05) (hash(x)=48374529)
|
| 550 |
+
9100 val loss 6.3961
|
| 551 |
+
9100 val perplexity 599.5264
|
| 552 |
+
9100 train 6.349834 (lr=1.7790e-05) (hash(x)=48065371)
|
| 553 |
+
9200 val loss 6.3860
|
| 554 |
+
9200 val perplexity 593.4916
|
| 555 |
+
9200 train 6.378371 (lr=1.7208e-05) (hash(x)=47408078)
|
| 556 |
+
9300 val loss 6.3823
|
| 557 |
+
9300 val perplexity 591.3016
|
| 558 |
+
9300 train 6.362077 (lr=1.6692e-05) (hash(x)=50749781)
|
| 559 |
+
9400 val loss 6.3772
|
| 560 |
+
9400 val perplexity 588.2960
|
| 561 |
+
9400 train 6.570877 (lr=1.6245e-05) (hash(x)=48560169)
|
| 562 |
+
9500 val loss 6.3806
|
| 563 |
+
9500 val perplexity 590.2719
|
| 564 |
+
9500 train 6.477550 (lr=1.5865e-05) (hash(x)=50936392)
|
| 565 |
+
9600 val loss 6.3755
|
| 566 |
+
9600 val perplexity 587.2906
|
| 567 |
+
9600 train 6.395058 (lr=1.5554e-05) (hash(x)=50651714)
|
| 568 |
+
9700 val loss 6.3734
|
| 569 |
+
9700 val perplexity 586.0524
|
| 570 |
+
9700 train 6.412555 (lr=1.5312e-05) (hash(x)=47311384)
|
| 571 |
+
9800 val loss 6.3752
|
| 572 |
+
9800 val perplexity 587.0786
|
| 573 |
+
9800 train 6.476438 (lr=1.5139e-05) (hash(x)=50921139)
|
| 574 |
+
9900 val loss 6.3796
|
| 575 |
+
9900 val perplexity 589.6859
|
| 576 |
+
9900 train 6.516968 (lr=1.5035e-05) (hash(x)=48142455)
|
| 577 |
+
9999 val loss 6.3698
|
| 578 |
+
9999 val perplexity 583.9539
|
attention_kindselective_n_heads2_seed1339/model_09999.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 38587970
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c18e968e2e4cfd80b488a29f5c86cfbf6836e1e7c6aec84c0e3cac29e04433b
|
| 3 |
size 38587970
|
attention_kindselective_n_heads2_seed1339/optimizer_09999.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 70895430
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fbbdb2805e757033351f0ad60ce70888b71aa7093fd3548bd7d54145f2c8cac
|
| 3 |
size 70895430
|