Upload folder using huggingface_hub
Browse files
attention_kindselective_n_heads2_seed1340/log2.txt
CHANGED
|
@@ -4470,3 +4470,38 @@ max_steps: 50000
|
|
| 4470 |
48800 val loss 5.7038
|
| 4471 |
48800 val perplexity 299.9909
|
| 4472 |
48800 train 5.903813 (lr=5.0644e-06) (hash(x)=52737449)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4470 |
48800 val loss 5.7038
|
| 4471 |
48800 val perplexity 299.9909
|
| 4472 |
48800 train 5.903813 (lr=5.0644e-06) (hash(x)=52737449)
|
| 4473 |
+
48900 val loss 5.7047
|
| 4474 |
+
48900 val perplexity 300.2729
|
| 4475 |
+
48900 train 5.558086 (lr=5.0542e-06) (hash(x)=47057569)
|
| 4476 |
+
49000 val loss 5.7069
|
| 4477 |
+
49000 val perplexity 300.9343
|
| 4478 |
+
49000 train 5.658326 (lr=5.0448e-06) (hash(x)=49908975)
|
| 4479 |
+
49100 val loss 5.7053
|
| 4480 |
+
49100 val perplexity 300.4627
|
| 4481 |
+
49100 train 5.540577 (lr=5.0363e-06) (hash(x)=48427414)
|
| 4482 |
+
49200 val loss 5.7030
|
| 4483 |
+
49200 val perplexity 299.7681
|
| 4484 |
+
49200 train 5.508754 (lr=5.0286e-06) (hash(x)=50246074)
|
| 4485 |
+
49300 val loss 5.7049
|
| 4486 |
+
49300 val perplexity 300.3210
|
| 4487 |
+
49300 train 5.766322 (lr=5.0219e-06) (hash(x)=47715359)
|
| 4488 |
+
49400 val loss 5.7077
|
| 4489 |
+
49400 val perplexity 301.1862
|
| 4490 |
+
49400 train 5.698795 (lr=5.0161e-06) (hash(x)=50175867)
|
| 4491 |
+
49500 val loss 5.7092
|
| 4492 |
+
49500 val perplexity 301.6173
|
| 4493 |
+
49500 train 5.520982 (lr=5.0112e-06) (hash(x)=49336040)
|
| 4494 |
+
49600 val loss 5.7104
|
| 4495 |
+
49600 val perplexity 301.9982
|
| 4496 |
+
49600 train 5.768514 (lr=5.0072e-06) (hash(x)=52039357)
|
| 4497 |
+
49700 val loss 5.7126
|
| 4498 |
+
49700 val perplexity 302.6517
|
| 4499 |
+
49700 train 5.517511 (lr=5.0040e-06) (hash(x)=47568707)
|
| 4500 |
+
49800 val loss 5.7114
|
| 4501 |
+
49800 val perplexity 302.2973
|
| 4502 |
+
49800 train 5.637798 (lr=5.0018e-06) (hash(x)=48451274)
|
| 4503 |
+
49900 val loss 5.7126
|
| 4504 |
+
49900 val perplexity 302.6707
|
| 4505 |
+
49900 train 5.381332 (lr=5.0004e-06) (hash(x)=44523603)
|
| 4506 |
+
49999 val loss 5.7046
|
| 4507 |
+
49999 val perplexity 300.2593
|
attention_kindselective_n_heads2_seed1340/model_49999.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 38587970
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0690e7db1073037b0a065585d087f8a57e28fb7064301c18de326ae88ed005c0
|
| 3 |
size 38587970
|
attention_kindselective_n_heads2_seed1340/optimizer_49999.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 70895430
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ea30a30de70605c60c23814323b757e98da6b58e66697ce3eefda8b5fa2214d
|
| 3 |
size 70895430
|