Upload folder using huggingface_hub
Browse files
attention_kindselective_n_heads4_seed1339/log2.txt
CHANGED
|
@@ -439,3 +439,65 @@ max_steps: 8750
|
|
| 439 |
8700 train 5.756435 (lr=5.0041e-06) (hash(x)=146417632)
|
| 440 |
8749 val loss 5.5336
|
| 441 |
8749 val perplexity 253.0482
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 439 |
8700 train 5.756435 (lr=5.0041e-06) (hash(x)=146417632)
|
| 440 |
8749 val loss 5.5336
|
| 441 |
8749 val perplexity 253.0482
|
| 442 |
+
6800 val loss 5.7526
|
| 443 |
+
6800 val perplexity 314.9956
|
| 444 |
+
6800 train 5.682679 (lr=8.7387e-06) (hash(x)=155640155)
|
| 445 |
+
6900 val loss 5.7481
|
| 446 |
+
6900 val perplexity 313.5948
|
| 447 |
+
6900 train 5.721629 (lr=8.2849e-06) (hash(x)=153722115)
|
| 448 |
+
7000 val loss 5.7443
|
| 449 |
+
7000 val perplexity 312.4002
|
| 450 |
+
7000 train 5.650160 (lr=7.8510e-06) (hash(x)=146953450)
|
| 451 |
+
7100 val loss 5.7435
|
| 452 |
+
7100 val perplexity 312.1492
|
| 453 |
+
7100 train 5.580120 (lr=7.4377e-06) (hash(x)=137663885)
|
| 454 |
+
7200 val loss 5.7340
|
| 455 |
+
7200 val perplexity 309.2181
|
| 456 |
+
7200 train 5.792004 (lr=7.0455e-06) (hash(x)=146172950)
|
| 457 |
+
7300 val loss 5.7297
|
| 458 |
+
7300 val perplexity 307.8705
|
| 459 |
+
7300 train 5.620568 (lr=6.6749e-06) (hash(x)=150018163)
|
| 460 |
+
7400 val loss 5.7279
|
| 461 |
+
7400 val perplexity 307.3379
|
| 462 |
+
7400 train 5.719522 (lr=6.3266e-06) (hash(x)=145351166)
|
| 463 |
+
7500 val loss 5.7288
|
| 464 |
+
7500 val perplexity 307.5870
|
| 465 |
+
7500 train 5.506716 (lr=6.0010e-06) (hash(x)=145292116)
|
| 466 |
+
7600 val loss 5.7217
|
| 467 |
+
7600 val perplexity 305.4134
|
| 468 |
+
7600 train 5.636374 (lr=5.6986e-06) (hash(x)=150235132)
|
| 469 |
+
7700 val loss 5.7184
|
| 470 |
+
7700 val perplexity 304.4276
|
| 471 |
+
7700 train 5.628303 (lr=5.4198e-06) (hash(x)=154543455)
|
| 472 |
+
7800 val loss 5.7168
|
| 473 |
+
7800 val perplexity 303.9220
|
| 474 |
+
7800 train 5.608966 (lr=5.1650e-06) (hash(x)=142456852)
|
| 475 |
+
7900 val loss 5.7176
|
| 476 |
+
7900 val perplexity 304.1714
|
| 477 |
+
7900 train 5.495376 (lr=4.9347e-06) (hash(x)=147363479)
|
| 478 |
+
8000 val loss 5.7109
|
| 479 |
+
8000 val perplexity 302.1479
|
| 480 |
+
8000 train 5.746116 (lr=4.7291e-06) (hash(x)=156122973)
|
| 481 |
+
8100 val loss 5.7083
|
| 482 |
+
8100 val perplexity 301.3624
|
| 483 |
+
8100 train 5.747890 (lr=4.5486e-06) (hash(x)=156153179)
|
| 484 |
+
8200 val loss 5.7069
|
| 485 |
+
8200 val perplexity 300.9515
|
| 486 |
+
8200 train 5.745360 (lr=4.3933e-06) (hash(x)=146430698)
|
| 487 |
+
8300 val loss 5.7069
|
| 488 |
+
8300 val perplexity 300.9449
|
| 489 |
+
8300 train 5.574670 (lr=4.2636e-06) (hash(x)=143507257)
|
| 490 |
+
8400 val loss 5.7014
|
| 491 |
+
8400 val perplexity 299.2712
|
| 492 |
+
8400 train 5.766168 (lr=4.1596e-06) (hash(x)=166272643)
|
| 493 |
+
8500 val loss 5.7005
|
| 494 |
+
8500 val perplexity 299.0067
|
| 495 |
+
8500 train 5.603253 (lr=4.0815e-06) (hash(x)=143887848)
|
| 496 |
+
8600 val loss 5.6995
|
| 497 |
+
8600 val perplexity 298.7304
|
| 498 |
+
8600 train 5.748824 (lr=4.0294e-06) (hash(x)=156900341)
|
| 499 |
+
8700 val loss 5.6953
|
| 500 |
+
8700 val perplexity 297.4741
|
| 501 |
+
8700 train 5.929177 (lr=4.0033e-06) (hash(x)=146417632)
|
| 502 |
+
8749 val loss 5.6947
|
| 503 |
+
8749 val perplexity 297.2797
|
attention_kindselective_n_heads4_seed1339/model_08749.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 92843394
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7abe87c847229bf1684006b8bde723ef8c75a76d7a05ed400156a9bc400f66d2
|
| 3 |
size 92843394
|
attention_kindselective_n_heads4_seed1339/optimizer_08749.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 179406214
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:523933e1721db59976135c171ccec9ef73e577aada970afe6a5478b35e1e05b1
|
| 3 |
size 179406214
|