Upload folder using huggingface_hub
Browse files- attention_kindselective_n_heads2_seed1338/log2.txt +89 -0
- attention_kindselective_n_heads2_seed1338/model_47500.pt +1 -1
- attention_kindselective_n_heads2_seed1338/model_49999.pt +1 -1
- attention_kindselective_n_heads2_seed1338/optimizer_47500.pt +1 -1
- attention_kindselective_n_heads2_seed1338/optimizer_49999.pt +1 -1
attention_kindselective_n_heads2_seed1338/log2.txt
CHANGED
|
@@ -4417,3 +4417,92 @@ max_steps: 50000
|
|
| 4417 |
47000 val loss 5.6956
|
| 4418 |
47000 val perplexity 297.5557
|
| 4419 |
47000 train 5.862358 (lr=5.4017e-06) (hash(x)=58296973)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4417 |
47000 val loss 5.6956
|
| 4418 |
47000 val perplexity 297.5557
|
| 4419 |
47000 train 5.862358 (lr=5.4017e-06) (hash(x)=58296973)
|
| 4420 |
+
47100 val loss 5.6962
|
| 4421 |
+
47100 val perplexity 297.7274
|
| 4422 |
+
47100 train 5.498293 (lr=5.3755e-06) (hash(x)=46202543)
|
| 4423 |
+
47200 val loss 5.6949
|
| 4424 |
+
47200 val perplexity 297.3451
|
| 4425 |
+
47200 train 6.162602 (lr=5.3501e-06) (hash(x)=57611994)
|
| 4426 |
+
47300 val loss 5.6936
|
| 4427 |
+
47300 val perplexity 296.9551
|
| 4428 |
+
47300 train 5.818519 (lr=5.3256e-06) (hash(x)=56095511)
|
| 4429 |
+
47400 val loss 5.6926
|
| 4430 |
+
47400 val perplexity 296.6577
|
| 4431 |
+
47400 train 5.885818 (lr=5.3020e-06) (hash(x)=51127773)
|
| 4432 |
+
47500 val loss 5.6941
|
| 4433 |
+
47500 val perplexity 297.1053
|
| 4434 |
+
47500 train 5.578781 (lr=5.2792e-06) (hash(x)=54642108)
|
| 4435 |
+
47600 val loss 5.6947
|
| 4436 |
+
47600 val perplexity 297.2766
|
| 4437 |
+
47600 train 5.554379 (lr=5.2574e-06) (hash(x)=47872131)
|
| 4438 |
+
47700 val loss 5.6924
|
| 4439 |
+
47700 val perplexity 296.6082
|
| 4440 |
+
47700 train 5.694735 (lr=5.2364e-06) (hash(x)=45971021)
|
| 4441 |
+
47800 val loss 5.6943
|
| 4442 |
+
47800 val perplexity 297.1729
|
| 4443 |
+
47800 train 5.494211 (lr=5.2163e-06) (hash(x)=49707099)
|
| 4444 |
+
47900 val loss 5.6944
|
| 4445 |
+
47900 val perplexity 297.1941
|
| 4446 |
+
47900 train 5.501933 (lr=5.1972e-06) (hash(x)=50127863)
|
| 4447 |
+
48000 val loss 5.6916
|
| 4448 |
+
48000 val perplexity 296.3759
|
| 4449 |
+
48000 train 5.535919 (lr=5.1788e-06) (hash(x)=46879177)
|
| 4450 |
+
48100 val loss 5.6930
|
| 4451 |
+
48100 val perplexity 296.7791
|
| 4452 |
+
48100 train 5.493693 (lr=5.1614e-06) (hash(x)=49271148)
|
| 4453 |
+
48200 val loss 5.6936
|
| 4454 |
+
48200 val perplexity 296.9648
|
| 4455 |
+
48200 train 5.231916 (lr=5.1449e-06) (hash(x)=40698784)
|
| 4456 |
+
48300 val loss 5.6938
|
| 4457 |
+
48300 val perplexity 297.0309
|
| 4458 |
+
48300 train 5.482736 (lr=5.1293e-06) (hash(x)=51381202)
|
| 4459 |
+
48400 val loss 5.6927
|
| 4460 |
+
48400 val perplexity 296.6988
|
| 4461 |
+
48400 train 5.372602 (lr=5.1145e-06) (hash(x)=46128392)
|
| 4462 |
+
48500 val loss 5.6943
|
| 4463 |
+
48500 val perplexity 297.1581
|
| 4464 |
+
48500 train 5.393999 (lr=5.1007e-06) (hash(x)=45126703)
|
| 4465 |
+
48600 val loss 5.6963
|
| 4466 |
+
48600 val perplexity 297.7726
|
| 4467 |
+
48600 train 5.534537 (lr=5.0877e-06) (hash(x)=48001878)
|
| 4468 |
+
48700 val loss 5.6909
|
| 4469 |
+
48700 val perplexity 296.1459
|
| 4470 |
+
48700 train 5.510094 (lr=5.0756e-06) (hash(x)=50726237)
|
| 4471 |
+
48800 val loss 5.6946
|
| 4472 |
+
48800 val perplexity 297.2566
|
| 4473 |
+
48800 train 5.679790 (lr=5.0644e-06) (hash(x)=53023918)
|
| 4474 |
+
48900 val loss 5.6915
|
| 4475 |
+
48900 val perplexity 296.3504
|
| 4476 |
+
48900 train 5.466436 (lr=5.0542e-06) (hash(x)=46623158)
|
| 4477 |
+
49000 val loss 5.6904
|
| 4478 |
+
49000 val perplexity 296.0216
|
| 4479 |
+
49000 train 5.534313 (lr=5.0448e-06) (hash(x)=48558395)
|
| 4480 |
+
49100 val loss 5.6884
|
| 4481 |
+
49100 val perplexity 295.4112
|
| 4482 |
+
49100 train 5.531930 (lr=5.0363e-06) (hash(x)=48791085)
|
| 4483 |
+
49200 val loss 5.6897
|
| 4484 |
+
49200 val perplexity 295.7928
|
| 4485 |
+
49200 train 6.411170 (lr=5.0286e-06) (hash(x)=58625942)
|
| 4486 |
+
49300 val loss 5.6890
|
| 4487 |
+
49300 val perplexity 295.5917
|
| 4488 |
+
49300 train 5.664113 (lr=5.0219e-06) (hash(x)=52680896)
|
| 4489 |
+
49400 val loss 5.6888
|
| 4490 |
+
49400 val perplexity 295.5517
|
| 4491 |
+
49400 train 5.797068 (lr=5.0161e-06) (hash(x)=59381598)
|
| 4492 |
+
49500 val loss 5.6911
|
| 4493 |
+
49500 val perplexity 296.2069
|
| 4494 |
+
49500 train 5.969440 (lr=5.0112e-06) (hash(x)=51678773)
|
| 4495 |
+
49600 val loss 5.6903
|
| 4496 |
+
49600 val perplexity 295.9921
|
| 4497 |
+
49600 train 5.465007 (lr=5.0072e-06) (hash(x)=49092923)
|
| 4498 |
+
49700 val loss 5.6907
|
| 4499 |
+
49700 val perplexity 296.1018
|
| 4500 |
+
49700 train 5.918400 (lr=5.0040e-06) (hash(x)=55550116)
|
| 4501 |
+
49800 val loss 5.6898
|
| 4502 |
+
49800 val perplexity 295.8321
|
| 4503 |
+
49800 train 5.565441 (lr=5.0018e-06) (hash(x)=48422352)
|
| 4504 |
+
49900 val loss 5.6916
|
| 4505 |
+
49900 val perplexity 296.3545
|
| 4506 |
+
49900 train 5.764893 (lr=5.0004e-06) (hash(x)=52576880)
|
| 4507 |
+
49999 val loss 5.6909
|
| 4508 |
+
49999 val perplexity 296.1590
|
attention_kindselective_n_heads2_seed1338/model_47500.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 38587970
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c61c7e55bc6fdc5a2b29941a2b66a41aec21ae86bcf0b7f65e27fce807d6198d
|
| 3 |
size 38587970
|
attention_kindselective_n_heads2_seed1338/model_49999.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 38587970
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33163b2f060c6c5f5a1cd71a9bcd5d1e2acf53e0674e95c52e8495d8ecf19f49
|
| 3 |
size 38587970
|
attention_kindselective_n_heads2_seed1338/optimizer_47500.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 70895430
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:336781bbe92c707410ef574515b5a29bed7ef2b59eae47fac04a5d57c5a08b0d
|
| 3 |
size 70895430
|
attention_kindselective_n_heads2_seed1338/optimizer_49999.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 70895430
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f919e489d0aee4db2867bde0664bdf90b8304dc8b004630eb73a6f44d69afdf
|
| 3 |
size 70895430
|