Upload folder using huggingface_hub
Browse files- attention_kindselective_n_heads2_seed1341/log2.txt +104 -0
- attention_kindselective_n_heads2_seed1341/model_47500.pt +1 -1
- attention_kindselective_n_heads2_seed1341/model_49999.pt +1 -1
- attention_kindselective_n_heads2_seed1341/optimizer_47500.pt +1 -1
- attention_kindselective_n_heads2_seed1341/optimizer_49999.pt +1 -1
attention_kindselective_n_heads2_seed1341/log2.txt
CHANGED
|
@@ -4402,3 +4402,107 @@ max_steps: 50000
|
|
| 4402 |
46500 val loss 5.8856
|
| 4403 |
46500 val perplexity 359.8179
|
| 4404 |
46500 train 5.713169 (lr=1.1092e-05) (hash(x)=48511186)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4402 |
46500 val loss 5.8856
|
| 4403 |
46500 val perplexity 359.8179
|
| 4404 |
46500 train 5.713169 (lr=1.1092e-05) (hash(x)=48511186)
|
| 4405 |
+
46600 val loss 5.8860
|
| 4406 |
+
46600 val perplexity 359.9745
|
| 4407 |
+
46600 train 5.918826 (lr=1.1031e-05) (hash(x)=48832039)
|
| 4408 |
+
46700 val loss 5.8864
|
| 4409 |
+
46700 val perplexity 360.1195
|
| 4410 |
+
46700 train 6.561471 (lr=1.0972e-05) (hash(x)=65821271)
|
| 4411 |
+
46800 val loss 5.8886
|
| 4412 |
+
46800 val perplexity 360.8994
|
| 4413 |
+
46800 train 5.569217 (lr=1.0914e-05) (hash(x)=48005593)
|
| 4414 |
+
46900 val loss 5.8879
|
| 4415 |
+
46900 val perplexity 360.6612
|
| 4416 |
+
46900 train 6.157232 (lr=1.0858e-05) (hash(x)=53182453)
|
| 4417 |
+
47000 val loss 5.8908
|
| 4418 |
+
47000 val perplexity 361.6933
|
| 4419 |
+
47000 train 5.775409 (lr=1.0803e-05) (hash(x)=49385983)
|
| 4420 |
+
47100 val loss 5.8881
|
| 4421 |
+
47100 val perplexity 360.7214
|
| 4422 |
+
47100 train 5.876601 (lr=1.0751e-05) (hash(x)=48801622)
|
| 4423 |
+
47200 val loss 5.8882
|
| 4424 |
+
47200 val perplexity 360.7730
|
| 4425 |
+
47200 train 5.618414 (lr=1.0700e-05) (hash(x)=45216690)
|
| 4426 |
+
47300 val loss 5.8918
|
| 4427 |
+
47300 val perplexity 362.0417
|
| 4428 |
+
47300 train 5.716314 (lr=1.0651e-05) (hash(x)=51185601)
|
| 4429 |
+
47400 val loss 5.8878
|
| 4430 |
+
47400 val perplexity 360.6154
|
| 4431 |
+
47400 train 5.607826 (lr=1.0604e-05) (hash(x)=47743508)
|
| 4432 |
+
47500 val loss 5.8948
|
| 4433 |
+
47500 val perplexity 363.1413
|
| 4434 |
+
47500 train 5.676246 (lr=1.0558e-05) (hash(x)=49318833)
|
| 4435 |
+
47600 val loss 5.8933
|
| 4436 |
+
47600 val perplexity 362.6100
|
| 4437 |
+
47600 train 5.876511 (lr=1.0515e-05) (hash(x)=50430826)
|
| 4438 |
+
47700 val loss 5.8931
|
| 4439 |
+
47700 val perplexity 362.5116
|
| 4440 |
+
47700 train 5.814244 (lr=1.0473e-05) (hash(x)=51966931)
|
| 4441 |
+
47800 val loss 5.8937
|
| 4442 |
+
47800 val perplexity 362.7275
|
| 4443 |
+
47800 train 5.641396 (lr=1.0433e-05) (hash(x)=49517209)
|
| 4444 |
+
47900 val loss 5.8987
|
| 4445 |
+
47900 val perplexity 364.5595
|
| 4446 |
+
47900 train 5.698562 (lr=1.0394e-05) (hash(x)=49749702)
|
| 4447 |
+
48000 val loss 5.8991
|
| 4448 |
+
48000 val perplexity 364.6937
|
| 4449 |
+
48000 train 5.766129 (lr=1.0358e-05) (hash(x)=52535448)
|
| 4450 |
+
48100 val loss 5.8882
|
| 4451 |
+
48100 val perplexity 360.7525
|
| 4452 |
+
48100 train 6.531771 (lr=1.0323e-05) (hash(x)=53739181)
|
| 4453 |
+
48200 val loss 5.8824
|
| 4454 |
+
48200 val perplexity 358.6651
|
| 4455 |
+
48200 train 5.854337 (lr=1.0290e-05) (hash(x)=48086710)
|
| 4456 |
+
48300 val loss 5.8826
|
| 4457 |
+
48300 val perplexity 358.7256
|
| 4458 |
+
48300 train 5.921894 (lr=1.0259e-05) (hash(x)=43991942)
|
| 4459 |
+
48400 val loss 5.8792
|
| 4460 |
+
48400 val perplexity 357.5202
|
| 4461 |
+
48400 train 5.780863 (lr=1.0229e-05) (hash(x)=50801906)
|
| 4462 |
+
48500 val loss 5.8788
|
| 4463 |
+
48500 val perplexity 357.3769
|
| 4464 |
+
48500 train 5.955594 (lr=1.0201e-05) (hash(x)=48915599)
|
| 4465 |
+
48600 val loss 5.8765
|
| 4466 |
+
48600 val perplexity 356.5590
|
| 4467 |
+
48600 train 6.042453 (lr=1.0175e-05) (hash(x)=54450172)
|
| 4468 |
+
48700 val loss 5.8783
|
| 4469 |
+
48700 val perplexity 357.1961
|
| 4470 |
+
48700 train 5.748240 (lr=1.0151e-05) (hash(x)=48755509)
|
| 4471 |
+
48800 val loss 5.8904
|
| 4472 |
+
48800 val perplexity 361.5333
|
| 4473 |
+
48800 train 5.780968 (lr=1.0129e-05) (hash(x)=51380096)
|
| 4474 |
+
48900 val loss 5.8791
|
| 4475 |
+
48900 val perplexity 357.4912
|
| 4476 |
+
48900 train 5.744174 (lr=1.0108e-05) (hash(x)=43404218)
|
| 4477 |
+
49000 val loss 5.8779
|
| 4478 |
+
49000 val perplexity 357.0585
|
| 4479 |
+
49000 train 5.845617 (lr=1.0090e-05) (hash(x)=55059739)
|
| 4480 |
+
49100 val loss 5.8824
|
| 4481 |
+
49100 val perplexity 358.6769
|
| 4482 |
+
49100 train 6.111704 (lr=1.0073e-05) (hash(x)=47889309)
|
| 4483 |
+
49200 val loss 5.8832
|
| 4484 |
+
49200 val perplexity 358.9541
|
| 4485 |
+
49200 train 5.629024 (lr=1.0057e-05) (hash(x)=46202589)
|
| 4486 |
+
49300 val loss 5.8825
|
| 4487 |
+
49300 val perplexity 358.7058
|
| 4488 |
+
49300 train 5.780267 (lr=1.0044e-05) (hash(x)=49161813)
|
| 4489 |
+
49400 val loss 5.8826
|
| 4490 |
+
49400 val perplexity 358.7585
|
| 4491 |
+
49400 train 5.719767 (lr=1.0032e-05) (hash(x)=49505044)
|
| 4492 |
+
49500 val loss 5.8832
|
| 4493 |
+
49500 val perplexity 358.9488
|
| 4494 |
+
49500 train 5.663615 (lr=1.0022e-05) (hash(x)=46610327)
|
| 4495 |
+
49600 val loss 5.8839
|
| 4496 |
+
49600 val perplexity 359.2018
|
| 4497 |
+
49600 train 6.220163 (lr=1.0014e-05) (hash(x)=51188240)
|
| 4498 |
+
49700 val loss 5.8821
|
| 4499 |
+
49700 val perplexity 358.5739
|
| 4500 |
+
49700 train 5.680971 (lr=1.0008e-05) (hash(x)=41492016)
|
| 4501 |
+
49800 val loss 5.8812
|
| 4502 |
+
49800 val perplexity 358.2258
|
| 4503 |
+
49800 train 5.788856 (lr=1.0004e-05) (hash(x)=53488833)
|
| 4504 |
+
49900 val loss 5.8811
|
| 4505 |
+
49900 val perplexity 358.1883
|
| 4506 |
+
49900 train 6.194769 (lr=1.0001e-05) (hash(x)=60260935)
|
| 4507 |
+
49999 val loss 5.8753
|
| 4508 |
+
49999 val perplexity 356.1245
|
attention_kindselective_n_heads2_seed1341/model_47500.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 38587970
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb22aa6f5947b15de1ac7135ec3e686429f487241f1a5524207c53012a4790f7
|
| 3 |
size 38587970
|
attention_kindselective_n_heads2_seed1341/model_49999.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 38587970
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31806c8a239461981a224edaa0eb46f8d1f3413c143e682980dd953b9856c440
|
| 3 |
size 38587970
|
attention_kindselective_n_heads2_seed1341/optimizer_47500.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 70895430
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43c587204270bfbb51bcc6f940266318ee64b48c78c0d8846062182ba107b060
|
| 3 |
size 70895430
|
attention_kindselective_n_heads2_seed1341/optimizer_49999.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 70895430
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d87df62bab55321fe1386a280814d4558ef6273be3157bbcccf05f163a097b2e
|
| 3 |
size 70895430
|