Training in progress, step 750000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06a6243c9d4d4400f2fd4b2fe0137f4acacd18bc76af280a65557307175812a8
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca21d56bc23134f1ffa9773f5c9a5a5bec62d83d95ecc451f1563d89bf1ca8c7
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33d75ec6779189bfdd9065f08c6e9a994b1eb8b76967bfabdd6b27ee5e25887e
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d18eaa545b11ae802428fcc84a292bf166dde7d82779209b7ff66360d3f3202
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da273cbae47af7d864ade3a4ed313bbb87dd2d2870dcaadf34a903110facd670
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c436a8d2150800bc376a9c6d9dcefc2480a69f8f3df22480e30232f440acef23
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f706ed5b2c0294f08358e4528b0c33500a028d6a90aa7b815bde92b4347b626
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 11.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -5482,11 +5482,85 @@
|
|
| 5482 |
"eval_samples_per_second": 1289.598,
|
| 5483 |
"eval_steps_per_second": 20.634,
|
| 5484 |
"step": 740000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5485 |
}
|
| 5486 |
],
|
| 5487 |
"max_steps": 1000000,
|
| 5488 |
"num_train_epochs": 16,
|
| 5489 |
-
"total_flos": 5.
|
| 5490 |
"trial_name": null,
|
| 5491 |
"trial_params": null
|
| 5492 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 11.45265472536534,
|
| 5 |
+
"global_step": 750000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 5482 |
"eval_samples_per_second": 1289.598,
|
| 5483 |
"eval_steps_per_second": 20.634,
|
| 5484 |
"step": 740000
|
| 5485 |
+
},
|
| 5486 |
+
{
|
| 5487 |
+
"epoch": 11.32,
|
| 5488 |
+
"learning_rate": 3.4143846579608744e-05,
|
| 5489 |
+
"loss": 0.2411,
|
| 5490 |
+
"step": 741000
|
| 5491 |
+
},
|
| 5492 |
+
{
|
| 5493 |
+
"epoch": 11.33,
|
| 5494 |
+
"learning_rate": 3.396919794145629e-05,
|
| 5495 |
+
"loss": 0.2412,
|
| 5496 |
+
"step": 742000
|
| 5497 |
+
},
|
| 5498 |
+
{
|
| 5499 |
+
"epoch": 11.35,
|
| 5500 |
+
"learning_rate": 3.3795052688780345e-05,
|
| 5501 |
+
"loss": 0.241,
|
| 5502 |
+
"step": 743000
|
| 5503 |
+
},
|
| 5504 |
+
{
|
| 5505 |
+
"epoch": 11.36,
|
| 5506 |
+
"learning_rate": 3.362141272600552e-05,
|
| 5507 |
+
"loss": 0.2413,
|
| 5508 |
+
"step": 744000
|
| 5509 |
+
},
|
| 5510 |
+
{
|
| 5511 |
+
"epoch": 11.38,
|
| 5512 |
+
"learning_rate": 3.3448279952030615e-05,
|
| 5513 |
+
"loss": 0.241,
|
| 5514 |
+
"step": 745000
|
| 5515 |
+
},
|
| 5516 |
+
{
|
| 5517 |
+
"epoch": 11.38,
|
| 5518 |
+
"eval_runtime": 0.937,
|
| 5519 |
+
"eval_samples_per_second": 1067.221,
|
| 5520 |
+
"eval_steps_per_second": 17.076,
|
| 5521 |
+
"step": 745000
|
| 5522 |
+
},
|
| 5523 |
+
{
|
| 5524 |
+
"epoch": 11.39,
|
| 5525 |
+
"learning_rate": 3.327565626020793e-05,
|
| 5526 |
+
"loss": 0.2408,
|
| 5527 |
+
"step": 746000
|
| 5528 |
+
},
|
| 5529 |
+
{
|
| 5530 |
+
"epoch": 11.41,
|
| 5531 |
+
"learning_rate": 3.3103543538322455e-05,
|
| 5532 |
+
"loss": 0.2408,
|
| 5533 |
+
"step": 747000
|
| 5534 |
+
},
|
| 5535 |
+
{
|
| 5536 |
+
"epoch": 11.42,
|
| 5537 |
+
"learning_rate": 3.293194366857137e-05,
|
| 5538 |
+
"loss": 0.2407,
|
| 5539 |
+
"step": 748000
|
| 5540 |
+
},
|
| 5541 |
+
{
|
| 5542 |
+
"epoch": 11.44,
|
| 5543 |
+
"learning_rate": 3.276085852754336e-05,
|
| 5544 |
+
"loss": 0.2409,
|
| 5545 |
+
"step": 749000
|
| 5546 |
+
},
|
| 5547 |
+
{
|
| 5548 |
+
"epoch": 11.45,
|
| 5549 |
+
"learning_rate": 3.259028998619814e-05,
|
| 5550 |
+
"loss": 0.2405,
|
| 5551 |
+
"step": 750000
|
| 5552 |
+
},
|
| 5553 |
+
{
|
| 5554 |
+
"epoch": 11.45,
|
| 5555 |
+
"eval_runtime": 0.7243,
|
| 5556 |
+
"eval_samples_per_second": 1380.717,
|
| 5557 |
+
"eval_steps_per_second": 22.091,
|
| 5558 |
+
"step": 750000
|
| 5559 |
}
|
| 5560 |
],
|
| 5561 |
"max_steps": 1000000,
|
| 5562 |
"num_train_epochs": 16,
|
| 5563 |
+
"total_flos": 5.2575123495263995e+22,
|
| 5564 |
"trial_name": null,
|
| 5565 |
"trial_params": null
|
| 5566 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca21d56bc23134f1ffa9773f5c9a5a5bec62d83d95ecc451f1563d89bf1ca8c7
|
| 3 |
size 449471589
|