Training in progress, step 390000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 202194449
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d1df67f6183cc09f42fe2f0f37f8357c9b8e65f61395be8418801628a4f2e406
|
| 3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a319b7f2b744152a6d0b7c7b011dcbab2f50e31847aad07fbf0452468b5c5506
|
| 3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b801257833dcb303ed75576841dd61289770bb2e540d74ef6ab937039253da2
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6243d1d6e37c5feee8bc48e8b6c7c5cd2d209aa34c8943a605523b56be5d5e4
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1abc03aa32f692919d10687fb19b346a26a86c47bfcdc5586f5937683d9a753d
|
| 3 |
+
size 14567
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05b257bebecd01fe3120913d4e730e44697707b0fb5a5618a441dd1149c501fa
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:593b188cbaecd147825c6ab6a7428985fef9dedd306035b01be6e3046807b5ca
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab242969f427542b52610398ae9f7ae92f28c7f51c41a80b11d327fe5508415b
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d23f4669a6b401e22fd0094f76b78d30e5af5448c611b04bb84563f4723f22b5
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c94a5e21ff6cb02edd9d7611a60f73f17f810262fe1570be2488fda0b577fd53
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:864647684ab3694f7aa2a258c1806e10c4abf99f67ed5e54443050e485ac9436
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 11.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -7606,11 +7606,211 @@
|
|
| 7606 |
"eval_samples_per_second": 1953.621,
|
| 7607 |
"eval_steps_per_second": 31.258,
|
| 7608 |
"step": 380000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7609 |
}
|
| 7610 |
],
|
| 7611 |
"max_steps": 500000,
|
| 7612 |
"num_train_epochs": 16,
|
| 7613 |
-
"total_flos": 1.
|
| 7614 |
"trial_name": null,
|
| 7615 |
"trial_params": null
|
| 7616 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 11.948895493121725,
|
| 5 |
+
"global_step": 390000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 7606 |
"eval_samples_per_second": 1953.621,
|
| 7607 |
"eval_steps_per_second": 31.258,
|
| 7608 |
"step": 380000
|
| 7609 |
+
},
|
| 7610 |
+
{
|
| 7611 |
+
"epoch": 11.66,
|
| 7612 |
+
"learning_rate": 5.2979369233306834e-05,
|
| 7613 |
+
"loss": 0.3201,
|
| 7614 |
+
"step": 380500
|
| 7615 |
+
},
|
| 7616 |
+
{
|
| 7617 |
+
"epoch": 11.67,
|
| 7618 |
+
"learning_rate": 5.26391878673975e-05,
|
| 7619 |
+
"loss": 0.32,
|
| 7620 |
+
"step": 381000
|
| 7621 |
+
},
|
| 7622 |
+
{
|
| 7623 |
+
"epoch": 11.67,
|
| 7624 |
+
"eval_loss": 0.7779923677444458,
|
| 7625 |
+
"eval_runtime": 0.495,
|
| 7626 |
+
"eval_samples_per_second": 2020.075,
|
| 7627 |
+
"eval_steps_per_second": 32.321,
|
| 7628 |
+
"step": 381000
|
| 7629 |
+
},
|
| 7630 |
+
{
|
| 7631 |
+
"epoch": 11.69,
|
| 7632 |
+
"learning_rate": 5.230012590292987e-05,
|
| 7633 |
+
"loss": 0.3199,
|
| 7634 |
+
"step": 381500
|
| 7635 |
+
},
|
| 7636 |
+
{
|
| 7637 |
+
"epoch": 11.7,
|
| 7638 |
+
"learning_rate": 5.1962187047831517e-05,
|
| 7639 |
+
"loss": 0.32,
|
| 7640 |
+
"step": 382000
|
| 7641 |
+
},
|
| 7642 |
+
{
|
| 7643 |
+
"epoch": 11.7,
|
| 7644 |
+
"eval_loss": 0.7783936858177185,
|
| 7645 |
+
"eval_runtime": 0.517,
|
| 7646 |
+
"eval_samples_per_second": 1934.07,
|
| 7647 |
+
"eval_steps_per_second": 30.945,
|
| 7648 |
+
"step": 382000
|
| 7649 |
+
},
|
| 7650 |
+
{
|
| 7651 |
+
"epoch": 11.72,
|
| 7652 |
+
"learning_rate": 5.162537499774743e-05,
|
| 7653 |
+
"loss": 0.3201,
|
| 7654 |
+
"step": 382500
|
| 7655 |
+
},
|
| 7656 |
+
{
|
| 7657 |
+
"epoch": 11.73,
|
| 7658 |
+
"learning_rate": 5.128969343600032e-05,
|
| 7659 |
+
"loss": 0.32,
|
| 7660 |
+
"step": 383000
|
| 7661 |
+
},
|
| 7662 |
+
{
|
| 7663 |
+
"epoch": 11.73,
|
| 7664 |
+
"eval_loss": 0.7770859599113464,
|
| 7665 |
+
"eval_runtime": 0.5158,
|
| 7666 |
+
"eval_samples_per_second": 1938.792,
|
| 7667 |
+
"eval_steps_per_second": 31.021,
|
| 7668 |
+
"step": 383000
|
| 7669 |
+
},
|
| 7670 |
+
{
|
| 7671 |
+
"epoch": 11.75,
|
| 7672 |
+
"learning_rate": 5.09551460335499e-05,
|
| 7673 |
+
"loss": 0.3199,
|
| 7674 |
+
"step": 383500
|
| 7675 |
+
},
|
| 7676 |
+
{
|
| 7677 |
+
"epoch": 11.77,
|
| 7678 |
+
"learning_rate": 5.062173644895296e-05,
|
| 7679 |
+
"loss": 0.3199,
|
| 7680 |
+
"step": 384000
|
| 7681 |
+
},
|
| 7682 |
+
{
|
| 7683 |
+
"epoch": 11.77,
|
| 7684 |
+
"eval_loss": 0.7772809863090515,
|
| 7685 |
+
"eval_runtime": 0.526,
|
| 7686 |
+
"eval_samples_per_second": 1901.052,
|
| 7687 |
+
"eval_steps_per_second": 30.417,
|
| 7688 |
+
"step": 384000
|
| 7689 |
+
},
|
| 7690 |
+
{
|
| 7691 |
+
"epoch": 11.78,
|
| 7692 |
+
"learning_rate": 5.0289468328323434e-05,
|
| 7693 |
+
"loss": 0.32,
|
| 7694 |
+
"step": 384500
|
| 7695 |
+
},
|
| 7696 |
+
{
|
| 7697 |
+
"epoch": 11.8,
|
| 7698 |
+
"learning_rate": 4.995834530529208e-05,
|
| 7699 |
+
"loss": 0.3198,
|
| 7700 |
+
"step": 385000
|
| 7701 |
+
},
|
| 7702 |
+
{
|
| 7703 |
+
"epoch": 11.8,
|
| 7704 |
+
"eval_loss": 0.7799978852272034,
|
| 7705 |
+
"eval_runtime": 0.5198,
|
| 7706 |
+
"eval_samples_per_second": 1923.762,
|
| 7707 |
+
"eval_steps_per_second": 30.78,
|
| 7708 |
+
"step": 385000
|
| 7709 |
+
},
|
| 7710 |
+
{
|
| 7711 |
+
"epoch": 11.81,
|
| 7712 |
+
"learning_rate": 4.9628371000967394e-05,
|
| 7713 |
+
"loss": 0.3198,
|
| 7714 |
+
"step": 385500
|
| 7715 |
+
},
|
| 7716 |
+
{
|
| 7717 |
+
"epoch": 11.83,
|
| 7718 |
+
"learning_rate": 4.929954902389534e-05,
|
| 7719 |
+
"loss": 0.3196,
|
| 7720 |
+
"step": 386000
|
| 7721 |
+
},
|
| 7722 |
+
{
|
| 7723 |
+
"epoch": 11.83,
|
| 7724 |
+
"eval_loss": 0.7772753834724426,
|
| 7725 |
+
"eval_runtime": 0.5182,
|
| 7726 |
+
"eval_samples_per_second": 1929.679,
|
| 7727 |
+
"eval_steps_per_second": 30.875,
|
| 7728 |
+
"step": 386000
|
| 7729 |
+
},
|
| 7730 |
+
{
|
| 7731 |
+
"epoch": 11.84,
|
| 7732 |
+
"learning_rate": 4.897188297002046e-05,
|
| 7733 |
+
"loss": 0.3194,
|
| 7734 |
+
"step": 386500
|
| 7735 |
+
},
|
| 7736 |
+
{
|
| 7737 |
+
"epoch": 11.86,
|
| 7738 |
+
"learning_rate": 4.8645376422646226e-05,
|
| 7739 |
+
"loss": 0.3194,
|
| 7740 |
+
"step": 387000
|
| 7741 |
+
},
|
| 7742 |
+
{
|
| 7743 |
+
"epoch": 11.86,
|
| 7744 |
+
"eval_loss": 0.7773513197898865,
|
| 7745 |
+
"eval_runtime": 0.5104,
|
| 7746 |
+
"eval_samples_per_second": 1959.206,
|
| 7747 |
+
"eval_steps_per_second": 31.347,
|
| 7748 |
+
"step": 387000
|
| 7749 |
+
},
|
| 7750 |
+
{
|
| 7751 |
+
"epoch": 11.87,
|
| 7752 |
+
"learning_rate": 4.832003295239591e-05,
|
| 7753 |
+
"loss": 0.3194,
|
| 7754 |
+
"step": 387500
|
| 7755 |
+
},
|
| 7756 |
+
{
|
| 7757 |
+
"epoch": 11.89,
|
| 7758 |
+
"learning_rate": 4.7995856117173624e-05,
|
| 7759 |
+
"loss": 0.3198,
|
| 7760 |
+
"step": 388000
|
| 7761 |
+
},
|
| 7762 |
+
{
|
| 7763 |
+
"epoch": 11.89,
|
| 7764 |
+
"eval_loss": 0.7708141803741455,
|
| 7765 |
+
"eval_runtime": 0.5167,
|
| 7766 |
+
"eval_samples_per_second": 1935.457,
|
| 7767 |
+
"eval_steps_per_second": 30.967,
|
| 7768 |
+
"step": 388000
|
| 7769 |
+
},
|
| 7770 |
+
{
|
| 7771 |
+
"epoch": 11.9,
|
| 7772 |
+
"learning_rate": 4.767284946212521e-05,
|
| 7773 |
+
"loss": 0.3192,
|
| 7774 |
+
"step": 388500
|
| 7775 |
+
},
|
| 7776 |
+
{
|
| 7777 |
+
"epoch": 11.92,
|
| 7778 |
+
"learning_rate": 4.735101651959977e-05,
|
| 7779 |
+
"loss": 0.3191,
|
| 7780 |
+
"step": 389000
|
| 7781 |
+
},
|
| 7782 |
+
{
|
| 7783 |
+
"epoch": 11.92,
|
| 7784 |
+
"eval_loss": 0.7765528559684753,
|
| 7785 |
+
"eval_runtime": 0.5105,
|
| 7786 |
+
"eval_samples_per_second": 1958.826,
|
| 7787 |
+
"eval_steps_per_second": 31.341,
|
| 7788 |
+
"step": 389000
|
| 7789 |
+
},
|
| 7790 |
+
{
|
| 7791 |
+
"epoch": 11.93,
|
| 7792 |
+
"learning_rate": 4.7030360809110754e-05,
|
| 7793 |
+
"loss": 0.3195,
|
| 7794 |
+
"step": 389500
|
| 7795 |
+
},
|
| 7796 |
+
{
|
| 7797 |
+
"epoch": 11.95,
|
| 7798 |
+
"learning_rate": 4.6710885837297726e-05,
|
| 7799 |
+
"loss": 0.3193,
|
| 7800 |
+
"step": 390000
|
| 7801 |
+
},
|
| 7802 |
+
{
|
| 7803 |
+
"epoch": 11.95,
|
| 7804 |
+
"eval_loss": 0.7797139883041382,
|
| 7805 |
+
"eval_runtime": 0.5113,
|
| 7806 |
+
"eval_samples_per_second": 1955.764,
|
| 7807 |
+
"eval_steps_per_second": 31.292,
|
| 7808 |
+
"step": 390000
|
| 7809 |
}
|
| 7810 |
],
|
| 7811 |
"max_steps": 500000,
|
| 7812 |
"num_train_epochs": 16,
|
| 7813 |
+
"total_flos": 1.2459910544733766e+22,
|
| 7814 |
"trial_name": null,
|
| 7815 |
"trial_params": null
|
| 7816 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a319b7f2b744152a6d0b7c7b011dcbab2f50e31847aad07fbf0452468b5c5506
|
| 3 |
size 102501541
|