Training in progress, epoch 9
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +846 -3
- pytorch_model.bin +1 -1
- runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 236469913
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0eb4dcf9ed1ce73d5d32482193ed272b0ff98916f9ae6c370fad43e65a6259a2
|
| 3 |
size 236469913
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 118242180
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9af26e70d13d97dd1148f504edda0ca0d4b3e70b4d9f65a19697b44e426a3580
|
| 3 |
size 118242180
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f39204b4e0d21ca3c9794332a74eb829d80abede2633c846a34ad11056cbd2f4
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:981a20d97caa849fd69d09144a00fa71a090a40d23907501e2ed06e6c009f28d
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c5c09b553671832cbd2235f75894fd5507dbddfab709bf1c35b62744443e806
|
| 3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90ca4aeeda8da7670d23742adcfecc2c9d6f9e133399a23226811c91518226ab
|
| 3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da7d64569f6b34d0d97ddba566149385f2d6d8171b6a32d36e23c2c2476fc151
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -6744,11 +6744,854 @@
|
|
| 6744 |
"eval_samples_per_second": 969.57,
|
| 6745 |
"eval_steps_per_second": 40.399,
|
| 6746 |
"step": 555784
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6747 |
}
|
| 6748 |
],
|
| 6749 |
"max_steps": 972622,
|
| 6750 |
"num_train_epochs": 14,
|
| 6751 |
-
"total_flos": 3.
|
| 6752 |
"trial_name": null,
|
| 6753 |
"trial_params": null
|
| 6754 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.0,
|
| 5 |
+
"global_step": 625257,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 6744 |
"eval_samples_per_second": 969.57,
|
| 6745 |
"eval_steps_per_second": 40.399,
|
| 6746 |
"step": 555784
|
| 6747 |
+
},
|
| 6748 |
+
{
|
| 6749 |
+
"epoch": 8.0,
|
| 6750 |
+
"learning_rate": 4.329360937589249e-05,
|
| 6751 |
+
"loss": 2.8652,
|
| 6752 |
+
"step": 556000
|
| 6753 |
+
},
|
| 6754 |
+
{
|
| 6755 |
+
"epoch": 8.01,
|
| 6756 |
+
"learning_rate": 4.3241682634139755e-05,
|
| 6757 |
+
"loss": 2.86,
|
| 6758 |
+
"step": 556500
|
| 6759 |
+
},
|
| 6760 |
+
{
|
| 6761 |
+
"epoch": 8.02,
|
| 6762 |
+
"learning_rate": 4.318975589238702e-05,
|
| 6763 |
+
"loss": 2.8523,
|
| 6764 |
+
"step": 557000
|
| 6765 |
+
},
|
| 6766 |
+
{
|
| 6767 |
+
"epoch": 8.02,
|
| 6768 |
+
"learning_rate": 4.3137829150634284e-05,
|
| 6769 |
+
"loss": 2.8513,
|
| 6770 |
+
"step": 557500
|
| 6771 |
+
},
|
| 6772 |
+
{
|
| 6773 |
+
"epoch": 8.03,
|
| 6774 |
+
"learning_rate": 4.308590240888155e-05,
|
| 6775 |
+
"loss": 2.8526,
|
| 6776 |
+
"step": 558000
|
| 6777 |
+
},
|
| 6778 |
+
{
|
| 6779 |
+
"epoch": 8.04,
|
| 6780 |
+
"learning_rate": 4.3033975667128814e-05,
|
| 6781 |
+
"loss": 2.8663,
|
| 6782 |
+
"step": 558500
|
| 6783 |
+
},
|
| 6784 |
+
{
|
| 6785 |
+
"epoch": 8.05,
|
| 6786 |
+
"learning_rate": 4.298204892537608e-05,
|
| 6787 |
+
"loss": 2.8554,
|
| 6788 |
+
"step": 559000
|
| 6789 |
+
},
|
| 6790 |
+
{
|
| 6791 |
+
"epoch": 8.05,
|
| 6792 |
+
"learning_rate": 4.2930226037106855e-05,
|
| 6793 |
+
"loss": 2.8556,
|
| 6794 |
+
"step": 559500
|
| 6795 |
+
},
|
| 6796 |
+
{
|
| 6797 |
+
"epoch": 8.06,
|
| 6798 |
+
"learning_rate": 4.287829929535412e-05,
|
| 6799 |
+
"loss": 2.856,
|
| 6800 |
+
"step": 560000
|
| 6801 |
+
},
|
| 6802 |
+
{
|
| 6803 |
+
"epoch": 8.07,
|
| 6804 |
+
"learning_rate": 4.282647640708489e-05,
|
| 6805 |
+
"loss": 2.8588,
|
| 6806 |
+
"step": 560500
|
| 6807 |
+
},
|
| 6808 |
+
{
|
| 6809 |
+
"epoch": 8.08,
|
| 6810 |
+
"learning_rate": 4.277454966533215e-05,
|
| 6811 |
+
"loss": 2.8524,
|
| 6812 |
+
"step": 561000
|
| 6813 |
+
},
|
| 6814 |
+
{
|
| 6815 |
+
"epoch": 8.08,
|
| 6816 |
+
"learning_rate": 4.272262292357941e-05,
|
| 6817 |
+
"loss": 2.8519,
|
| 6818 |
+
"step": 561500
|
| 6819 |
+
},
|
| 6820 |
+
{
|
| 6821 |
+
"epoch": 8.09,
|
| 6822 |
+
"learning_rate": 4.267069618182668e-05,
|
| 6823 |
+
"loss": 2.8579,
|
| 6824 |
+
"step": 562000
|
| 6825 |
+
},
|
| 6826 |
+
{
|
| 6827 |
+
"epoch": 8.1,
|
| 6828 |
+
"learning_rate": 4.261876944007395e-05,
|
| 6829 |
+
"loss": 2.853,
|
| 6830 |
+
"step": 562500
|
| 6831 |
+
},
|
| 6832 |
+
{
|
| 6833 |
+
"epoch": 8.1,
|
| 6834 |
+
"learning_rate": 4.256684269832121e-05,
|
| 6835 |
+
"loss": 2.8629,
|
| 6836 |
+
"step": 563000
|
| 6837 |
+
},
|
| 6838 |
+
{
|
| 6839 |
+
"epoch": 8.11,
|
| 6840 |
+
"learning_rate": 4.251491595656847e-05,
|
| 6841 |
+
"loss": 2.8584,
|
| 6842 |
+
"step": 563500
|
| 6843 |
+
},
|
| 6844 |
+
{
|
| 6845 |
+
"epoch": 8.12,
|
| 6846 |
+
"learning_rate": 4.246298921481574e-05,
|
| 6847 |
+
"loss": 2.853,
|
| 6848 |
+
"step": 564000
|
| 6849 |
+
},
|
| 6850 |
+
{
|
| 6851 |
+
"epoch": 8.13,
|
| 6852 |
+
"learning_rate": 4.241116632654651e-05,
|
| 6853 |
+
"loss": 2.8529,
|
| 6854 |
+
"step": 564500
|
| 6855 |
+
},
|
| 6856 |
+
{
|
| 6857 |
+
"epoch": 8.13,
|
| 6858 |
+
"learning_rate": 4.2359239584793775e-05,
|
| 6859 |
+
"loss": 2.8497,
|
| 6860 |
+
"step": 565000
|
| 6861 |
+
},
|
| 6862 |
+
{
|
| 6863 |
+
"epoch": 8.14,
|
| 6864 |
+
"learning_rate": 4.2307312843041044e-05,
|
| 6865 |
+
"loss": 2.8558,
|
| 6866 |
+
"step": 565500
|
| 6867 |
+
},
|
| 6868 |
+
{
|
| 6869 |
+
"epoch": 8.15,
|
| 6870 |
+
"learning_rate": 4.2255386101288305e-05,
|
| 6871 |
+
"loss": 2.8552,
|
| 6872 |
+
"step": 566000
|
| 6873 |
+
},
|
| 6874 |
+
{
|
| 6875 |
+
"epoch": 8.15,
|
| 6876 |
+
"learning_rate": 4.220356321301908e-05,
|
| 6877 |
+
"loss": 2.8553,
|
| 6878 |
+
"step": 566500
|
| 6879 |
+
},
|
| 6880 |
+
{
|
| 6881 |
+
"epoch": 8.16,
|
| 6882 |
+
"learning_rate": 4.215163647126634e-05,
|
| 6883 |
+
"loss": 2.8547,
|
| 6884 |
+
"step": 567000
|
| 6885 |
+
},
|
| 6886 |
+
{
|
| 6887 |
+
"epoch": 8.17,
|
| 6888 |
+
"learning_rate": 4.20997097295136e-05,
|
| 6889 |
+
"loss": 2.8513,
|
| 6890 |
+
"step": 567500
|
| 6891 |
+
},
|
| 6892 |
+
{
|
| 6893 |
+
"epoch": 8.18,
|
| 6894 |
+
"learning_rate": 4.204778298776087e-05,
|
| 6895 |
+
"loss": 2.855,
|
| 6896 |
+
"step": 568000
|
| 6897 |
+
},
|
| 6898 |
+
{
|
| 6899 |
+
"epoch": 8.18,
|
| 6900 |
+
"learning_rate": 4.199585624600814e-05,
|
| 6901 |
+
"loss": 2.8517,
|
| 6902 |
+
"step": 568500
|
| 6903 |
+
},
|
| 6904 |
+
{
|
| 6905 |
+
"epoch": 8.19,
|
| 6906 |
+
"learning_rate": 4.1944033357738904e-05,
|
| 6907 |
+
"loss": 2.8513,
|
| 6908 |
+
"step": 569000
|
| 6909 |
+
},
|
| 6910 |
+
{
|
| 6911 |
+
"epoch": 8.2,
|
| 6912 |
+
"learning_rate": 4.189210661598617e-05,
|
| 6913 |
+
"loss": 2.8525,
|
| 6914 |
+
"step": 569500
|
| 6915 |
+
},
|
| 6916 |
+
{
|
| 6917 |
+
"epoch": 8.2,
|
| 6918 |
+
"learning_rate": 4.184028372771694e-05,
|
| 6919 |
+
"loss": 2.8587,
|
| 6920 |
+
"step": 570000
|
| 6921 |
+
},
|
| 6922 |
+
{
|
| 6923 |
+
"epoch": 8.21,
|
| 6924 |
+
"learning_rate": 4.178835698596421e-05,
|
| 6925 |
+
"loss": 2.8499,
|
| 6926 |
+
"step": 570500
|
| 6927 |
+
},
|
| 6928 |
+
{
|
| 6929 |
+
"epoch": 8.22,
|
| 6930 |
+
"learning_rate": 4.173643024421147e-05,
|
| 6931 |
+
"loss": 2.8525,
|
| 6932 |
+
"step": 571000
|
| 6933 |
+
},
|
| 6934 |
+
{
|
| 6935 |
+
"epoch": 8.23,
|
| 6936 |
+
"learning_rate": 4.168450350245873e-05,
|
| 6937 |
+
"loss": 2.8523,
|
| 6938 |
+
"step": 571500
|
| 6939 |
+
},
|
| 6940 |
+
{
|
| 6941 |
+
"epoch": 8.23,
|
| 6942 |
+
"learning_rate": 4.1632576760706e-05,
|
| 6943 |
+
"loss": 2.851,
|
| 6944 |
+
"step": 572000
|
| 6945 |
+
},
|
| 6946 |
+
{
|
| 6947 |
+
"epoch": 8.24,
|
| 6948 |
+
"learning_rate": 4.1580650018953267e-05,
|
| 6949 |
+
"loss": 2.861,
|
| 6950 |
+
"step": 572500
|
| 6951 |
+
},
|
| 6952 |
+
{
|
| 6953 |
+
"epoch": 8.25,
|
| 6954 |
+
"learning_rate": 4.152872327720053e-05,
|
| 6955 |
+
"loss": 2.8567,
|
| 6956 |
+
"step": 573000
|
| 6957 |
+
},
|
| 6958 |
+
{
|
| 6959 |
+
"epoch": 8.26,
|
| 6960 |
+
"learning_rate": 4.147679653544779e-05,
|
| 6961 |
+
"loss": 2.8534,
|
| 6962 |
+
"step": 573500
|
| 6963 |
+
},
|
| 6964 |
+
{
|
| 6965 |
+
"epoch": 8.26,
|
| 6966 |
+
"learning_rate": 4.142497364717856e-05,
|
| 6967 |
+
"loss": 2.8493,
|
| 6968 |
+
"step": 574000
|
| 6969 |
+
},
|
| 6970 |
+
{
|
| 6971 |
+
"epoch": 8.27,
|
| 6972 |
+
"learning_rate": 4.1373046905425824e-05,
|
| 6973 |
+
"loss": 2.8514,
|
| 6974 |
+
"step": 574500
|
| 6975 |
+
},
|
| 6976 |
+
{
|
| 6977 |
+
"epoch": 8.28,
|
| 6978 |
+
"learning_rate": 4.132112016367309e-05,
|
| 6979 |
+
"loss": 2.8511,
|
| 6980 |
+
"step": 575000
|
| 6981 |
+
},
|
| 6982 |
+
{
|
| 6983 |
+
"epoch": 8.28,
|
| 6984 |
+
"learning_rate": 4.1269193421920354e-05,
|
| 6985 |
+
"loss": 2.8514,
|
| 6986 |
+
"step": 575500
|
| 6987 |
+
},
|
| 6988 |
+
{
|
| 6989 |
+
"epoch": 8.29,
|
| 6990 |
+
"learning_rate": 4.121726668016762e-05,
|
| 6991 |
+
"loss": 2.854,
|
| 6992 |
+
"step": 576000
|
| 6993 |
+
},
|
| 6994 |
+
{
|
| 6995 |
+
"epoch": 8.3,
|
| 6996 |
+
"learning_rate": 4.116533993841489e-05,
|
| 6997 |
+
"loss": 2.8539,
|
| 6998 |
+
"step": 576500
|
| 6999 |
+
},
|
| 7000 |
+
{
|
| 7001 |
+
"epoch": 8.31,
|
| 7002 |
+
"learning_rate": 4.111351705014566e-05,
|
| 7003 |
+
"loss": 2.8456,
|
| 7004 |
+
"step": 577000
|
| 7005 |
+
},
|
| 7006 |
+
{
|
| 7007 |
+
"epoch": 8.31,
|
| 7008 |
+
"learning_rate": 4.1061590308392925e-05,
|
| 7009 |
+
"loss": 2.8602,
|
| 7010 |
+
"step": 577500
|
| 7011 |
+
},
|
| 7012 |
+
{
|
| 7013 |
+
"epoch": 8.32,
|
| 7014 |
+
"learning_rate": 4.1009663566640187e-05,
|
| 7015 |
+
"loss": 2.8506,
|
| 7016 |
+
"step": 578000
|
| 7017 |
+
},
|
| 7018 |
+
{
|
| 7019 |
+
"epoch": 8.33,
|
| 7020 |
+
"learning_rate": 4.095773682488745e-05,
|
| 7021 |
+
"loss": 2.8524,
|
| 7022 |
+
"step": 578500
|
| 7023 |
+
},
|
| 7024 |
+
{
|
| 7025 |
+
"epoch": 8.33,
|
| 7026 |
+
"learning_rate": 4.0905810083134716e-05,
|
| 7027 |
+
"loss": 2.8504,
|
| 7028 |
+
"step": 579000
|
| 7029 |
+
},
|
| 7030 |
+
{
|
| 7031 |
+
"epoch": 8.34,
|
| 7032 |
+
"learning_rate": 4.085398719486548e-05,
|
| 7033 |
+
"loss": 2.8484,
|
| 7034 |
+
"step": 579500
|
| 7035 |
+
},
|
| 7036 |
+
{
|
| 7037 |
+
"epoch": 8.35,
|
| 7038 |
+
"learning_rate": 4.080206045311275e-05,
|
| 7039 |
+
"loss": 2.8479,
|
| 7040 |
+
"step": 580000
|
| 7041 |
+
},
|
| 7042 |
+
{
|
| 7043 |
+
"epoch": 8.36,
|
| 7044 |
+
"learning_rate": 4.075013371136002e-05,
|
| 7045 |
+
"loss": 2.8561,
|
| 7046 |
+
"step": 580500
|
| 7047 |
+
},
|
| 7048 |
+
{
|
| 7049 |
+
"epoch": 8.36,
|
| 7050 |
+
"learning_rate": 4.069820696960728e-05,
|
| 7051 |
+
"loss": 2.85,
|
| 7052 |
+
"step": 581000
|
| 7053 |
+
},
|
| 7054 |
+
{
|
| 7055 |
+
"epoch": 8.37,
|
| 7056 |
+
"learning_rate": 4.064628022785454e-05,
|
| 7057 |
+
"loss": 2.8561,
|
| 7058 |
+
"step": 581500
|
| 7059 |
+
},
|
| 7060 |
+
{
|
| 7061 |
+
"epoch": 8.38,
|
| 7062 |
+
"learning_rate": 4.059435348610181e-05,
|
| 7063 |
+
"loss": 2.8504,
|
| 7064 |
+
"step": 582000
|
| 7065 |
+
},
|
| 7066 |
+
{
|
| 7067 |
+
"epoch": 8.38,
|
| 7068 |
+
"learning_rate": 4.054242674434908e-05,
|
| 7069 |
+
"loss": 2.8555,
|
| 7070 |
+
"step": 582500
|
| 7071 |
+
},
|
| 7072 |
+
{
|
| 7073 |
+
"epoch": 8.39,
|
| 7074 |
+
"learning_rate": 4.0490603856079845e-05,
|
| 7075 |
+
"loss": 2.8438,
|
| 7076 |
+
"step": 583000
|
| 7077 |
+
},
|
| 7078 |
+
{
|
| 7079 |
+
"epoch": 8.4,
|
| 7080 |
+
"learning_rate": 4.043867711432711e-05,
|
| 7081 |
+
"loss": 2.8494,
|
| 7082 |
+
"step": 583500
|
| 7083 |
+
},
|
| 7084 |
+
{
|
| 7085 |
+
"epoch": 8.41,
|
| 7086 |
+
"learning_rate": 4.0386750372574375e-05,
|
| 7087 |
+
"loss": 2.8473,
|
| 7088 |
+
"step": 584000
|
| 7089 |
+
},
|
| 7090 |
+
{
|
| 7091 |
+
"epoch": 8.41,
|
| 7092 |
+
"learning_rate": 4.0334823630821636e-05,
|
| 7093 |
+
"loss": 2.8479,
|
| 7094 |
+
"step": 584500
|
| 7095 |
+
},
|
| 7096 |
+
{
|
| 7097 |
+
"epoch": 8.42,
|
| 7098 |
+
"learning_rate": 4.0282896889068905e-05,
|
| 7099 |
+
"loss": 2.8488,
|
| 7100 |
+
"step": 585000
|
| 7101 |
+
},
|
| 7102 |
+
{
|
| 7103 |
+
"epoch": 8.43,
|
| 7104 |
+
"learning_rate": 4.0230970147316166e-05,
|
| 7105 |
+
"loss": 2.8506,
|
| 7106 |
+
"step": 585500
|
| 7107 |
+
},
|
| 7108 |
+
{
|
| 7109 |
+
"epoch": 8.43,
|
| 7110 |
+
"learning_rate": 4.0179043405563434e-05,
|
| 7111 |
+
"loss": 2.8476,
|
| 7112 |
+
"step": 586000
|
| 7113 |
+
},
|
| 7114 |
+
{
|
| 7115 |
+
"epoch": 8.44,
|
| 7116 |
+
"learning_rate": 4.0127116663810696e-05,
|
| 7117 |
+
"loss": 2.8448,
|
| 7118 |
+
"step": 586500
|
| 7119 |
+
},
|
| 7120 |
+
{
|
| 7121 |
+
"epoch": 8.45,
|
| 7122 |
+
"learning_rate": 4.007529377554147e-05,
|
| 7123 |
+
"loss": 2.8518,
|
| 7124 |
+
"step": 587000
|
| 7125 |
+
},
|
| 7126 |
+
{
|
| 7127 |
+
"epoch": 8.46,
|
| 7128 |
+
"learning_rate": 4.002336703378873e-05,
|
| 7129 |
+
"loss": 2.8421,
|
| 7130 |
+
"step": 587500
|
| 7131 |
+
},
|
| 7132 |
+
{
|
| 7133 |
+
"epoch": 8.46,
|
| 7134 |
+
"learning_rate": 3.9971440292036e-05,
|
| 7135 |
+
"loss": 2.8544,
|
| 7136 |
+
"step": 588000
|
| 7137 |
+
},
|
| 7138 |
+
{
|
| 7139 |
+
"epoch": 8.47,
|
| 7140 |
+
"learning_rate": 3.9919617403766765e-05,
|
| 7141 |
+
"loss": 2.8506,
|
| 7142 |
+
"step": 588500
|
| 7143 |
+
},
|
| 7144 |
+
{
|
| 7145 |
+
"epoch": 8.48,
|
| 7146 |
+
"learning_rate": 3.986769066201403e-05,
|
| 7147 |
+
"loss": 2.8495,
|
| 7148 |
+
"step": 589000
|
| 7149 |
+
},
|
| 7150 |
+
{
|
| 7151 |
+
"epoch": 8.49,
|
| 7152 |
+
"learning_rate": 3.9815763920261295e-05,
|
| 7153 |
+
"loss": 2.8504,
|
| 7154 |
+
"step": 589500
|
| 7155 |
+
},
|
| 7156 |
+
{
|
| 7157 |
+
"epoch": 8.49,
|
| 7158 |
+
"learning_rate": 3.976383717850856e-05,
|
| 7159 |
+
"loss": 2.844,
|
| 7160 |
+
"step": 590000
|
| 7161 |
+
},
|
| 7162 |
+
{
|
| 7163 |
+
"epoch": 8.5,
|
| 7164 |
+
"learning_rate": 3.971191043675583e-05,
|
| 7165 |
+
"loss": 2.8472,
|
| 7166 |
+
"step": 590500
|
| 7167 |
+
},
|
| 7168 |
+
{
|
| 7169 |
+
"epoch": 8.51,
|
| 7170 |
+
"learning_rate": 3.9659983695003086e-05,
|
| 7171 |
+
"loss": 2.8443,
|
| 7172 |
+
"step": 591000
|
| 7173 |
+
},
|
| 7174 |
+
{
|
| 7175 |
+
"epoch": 8.51,
|
| 7176 |
+
"learning_rate": 3.9608056953250354e-05,
|
| 7177 |
+
"loss": 2.8546,
|
| 7178 |
+
"step": 591500
|
| 7179 |
+
},
|
| 7180 |
+
{
|
| 7181 |
+
"epoch": 8.52,
|
| 7182 |
+
"learning_rate": 3.955623406498113e-05,
|
| 7183 |
+
"loss": 2.8484,
|
| 7184 |
+
"step": 592000
|
| 7185 |
+
},
|
| 7186 |
+
{
|
| 7187 |
+
"epoch": 8.53,
|
| 7188 |
+
"learning_rate": 3.950430732322839e-05,
|
| 7189 |
+
"loss": 2.8438,
|
| 7190 |
+
"step": 592500
|
| 7191 |
+
},
|
| 7192 |
+
{
|
| 7193 |
+
"epoch": 8.54,
|
| 7194 |
+
"learning_rate": 3.945238058147566e-05,
|
| 7195 |
+
"loss": 2.8479,
|
| 7196 |
+
"step": 593000
|
| 7197 |
+
},
|
| 7198 |
+
{
|
| 7199 |
+
"epoch": 8.54,
|
| 7200 |
+
"learning_rate": 3.9400453839722926e-05,
|
| 7201 |
+
"loss": 2.8455,
|
| 7202 |
+
"step": 593500
|
| 7203 |
+
},
|
| 7204 |
+
{
|
| 7205 |
+
"epoch": 8.55,
|
| 7206 |
+
"learning_rate": 3.934852709797018e-05,
|
| 7207 |
+
"loss": 2.8422,
|
| 7208 |
+
"step": 594000
|
| 7209 |
+
},
|
| 7210 |
+
{
|
| 7211 |
+
"epoch": 8.56,
|
| 7212 |
+
"learning_rate": 3.929660035621745e-05,
|
| 7213 |
+
"loss": 2.847,
|
| 7214 |
+
"step": 594500
|
| 7215 |
+
},
|
| 7216 |
+
{
|
| 7217 |
+
"epoch": 8.56,
|
| 7218 |
+
"learning_rate": 3.924467361446472e-05,
|
| 7219 |
+
"loss": 2.8453,
|
| 7220 |
+
"step": 595000
|
| 7221 |
+
},
|
| 7222 |
+
{
|
| 7223 |
+
"epoch": 8.57,
|
| 7224 |
+
"learning_rate": 3.919274687271198e-05,
|
| 7225 |
+
"loss": 2.8533,
|
| 7226 |
+
"step": 595500
|
| 7227 |
+
},
|
| 7228 |
+
{
|
| 7229 |
+
"epoch": 8.58,
|
| 7230 |
+
"learning_rate": 3.9141027837926256e-05,
|
| 7231 |
+
"loss": 2.8511,
|
| 7232 |
+
"step": 596000
|
| 7233 |
+
},
|
| 7234 |
+
{
|
| 7235 |
+
"epoch": 8.59,
|
| 7236 |
+
"learning_rate": 3.908910109617352e-05,
|
| 7237 |
+
"loss": 2.8456,
|
| 7238 |
+
"step": 596500
|
| 7239 |
+
},
|
| 7240 |
+
{
|
| 7241 |
+
"epoch": 8.59,
|
| 7242 |
+
"learning_rate": 3.9037174354420786e-05,
|
| 7243 |
+
"loss": 2.8501,
|
| 7244 |
+
"step": 597000
|
| 7245 |
+
},
|
| 7246 |
+
{
|
| 7247 |
+
"epoch": 8.6,
|
| 7248 |
+
"learning_rate": 3.8985247612668054e-05,
|
| 7249 |
+
"loss": 2.8479,
|
| 7250 |
+
"step": 597500
|
| 7251 |
+
},
|
| 7252 |
+
{
|
| 7253 |
+
"epoch": 8.61,
|
| 7254 |
+
"learning_rate": 3.8933320870915316e-05,
|
| 7255 |
+
"loss": 2.8488,
|
| 7256 |
+
"step": 598000
|
| 7257 |
+
},
|
| 7258 |
+
{
|
| 7259 |
+
"epoch": 8.61,
|
| 7260 |
+
"learning_rate": 3.888139412916258e-05,
|
| 7261 |
+
"loss": 2.8456,
|
| 7262 |
+
"step": 598500
|
| 7263 |
+
},
|
| 7264 |
+
{
|
| 7265 |
+
"epoch": 8.62,
|
| 7266 |
+
"learning_rate": 3.8829467387409846e-05,
|
| 7267 |
+
"loss": 2.8479,
|
| 7268 |
+
"step": 599000
|
| 7269 |
+
},
|
| 7270 |
+
{
|
| 7271 |
+
"epoch": 8.63,
|
| 7272 |
+
"learning_rate": 3.877764449914061e-05,
|
| 7273 |
+
"loss": 2.8457,
|
| 7274 |
+
"step": 599500
|
| 7275 |
+
},
|
| 7276 |
+
{
|
| 7277 |
+
"epoch": 8.64,
|
| 7278 |
+
"learning_rate": 3.872571775738788e-05,
|
| 7279 |
+
"loss": 2.8439,
|
| 7280 |
+
"step": 600000
|
| 7281 |
+
},
|
| 7282 |
+
{
|
| 7283 |
+
"epoch": 8.64,
|
| 7284 |
+
"learning_rate": 3.867379101563514e-05,
|
| 7285 |
+
"loss": 2.8488,
|
| 7286 |
+
"step": 600500
|
| 7287 |
+
},
|
| 7288 |
+
{
|
| 7289 |
+
"epoch": 8.65,
|
| 7290 |
+
"learning_rate": 3.862186427388241e-05,
|
| 7291 |
+
"loss": 2.8437,
|
| 7292 |
+
"step": 601000
|
| 7293 |
+
},
|
| 7294 |
+
{
|
| 7295 |
+
"epoch": 8.66,
|
| 7296 |
+
"learning_rate": 3.856993753212967e-05,
|
| 7297 |
+
"loss": 2.8445,
|
| 7298 |
+
"step": 601500
|
| 7299 |
+
},
|
| 7300 |
+
{
|
| 7301 |
+
"epoch": 8.67,
|
| 7302 |
+
"learning_rate": 3.851801079037694e-05,
|
| 7303 |
+
"loss": 2.842,
|
| 7304 |
+
"step": 602000
|
| 7305 |
+
},
|
| 7306 |
+
{
|
| 7307 |
+
"epoch": 8.67,
|
| 7308 |
+
"learning_rate": 3.8466187902107706e-05,
|
| 7309 |
+
"loss": 2.8458,
|
| 7310 |
+
"step": 602500
|
| 7311 |
+
},
|
| 7312 |
+
{
|
| 7313 |
+
"epoch": 8.68,
|
| 7314 |
+
"learning_rate": 3.8414261160354974e-05,
|
| 7315 |
+
"loss": 2.8489,
|
| 7316 |
+
"step": 603000
|
| 7317 |
+
},
|
| 7318 |
+
{
|
| 7319 |
+
"epoch": 8.69,
|
| 7320 |
+
"learning_rate": 3.8362334418602236e-05,
|
| 7321 |
+
"loss": 2.8407,
|
| 7322 |
+
"step": 603500
|
| 7323 |
+
},
|
| 7324 |
+
{
|
| 7325 |
+
"epoch": 8.69,
|
| 7326 |
+
"learning_rate": 3.8310407676849504e-05,
|
| 7327 |
+
"loss": 2.8452,
|
| 7328 |
+
"step": 604000
|
| 7329 |
+
},
|
| 7330 |
+
{
|
| 7331 |
+
"epoch": 8.7,
|
| 7332 |
+
"learning_rate": 3.8258480935096766e-05,
|
| 7333 |
+
"loss": 2.8495,
|
| 7334 |
+
"step": 604500
|
| 7335 |
+
},
|
| 7336 |
+
{
|
| 7337 |
+
"epoch": 8.71,
|
| 7338 |
+
"learning_rate": 3.820655419334403e-05,
|
| 7339 |
+
"loss": 2.8426,
|
| 7340 |
+
"step": 605000
|
| 7341 |
+
},
|
| 7342 |
+
{
|
| 7343 |
+
"epoch": 8.72,
|
| 7344 |
+
"learning_rate": 3.8154627451591295e-05,
|
| 7345 |
+
"loss": 2.845,
|
| 7346 |
+
"step": 605500
|
| 7347 |
+
},
|
| 7348 |
+
{
|
| 7349 |
+
"epoch": 8.72,
|
| 7350 |
+
"learning_rate": 3.810280456332207e-05,
|
| 7351 |
+
"loss": 2.8437,
|
| 7352 |
+
"step": 606000
|
| 7353 |
+
},
|
| 7354 |
+
{
|
| 7355 |
+
"epoch": 8.73,
|
| 7356 |
+
"learning_rate": 3.805087782156933e-05,
|
| 7357 |
+
"loss": 2.8411,
|
| 7358 |
+
"step": 606500
|
| 7359 |
+
},
|
| 7360 |
+
{
|
| 7361 |
+
"epoch": 8.74,
|
| 7362 |
+
"learning_rate": 3.79989510798166e-05,
|
| 7363 |
+
"loss": 2.8449,
|
| 7364 |
+
"step": 607000
|
| 7365 |
+
},
|
| 7366 |
+
{
|
| 7367 |
+
"epoch": 8.74,
|
| 7368 |
+
"learning_rate": 3.7947024338063866e-05,
|
| 7369 |
+
"loss": 2.8474,
|
| 7370 |
+
"step": 607500
|
| 7371 |
+
},
|
| 7372 |
+
{
|
| 7373 |
+
"epoch": 8.75,
|
| 7374 |
+
"learning_rate": 3.789520144979463e-05,
|
| 7375 |
+
"loss": 2.8431,
|
| 7376 |
+
"step": 608000
|
| 7377 |
+
},
|
| 7378 |
+
{
|
| 7379 |
+
"epoch": 8.76,
|
| 7380 |
+
"learning_rate": 3.78432747080419e-05,
|
| 7381 |
+
"loss": 2.8437,
|
| 7382 |
+
"step": 608500
|
| 7383 |
+
},
|
| 7384 |
+
{
|
| 7385 |
+
"epoch": 8.77,
|
| 7386 |
+
"learning_rate": 3.7791347966289156e-05,
|
| 7387 |
+
"loss": 2.8445,
|
| 7388 |
+
"step": 609000
|
| 7389 |
+
},
|
| 7390 |
+
{
|
| 7391 |
+
"epoch": 8.77,
|
| 7392 |
+
"learning_rate": 3.7739421224536424e-05,
|
| 7393 |
+
"loss": 2.8438,
|
| 7394 |
+
"step": 609500
|
| 7395 |
+
},
|
| 7396 |
+
{
|
| 7397 |
+
"epoch": 8.78,
|
| 7398 |
+
"learning_rate": 3.768749448278369e-05,
|
| 7399 |
+
"loss": 2.8441,
|
| 7400 |
+
"step": 610000
|
| 7401 |
+
},
|
| 7402 |
+
{
|
| 7403 |
+
"epoch": 8.79,
|
| 7404 |
+
"learning_rate": 3.763567159451446e-05,
|
| 7405 |
+
"loss": 2.8497,
|
| 7406 |
+
"step": 610500
|
| 7407 |
+
},
|
| 7408 |
+
{
|
| 7409 |
+
"epoch": 8.79,
|
| 7410 |
+
"learning_rate": 3.758374485276173e-05,
|
| 7411 |
+
"loss": 2.8466,
|
| 7412 |
+
"step": 611000
|
| 7413 |
+
},
|
| 7414 |
+
{
|
| 7415 |
+
"epoch": 8.8,
|
| 7416 |
+
"learning_rate": 3.7531818111008995e-05,
|
| 7417 |
+
"loss": 2.8451,
|
| 7418 |
+
"step": 611500
|
| 7419 |
+
},
|
| 7420 |
+
{
|
| 7421 |
+
"epoch": 8.81,
|
| 7422 |
+
"learning_rate": 3.747989136925626e-05,
|
| 7423 |
+
"loss": 2.8407,
|
| 7424 |
+
"step": 612000
|
| 7425 |
+
},
|
| 7426 |
+
{
|
| 7427 |
+
"epoch": 8.82,
|
| 7428 |
+
"learning_rate": 3.742806848098703e-05,
|
| 7429 |
+
"loss": 2.8453,
|
| 7430 |
+
"step": 612500
|
| 7431 |
+
},
|
| 7432 |
+
{
|
| 7433 |
+
"epoch": 8.82,
|
| 7434 |
+
"learning_rate": 3.737614173923429e-05,
|
| 7435 |
+
"loss": 2.8403,
|
| 7436 |
+
"step": 613000
|
| 7437 |
+
},
|
| 7438 |
+
{
|
| 7439 |
+
"epoch": 8.83,
|
| 7440 |
+
"learning_rate": 3.732421499748155e-05,
|
| 7441 |
+
"loss": 2.8445,
|
| 7442 |
+
"step": 613500
|
| 7443 |
+
},
|
| 7444 |
+
{
|
| 7445 |
+
"epoch": 8.84,
|
| 7446 |
+
"learning_rate": 3.727228825572882e-05,
|
| 7447 |
+
"loss": 2.8422,
|
| 7448 |
+
"step": 614000
|
| 7449 |
+
},
|
| 7450 |
+
{
|
| 7451 |
+
"epoch": 8.85,
|
| 7452 |
+
"learning_rate": 3.722036151397608e-05,
|
| 7453 |
+
"loss": 2.8411,
|
| 7454 |
+
"step": 614500
|
| 7455 |
+
},
|
| 7456 |
+
{
|
| 7457 |
+
"epoch": 8.85,
|
| 7458 |
+
"learning_rate": 3.716843477222335e-05,
|
| 7459 |
+
"loss": 2.8402,
|
| 7460 |
+
"step": 615000
|
| 7461 |
+
},
|
| 7462 |
+
{
|
| 7463 |
+
"epoch": 8.86,
|
| 7464 |
+
"learning_rate": 3.711650803047061e-05,
|
| 7465 |
+
"loss": 2.8368,
|
| 7466 |
+
"step": 615500
|
| 7467 |
+
},
|
| 7468 |
+
{
|
| 7469 |
+
"epoch": 8.87,
|
| 7470 |
+
"learning_rate": 3.706458128871788e-05,
|
| 7471 |
+
"loss": 2.846,
|
| 7472 |
+
"step": 616000
|
| 7473 |
+
},
|
| 7474 |
+
{
|
| 7475 |
+
"epoch": 8.87,
|
| 7476 |
+
"learning_rate": 3.701286225393216e-05,
|
| 7477 |
+
"loss": 2.8407,
|
| 7478 |
+
"step": 616500
|
| 7479 |
+
},
|
| 7480 |
+
{
|
| 7481 |
+
"epoch": 8.88,
|
| 7482 |
+
"learning_rate": 3.696093551217942e-05,
|
| 7483 |
+
"loss": 2.8388,
|
| 7484 |
+
"step": 617000
|
| 7485 |
+
},
|
| 7486 |
+
{
|
| 7487 |
+
"epoch": 8.89,
|
| 7488 |
+
"learning_rate": 3.690900877042668e-05,
|
| 7489 |
+
"loss": 2.8414,
|
| 7490 |
+
"step": 617500
|
| 7491 |
+
},
|
| 7492 |
+
{
|
| 7493 |
+
"epoch": 8.9,
|
| 7494 |
+
"learning_rate": 3.685708202867395e-05,
|
| 7495 |
+
"loss": 2.8437,
|
| 7496 |
+
"step": 618000
|
| 7497 |
+
},
|
| 7498 |
+
{
|
| 7499 |
+
"epoch": 8.9,
|
| 7500 |
+
"learning_rate": 3.680515528692121e-05,
|
| 7501 |
+
"loss": 2.8418,
|
| 7502 |
+
"step": 618500
|
| 7503 |
+
},
|
| 7504 |
+
{
|
| 7505 |
+
"epoch": 8.91,
|
| 7506 |
+
"learning_rate": 3.675322854516848e-05,
|
| 7507 |
+
"loss": 2.8413,
|
| 7508 |
+
"step": 619000
|
| 7509 |
+
},
|
| 7510 |
+
{
|
| 7511 |
+
"epoch": 8.92,
|
| 7512 |
+
"learning_rate": 3.6701405656899246e-05,
|
| 7513 |
+
"loss": 2.8469,
|
| 7514 |
+
"step": 619500
|
| 7515 |
+
},
|
| 7516 |
+
{
|
| 7517 |
+
"epoch": 8.92,
|
| 7518 |
+
"learning_rate": 3.6649478915146514e-05,
|
| 7519 |
+
"loss": 2.8402,
|
| 7520 |
+
"step": 620000
|
| 7521 |
+
},
|
| 7522 |
+
{
|
| 7523 |
+
"epoch": 8.93,
|
| 7524 |
+
"learning_rate": 3.6597552173393776e-05,
|
| 7525 |
+
"loss": 2.842,
|
| 7526 |
+
"step": 620500
|
| 7527 |
+
},
|
| 7528 |
+
{
|
| 7529 |
+
"epoch": 8.94,
|
| 7530 |
+
"learning_rate": 3.6545625431641044e-05,
|
| 7531 |
+
"loss": 2.84,
|
| 7532 |
+
"step": 621000
|
| 7533 |
+
},
|
| 7534 |
+
{
|
| 7535 |
+
"epoch": 8.95,
|
| 7536 |
+
"learning_rate": 3.6493698689888305e-05,
|
| 7537 |
+
"loss": 2.8442,
|
| 7538 |
+
"step": 621500
|
| 7539 |
+
},
|
| 7540 |
+
{
|
| 7541 |
+
"epoch": 8.95,
|
| 7542 |
+
"learning_rate": 3.6441771948135574e-05,
|
| 7543 |
+
"loss": 2.8432,
|
| 7544 |
+
"step": 622000
|
| 7545 |
+
},
|
| 7546 |
+
{
|
| 7547 |
+
"epoch": 8.96,
|
| 7548 |
+
"learning_rate": 3.638984520638284e-05,
|
| 7549 |
+
"loss": 2.8384,
|
| 7550 |
+
"step": 622500
|
| 7551 |
+
},
|
| 7552 |
+
{
|
| 7553 |
+
"epoch": 8.97,
|
| 7554 |
+
"learning_rate": 3.63379184646301e-05,
|
| 7555 |
+
"loss": 2.8414,
|
| 7556 |
+
"step": 623000
|
| 7557 |
+
},
|
| 7558 |
+
{
|
| 7559 |
+
"epoch": 8.97,
|
| 7560 |
+
"learning_rate": 3.6286095576360876e-05,
|
| 7561 |
+
"loss": 2.8429,
|
| 7562 |
+
"step": 623500
|
| 7563 |
+
},
|
| 7564 |
+
{
|
| 7565 |
+
"epoch": 8.98,
|
| 7566 |
+
"learning_rate": 3.623416883460813e-05,
|
| 7567 |
+
"loss": 2.8397,
|
| 7568 |
+
"step": 624000
|
| 7569 |
+
},
|
| 7570 |
+
{
|
| 7571 |
+
"epoch": 8.99,
|
| 7572 |
+
"learning_rate": 3.61822420928554e-05,
|
| 7573 |
+
"loss": 2.8377,
|
| 7574 |
+
"step": 624500
|
| 7575 |
+
},
|
| 7576 |
+
{
|
| 7577 |
+
"epoch": 9.0,
|
| 7578 |
+
"learning_rate": 3.6130419204586166e-05,
|
| 7579 |
+
"loss": 2.8352,
|
| 7580 |
+
"step": 625000
|
| 7581 |
+
},
|
| 7582 |
+
{
|
| 7583 |
+
"epoch": 9.0,
|
| 7584 |
+
"eval_accuracy": 0.49907601112677125,
|
| 7585 |
+
"eval_loss": 2.6701717376708984,
|
| 7586 |
+
"eval_runtime": 557.6796,
|
| 7587 |
+
"eval_samples_per_second": 966.392,
|
| 7588 |
+
"eval_steps_per_second": 40.267,
|
| 7589 |
+
"step": 625257
|
| 7590 |
}
|
| 7591 |
],
|
| 7592 |
"max_steps": 972622,
|
| 7593 |
"num_train_epochs": 14,
|
| 7594 |
+
"total_flos": 3.845195420804317e+18,
|
| 7595 |
"trial_name": null,
|
| 7596 |
"trial_params": null
|
| 7597 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 118242180
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9af26e70d13d97dd1148f504edda0ca0d4b3e70b4d9f65a19697b44e426a3580
|
| 3 |
size 118242180
|
runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15dad8bb38ca7c3dfddd43f874a30019a7bb4e8290fe3bcc51c26b73d24ec10e
|
| 3 |
+
size 206287
|