Training in progress, epoch 11
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +846 -3
- pytorch_model.bin +1 -1
- runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 236469913
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:427ddde5949f77db8935be38c6362c73ffac529ca902634168abcb77846282d4
|
| 3 |
size 236469913
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 118242180
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf9217f1422e2ba20adfa234e66d641852a206c5630b921ecc80d6c9999bdad2
|
| 3 |
size 118242180
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:896681ae078ffd1d4e4769bf2d6e9344fd51fb18220bfe249c5a5fc74a830917
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac1e8edaf46acd0fd7646a0bb25c1e4b46b0ab04d87474b4fe175e85e479d6d4
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f35c7bd008e8e8a41d0ab633d161ba4bfa16e5435c68d6d7e739be6d290d478
|
| 3 |
size 14503
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1b3125518446f5f519d5932574d9e19db8e3320097f9a530448ee7f4f0d7d71
|
| 3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e08fe68888b57c19aba30af213d1336bce889e7e0465c4a58939b446db8405c5
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -8430,11 +8430,854 @@
|
|
| 8430 |
"eval_samples_per_second": 969.838,
|
| 8431 |
"eval_steps_per_second": 40.41,
|
| 8432 |
"step": 694730
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8433 |
}
|
| 8434 |
],
|
| 8435 |
"max_steps": 972622,
|
| 8436 |
"num_train_epochs": 14,
|
| 8437 |
-
"total_flos": 4.
|
| 8438 |
"trial_name": null,
|
| 8439 |
"trial_params": null
|
| 8440 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 11.0,
|
| 5 |
+
"global_step": 764203,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 8430 |
"eval_samples_per_second": 969.838,
|
| 8431 |
"eval_steps_per_second": 40.41,
|
| 8432 |
"step": 694730
|
| 8433 |
+
},
|
| 8434 |
+
{
|
| 8435 |
+
"epoch": 10.0,
|
| 8436 |
+
"learning_rate": 2.886337554977438e-05,
|
| 8437 |
+
"loss": 2.8173,
|
| 8438 |
+
"step": 695000
|
| 8439 |
+
},
|
| 8440 |
+
{
|
| 8441 |
+
"epoch": 10.01,
|
| 8442 |
+
"learning_rate": 2.8811448808021647e-05,
|
| 8443 |
+
"loss": 2.8197,
|
| 8444 |
+
"step": 695500
|
| 8445 |
+
},
|
| 8446 |
+
{
|
| 8447 |
+
"epoch": 10.02,
|
| 8448 |
+
"learning_rate": 2.8759522066268908e-05,
|
| 8449 |
+
"loss": 2.8121,
|
| 8450 |
+
"step": 696000
|
| 8451 |
+
},
|
| 8452 |
+
{
|
| 8453 |
+
"epoch": 10.03,
|
| 8454 |
+
"learning_rate": 2.8707595324516173e-05,
|
| 8455 |
+
"loss": 2.8132,
|
| 8456 |
+
"step": 696500
|
| 8457 |
+
},
|
| 8458 |
+
{
|
| 8459 |
+
"epoch": 10.03,
|
| 8460 |
+
"learning_rate": 2.8655668582763438e-05,
|
| 8461 |
+
"loss": 2.8159,
|
| 8462 |
+
"step": 697000
|
| 8463 |
+
},
|
| 8464 |
+
{
|
| 8465 |
+
"epoch": 10.04,
|
| 8466 |
+
"learning_rate": 2.8603949547977716e-05,
|
| 8467 |
+
"loss": 2.8141,
|
| 8468 |
+
"step": 697500
|
| 8469 |
+
},
|
| 8470 |
+
{
|
| 8471 |
+
"epoch": 10.05,
|
| 8472 |
+
"learning_rate": 2.855202280622498e-05,
|
| 8473 |
+
"loss": 2.819,
|
| 8474 |
+
"step": 698000
|
| 8475 |
+
},
|
| 8476 |
+
{
|
| 8477 |
+
"epoch": 10.05,
|
| 8478 |
+
"learning_rate": 2.8500096064472242e-05,
|
| 8479 |
+
"loss": 2.8164,
|
| 8480 |
+
"step": 698500
|
| 8481 |
+
},
|
| 8482 |
+
{
|
| 8483 |
+
"epoch": 10.06,
|
| 8484 |
+
"learning_rate": 2.844816932271951e-05,
|
| 8485 |
+
"loss": 2.8188,
|
| 8486 |
+
"step": 699000
|
| 8487 |
+
},
|
| 8488 |
+
{
|
| 8489 |
+
"epoch": 10.07,
|
| 8490 |
+
"learning_rate": 2.8396242580966775e-05,
|
| 8491 |
+
"loss": 2.8136,
|
| 8492 |
+
"step": 699500
|
| 8493 |
+
},
|
| 8494 |
+
{
|
| 8495 |
+
"epoch": 10.08,
|
| 8496 |
+
"learning_rate": 2.8344315839214037e-05,
|
| 8497 |
+
"loss": 2.8099,
|
| 8498 |
+
"step": 700000
|
| 8499 |
+
},
|
| 8500 |
+
{
|
| 8501 |
+
"epoch": 10.08,
|
| 8502 |
+
"learning_rate": 2.8292389097461302e-05,
|
| 8503 |
+
"loss": 2.8207,
|
| 8504 |
+
"step": 700500
|
| 8505 |
+
},
|
| 8506 |
+
{
|
| 8507 |
+
"epoch": 10.09,
|
| 8508 |
+
"learning_rate": 2.824046235570857e-05,
|
| 8509 |
+
"loss": 2.8109,
|
| 8510 |
+
"step": 701000
|
| 8511 |
+
},
|
| 8512 |
+
{
|
| 8513 |
+
"epoch": 10.1,
|
| 8514 |
+
"learning_rate": 2.8188639467439336e-05,
|
| 8515 |
+
"loss": 2.8167,
|
| 8516 |
+
"step": 701500
|
| 8517 |
+
},
|
| 8518 |
+
{
|
| 8519 |
+
"epoch": 10.1,
|
| 8520 |
+
"learning_rate": 2.8136712725686605e-05,
|
| 8521 |
+
"loss": 2.8171,
|
| 8522 |
+
"step": 702000
|
| 8523 |
+
},
|
| 8524 |
+
{
|
| 8525 |
+
"epoch": 10.11,
|
| 8526 |
+
"learning_rate": 2.808478598393387e-05,
|
| 8527 |
+
"loss": 2.8198,
|
| 8528 |
+
"step": 702500
|
| 8529 |
+
},
|
| 8530 |
+
{
|
| 8531 |
+
"epoch": 10.12,
|
| 8532 |
+
"learning_rate": 2.803285924218113e-05,
|
| 8533 |
+
"loss": 2.8165,
|
| 8534 |
+
"step": 703000
|
| 8535 |
+
},
|
| 8536 |
+
{
|
| 8537 |
+
"epoch": 10.13,
|
| 8538 |
+
"learning_rate": 2.7981036353911904e-05,
|
| 8539 |
+
"loss": 2.8162,
|
| 8540 |
+
"step": 703500
|
| 8541 |
+
},
|
| 8542 |
+
{
|
| 8543 |
+
"epoch": 10.13,
|
| 8544 |
+
"learning_rate": 2.7929109612159166e-05,
|
| 8545 |
+
"loss": 2.8141,
|
| 8546 |
+
"step": 704000
|
| 8547 |
+
},
|
| 8548 |
+
{
|
| 8549 |
+
"epoch": 10.14,
|
| 8550 |
+
"learning_rate": 2.787718287040643e-05,
|
| 8551 |
+
"loss": 2.8126,
|
| 8552 |
+
"step": 704500
|
| 8553 |
+
},
|
| 8554 |
+
{
|
| 8555 |
+
"epoch": 10.15,
|
| 8556 |
+
"learning_rate": 2.78252561286537e-05,
|
| 8557 |
+
"loss": 2.8126,
|
| 8558 |
+
"step": 705000
|
| 8559 |
+
},
|
| 8560 |
+
{
|
| 8561 |
+
"epoch": 10.16,
|
| 8562 |
+
"learning_rate": 2.777332938690096e-05,
|
| 8563 |
+
"loss": 2.8168,
|
| 8564 |
+
"step": 705500
|
| 8565 |
+
},
|
| 8566 |
+
{
|
| 8567 |
+
"epoch": 10.16,
|
| 8568 |
+
"learning_rate": 2.7721402645148225e-05,
|
| 8569 |
+
"loss": 2.8162,
|
| 8570 |
+
"step": 706000
|
| 8571 |
+
},
|
| 8572 |
+
{
|
| 8573 |
+
"epoch": 10.17,
|
| 8574 |
+
"learning_rate": 2.7669579756878995e-05,
|
| 8575 |
+
"loss": 2.8156,
|
| 8576 |
+
"step": 706500
|
| 8577 |
+
},
|
| 8578 |
+
{
|
| 8579 |
+
"epoch": 10.18,
|
| 8580 |
+
"learning_rate": 2.761765301512626e-05,
|
| 8581 |
+
"loss": 2.8092,
|
| 8582 |
+
"step": 707000
|
| 8583 |
+
},
|
| 8584 |
+
{
|
| 8585 |
+
"epoch": 10.18,
|
| 8586 |
+
"learning_rate": 2.7565726273373528e-05,
|
| 8587 |
+
"loss": 2.8151,
|
| 8588 |
+
"step": 707500
|
| 8589 |
+
},
|
| 8590 |
+
{
|
| 8591 |
+
"epoch": 10.19,
|
| 8592 |
+
"learning_rate": 2.7513799531620793e-05,
|
| 8593 |
+
"loss": 2.8134,
|
| 8594 |
+
"step": 708000
|
| 8595 |
+
},
|
| 8596 |
+
{
|
| 8597 |
+
"epoch": 10.2,
|
| 8598 |
+
"learning_rate": 2.7461872789868054e-05,
|
| 8599 |
+
"loss": 2.8125,
|
| 8600 |
+
"step": 708500
|
| 8601 |
+
},
|
| 8602 |
+
{
|
| 8603 |
+
"epoch": 10.21,
|
| 8604 |
+
"learning_rate": 2.741015375508233e-05,
|
| 8605 |
+
"loss": 2.8139,
|
| 8606 |
+
"step": 709000
|
| 8607 |
+
},
|
| 8608 |
+
{
|
| 8609 |
+
"epoch": 10.21,
|
| 8610 |
+
"learning_rate": 2.7358227013329597e-05,
|
| 8611 |
+
"loss": 2.8176,
|
| 8612 |
+
"step": 709500
|
| 8613 |
+
},
|
| 8614 |
+
{
|
| 8615 |
+
"epoch": 10.22,
|
| 8616 |
+
"learning_rate": 2.7306300271576862e-05,
|
| 8617 |
+
"loss": 2.8119,
|
| 8618 |
+
"step": 710000
|
| 8619 |
+
},
|
| 8620 |
+
{
|
| 8621 |
+
"epoch": 10.23,
|
| 8622 |
+
"learning_rate": 2.7254373529824124e-05,
|
| 8623 |
+
"loss": 2.8132,
|
| 8624 |
+
"step": 710500
|
| 8625 |
+
},
|
| 8626 |
+
{
|
| 8627 |
+
"epoch": 10.23,
|
| 8628 |
+
"learning_rate": 2.720244678807139e-05,
|
| 8629 |
+
"loss": 2.817,
|
| 8630 |
+
"step": 711000
|
| 8631 |
+
},
|
| 8632 |
+
{
|
| 8633 |
+
"epoch": 10.24,
|
| 8634 |
+
"learning_rate": 2.7150520046318657e-05,
|
| 8635 |
+
"loss": 2.8143,
|
| 8636 |
+
"step": 711500
|
| 8637 |
+
},
|
| 8638 |
+
{
|
| 8639 |
+
"epoch": 10.25,
|
| 8640 |
+
"learning_rate": 2.709859330456592e-05,
|
| 8641 |
+
"loss": 2.8172,
|
| 8642 |
+
"step": 712000
|
| 8643 |
+
},
|
| 8644 |
+
{
|
| 8645 |
+
"epoch": 10.26,
|
| 8646 |
+
"learning_rate": 2.7046666562813183e-05,
|
| 8647 |
+
"loss": 2.8124,
|
| 8648 |
+
"step": 712500
|
| 8649 |
+
},
|
| 8650 |
+
{
|
| 8651 |
+
"epoch": 10.26,
|
| 8652 |
+
"learning_rate": 2.6994739821060448e-05,
|
| 8653 |
+
"loss": 2.814,
|
| 8654 |
+
"step": 713000
|
| 8655 |
+
},
|
| 8656 |
+
{
|
| 8657 |
+
"epoch": 10.27,
|
| 8658 |
+
"learning_rate": 2.6942813079307716e-05,
|
| 8659 |
+
"loss": 2.812,
|
| 8660 |
+
"step": 713500
|
| 8661 |
+
},
|
| 8662 |
+
{
|
| 8663 |
+
"epoch": 10.28,
|
| 8664 |
+
"learning_rate": 2.6890886337554978e-05,
|
| 8665 |
+
"loss": 2.8138,
|
| 8666 |
+
"step": 714000
|
| 8667 |
+
},
|
| 8668 |
+
{
|
| 8669 |
+
"epoch": 10.28,
|
| 8670 |
+
"learning_rate": 2.683906344928575e-05,
|
| 8671 |
+
"loss": 2.8112,
|
| 8672 |
+
"step": 714500
|
| 8673 |
+
},
|
| 8674 |
+
{
|
| 8675 |
+
"epoch": 10.29,
|
| 8676 |
+
"learning_rate": 2.6787136707533012e-05,
|
| 8677 |
+
"loss": 2.811,
|
| 8678 |
+
"step": 715000
|
| 8679 |
+
},
|
| 8680 |
+
{
|
| 8681 |
+
"epoch": 10.3,
|
| 8682 |
+
"learning_rate": 2.6735209965780277e-05,
|
| 8683 |
+
"loss": 2.8142,
|
| 8684 |
+
"step": 715500
|
| 8685 |
+
},
|
| 8686 |
+
{
|
| 8687 |
+
"epoch": 10.31,
|
| 8688 |
+
"learning_rate": 2.6683283224027546e-05,
|
| 8689 |
+
"loss": 2.812,
|
| 8690 |
+
"step": 716000
|
| 8691 |
+
},
|
| 8692 |
+
{
|
| 8693 |
+
"epoch": 10.31,
|
| 8694 |
+
"learning_rate": 2.663135648227481e-05,
|
| 8695 |
+
"loss": 2.8149,
|
| 8696 |
+
"step": 716500
|
| 8697 |
+
},
|
| 8698 |
+
{
|
| 8699 |
+
"epoch": 10.32,
|
| 8700 |
+
"learning_rate": 2.657953359400558e-05,
|
| 8701 |
+
"loss": 2.8093,
|
| 8702 |
+
"step": 717000
|
| 8703 |
+
},
|
| 8704 |
+
{
|
| 8705 |
+
"epoch": 10.33,
|
| 8706 |
+
"learning_rate": 2.6527606852252845e-05,
|
| 8707 |
+
"loss": 2.8155,
|
| 8708 |
+
"step": 717500
|
| 8709 |
+
},
|
| 8710 |
+
{
|
| 8711 |
+
"epoch": 10.33,
|
| 8712 |
+
"learning_rate": 2.6475680110500107e-05,
|
| 8713 |
+
"loss": 2.8115,
|
| 8714 |
+
"step": 718000
|
| 8715 |
+
},
|
| 8716 |
+
{
|
| 8717 |
+
"epoch": 10.34,
|
| 8718 |
+
"learning_rate": 2.642375336874737e-05,
|
| 8719 |
+
"loss": 2.8132,
|
| 8720 |
+
"step": 718500
|
| 8721 |
+
},
|
| 8722 |
+
{
|
| 8723 |
+
"epoch": 10.35,
|
| 8724 |
+
"learning_rate": 2.637182662699464e-05,
|
| 8725 |
+
"loss": 2.8131,
|
| 8726 |
+
"step": 719000
|
| 8727 |
+
},
|
| 8728 |
+
{
|
| 8729 |
+
"epoch": 10.36,
|
| 8730 |
+
"learning_rate": 2.63198998852419e-05,
|
| 8731 |
+
"loss": 2.8088,
|
| 8732 |
+
"step": 719500
|
| 8733 |
+
},
|
| 8734 |
+
{
|
| 8735 |
+
"epoch": 10.36,
|
| 8736 |
+
"learning_rate": 2.6268076996972674e-05,
|
| 8737 |
+
"loss": 2.8079,
|
| 8738 |
+
"step": 720000
|
| 8739 |
+
},
|
| 8740 |
+
{
|
| 8741 |
+
"epoch": 10.37,
|
| 8742 |
+
"learning_rate": 2.6216150255219936e-05,
|
| 8743 |
+
"loss": 2.8035,
|
| 8744 |
+
"step": 720500
|
| 8745 |
+
},
|
| 8746 |
+
{
|
| 8747 |
+
"epoch": 10.38,
|
| 8748 |
+
"learning_rate": 2.61642235134672e-05,
|
| 8749 |
+
"loss": 2.8155,
|
| 8750 |
+
"step": 721000
|
| 8751 |
+
},
|
| 8752 |
+
{
|
| 8753 |
+
"epoch": 10.39,
|
| 8754 |
+
"learning_rate": 2.611229677171447e-05,
|
| 8755 |
+
"loss": 2.8114,
|
| 8756 |
+
"step": 721500
|
| 8757 |
+
},
|
| 8758 |
+
{
|
| 8759 |
+
"epoch": 10.39,
|
| 8760 |
+
"learning_rate": 2.6060473883445235e-05,
|
| 8761 |
+
"loss": 2.8157,
|
| 8762 |
+
"step": 722000
|
| 8763 |
+
},
|
| 8764 |
+
{
|
| 8765 |
+
"epoch": 10.4,
|
| 8766 |
+
"learning_rate": 2.6008547141692504e-05,
|
| 8767 |
+
"loss": 2.8042,
|
| 8768 |
+
"step": 722500
|
| 8769 |
+
},
|
| 8770 |
+
{
|
| 8771 |
+
"epoch": 10.41,
|
| 8772 |
+
"learning_rate": 2.595662039993977e-05,
|
| 8773 |
+
"loss": 2.8137,
|
| 8774 |
+
"step": 723000
|
| 8775 |
+
},
|
| 8776 |
+
{
|
| 8777 |
+
"epoch": 10.41,
|
| 8778 |
+
"learning_rate": 2.590469365818703e-05,
|
| 8779 |
+
"loss": 2.8136,
|
| 8780 |
+
"step": 723500
|
| 8781 |
+
},
|
| 8782 |
+
{
|
| 8783 |
+
"epoch": 10.42,
|
| 8784 |
+
"learning_rate": 2.5852766916434295e-05,
|
| 8785 |
+
"loss": 2.8074,
|
| 8786 |
+
"step": 724000
|
| 8787 |
+
},
|
| 8788 |
+
{
|
| 8789 |
+
"epoch": 10.43,
|
| 8790 |
+
"learning_rate": 2.5800944028165064e-05,
|
| 8791 |
+
"loss": 2.8052,
|
| 8792 |
+
"step": 724500
|
| 8793 |
+
},
|
| 8794 |
+
{
|
| 8795 |
+
"epoch": 10.44,
|
| 8796 |
+
"learning_rate": 2.574901728641233e-05,
|
| 8797 |
+
"loss": 2.8065,
|
| 8798 |
+
"step": 725000
|
| 8799 |
+
},
|
| 8800 |
+
{
|
| 8801 |
+
"epoch": 10.44,
|
| 8802 |
+
"learning_rate": 2.5697090544659598e-05,
|
| 8803 |
+
"loss": 2.814,
|
| 8804 |
+
"step": 725500
|
| 8805 |
+
},
|
| 8806 |
+
{
|
| 8807 |
+
"epoch": 10.45,
|
| 8808 |
+
"learning_rate": 2.5645163802906856e-05,
|
| 8809 |
+
"loss": 2.8128,
|
| 8810 |
+
"step": 726000
|
| 8811 |
+
},
|
| 8812 |
+
{
|
| 8813 |
+
"epoch": 10.46,
|
| 8814 |
+
"learning_rate": 2.5593237061154124e-05,
|
| 8815 |
+
"loss": 2.8117,
|
| 8816 |
+
"step": 726500
|
| 8817 |
+
},
|
| 8818 |
+
{
|
| 8819 |
+
"epoch": 10.46,
|
| 8820 |
+
"learning_rate": 2.554131031940139e-05,
|
| 8821 |
+
"loss": 2.8092,
|
| 8822 |
+
"step": 727000
|
| 8823 |
+
},
|
| 8824 |
+
{
|
| 8825 |
+
"epoch": 10.47,
|
| 8826 |
+
"learning_rate": 2.5489383577648657e-05,
|
| 8827 |
+
"loss": 2.8079,
|
| 8828 |
+
"step": 727500
|
| 8829 |
+
},
|
| 8830 |
+
{
|
| 8831 |
+
"epoch": 10.48,
|
| 8832 |
+
"learning_rate": 2.543745683589592e-05,
|
| 8833 |
+
"loss": 2.8108,
|
| 8834 |
+
"step": 728000
|
| 8835 |
+
},
|
| 8836 |
+
{
|
| 8837 |
+
"epoch": 10.49,
|
| 8838 |
+
"learning_rate": 2.5385633947626692e-05,
|
| 8839 |
+
"loss": 2.8113,
|
| 8840 |
+
"step": 728500
|
| 8841 |
+
},
|
| 8842 |
+
{
|
| 8843 |
+
"epoch": 10.49,
|
| 8844 |
+
"learning_rate": 2.5333811059357458e-05,
|
| 8845 |
+
"loss": 2.8091,
|
| 8846 |
+
"step": 729000
|
| 8847 |
+
},
|
| 8848 |
+
{
|
| 8849 |
+
"epoch": 10.5,
|
| 8850 |
+
"learning_rate": 2.5281884317604726e-05,
|
| 8851 |
+
"loss": 2.8056,
|
| 8852 |
+
"step": 729500
|
| 8853 |
+
},
|
| 8854 |
+
{
|
| 8855 |
+
"epoch": 10.51,
|
| 8856 |
+
"learning_rate": 2.5229957575851988e-05,
|
| 8857 |
+
"loss": 2.8141,
|
| 8858 |
+
"step": 730000
|
| 8859 |
+
},
|
| 8860 |
+
{
|
| 8861 |
+
"epoch": 10.51,
|
| 8862 |
+
"learning_rate": 2.5178030834099253e-05,
|
| 8863 |
+
"loss": 2.8058,
|
| 8864 |
+
"step": 730500
|
| 8865 |
+
},
|
| 8866 |
+
{
|
| 8867 |
+
"epoch": 10.52,
|
| 8868 |
+
"learning_rate": 2.512610409234652e-05,
|
| 8869 |
+
"loss": 2.8084,
|
| 8870 |
+
"step": 731000
|
| 8871 |
+
},
|
| 8872 |
+
{
|
| 8873 |
+
"epoch": 10.53,
|
| 8874 |
+
"learning_rate": 2.5074177350593786e-05,
|
| 8875 |
+
"loss": 2.8066,
|
| 8876 |
+
"step": 731500
|
| 8877 |
+
},
|
| 8878 |
+
{
|
| 8879 |
+
"epoch": 10.54,
|
| 8880 |
+
"learning_rate": 2.5022250608841047e-05,
|
| 8881 |
+
"loss": 2.8056,
|
| 8882 |
+
"step": 732000
|
| 8883 |
+
},
|
| 8884 |
+
{
|
| 8885 |
+
"epoch": 10.54,
|
| 8886 |
+
"learning_rate": 2.4970427720571817e-05,
|
| 8887 |
+
"loss": 2.8085,
|
| 8888 |
+
"step": 732500
|
| 8889 |
+
},
|
| 8890 |
+
{
|
| 8891 |
+
"epoch": 10.55,
|
| 8892 |
+
"learning_rate": 2.4918500978819085e-05,
|
| 8893 |
+
"loss": 2.8108,
|
| 8894 |
+
"step": 733000
|
| 8895 |
+
},
|
| 8896 |
+
{
|
| 8897 |
+
"epoch": 10.56,
|
| 8898 |
+
"learning_rate": 2.4866574237066347e-05,
|
| 8899 |
+
"loss": 2.8139,
|
| 8900 |
+
"step": 733500
|
| 8901 |
+
},
|
| 8902 |
+
{
|
| 8903 |
+
"epoch": 10.57,
|
| 8904 |
+
"learning_rate": 2.4814647495313612e-05,
|
| 8905 |
+
"loss": 2.8046,
|
| 8906 |
+
"step": 734000
|
| 8907 |
+
},
|
| 8908 |
+
{
|
| 8909 |
+
"epoch": 10.57,
|
| 8910 |
+
"learning_rate": 2.4762720753560877e-05,
|
| 8911 |
+
"loss": 2.8119,
|
| 8912 |
+
"step": 734500
|
| 8913 |
+
},
|
| 8914 |
+
{
|
| 8915 |
+
"epoch": 10.58,
|
| 8916 |
+
"learning_rate": 2.471089786529165e-05,
|
| 8917 |
+
"loss": 2.8117,
|
| 8918 |
+
"step": 735000
|
| 8919 |
+
},
|
| 8920 |
+
{
|
| 8921 |
+
"epoch": 10.59,
|
| 8922 |
+
"learning_rate": 2.465897112353891e-05,
|
| 8923 |
+
"loss": 2.8102,
|
| 8924 |
+
"step": 735500
|
| 8925 |
+
},
|
| 8926 |
+
{
|
| 8927 |
+
"epoch": 10.59,
|
| 8928 |
+
"learning_rate": 2.4607044381786176e-05,
|
| 8929 |
+
"loss": 2.8075,
|
| 8930 |
+
"step": 736000
|
| 8931 |
+
},
|
| 8932 |
+
{
|
| 8933 |
+
"epoch": 10.6,
|
| 8934 |
+
"learning_rate": 2.455511764003344e-05,
|
| 8935 |
+
"loss": 2.8127,
|
| 8936 |
+
"step": 736500
|
| 8937 |
+
},
|
| 8938 |
+
{
|
| 8939 |
+
"epoch": 10.61,
|
| 8940 |
+
"learning_rate": 2.450329475176421e-05,
|
| 8941 |
+
"loss": 2.8076,
|
| 8942 |
+
"step": 737000
|
| 8943 |
+
},
|
| 8944 |
+
{
|
| 8945 |
+
"epoch": 10.62,
|
| 8946 |
+
"learning_rate": 2.4451368010011476e-05,
|
| 8947 |
+
"loss": 2.8036,
|
| 8948 |
+
"step": 737500
|
| 8949 |
+
},
|
| 8950 |
+
{
|
| 8951 |
+
"epoch": 10.62,
|
| 8952 |
+
"learning_rate": 2.439944126825874e-05,
|
| 8953 |
+
"loss": 2.8055,
|
| 8954 |
+
"step": 738000
|
| 8955 |
+
},
|
| 8956 |
+
{
|
| 8957 |
+
"epoch": 10.63,
|
| 8958 |
+
"learning_rate": 2.434751452650601e-05,
|
| 8959 |
+
"loss": 2.8104,
|
| 8960 |
+
"step": 738500
|
| 8961 |
+
},
|
| 8962 |
+
{
|
| 8963 |
+
"epoch": 10.64,
|
| 8964 |
+
"learning_rate": 2.429558778475327e-05,
|
| 8965 |
+
"loss": 2.8087,
|
| 8966 |
+
"step": 739000
|
| 8967 |
+
},
|
| 8968 |
+
{
|
| 8969 |
+
"epoch": 10.64,
|
| 8970 |
+
"learning_rate": 2.4243764896484043e-05,
|
| 8971 |
+
"loss": 2.8048,
|
| 8972 |
+
"step": 739500
|
| 8973 |
+
},
|
| 8974 |
+
{
|
| 8975 |
+
"epoch": 10.65,
|
| 8976 |
+
"learning_rate": 2.4191838154731305e-05,
|
| 8977 |
+
"loss": 2.8087,
|
| 8978 |
+
"step": 740000
|
| 8979 |
+
},
|
| 8980 |
+
{
|
| 8981 |
+
"epoch": 10.66,
|
| 8982 |
+
"learning_rate": 2.4139911412978573e-05,
|
| 8983 |
+
"loss": 2.8019,
|
| 8984 |
+
"step": 740500
|
| 8985 |
+
},
|
| 8986 |
+
{
|
| 8987 |
+
"epoch": 10.67,
|
| 8988 |
+
"learning_rate": 2.4087984671225835e-05,
|
| 8989 |
+
"loss": 2.8137,
|
| 8990 |
+
"step": 741000
|
| 8991 |
+
},
|
| 8992 |
+
{
|
| 8993 |
+
"epoch": 10.67,
|
| 8994 |
+
"learning_rate": 2.40360579294731e-05,
|
| 8995 |
+
"loss": 2.808,
|
| 8996 |
+
"step": 741500
|
| 8997 |
+
},
|
| 8998 |
+
{
|
| 8999 |
+
"epoch": 10.68,
|
| 9000 |
+
"learning_rate": 2.398423504120387e-05,
|
| 9001 |
+
"loss": 2.8106,
|
| 9002 |
+
"step": 742000
|
| 9003 |
+
},
|
| 9004 |
+
{
|
| 9005 |
+
"epoch": 10.69,
|
| 9006 |
+
"learning_rate": 2.3932308299451137e-05,
|
| 9007 |
+
"loss": 2.8067,
|
| 9008 |
+
"step": 742500
|
| 9009 |
+
},
|
| 9010 |
+
{
|
| 9011 |
+
"epoch": 10.69,
|
| 9012 |
+
"learning_rate": 2.38803815576984e-05,
|
| 9013 |
+
"loss": 2.8072,
|
| 9014 |
+
"step": 743000
|
| 9015 |
+
},
|
| 9016 |
+
{
|
| 9017 |
+
"epoch": 10.7,
|
| 9018 |
+
"learning_rate": 2.3828454815945664e-05,
|
| 9019 |
+
"loss": 2.8095,
|
| 9020 |
+
"step": 743500
|
| 9021 |
+
},
|
| 9022 |
+
{
|
| 9023 |
+
"epoch": 10.71,
|
| 9024 |
+
"learning_rate": 2.377652807419293e-05,
|
| 9025 |
+
"loss": 2.8015,
|
| 9026 |
+
"step": 744000
|
| 9027 |
+
},
|
| 9028 |
+
{
|
| 9029 |
+
"epoch": 10.72,
|
| 9030 |
+
"learning_rate": 2.3724601332440194e-05,
|
| 9031 |
+
"loss": 2.8056,
|
| 9032 |
+
"step": 744500
|
| 9033 |
+
},
|
| 9034 |
+
{
|
| 9035 |
+
"epoch": 10.72,
|
| 9036 |
+
"learning_rate": 2.367267459068746e-05,
|
| 9037 |
+
"loss": 2.8076,
|
| 9038 |
+
"step": 745000
|
| 9039 |
+
},
|
| 9040 |
+
{
|
| 9041 |
+
"epoch": 10.73,
|
| 9042 |
+
"learning_rate": 2.3620747848934724e-05,
|
| 9043 |
+
"loss": 2.8052,
|
| 9044 |
+
"step": 745500
|
| 9045 |
+
},
|
| 9046 |
+
{
|
| 9047 |
+
"epoch": 10.74,
|
| 9048 |
+
"learning_rate": 2.3568924960665497e-05,
|
| 9049 |
+
"loss": 2.8051,
|
| 9050 |
+
"step": 746000
|
| 9051 |
+
},
|
| 9052 |
+
{
|
| 9053 |
+
"epoch": 10.75,
|
| 9054 |
+
"learning_rate": 2.3516998218912758e-05,
|
| 9055 |
+
"loss": 2.8012,
|
| 9056 |
+
"step": 746500
|
| 9057 |
+
},
|
| 9058 |
+
{
|
| 9059 |
+
"epoch": 10.75,
|
| 9060 |
+
"learning_rate": 2.346517533064353e-05,
|
| 9061 |
+
"loss": 2.8097,
|
| 9062 |
+
"step": 747000
|
| 9063 |
+
},
|
| 9064 |
+
{
|
| 9065 |
+
"epoch": 10.76,
|
| 9066 |
+
"learning_rate": 2.3413248588890793e-05,
|
| 9067 |
+
"loss": 2.802,
|
| 9068 |
+
"step": 747500
|
| 9069 |
+
},
|
| 9070 |
+
{
|
| 9071 |
+
"epoch": 10.77,
|
| 9072 |
+
"learning_rate": 2.336132184713806e-05,
|
| 9073 |
+
"loss": 2.801,
|
| 9074 |
+
"step": 748000
|
| 9075 |
+
},
|
| 9076 |
+
{
|
| 9077 |
+
"epoch": 10.77,
|
| 9078 |
+
"learning_rate": 2.3309395105385322e-05,
|
| 9079 |
+
"loss": 2.8061,
|
| 9080 |
+
"step": 748500
|
| 9081 |
+
},
|
| 9082 |
+
{
|
| 9083 |
+
"epoch": 10.78,
|
| 9084 |
+
"learning_rate": 2.325746836363259e-05,
|
| 9085 |
+
"loss": 2.8079,
|
| 9086 |
+
"step": 749000
|
| 9087 |
+
},
|
| 9088 |
+
{
|
| 9089 |
+
"epoch": 10.79,
|
| 9090 |
+
"learning_rate": 2.3205541621879852e-05,
|
| 9091 |
+
"loss": 2.8062,
|
| 9092 |
+
"step": 749500
|
| 9093 |
+
},
|
| 9094 |
+
{
|
| 9095 |
+
"epoch": 10.8,
|
| 9096 |
+
"learning_rate": 2.3153614880127117e-05,
|
| 9097 |
+
"loss": 2.8007,
|
| 9098 |
+
"step": 750000
|
| 9099 |
+
},
|
| 9100 |
+
{
|
| 9101 |
+
"epoch": 10.8,
|
| 9102 |
+
"learning_rate": 2.3101688138374382e-05,
|
| 9103 |
+
"loss": 2.8057,
|
| 9104 |
+
"step": 750500
|
| 9105 |
+
},
|
| 9106 |
+
{
|
| 9107 |
+
"epoch": 10.81,
|
| 9108 |
+
"learning_rate": 2.304986525010515e-05,
|
| 9109 |
+
"loss": 2.8045,
|
| 9110 |
+
"step": 751000
|
| 9111 |
+
},
|
| 9112 |
+
{
|
| 9113 |
+
"epoch": 10.82,
|
| 9114 |
+
"learning_rate": 2.2997938508352417e-05,
|
| 9115 |
+
"loss": 2.8053,
|
| 9116 |
+
"step": 751500
|
| 9117 |
+
},
|
| 9118 |
+
{
|
| 9119 |
+
"epoch": 10.82,
|
| 9120 |
+
"learning_rate": 2.294601176659968e-05,
|
| 9121 |
+
"loss": 2.8026,
|
| 9122 |
+
"step": 752000
|
| 9123 |
+
},
|
| 9124 |
+
{
|
| 9125 |
+
"epoch": 10.83,
|
| 9126 |
+
"learning_rate": 2.2894085024846946e-05,
|
| 9127 |
+
"loss": 2.805,
|
| 9128 |
+
"step": 752500
|
| 9129 |
+
},
|
| 9130 |
+
{
|
| 9131 |
+
"epoch": 10.84,
|
| 9132 |
+
"learning_rate": 2.2842262136577716e-05,
|
| 9133 |
+
"loss": 2.8023,
|
| 9134 |
+
"step": 753000
|
| 9135 |
+
},
|
| 9136 |
+
{
|
| 9137 |
+
"epoch": 10.85,
|
| 9138 |
+
"learning_rate": 2.2790335394824984e-05,
|
| 9139 |
+
"loss": 2.8072,
|
| 9140 |
+
"step": 753500
|
| 9141 |
+
},
|
| 9142 |
+
{
|
| 9143 |
+
"epoch": 10.85,
|
| 9144 |
+
"learning_rate": 2.2738408653072246e-05,
|
| 9145 |
+
"loss": 2.8021,
|
| 9146 |
+
"step": 754000
|
| 9147 |
+
},
|
| 9148 |
+
{
|
| 9149 |
+
"epoch": 10.86,
|
| 9150 |
+
"learning_rate": 2.2686481911319514e-05,
|
| 9151 |
+
"loss": 2.8027,
|
| 9152 |
+
"step": 754500
|
| 9153 |
+
},
|
| 9154 |
+
{
|
| 9155 |
+
"epoch": 10.87,
|
| 9156 |
+
"learning_rate": 2.2634555169566776e-05,
|
| 9157 |
+
"loss": 2.8036,
|
| 9158 |
+
"step": 755000
|
| 9159 |
+
},
|
| 9160 |
+
{
|
| 9161 |
+
"epoch": 10.87,
|
| 9162 |
+
"learning_rate": 2.258273228129755e-05,
|
| 9163 |
+
"loss": 2.8052,
|
| 9164 |
+
"step": 755500
|
| 9165 |
+
},
|
| 9166 |
+
{
|
| 9167 |
+
"epoch": 10.88,
|
| 9168 |
+
"learning_rate": 2.253080553954481e-05,
|
| 9169 |
+
"loss": 2.8014,
|
| 9170 |
+
"step": 756000
|
| 9171 |
+
},
|
| 9172 |
+
{
|
| 9173 |
+
"epoch": 10.89,
|
| 9174 |
+
"learning_rate": 2.247887879779208e-05,
|
| 9175 |
+
"loss": 2.8009,
|
| 9176 |
+
"step": 756500
|
| 9177 |
+
},
|
| 9178 |
+
{
|
| 9179 |
+
"epoch": 10.9,
|
| 9180 |
+
"learning_rate": 2.242695205603934e-05,
|
| 9181 |
+
"loss": 2.804,
|
| 9182 |
+
"step": 757000
|
| 9183 |
+
},
|
| 9184 |
+
{
|
| 9185 |
+
"epoch": 10.9,
|
| 9186 |
+
"learning_rate": 2.2375129167770113e-05,
|
| 9187 |
+
"loss": 2.8077,
|
| 9188 |
+
"step": 757500
|
| 9189 |
+
},
|
| 9190 |
+
{
|
| 9191 |
+
"epoch": 10.91,
|
| 9192 |
+
"learning_rate": 2.2323202426017374e-05,
|
| 9193 |
+
"loss": 2.7989,
|
| 9194 |
+
"step": 758000
|
| 9195 |
+
},
|
| 9196 |
+
{
|
| 9197 |
+
"epoch": 10.92,
|
| 9198 |
+
"learning_rate": 2.227127568426464e-05,
|
| 9199 |
+
"loss": 2.8051,
|
| 9200 |
+
"step": 758500
|
| 9201 |
+
},
|
| 9202 |
+
{
|
| 9203 |
+
"epoch": 10.93,
|
| 9204 |
+
"learning_rate": 2.2219348942511904e-05,
|
| 9205 |
+
"loss": 2.8048,
|
| 9206 |
+
"step": 759000
|
| 9207 |
+
},
|
| 9208 |
+
{
|
| 9209 |
+
"epoch": 10.93,
|
| 9210 |
+
"learning_rate": 2.216742220075917e-05,
|
| 9211 |
+
"loss": 2.7965,
|
| 9212 |
+
"step": 759500
|
| 9213 |
+
},
|
| 9214 |
+
{
|
| 9215 |
+
"epoch": 10.94,
|
| 9216 |
+
"learning_rate": 2.211559931248994e-05,
|
| 9217 |
+
"loss": 2.801,
|
| 9218 |
+
"step": 760000
|
| 9219 |
+
},
|
| 9220 |
+
{
|
| 9221 |
+
"epoch": 10.95,
|
| 9222 |
+
"learning_rate": 2.2063672570737204e-05,
|
| 9223 |
+
"loss": 2.8038,
|
| 9224 |
+
"step": 760500
|
| 9225 |
+
},
|
| 9226 |
+
{
|
| 9227 |
+
"epoch": 10.95,
|
| 9228 |
+
"learning_rate": 2.201174582898447e-05,
|
| 9229 |
+
"loss": 2.8041,
|
| 9230 |
+
"step": 761000
|
| 9231 |
+
},
|
| 9232 |
+
{
|
| 9233 |
+
"epoch": 10.96,
|
| 9234 |
+
"learning_rate": 2.1959819087231733e-05,
|
| 9235 |
+
"loss": 2.8061,
|
| 9236 |
+
"step": 761500
|
| 9237 |
+
},
|
| 9238 |
+
{
|
| 9239 |
+
"epoch": 10.97,
|
| 9240 |
+
"learning_rate": 2.1907892345479002e-05,
|
| 9241 |
+
"loss": 2.8046,
|
| 9242 |
+
"step": 762000
|
| 9243 |
+
},
|
| 9244 |
+
{
|
| 9245 |
+
"epoch": 10.98,
|
| 9246 |
+
"learning_rate": 2.1856069457209768e-05,
|
| 9247 |
+
"loss": 2.8002,
|
| 9248 |
+
"step": 762500
|
| 9249 |
+
},
|
| 9250 |
+
{
|
| 9251 |
+
"epoch": 10.98,
|
| 9252 |
+
"learning_rate": 2.1804142715457036e-05,
|
| 9253 |
+
"loss": 2.8039,
|
| 9254 |
+
"step": 763000
|
| 9255 |
+
},
|
| 9256 |
+
{
|
| 9257 |
+
"epoch": 10.99,
|
| 9258 |
+
"learning_rate": 2.1752215973704298e-05,
|
| 9259 |
+
"loss": 2.802,
|
| 9260 |
+
"step": 763500
|
| 9261 |
+
},
|
| 9262 |
+
{
|
| 9263 |
+
"epoch": 11.0,
|
| 9264 |
+
"learning_rate": 2.1700289231951566e-05,
|
| 9265 |
+
"loss": 2.8026,
|
| 9266 |
+
"step": 764000
|
| 9267 |
+
},
|
| 9268 |
+
{
|
| 9269 |
+
"epoch": 11.0,
|
| 9270 |
+
"eval_accuracy": 0.5046112569907905,
|
| 9271 |
+
"eval_loss": 2.62461256980896,
|
| 9272 |
+
"eval_runtime": 555.9604,
|
| 9273 |
+
"eval_samples_per_second": 969.38,
|
| 9274 |
+
"eval_steps_per_second": 40.391,
|
| 9275 |
+
"step": 764203
|
| 9276 |
}
|
| 9277 |
],
|
| 9278 |
"max_steps": 972622,
|
| 9279 |
"num_train_epochs": 14,
|
| 9280 |
+
"total_flos": 4.6996799356928e+18,
|
| 9281 |
"trial_name": null,
|
| 9282 |
"trial_params": null
|
| 9283 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 118242180
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf9217f1422e2ba20adfa234e66d641852a206c5630b921ecc80d6c9999bdad2
|
| 3 |
size 118242180
|
runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:691c58d07abd0364109c6890a1a42b4785eb7fdcf6aab10cc7829acc674dcedd
|
| 3 |
+
size 251425
|