Training in progress, step 490000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:abdc666b2f3669fde2f13b1cbce8537a9750ab5e917c8e2f654b514fc145c70c
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99bd6ef8a1b85dd6a22f6aedd2dc916de7e85d96497ce03a01c5ad35aba260ef
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5a73351b78231930c5e38a85e2db75ffb99765eca05e9e6dd123f382ddd4cb3
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7260623fa55e4f39900f0d796a360342ed8e000aa7fbed24d40632bf5f5532f3
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccfe48daf5b331e0d6c664328074c7da11a0476f84c219e54335158a88175b91
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cff816a3de440d565f73bab1c06a61b794b87400c0cb82ffdc2d9ef43530b338
|
| 3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74800ce917e328df8d2e651e5da6a2b131e41e32b116f92b00e5f62a5503f854
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 7.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -3558,11 +3558,85 @@
|
|
| 3558 |
"eval_samples_per_second": 966.468,
|
| 3559 |
"eval_steps_per_second": 15.463,
|
| 3560 |
"step": 480000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3561 |
}
|
| 3562 |
],
|
| 3563 |
"max_steps": 1000000,
|
| 3564 |
"num_train_epochs": 16,
|
| 3565 |
-
"total_flos": 3.
|
| 3566 |
"trial_name": null,
|
| 3567 |
"trial_params": null
|
| 3568 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 7.482401087238689,
|
| 5 |
+
"global_step": 490000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 3558 |
"eval_samples_per_second": 966.468,
|
| 3559 |
"eval_steps_per_second": 15.463,
|
| 3560 |
"step": 480000
|
| 3561 |
+
},
|
| 3562 |
+
{
|
| 3563 |
+
"epoch": 7.34,
|
| 3564 |
+
"learning_rate": 9.014947164477721e-05,
|
| 3565 |
+
"loss": 0.2663,
|
| 3566 |
+
"step": 481000
|
| 3567 |
+
},
|
| 3568 |
+
{
|
| 3569 |
+
"epoch": 7.36,
|
| 3570 |
+
"learning_rate": 8.992037695672967e-05,
|
| 3571 |
+
"loss": 0.267,
|
| 3572 |
+
"step": 482000
|
| 3573 |
+
},
|
| 3574 |
+
{
|
| 3575 |
+
"epoch": 7.38,
|
| 3576 |
+
"learning_rate": 8.969117378102912e-05,
|
| 3577 |
+
"loss": 0.2665,
|
| 3578 |
+
"step": 483000
|
| 3579 |
+
},
|
| 3580 |
+
{
|
| 3581 |
+
"epoch": 7.39,
|
| 3582 |
+
"learning_rate": 8.946186462420478e-05,
|
| 3583 |
+
"loss": 0.2662,
|
| 3584 |
+
"step": 484000
|
| 3585 |
+
},
|
| 3586 |
+
{
|
| 3587 |
+
"epoch": 7.41,
|
| 3588 |
+
"learning_rate": 8.923245199394482e-05,
|
| 3589 |
+
"loss": 0.2662,
|
| 3590 |
+
"step": 485000
|
| 3591 |
+
},
|
| 3592 |
+
{
|
| 3593 |
+
"epoch": 7.41,
|
| 3594 |
+
"eval_runtime": 1.0079,
|
| 3595 |
+
"eval_samples_per_second": 992.191,
|
| 3596 |
+
"eval_steps_per_second": 15.875,
|
| 3597 |
+
"step": 485000
|
| 3598 |
+
},
|
| 3599 |
+
{
|
| 3600 |
+
"epoch": 7.42,
|
| 3601 |
+
"learning_rate": 8.900293839906903e-05,
|
| 3602 |
+
"loss": 0.2664,
|
| 3603 |
+
"step": 486000
|
| 3604 |
+
},
|
| 3605 |
+
{
|
| 3606 |
+
"epoch": 7.44,
|
| 3607 |
+
"learning_rate": 8.87733263495013e-05,
|
| 3608 |
+
"loss": 0.2658,
|
| 3609 |
+
"step": 487000
|
| 3610 |
+
},
|
| 3611 |
+
{
|
| 3612 |
+
"epoch": 7.45,
|
| 3613 |
+
"learning_rate": 8.85436183562422e-05,
|
| 3614 |
+
"loss": 0.2659,
|
| 3615 |
+
"step": 488000
|
| 3616 |
+
},
|
| 3617 |
+
{
|
| 3618 |
+
"epoch": 7.47,
|
| 3619 |
+
"learning_rate": 8.83138169313416e-05,
|
| 3620 |
+
"loss": 0.2663,
|
| 3621 |
+
"step": 489000
|
| 3622 |
+
},
|
| 3623 |
+
{
|
| 3624 |
+
"epoch": 7.48,
|
| 3625 |
+
"learning_rate": 8.808392458787103e-05,
|
| 3626 |
+
"loss": 0.2656,
|
| 3627 |
+
"step": 490000
|
| 3628 |
+
},
|
| 3629 |
+
{
|
| 3630 |
+
"epoch": 7.48,
|
| 3631 |
+
"eval_runtime": 1.075,
|
| 3632 |
+
"eval_samples_per_second": 930.213,
|
| 3633 |
+
"eval_steps_per_second": 14.883,
|
| 3634 |
+
"step": 490000
|
| 3635 |
}
|
| 3636 |
],
|
| 3637 |
"max_steps": 1000000,
|
| 3638 |
"num_train_epochs": 16,
|
| 3639 |
+
"total_flos": 3.434908191009969e+22,
|
| 3640 |
"trial_name": null,
|
| 3641 |
"trial_params": null
|
| 3642 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99bd6ef8a1b85dd6a22f6aedd2dc916de7e85d96497ce03a01c5ad35aba260ef
|
| 3 |
size 449471589
|