Training in progress, step 760000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b007e5fd244b5729ff8cacaa57b1c785d03d898d5343c77b7ff241a5f8a33380
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e44d1153c2511a74df34714d54a3a947742b99b6eae06df301cfffab1c1eeb6
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:120630248e0966968ea30af6ab08315c5a7b742e34d05df0eaea1327acea70ef
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ac732455f777abd748b7734f32cb802da48d17d36fc1f892d1c7b8c265c6c8d
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b6effa5b718ae036f8fdfe02790dba13a4eddf18e987e06715606a6733b193f
|
| 3 |
+
size 14567
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:39fd70086a62614811003e1691a71dc1a1851fe2a8de8e368309da1203a65b12
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e175165a3eac134d38f788fbf76210f5cf20ad100371de72779cc7de9da4b86f
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 11.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -5556,11 +5556,85 @@
|
|
| 5556 |
"eval_samples_per_second": 1380.717,
|
| 5557 |
"eval_steps_per_second": 22.091,
|
| 5558 |
"step": 750000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5559 |
}
|
| 5560 |
],
|
| 5561 |
"max_steps": 1000000,
|
| 5562 |
"num_train_epochs": 16,
|
| 5563 |
-
"total_flos": 5.
|
| 5564 |
"trial_name": null,
|
| 5565 |
"trial_params": null
|
| 5566 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 11.60535678837021,
|
| 5 |
+
"global_step": 760000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 5556 |
"eval_samples_per_second": 1380.717,
|
| 5557 |
"eval_steps_per_second": 22.091,
|
| 5558 |
"step": 750000
|
| 5559 |
+
},
|
| 5560 |
+
{
|
| 5561 |
+
"epoch": 11.47,
|
| 5562 |
+
"learning_rate": 3.2420239909845894e-05,
|
| 5563 |
+
"loss": 0.2403,
|
| 5564 |
+
"step": 751000
|
| 5565 |
+
},
|
| 5566 |
+
{
|
| 5567 |
+
"epoch": 11.48,
|
| 5568 |
+
"learning_rate": 3.2250710158127045e-05,
|
| 5569 |
+
"loss": 0.2402,
|
| 5570 |
+
"step": 752000
|
| 5571 |
+
},
|
| 5572 |
+
{
|
| 5573 |
+
"epoch": 11.5,
|
| 5574 |
+
"learning_rate": 3.2081702584991786e-05,
|
| 5575 |
+
"loss": 0.2398,
|
| 5576 |
+
"step": 753000
|
| 5577 |
+
},
|
| 5578 |
+
{
|
| 5579 |
+
"epoch": 11.51,
|
| 5580 |
+
"learning_rate": 3.191321903867988e-05,
|
| 5581 |
+
"loss": 0.2401,
|
| 5582 |
+
"step": 754000
|
| 5583 |
+
},
|
| 5584 |
+
{
|
| 5585 |
+
"epoch": 11.53,
|
| 5586 |
+
"learning_rate": 3.174526136170039e-05,
|
| 5587 |
+
"loss": 0.2403,
|
| 5588 |
+
"step": 755000
|
| 5589 |
+
},
|
| 5590 |
+
{
|
| 5591 |
+
"epoch": 11.53,
|
| 5592 |
+
"eval_runtime": 0.695,
|
| 5593 |
+
"eval_samples_per_second": 1438.835,
|
| 5594 |
+
"eval_steps_per_second": 23.021,
|
| 5595 |
+
"step": 755000
|
| 5596 |
+
},
|
| 5597 |
+
{
|
| 5598 |
+
"epoch": 11.54,
|
| 5599 |
+
"learning_rate": 3.157783139081155e-05,
|
| 5600 |
+
"loss": 0.24,
|
| 5601 |
+
"step": 756000
|
| 5602 |
+
},
|
| 5603 |
+
{
|
| 5604 |
+
"epoch": 11.56,
|
| 5605 |
+
"learning_rate": 3.141093095700072e-05,
|
| 5606 |
+
"loss": 0.2401,
|
| 5607 |
+
"step": 757000
|
| 5608 |
+
},
|
| 5609 |
+
{
|
| 5610 |
+
"epoch": 11.57,
|
| 5611 |
+
"learning_rate": 3.1244561885464244e-05,
|
| 5612 |
+
"loss": 0.252,
|
| 5613 |
+
"step": 758000
|
| 5614 |
+
},
|
| 5615 |
+
{
|
| 5616 |
+
"epoch": 11.59,
|
| 5617 |
+
"learning_rate": 3.107872599558769e-05,
|
| 5618 |
+
"loss": 0.24,
|
| 5619 |
+
"step": 759000
|
| 5620 |
+
},
|
| 5621 |
+
{
|
| 5622 |
+
"epoch": 11.61,
|
| 5623 |
+
"learning_rate": 3.0913425100925795e-05,
|
| 5624 |
+
"loss": 0.2396,
|
| 5625 |
+
"step": 760000
|
| 5626 |
+
},
|
| 5627 |
+
{
|
| 5628 |
+
"epoch": 11.61,
|
| 5629 |
+
"eval_runtime": 0.7192,
|
| 5630 |
+
"eval_samples_per_second": 1390.499,
|
| 5631 |
+
"eval_steps_per_second": 22.248,
|
| 5632 |
+
"step": 760000
|
| 5633 |
}
|
| 5634 |
],
|
| 5635 |
"max_steps": 1000000,
|
| 5636 |
"num_train_epochs": 16,
|
| 5637 |
+
"total_flos": 5.327612610556747e+22,
|
| 5638 |
"trial_name": null,
|
| 5639 |
"trial_params": null
|
| 5640 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e44d1153c2511a74df34714d54a3a947742b99b6eae06df301cfffab1c1eeb6
|
| 3 |
size 449471589
|