Training in progress, step 890000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:42ed7fe58b215bb0dbec2af4a7d664e938d635b1ef05290da2036605b37704a3
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa4ab0c6834a759e55baa287a48208abf39c778ed277af3b0afe1b1b96d8b552
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f483f771a26716cfd70c3a5175a315dd25e3cc6dc8bc56308db21f3af0dc88d6
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b2fa8647e8771b01ebc4433e88606f474979e505ff0f89ae2bd75e3418c26e7
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70df957d931b4b06dbdec01474740c6119583917c674231d25b7b12875e1d7a9
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbaae9451e4051ab9fe7948d1d8a9c41e70f70568605abb0409e412d5e85feaa
|
| 3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2afa6aa14483adb7c817c2439178a198c4680dbfe427eab82def33bea1566914
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 13.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -6518,11 +6518,85 @@
|
|
| 6518 |
"eval_samples_per_second": 1164.009,
|
| 6519 |
"eval_steps_per_second": 18.624,
|
| 6520 |
"step": 880000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6521 |
}
|
| 6522 |
],
|
| 6523 |
"max_steps": 1000000,
|
| 6524 |
"num_train_epochs": 16,
|
| 6525 |
-
"total_flos": 6.
|
| 6526 |
"trial_name": null,
|
| 6527 |
"trial_params": null
|
| 6528 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 13.590483607433537,
|
| 5 |
+
"global_step": 890000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 6518 |
"eval_samples_per_second": 1164.009,
|
| 6519 |
"eval_steps_per_second": 18.624,
|
| 6520 |
"step": 880000
|
| 6521 |
+
},
|
| 6522 |
+
{
|
| 6523 |
+
"epoch": 13.45,
|
| 6524 |
+
"learning_rate": 1.535060126557028e-05,
|
| 6525 |
+
"loss": 0.2322,
|
| 6526 |
+
"step": 881000
|
| 6527 |
+
},
|
| 6528 |
+
{
|
| 6529 |
+
"epoch": 13.47,
|
| 6530 |
+
"learning_rate": 1.5262192375024284e-05,
|
| 6531 |
+
"loss": 0.232,
|
| 6532 |
+
"step": 882000
|
| 6533 |
+
},
|
| 6534 |
+
{
|
| 6535 |
+
"epoch": 13.48,
|
| 6536 |
+
"learning_rate": 1.5174491446770566e-05,
|
| 6537 |
+
"loss": 0.2314,
|
| 6538 |
+
"step": 883000
|
| 6539 |
+
},
|
| 6540 |
+
{
|
| 6541 |
+
"epoch": 13.5,
|
| 6542 |
+
"learning_rate": 1.508749943989242e-05,
|
| 6543 |
+
"loss": 0.2312,
|
| 6544 |
+
"step": 884000
|
| 6545 |
+
},
|
| 6546 |
+
{
|
| 6547 |
+
"epoch": 13.51,
|
| 6548 |
+
"learning_rate": 1.500121730572051e-05,
|
| 6549 |
+
"loss": 0.2314,
|
| 6550 |
+
"step": 885000
|
| 6551 |
+
},
|
| 6552 |
+
{
|
| 6553 |
+
"epoch": 13.51,
|
| 6554 |
+
"eval_runtime": 0.7508,
|
| 6555 |
+
"eval_samples_per_second": 1331.906,
|
| 6556 |
+
"eval_steps_per_second": 21.31,
|
| 6557 |
+
"step": 885000
|
| 6558 |
+
},
|
| 6559 |
+
{
|
| 6560 |
+
"epoch": 13.53,
|
| 6561 |
+
"learning_rate": 1.4915645987822406e-05,
|
| 6562 |
+
"loss": 0.2314,
|
| 6563 |
+
"step": 886000
|
| 6564 |
+
},
|
| 6565 |
+
{
|
| 6566 |
+
"epoch": 13.54,
|
| 6567 |
+
"learning_rate": 1.4830786421992347e-05,
|
| 6568 |
+
"loss": 0.2316,
|
| 6569 |
+
"step": 887000
|
| 6570 |
+
},
|
| 6571 |
+
{
|
| 6572 |
+
"epoch": 13.56,
|
| 6573 |
+
"learning_rate": 1.4746639536240942e-05,
|
| 6574 |
+
"loss": 0.2312,
|
| 6575 |
+
"step": 888000
|
| 6576 |
+
},
|
| 6577 |
+
{
|
| 6578 |
+
"epoch": 13.58,
|
| 6579 |
+
"learning_rate": 1.4663206250785055e-05,
|
| 6580 |
+
"loss": 0.2315,
|
| 6581 |
+
"step": 889000
|
| 6582 |
+
},
|
| 6583 |
+
{
|
| 6584 |
+
"epoch": 13.59,
|
| 6585 |
+
"learning_rate": 1.4580487478037748e-05,
|
| 6586 |
+
"loss": 0.2311,
|
| 6587 |
+
"step": 890000
|
| 6588 |
+
},
|
| 6589 |
+
{
|
| 6590 |
+
"epoch": 13.59,
|
| 6591 |
+
"eval_runtime": 0.7331,
|
| 6592 |
+
"eval_samples_per_second": 1364.004,
|
| 6593 |
+
"eval_steps_per_second": 21.824,
|
| 6594 |
+
"step": 890000
|
| 6595 |
}
|
| 6596 |
],
|
| 6597 |
"max_steps": 1000000,
|
| 6598 |
"num_train_epochs": 16,
|
| 6599 |
+
"total_flos": 6.238914689814962e+22,
|
| 6600 |
"trial_name": null,
|
| 6601 |
"trial_params": null
|
| 6602 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa4ab0c6834a759e55baa287a48208abf39c778ed277af3b0afe1b1b96d8b552
|
| 3 |
size 449471589
|