Training in progress, step 720000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66a4dbe9eca38a1792482b27bee5680ed76ff9b4fd9c693743e32db4ef9e8647
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c1af069a55aa141dcc761c0323c960402d5bdd923f0a54d0011b64f759ce573
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b66387fbbba596b80573df493f29c352b88c96c68303d4022fc8f1e14c19ee9e
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd40898eaf2bb8f0e6b4793242d3113eb1c145ed61a59dba0b51391b3bf76148
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d33131db4d0dac1ab66ab66d875733bb83b9d53bdcdbb76cdb67e79eb395bffe
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b4ac527158b6aa910da1978f5ab170d69172ed0e22a278e864ebd1fa5d08f7e
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c41eb120388058e9e86dafd9920ba056d2f7de5234fa0df636e9377bd5cd58c
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a27da1b3b783fed75c82831476e53d38ef818a90b5db4dbe8bd2147c26f1d00
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc1301019ee94a4e73634cfb2e922704c945921ceecc63c3c74ee79aae0fb318
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f15d3ae110e1b804d6f357797df2bd20ffa7af4cc5c48ef5f3805abb21eebf52
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2712ef8beaf45f8ea70ea16f7da83a9783b51ec4e90a9cb114e7f3a2c3044b7
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -8526,11 +8526,131 @@
|
|
| 8526 |
"learning_rate": 3.979699361219395e-05,
|
| 8527 |
"loss": 0.2956,
|
| 8528 |
"step": 710000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8529 |
}
|
| 8530 |
],
|
| 8531 |
"max_steps": 1000000,
|
| 8532 |
"num_train_epochs": 2,
|
| 8533 |
-
"total_flos": 4.
|
| 8534 |
"trial_name": null,
|
| 8535 |
"trial_params": null
|
| 8536 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.4393148861142095,
|
| 5 |
+
"global_step": 720000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 8526 |
"learning_rate": 3.979699361219395e-05,
|
| 8527 |
"loss": 0.2956,
|
| 8528 |
"step": 710000
|
| 8529 |
+
},
|
| 8530 |
+
{
|
| 8531 |
+
"epoch": 1.42,
|
| 8532 |
+
"learning_rate": 3.9702298612493816e-05,
|
| 8533 |
+
"loss": 0.2958,
|
| 8534 |
+
"step": 710500
|
| 8535 |
+
},
|
| 8536 |
+
{
|
| 8537 |
+
"epoch": 1.42,
|
| 8538 |
+
"learning_rate": 3.960771378517049e-05,
|
| 8539 |
+
"loss": 0.296,
|
| 8540 |
+
"step": 711000
|
| 8541 |
+
},
|
| 8542 |
+
{
|
| 8543 |
+
"epoch": 1.42,
|
| 8544 |
+
"learning_rate": 3.951323938881533e-05,
|
| 8545 |
+
"loss": 0.2953,
|
| 8546 |
+
"step": 711500
|
| 8547 |
+
},
|
| 8548 |
+
{
|
| 8549 |
+
"epoch": 1.42,
|
| 8550 |
+
"learning_rate": 3.941887568171766e-05,
|
| 8551 |
+
"loss": 0.2956,
|
| 8552 |
+
"step": 712000
|
| 8553 |
+
},
|
| 8554 |
+
{
|
| 8555 |
+
"epoch": 1.42,
|
| 8556 |
+
"learning_rate": 3.9324622921864323e-05,
|
| 8557 |
+
"loss": 0.2963,
|
| 8558 |
+
"step": 712500
|
| 8559 |
+
},
|
| 8560 |
+
{
|
| 8561 |
+
"epoch": 1.43,
|
| 8562 |
+
"learning_rate": 3.923048136693873e-05,
|
| 8563 |
+
"loss": 0.2951,
|
| 8564 |
+
"step": 713000
|
| 8565 |
+
},
|
| 8566 |
+
{
|
| 8567 |
+
"epoch": 1.43,
|
| 8568 |
+
"learning_rate": 3.913645127432028e-05,
|
| 8569 |
+
"loss": 0.2957,
|
| 8570 |
+
"step": 713500
|
| 8571 |
+
},
|
| 8572 |
+
{
|
| 8573 |
+
"epoch": 1.43,
|
| 8574 |
+
"learning_rate": 3.904253290108369e-05,
|
| 8575 |
+
"loss": 0.2953,
|
| 8576 |
+
"step": 714000
|
| 8577 |
+
},
|
| 8578 |
+
{
|
| 8579 |
+
"epoch": 1.43,
|
| 8580 |
+
"learning_rate": 3.8948726503998176e-05,
|
| 8581 |
+
"loss": 0.2954,
|
| 8582 |
+
"step": 714500
|
| 8583 |
+
},
|
| 8584 |
+
{
|
| 8585 |
+
"epoch": 1.43,
|
| 8586 |
+
"learning_rate": 3.885503233952689e-05,
|
| 8587 |
+
"loss": 0.2958,
|
| 8588 |
+
"step": 715000
|
| 8589 |
+
},
|
| 8590 |
+
{
|
| 8591 |
+
"epoch": 1.43,
|
| 8592 |
+
"learning_rate": 3.876145066382606e-05,
|
| 8593 |
+
"loss": 0.2948,
|
| 8594 |
+
"step": 715500
|
| 8595 |
+
},
|
| 8596 |
+
{
|
| 8597 |
+
"epoch": 1.43,
|
| 8598 |
+
"learning_rate": 3.86679817327444e-05,
|
| 8599 |
+
"loss": 0.2953,
|
| 8600 |
+
"step": 716000
|
| 8601 |
+
},
|
| 8602 |
+
{
|
| 8603 |
+
"epoch": 1.43,
|
| 8604 |
+
"learning_rate": 3.857462580182245e-05,
|
| 8605 |
+
"loss": 0.2952,
|
| 8606 |
+
"step": 716500
|
| 8607 |
+
},
|
| 8608 |
+
{
|
| 8609 |
+
"epoch": 1.43,
|
| 8610 |
+
"learning_rate": 3.848138312629171e-05,
|
| 8611 |
+
"loss": 0.2953,
|
| 8612 |
+
"step": 717000
|
| 8613 |
+
},
|
| 8614 |
+
{
|
| 8615 |
+
"epoch": 1.43,
|
| 8616 |
+
"learning_rate": 3.838825396107415e-05,
|
| 8617 |
+
"loss": 0.2962,
|
| 8618 |
+
"step": 717500
|
| 8619 |
+
},
|
| 8620 |
+
{
|
| 8621 |
+
"epoch": 1.44,
|
| 8622 |
+
"learning_rate": 3.8295238560781317e-05,
|
| 8623 |
+
"loss": 0.2957,
|
| 8624 |
+
"step": 718000
|
| 8625 |
+
},
|
| 8626 |
+
{
|
| 8627 |
+
"epoch": 1.44,
|
| 8628 |
+
"learning_rate": 3.820233717971374e-05,
|
| 8629 |
+
"loss": 0.2955,
|
| 8630 |
+
"step": 718500
|
| 8631 |
+
},
|
| 8632 |
+
{
|
| 8633 |
+
"epoch": 1.44,
|
| 8634 |
+
"learning_rate": 3.810955007186029e-05,
|
| 8635 |
+
"loss": 0.2953,
|
| 8636 |
+
"step": 719000
|
| 8637 |
+
},
|
| 8638 |
+
{
|
| 8639 |
+
"epoch": 1.44,
|
| 8640 |
+
"learning_rate": 3.801687749089737e-05,
|
| 8641 |
+
"loss": 0.295,
|
| 8642 |
+
"step": 719500
|
| 8643 |
+
},
|
| 8644 |
+
{
|
| 8645 |
+
"epoch": 1.44,
|
| 8646 |
+
"learning_rate": 3.792431969018824e-05,
|
| 8647 |
+
"loss": 0.2951,
|
| 8648 |
+
"step": 720000
|
| 8649 |
}
|
| 8650 |
],
|
| 8651 |
"max_steps": 1000000,
|
| 8652 |
"num_train_epochs": 2,
|
| 8653 |
+
"total_flos": 4.86771334519035e+22,
|
| 8654 |
"trial_name": null,
|
| 8655 |
"trial_params": null
|
| 8656 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c1af069a55aa141dcc761c0323c960402d5bdd923f0a54d0011b64f759ce573
|
| 3 |
size 449450757
|