Training in progress, step 290000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +2 -2
- last-checkpoint/rng_state_5.pth +2 -2
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 202194449
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd4094b5cac60d73fdd6f21e7e668f3210934954f13fb86e2d4209ee938a0a5a
|
| 3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6255fd3a6ff2bf7ca5c36d99c6e77a3008adb68677e42013dd8386bb7b970a5
|
| 3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:363babfb7b54265d790fbecb2309bf42f41b102f3bf25fe89de84147b11c7dfa
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:510c7713e00d76055c77bffa7429d1c526fc618345e8f8ea963b237765d79340
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ae1f30b94d6bbe49a697a2558dea2baf48ccbdc3ae096616d495689477f1d7b
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:815dd65ada93eb961b018854672e652fa0d47bdfa3d615278f6e0ee59635af1b
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eddefbea3505d30a2cf1bb4dae32403e020f44a6c57a201cd9d6a10a92b68999
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87c32cbcd683d4257d3e7fe41cd4c20d1f40623baf6540486e5ca371ee7890e3
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14439
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f5b8ca27148787504a99d545fbc90a1c7466702929561717fa3c00574d165ef
|
| 3 |
size 14439
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a00f019162fc5f8994e9b1cb654b981eebf83af07e8dead098665a0f88f9319c
|
| 3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:39bc196d8aea9810b9698ff8cd04e2aeef8774f706fbd61ae0f0055bbacd0eaf
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 8.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -5606,11 +5606,211 @@
|
|
| 5606 |
"eval_samples_per_second": 1904.885,
|
| 5607 |
"eval_steps_per_second": 30.478,
|
| 5608 |
"step": 280000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5609 |
}
|
| 5610 |
],
|
| 5611 |
"max_steps": 500000,
|
| 5612 |
"num_train_epochs": 16,
|
| 5613 |
-
"total_flos":
|
| 5614 |
"trial_name": null,
|
| 5615 |
"trial_params": null
|
| 5616 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 8.885076135911026,
|
| 5 |
+
"global_step": 290000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 5606 |
"eval_samples_per_second": 1904.885,
|
| 5607 |
"eval_steps_per_second": 30.478,
|
| 5608 |
"step": 280000
|
| 5609 |
+
},
|
| 5610 |
+
{
|
| 5611 |
+
"epoch": 8.59,
|
| 5612 |
+
"learning_rate": 0.00013777852207079235,
|
| 5613 |
+
"loss": 0.3337,
|
| 5614 |
+
"step": 280500
|
| 5615 |
+
},
|
| 5616 |
+
{
|
| 5617 |
+
"epoch": 8.61,
|
| 5618 |
+
"learning_rate": 0.00013730250483349825,
|
| 5619 |
+
"loss": 0.3335,
|
| 5620 |
+
"step": 281000
|
| 5621 |
+
},
|
| 5622 |
+
{
|
| 5623 |
+
"epoch": 8.61,
|
| 5624 |
+
"eval_loss": 0.7787224054336548,
|
| 5625 |
+
"eval_runtime": 0.5159,
|
| 5626 |
+
"eval_samples_per_second": 1938.186,
|
| 5627 |
+
"eval_steps_per_second": 31.011,
|
| 5628 |
+
"step": 281000
|
| 5629 |
+
},
|
| 5630 |
+
{
|
| 5631 |
+
"epoch": 8.62,
|
| 5632 |
+
"learning_rate": 0.00013682668113317584,
|
| 5633 |
+
"loss": 0.3334,
|
| 5634 |
+
"step": 281500
|
| 5635 |
+
},
|
| 5636 |
+
{
|
| 5637 |
+
"epoch": 8.64,
|
| 5638 |
+
"learning_rate": 0.00013635105617335703,
|
| 5639 |
+
"loss": 0.3332,
|
| 5640 |
+
"step": 282000
|
| 5641 |
+
},
|
| 5642 |
+
{
|
| 5643 |
+
"epoch": 8.64,
|
| 5644 |
+
"eval_loss": 0.7804464101791382,
|
| 5645 |
+
"eval_runtime": 0.5024,
|
| 5646 |
+
"eval_samples_per_second": 1990.477,
|
| 5647 |
+
"eval_steps_per_second": 31.848,
|
| 5648 |
+
"step": 282000
|
| 5649 |
+
},
|
| 5650 |
+
{
|
| 5651 |
+
"epoch": 8.66,
|
| 5652 |
+
"learning_rate": 0.00013587563515539996,
|
| 5653 |
+
"loss": 0.3335,
|
| 5654 |
+
"step": 282500
|
| 5655 |
+
},
|
| 5656 |
+
{
|
| 5657 |
+
"epoch": 8.67,
|
| 5658 |
+
"learning_rate": 0.00013540042327843296,
|
| 5659 |
+
"loss": 0.3332,
|
| 5660 |
+
"step": 283000
|
| 5661 |
+
},
|
| 5662 |
+
{
|
| 5663 |
+
"epoch": 8.67,
|
| 5664 |
+
"eval_loss": 0.7820075750350952,
|
| 5665 |
+
"eval_runtime": 0.5246,
|
| 5666 |
+
"eval_samples_per_second": 1906.19,
|
| 5667 |
+
"eval_steps_per_second": 30.499,
|
| 5668 |
+
"step": 283000
|
| 5669 |
+
},
|
| 5670 |
+
{
|
| 5671 |
+
"epoch": 8.69,
|
| 5672 |
+
"learning_rate": 0.00013492542573929678,
|
| 5673 |
+
"loss": 0.3364,
|
| 5674 |
+
"step": 283500
|
| 5675 |
+
},
|
| 5676 |
+
{
|
| 5677 |
+
"epoch": 8.7,
|
| 5678 |
+
"learning_rate": 0.00013445064773248846,
|
| 5679 |
+
"loss": 0.3349,
|
| 5680 |
+
"step": 284000
|
| 5681 |
+
},
|
| 5682 |
+
{
|
| 5683 |
+
"epoch": 8.7,
|
| 5684 |
+
"eval_loss": 0.7795833349227905,
|
| 5685 |
+
"eval_runtime": 0.5328,
|
| 5686 |
+
"eval_samples_per_second": 1877.003,
|
| 5687 |
+
"eval_steps_per_second": 30.032,
|
| 5688 |
+
"step": 284000
|
| 5689 |
+
},
|
| 5690 |
+
{
|
| 5691 |
+
"epoch": 8.72,
|
| 5692 |
+
"learning_rate": 0.00013397609445010432,
|
| 5693 |
+
"loss": 0.3324,
|
| 5694 |
+
"step": 284500
|
| 5695 |
+
},
|
| 5696 |
+
{
|
| 5697 |
+
"epoch": 8.73,
|
| 5698 |
+
"learning_rate": 0.00013350177108178288,
|
| 5699 |
+
"loss": 0.3322,
|
| 5700 |
+
"step": 285000
|
| 5701 |
+
},
|
| 5702 |
+
{
|
| 5703 |
+
"epoch": 8.73,
|
| 5704 |
+
"eval_loss": 0.778048574924469,
|
| 5705 |
+
"eval_runtime": 0.5458,
|
| 5706 |
+
"eval_samples_per_second": 1832.202,
|
| 5707 |
+
"eval_steps_per_second": 29.315,
|
| 5708 |
+
"step": 285000
|
| 5709 |
+
},
|
| 5710 |
+
{
|
| 5711 |
+
"epoch": 8.75,
|
| 5712 |
+
"learning_rate": 0.00013302768281464863,
|
| 5713 |
+
"loss": 0.3325,
|
| 5714 |
+
"step": 285500
|
| 5715 |
+
},
|
| 5716 |
+
{
|
| 5717 |
+
"epoch": 8.76,
|
| 5718 |
+
"learning_rate": 0.0001325538348332548,
|
| 5719 |
+
"loss": 0.3328,
|
| 5720 |
+
"step": 286000
|
| 5721 |
+
},
|
| 5722 |
+
{
|
| 5723 |
+
"epoch": 8.76,
|
| 5724 |
+
"eval_loss": 0.7764204144477844,
|
| 5725 |
+
"eval_runtime": 0.5373,
|
| 5726 |
+
"eval_samples_per_second": 1861.329,
|
| 5727 |
+
"eval_steps_per_second": 29.781,
|
| 5728 |
+
"step": 286000
|
| 5729 |
+
},
|
| 5730 |
+
{
|
| 5731 |
+
"epoch": 8.78,
|
| 5732 |
+
"learning_rate": 0.00013208023231952706,
|
| 5733 |
+
"loss": 0.3322,
|
| 5734 |
+
"step": 286500
|
| 5735 |
+
},
|
| 5736 |
+
{
|
| 5737 |
+
"epoch": 8.79,
|
| 5738 |
+
"learning_rate": 0.0001316068804527066,
|
| 5739 |
+
"loss": 0.3323,
|
| 5740 |
+
"step": 287000
|
| 5741 |
+
},
|
| 5742 |
+
{
|
| 5743 |
+
"epoch": 8.79,
|
| 5744 |
+
"eval_loss": 0.7780716419219971,
|
| 5745 |
+
"eval_runtime": 0.5287,
|
| 5746 |
+
"eval_samples_per_second": 1891.289,
|
| 5747 |
+
"eval_steps_per_second": 30.261,
|
| 5748 |
+
"step": 287000
|
| 5749 |
+
},
|
| 5750 |
+
{
|
| 5751 |
+
"epoch": 8.81,
|
| 5752 |
+
"learning_rate": 0.00013113378440929353,
|
| 5753 |
+
"loss": 0.3322,
|
| 5754 |
+
"step": 287500
|
| 5755 |
+
},
|
| 5756 |
+
{
|
| 5757 |
+
"epoch": 8.82,
|
| 5758 |
+
"learning_rate": 0.00013066094936299056,
|
| 5759 |
+
"loss": 0.332,
|
| 5760 |
+
"step": 288000
|
| 5761 |
+
},
|
| 5762 |
+
{
|
| 5763 |
+
"epoch": 8.82,
|
| 5764 |
+
"eval_loss": 0.7822167873382568,
|
| 5765 |
+
"eval_runtime": 0.5284,
|
| 5766 |
+
"eval_samples_per_second": 1892.53,
|
| 5767 |
+
"eval_steps_per_second": 30.28,
|
| 5768 |
+
"step": 288000
|
| 5769 |
+
},
|
| 5770 |
+
{
|
| 5771 |
+
"epoch": 8.84,
|
| 5772 |
+
"learning_rate": 0.00013018838048464582,
|
| 5773 |
+
"loss": 0.332,
|
| 5774 |
+
"step": 288500
|
| 5775 |
+
},
|
| 5776 |
+
{
|
| 5777 |
+
"epoch": 8.85,
|
| 5778 |
+
"learning_rate": 0.00012971608294219702,
|
| 5779 |
+
"loss": 0.332,
|
| 5780 |
+
"step": 289000
|
| 5781 |
+
},
|
| 5782 |
+
{
|
| 5783 |
+
"epoch": 8.85,
|
| 5784 |
+
"eval_loss": 0.7825139760971069,
|
| 5785 |
+
"eval_runtime": 0.5164,
|
| 5786 |
+
"eval_samples_per_second": 1936.526,
|
| 5787 |
+
"eval_steps_per_second": 30.984,
|
| 5788 |
+
"step": 289000
|
| 5789 |
+
},
|
| 5790 |
+
{
|
| 5791 |
+
"epoch": 8.87,
|
| 5792 |
+
"learning_rate": 0.00012924406190061423,
|
| 5793 |
+
"loss": 0.332,
|
| 5794 |
+
"step": 289500
|
| 5795 |
+
},
|
| 5796 |
+
{
|
| 5797 |
+
"epoch": 8.89,
|
| 5798 |
+
"learning_rate": 0.0001287723225218441,
|
| 5799 |
+
"loss": 0.3323,
|
| 5800 |
+
"step": 290000
|
| 5801 |
+
},
|
| 5802 |
+
{
|
| 5803 |
+
"epoch": 8.89,
|
| 5804 |
+
"eval_loss": 0.7750741839408875,
|
| 5805 |
+
"eval_runtime": 0.5106,
|
| 5806 |
+
"eval_samples_per_second": 1958.426,
|
| 5807 |
+
"eval_steps_per_second": 31.335,
|
| 5808 |
+
"step": 290000
|
| 5809 |
}
|
| 5810 |
],
|
| 5811 |
"max_steps": 500000,
|
| 5812 |
"num_train_epochs": 16,
|
| 5813 |
+
"total_flos": 9.265063120844693e+21,
|
| 5814 |
"trial_name": null,
|
| 5815 |
"trial_params": null
|
| 5816 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6255fd3a6ff2bf7ca5c36d99c6e77a3008adb68677e42013dd8386bb7b970a5
|
| 3 |
size 102501541
|