Training in progress, step 83000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
- last-checkpoint/training_args.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 304481530
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:86867d7114034c5ce9cc9d029da201dff42a389f63d4b662bb9a3aaa72d02379
|
| 3 |
size 304481530
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 402029570
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81e0d480b36bf30d291b97e091e18788bf233399a4446897865328f68d72beb6
|
| 3 |
size 402029570
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8d7f9758da2d3e14d7f42182479d86315138a7b7b34199b33bbe616fd250fd1
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b91440a4692b5f169135f333404f68fc858d96847193631d33bcd1a9bc277a1e
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07ba5f63f4711c8ab404d662c7cb13d35ae312e00a001da6fffa61922c3b4f44
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e5adf39b25c897bc63e4019ed90698924cec0f2e7d40940eb00ab5a52f2cef4
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e6939fa0bb635077cb363f18f1780aac2e900315e1171d36449035e74f63bb8
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -28708,6 +28708,356 @@
|
|
| 28708 |
"learning_rate": 0.0004798740867909171,
|
| 28709 |
"loss": 16.2152,
|
| 28710 |
"step": 82000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28711 |
}
|
| 28712 |
],
|
| 28713 |
"logging_steps": 20,
|
|
@@ -28727,7 +29077,7 @@
|
|
| 28727 |
"attributes": {}
|
| 28728 |
}
|
| 28729 |
},
|
| 28730 |
-
"total_flos": 6.
|
| 28731 |
"train_batch_size": 48,
|
| 28732 |
"trial_name": null,
|
| 28733 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.12294911980280739,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 83000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 28708 |
"learning_rate": 0.0004798740867909171,
|
| 28709 |
"loss": 16.2152,
|
| 28710 |
"step": 82000
|
| 28711 |
+
},
|
| 28712 |
+
{
|
| 28713 |
+
"epoch": 0.12149743140031641,
|
| 28714 |
+
"grad_norm": 6.03125,
|
| 28715 |
+
"learning_rate": 0.00047986914785590955,
|
| 28716 |
+
"loss": 16.2106,
|
| 28717 |
+
"step": 82020
|
| 28718 |
+
},
|
| 28719 |
+
{
|
| 28720 |
+
"epoch": 0.1215270576942448,
|
| 28721 |
+
"grad_norm": 6.4375,
|
| 28722 |
+
"learning_rate": 0.000479864208920902,
|
| 28723 |
+
"loss": 16.2421,
|
| 28724 |
+
"step": 82040
|
| 28725 |
+
},
|
| 28726 |
+
{
|
| 28727 |
+
"epoch": 0.12155668398817318,
|
| 28728 |
+
"grad_norm": 6.6875,
|
| 28729 |
+
"learning_rate": 0.0004798592699858944,
|
| 28730 |
+
"loss": 16.2096,
|
| 28731 |
+
"step": 82060
|
| 28732 |
+
},
|
| 28733 |
+
{
|
| 28734 |
+
"epoch": 0.12158631028210157,
|
| 28735 |
+
"grad_norm": 6.40625,
|
| 28736 |
+
"learning_rate": 0.00047985433105088683,
|
| 28737 |
+
"loss": 16.2556,
|
| 28738 |
+
"step": 82080
|
| 28739 |
+
},
|
| 28740 |
+
{
|
| 28741 |
+
"epoch": 0.12161593657602995,
|
| 28742 |
+
"grad_norm": 6.75,
|
| 28743 |
+
"learning_rate": 0.0004798493921158793,
|
| 28744 |
+
"loss": 16.183,
|
| 28745 |
+
"step": 82100
|
| 28746 |
+
},
|
| 28747 |
+
{
|
| 28748 |
+
"epoch": 0.12164556286995834,
|
| 28749 |
+
"grad_norm": 6.21875,
|
| 28750 |
+
"learning_rate": 0.00047984445318087173,
|
| 28751 |
+
"loss": 16.232,
|
| 28752 |
+
"step": 82120
|
| 28753 |
+
},
|
| 28754 |
+
{
|
| 28755 |
+
"epoch": 0.12167518916388673,
|
| 28756 |
+
"grad_norm": 7.3125,
|
| 28757 |
+
"learning_rate": 0.0004798395142458641,
|
| 28758 |
+
"loss": 16.2333,
|
| 28759 |
+
"step": 82140
|
| 28760 |
+
},
|
| 28761 |
+
{
|
| 28762 |
+
"epoch": 0.12170481545781513,
|
| 28763 |
+
"grad_norm": 6.15625,
|
| 28764 |
+
"learning_rate": 0.00047983457531085657,
|
| 28765 |
+
"loss": 16.1709,
|
| 28766 |
+
"step": 82160
|
| 28767 |
+
},
|
| 28768 |
+
{
|
| 28769 |
+
"epoch": 0.12173444175174351,
|
| 28770 |
+
"grad_norm": 6.96875,
|
| 28771 |
+
"learning_rate": 0.000479829636375849,
|
| 28772 |
+
"loss": 16.243,
|
| 28773 |
+
"step": 82180
|
| 28774 |
+
},
|
| 28775 |
+
{
|
| 28776 |
+
"epoch": 0.1217640680456719,
|
| 28777 |
+
"grad_norm": 6.40625,
|
| 28778 |
+
"learning_rate": 0.00047982469744084147,
|
| 28779 |
+
"loss": 16.2207,
|
| 28780 |
+
"step": 82200
|
| 28781 |
+
},
|
| 28782 |
+
{
|
| 28783 |
+
"epoch": 0.12179369433960029,
|
| 28784 |
+
"grad_norm": 6.90625,
|
| 28785 |
+
"learning_rate": 0.00047981975850583386,
|
| 28786 |
+
"loss": 16.2444,
|
| 28787 |
+
"step": 82220
|
| 28788 |
+
},
|
| 28789 |
+
{
|
| 28790 |
+
"epoch": 0.12182332063352867,
|
| 28791 |
+
"grad_norm": 6.65625,
|
| 28792 |
+
"learning_rate": 0.0004798148195708263,
|
| 28793 |
+
"loss": 16.1667,
|
| 28794 |
+
"step": 82240
|
| 28795 |
+
},
|
| 28796 |
+
{
|
| 28797 |
+
"epoch": 0.12185294692745706,
|
| 28798 |
+
"grad_norm": 6.53125,
|
| 28799 |
+
"learning_rate": 0.00047980988063581876,
|
| 28800 |
+
"loss": 16.1988,
|
| 28801 |
+
"step": 82260
|
| 28802 |
+
},
|
| 28803 |
+
{
|
| 28804 |
+
"epoch": 0.12188257322138545,
|
| 28805 |
+
"grad_norm": 6.59375,
|
| 28806 |
+
"learning_rate": 0.0004798049417008112,
|
| 28807 |
+
"loss": 16.216,
|
| 28808 |
+
"step": 82280
|
| 28809 |
+
},
|
| 28810 |
+
{
|
| 28811 |
+
"epoch": 0.12191219951531383,
|
| 28812 |
+
"grad_norm": 7.34375,
|
| 28813 |
+
"learning_rate": 0.0004798000027658036,
|
| 28814 |
+
"loss": 16.1921,
|
| 28815 |
+
"step": 82300
|
| 28816 |
+
},
|
| 28817 |
+
{
|
| 28818 |
+
"epoch": 0.12194182580924222,
|
| 28819 |
+
"grad_norm": 6.65625,
|
| 28820 |
+
"learning_rate": 0.0004797950638307961,
|
| 28821 |
+
"loss": 16.2033,
|
| 28822 |
+
"step": 82320
|
| 28823 |
+
},
|
| 28824 |
+
{
|
| 28825 |
+
"epoch": 0.1219714521031706,
|
| 28826 |
+
"grad_norm": 5.875,
|
| 28827 |
+
"learning_rate": 0.0004797901248957885,
|
| 28828 |
+
"loss": 16.1843,
|
| 28829 |
+
"step": 82340
|
| 28830 |
+
},
|
| 28831 |
+
{
|
| 28832 |
+
"epoch": 0.12200107839709899,
|
| 28833 |
+
"grad_norm": 6.5625,
|
| 28834 |
+
"learning_rate": 0.0004797851859607809,
|
| 28835 |
+
"loss": 16.1888,
|
| 28836 |
+
"step": 82360
|
| 28837 |
+
},
|
| 28838 |
+
{
|
| 28839 |
+
"epoch": 0.12203070469102738,
|
| 28840 |
+
"grad_norm": 5.6875,
|
| 28841 |
+
"learning_rate": 0.00047978024702577334,
|
| 28842 |
+
"loss": 16.1824,
|
| 28843 |
+
"step": 82380
|
| 28844 |
+
},
|
| 28845 |
+
{
|
| 28846 |
+
"epoch": 0.12206033098495576,
|
| 28847 |
+
"grad_norm": 6.125,
|
| 28848 |
+
"learning_rate": 0.0004797753080907658,
|
| 28849 |
+
"loss": 16.1561,
|
| 28850 |
+
"step": 82400
|
| 28851 |
+
},
|
| 28852 |
+
{
|
| 28853 |
+
"epoch": 0.12208995727888415,
|
| 28854 |
+
"grad_norm": 7.25,
|
| 28855 |
+
"learning_rate": 0.00047977036915575823,
|
| 28856 |
+
"loss": 16.2548,
|
| 28857 |
+
"step": 82420
|
| 28858 |
+
},
|
| 28859 |
+
{
|
| 28860 |
+
"epoch": 0.12211958357281254,
|
| 28861 |
+
"grad_norm": 8.0625,
|
| 28862 |
+
"learning_rate": 0.0004797654302207506,
|
| 28863 |
+
"loss": 16.2019,
|
| 28864 |
+
"step": 82440
|
| 28865 |
+
},
|
| 28866 |
+
{
|
| 28867 |
+
"epoch": 0.12214920986674092,
|
| 28868 |
+
"grad_norm": 7.1875,
|
| 28869 |
+
"learning_rate": 0.00047976049128574307,
|
| 28870 |
+
"loss": 16.1525,
|
| 28871 |
+
"step": 82460
|
| 28872 |
+
},
|
| 28873 |
+
{
|
| 28874 |
+
"epoch": 0.12217883616066932,
|
| 28875 |
+
"grad_norm": 7.03125,
|
| 28876 |
+
"learning_rate": 0.0004797555523507355,
|
| 28877 |
+
"loss": 16.2534,
|
| 28878 |
+
"step": 82480
|
| 28879 |
+
},
|
| 28880 |
+
{
|
| 28881 |
+
"epoch": 0.12220846245459771,
|
| 28882 |
+
"grad_norm": 6.9375,
|
| 28883 |
+
"learning_rate": 0.00047975061341572797,
|
| 28884 |
+
"loss": 16.1858,
|
| 28885 |
+
"step": 82500
|
| 28886 |
+
},
|
| 28887 |
+
{
|
| 28888 |
+
"epoch": 0.1222380887485261,
|
| 28889 |
+
"grad_norm": 6.6875,
|
| 28890 |
+
"learning_rate": 0.00047974567448072036,
|
| 28891 |
+
"loss": 16.1439,
|
| 28892 |
+
"step": 82520
|
| 28893 |
+
},
|
| 28894 |
+
{
|
| 28895 |
+
"epoch": 0.12226771504245448,
|
| 28896 |
+
"grad_norm": 6.4375,
|
| 28897 |
+
"learning_rate": 0.0004797407355457128,
|
| 28898 |
+
"loss": 16.2245,
|
| 28899 |
+
"step": 82540
|
| 28900 |
+
},
|
| 28901 |
+
{
|
| 28902 |
+
"epoch": 0.12229734133638287,
|
| 28903 |
+
"grad_norm": 6.46875,
|
| 28904 |
+
"learning_rate": 0.00047973579661070526,
|
| 28905 |
+
"loss": 16.1173,
|
| 28906 |
+
"step": 82560
|
| 28907 |
+
},
|
| 28908 |
+
{
|
| 28909 |
+
"epoch": 0.12232696763031126,
|
| 28910 |
+
"grad_norm": 6.96875,
|
| 28911 |
+
"learning_rate": 0.0004797308576756977,
|
| 28912 |
+
"loss": 16.1917,
|
| 28913 |
+
"step": 82580
|
| 28914 |
+
},
|
| 28915 |
+
{
|
| 28916 |
+
"epoch": 0.12235659392423964,
|
| 28917 |
+
"grad_norm": 6.46875,
|
| 28918 |
+
"learning_rate": 0.0004797259187406901,
|
| 28919 |
+
"loss": 16.2421,
|
| 28920 |
+
"step": 82600
|
| 28921 |
+
},
|
| 28922 |
+
{
|
| 28923 |
+
"epoch": 0.12238622021816803,
|
| 28924 |
+
"grad_norm": 6.875,
|
| 28925 |
+
"learning_rate": 0.0004797209798056826,
|
| 28926 |
+
"loss": 16.2,
|
| 28927 |
+
"step": 82620
|
| 28928 |
+
},
|
| 28929 |
+
{
|
| 28930 |
+
"epoch": 0.12241584651209642,
|
| 28931 |
+
"grad_norm": 6.28125,
|
| 28932 |
+
"learning_rate": 0.000479716040870675,
|
| 28933 |
+
"loss": 16.2038,
|
| 28934 |
+
"step": 82640
|
| 28935 |
+
},
|
| 28936 |
+
{
|
| 28937 |
+
"epoch": 0.1224454728060248,
|
| 28938 |
+
"grad_norm": 5.96875,
|
| 28939 |
+
"learning_rate": 0.00047971110193566744,
|
| 28940 |
+
"loss": 16.2384,
|
| 28941 |
+
"step": 82660
|
| 28942 |
+
},
|
| 28943 |
+
{
|
| 28944 |
+
"epoch": 0.12247509909995319,
|
| 28945 |
+
"grad_norm": 6.1875,
|
| 28946 |
+
"learning_rate": 0.00047970616300065984,
|
| 28947 |
+
"loss": 16.2127,
|
| 28948 |
+
"step": 82680
|
| 28949 |
+
},
|
| 28950 |
+
{
|
| 28951 |
+
"epoch": 0.12250472539388158,
|
| 28952 |
+
"grad_norm": 6.28125,
|
| 28953 |
+
"learning_rate": 0.0004797012240656523,
|
| 28954 |
+
"loss": 16.235,
|
| 28955 |
+
"step": 82700
|
| 28956 |
+
},
|
| 28957 |
+
{
|
| 28958 |
+
"epoch": 0.12253435168780996,
|
| 28959 |
+
"grad_norm": 6.125,
|
| 28960 |
+
"learning_rate": 0.00047969628513064473,
|
| 28961 |
+
"loss": 16.1783,
|
| 28962 |
+
"step": 82720
|
| 28963 |
+
},
|
| 28964 |
+
{
|
| 28965 |
+
"epoch": 0.12256397798173835,
|
| 28966 |
+
"grad_norm": 7.28125,
|
| 28967 |
+
"learning_rate": 0.0004796913461956371,
|
| 28968 |
+
"loss": 16.227,
|
| 28969 |
+
"step": 82740
|
| 28970 |
+
},
|
| 28971 |
+
{
|
| 28972 |
+
"epoch": 0.12259360427566673,
|
| 28973 |
+
"grad_norm": 6.15625,
|
| 28974 |
+
"learning_rate": 0.00047968640726062957,
|
| 28975 |
+
"loss": 16.2354,
|
| 28976 |
+
"step": 82760
|
| 28977 |
+
},
|
| 28978 |
+
{
|
| 28979 |
+
"epoch": 0.12262323056959512,
|
| 28980 |
+
"grad_norm": 6.125,
|
| 28981 |
+
"learning_rate": 0.000479681468325622,
|
| 28982 |
+
"loss": 16.1881,
|
| 28983 |
+
"step": 82780
|
| 28984 |
+
},
|
| 28985 |
+
{
|
| 28986 |
+
"epoch": 0.12265285686352352,
|
| 28987 |
+
"grad_norm": 6.375,
|
| 28988 |
+
"learning_rate": 0.00047967652939061447,
|
| 28989 |
+
"loss": 16.2082,
|
| 28990 |
+
"step": 82800
|
| 28991 |
+
},
|
| 28992 |
+
{
|
| 28993 |
+
"epoch": 0.12268248315745191,
|
| 28994 |
+
"grad_norm": 6.375,
|
| 28995 |
+
"learning_rate": 0.00047967159045560686,
|
| 28996 |
+
"loss": 16.1181,
|
| 28997 |
+
"step": 82820
|
| 28998 |
+
},
|
| 28999 |
+
{
|
| 29000 |
+
"epoch": 0.1227121094513803,
|
| 29001 |
+
"grad_norm": 6.59375,
|
| 29002 |
+
"learning_rate": 0.0004796666515205993,
|
| 29003 |
+
"loss": 16.1534,
|
| 29004 |
+
"step": 82840
|
| 29005 |
+
},
|
| 29006 |
+
{
|
| 29007 |
+
"epoch": 0.12274173574530868,
|
| 29008 |
+
"grad_norm": 6.4375,
|
| 29009 |
+
"learning_rate": 0.00047966171258559176,
|
| 29010 |
+
"loss": 16.1316,
|
| 29011 |
+
"step": 82860
|
| 29012 |
+
},
|
| 29013 |
+
{
|
| 29014 |
+
"epoch": 0.12277136203923707,
|
| 29015 |
+
"grad_norm": 6.5,
|
| 29016 |
+
"learning_rate": 0.0004796567736505842,
|
| 29017 |
+
"loss": 16.1864,
|
| 29018 |
+
"step": 82880
|
| 29019 |
+
},
|
| 29020 |
+
{
|
| 29021 |
+
"epoch": 0.12280098833316545,
|
| 29022 |
+
"grad_norm": 7.15625,
|
| 29023 |
+
"learning_rate": 0.0004796518347155766,
|
| 29024 |
+
"loss": 16.1988,
|
| 29025 |
+
"step": 82900
|
| 29026 |
+
},
|
| 29027 |
+
{
|
| 29028 |
+
"epoch": 0.12283061462709384,
|
| 29029 |
+
"grad_norm": 6.5625,
|
| 29030 |
+
"learning_rate": 0.0004796468957805691,
|
| 29031 |
+
"loss": 16.2569,
|
| 29032 |
+
"step": 82920
|
| 29033 |
+
},
|
| 29034 |
+
{
|
| 29035 |
+
"epoch": 0.12286024092102223,
|
| 29036 |
+
"grad_norm": 6.40625,
|
| 29037 |
+
"learning_rate": 0.0004796419568455615,
|
| 29038 |
+
"loss": 16.1792,
|
| 29039 |
+
"step": 82940
|
| 29040 |
+
},
|
| 29041 |
+
{
|
| 29042 |
+
"epoch": 0.12288986721495061,
|
| 29043 |
+
"grad_norm": 6.53125,
|
| 29044 |
+
"learning_rate": 0.00047963701791055394,
|
| 29045 |
+
"loss": 16.2019,
|
| 29046 |
+
"step": 82960
|
| 29047 |
+
},
|
| 29048 |
+
{
|
| 29049 |
+
"epoch": 0.122919493508879,
|
| 29050 |
+
"grad_norm": 6.6875,
|
| 29051 |
+
"learning_rate": 0.00047963207897554634,
|
| 29052 |
+
"loss": 16.1815,
|
| 29053 |
+
"step": 82980
|
| 29054 |
+
},
|
| 29055 |
+
{
|
| 29056 |
+
"epoch": 0.12294911980280739,
|
| 29057 |
+
"grad_norm": 6.75,
|
| 29058 |
+
"learning_rate": 0.00047962714004053884,
|
| 29059 |
+
"loss": 16.2309,
|
| 29060 |
+
"step": 83000
|
| 29061 |
}
|
| 29062 |
],
|
| 29063 |
"logging_steps": 20,
|
|
|
|
| 29077 |
"attributes": {}
|
| 29078 |
}
|
| 29079 |
},
|
| 29080 |
+
"total_flos": 6.102608488091877e+19,
|
| 29081 |
"train_batch_size": 48,
|
| 29082 |
"trial_name": null,
|
| 29083 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5432
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78e73c5569e6c1326aedcb241444fa9deb29154b44bf64880d75f7e6d9e90132
|
| 3 |
size 5432
|