Training in progress, step 3200, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 891644712
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acde9c308eddee03ae7ba07078f126ecbfbf189649125ba5e28eb98b2eb7a498
|
| 3 |
size 891644712
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1783444794
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69a94b5c388ad02488cfa16d32d05e88a60512f1756f067232047f67b1bbc1d7
|
| 3 |
size 1783444794
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a9ec26c805fc0805503b452ed1d7a3e08af9f21c7d994d43e4705d7fe6b69c0
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ae1375ade70d0aa6318948d7a88aecd14c5ea3b408d7a30a7af5ef14aa83d44
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -10767,6 +10767,454 @@
|
|
| 10767 |
"learning_rate": 1.128797253211723e-05,
|
| 10768 |
"loss": 0.3036,
|
| 10769 |
"step": 3072
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10770 |
}
|
| 10771 |
],
|
| 10772 |
"logging_steps": 2,
|
|
@@ -10786,7 +11234,7 @@
|
|
| 10786 |
"attributes": {}
|
| 10787 |
}
|
| 10788 |
},
|
| 10789 |
-
"total_flos":
|
| 10790 |
"train_batch_size": 8,
|
| 10791 |
"trial_name": null,
|
| 10792 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.7748197448696617,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 3200,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 10767 |
"learning_rate": 1.128797253211723e-05,
|
| 10768 |
"loss": 0.3036,
|
| 10769 |
"step": 3072
|
| 10770 |
+
},
|
| 10771 |
+
{
|
| 10772 |
+
"epoch": 1.7049362174154188,
|
| 10773 |
+
"grad_norm": 0.24164661765098572,
|
| 10774 |
+
"learning_rate": 1.120518952483819e-05,
|
| 10775 |
+
"loss": 0.3209,
|
| 10776 |
+
"step": 3074
|
| 10777 |
+
},
|
| 10778 |
+
{
|
| 10779 |
+
"epoch": 1.7060454797559623,
|
| 10780 |
+
"grad_norm": 0.34098076820373535,
|
| 10781 |
+
"learning_rate": 1.1122693170195164e-05,
|
| 10782 |
+
"loss": 0.446,
|
| 10783 |
+
"step": 3076
|
| 10784 |
+
},
|
| 10785 |
+
{
|
| 10786 |
+
"epoch": 1.7071547420965059,
|
| 10787 |
+
"grad_norm": 0.3181568384170532,
|
| 10788 |
+
"learning_rate": 1.1040483734507789e-05,
|
| 10789 |
+
"loss": 0.3758,
|
| 10790 |
+
"step": 3078
|
| 10791 |
+
},
|
| 10792 |
+
{
|
| 10793 |
+
"epoch": 1.7082640044370494,
|
| 10794 |
+
"grad_norm": 0.2597646415233612,
|
| 10795 |
+
"learning_rate": 1.095856148316936e-05,
|
| 10796 |
+
"loss": 0.35,
|
| 10797 |
+
"step": 3080
|
| 10798 |
+
},
|
| 10799 |
+
{
|
| 10800 |
+
"epoch": 1.709373266777593,
|
| 10801 |
+
"grad_norm": 0.27917012572288513,
|
| 10802 |
+
"learning_rate": 1.087692668064616e-05,
|
| 10803 |
+
"loss": 0.3619,
|
| 10804 |
+
"step": 3082
|
| 10805 |
+
},
|
| 10806 |
+
{
|
| 10807 |
+
"epoch": 1.7104825291181365,
|
| 10808 |
+
"grad_norm": 0.2992468774318695,
|
| 10809 |
+
"learning_rate": 1.0795579590476445e-05,
|
| 10810 |
+
"loss": 0.37,
|
| 10811 |
+
"step": 3084
|
| 10812 |
+
},
|
| 10813 |
+
{
|
| 10814 |
+
"epoch": 1.71159179145868,
|
| 10815 |
+
"grad_norm": 0.3110543191432953,
|
| 10816 |
+
"learning_rate": 1.0714520475269652e-05,
|
| 10817 |
+
"loss": 0.3318,
|
| 10818 |
+
"step": 3086
|
| 10819 |
+
},
|
| 10820 |
+
{
|
| 10821 |
+
"epoch": 1.7127010537992235,
|
| 10822 |
+
"grad_norm": 0.393775075674057,
|
| 10823 |
+
"learning_rate": 1.0633749596705645e-05,
|
| 10824 |
+
"loss": 0.4044,
|
| 10825 |
+
"step": 3088
|
| 10826 |
+
},
|
| 10827 |
+
{
|
| 10828 |
+
"epoch": 1.713810316139767,
|
| 10829 |
+
"grad_norm": 0.32126861810684204,
|
| 10830 |
+
"learning_rate": 1.055326721553368e-05,
|
| 10831 |
+
"loss": 0.4077,
|
| 10832 |
+
"step": 3090
|
| 10833 |
+
},
|
| 10834 |
+
{
|
| 10835 |
+
"epoch": 1.7149195784803106,
|
| 10836 |
+
"grad_norm": 0.316629558801651,
|
| 10837 |
+
"learning_rate": 1.0473073591571758e-05,
|
| 10838 |
+
"loss": 0.3887,
|
| 10839 |
+
"step": 3092
|
| 10840 |
+
},
|
| 10841 |
+
{
|
| 10842 |
+
"epoch": 1.7160288408208542,
|
| 10843 |
+
"grad_norm": 0.24358634650707245,
|
| 10844 |
+
"learning_rate": 1.0393168983705626e-05,
|
| 10845 |
+
"loss": 0.3439,
|
| 10846 |
+
"step": 3094
|
| 10847 |
+
},
|
| 10848 |
+
{
|
| 10849 |
+
"epoch": 1.7171381031613977,
|
| 10850 |
+
"grad_norm": 0.3309425413608551,
|
| 10851 |
+
"learning_rate": 1.0313553649888074e-05,
|
| 10852 |
+
"loss": 0.3894,
|
| 10853 |
+
"step": 3096
|
| 10854 |
+
},
|
| 10855 |
+
{
|
| 10856 |
+
"epoch": 1.7182473655019412,
|
| 10857 |
+
"grad_norm": 0.3401065468788147,
|
| 10858 |
+
"learning_rate": 1.0234227847138011e-05,
|
| 10859 |
+
"loss": 0.376,
|
| 10860 |
+
"step": 3098
|
| 10861 |
+
},
|
| 10862 |
+
{
|
| 10863 |
+
"epoch": 1.7193566278424848,
|
| 10864 |
+
"grad_norm": 0.33251863718032837,
|
| 10865 |
+
"learning_rate": 1.0155191831539645e-05,
|
| 10866 |
+
"loss": 0.4203,
|
| 10867 |
+
"step": 3100
|
| 10868 |
+
},
|
| 10869 |
+
{
|
| 10870 |
+
"epoch": 1.7204658901830283,
|
| 10871 |
+
"grad_norm": 0.3005315363407135,
|
| 10872 |
+
"learning_rate": 1.0076445858241679e-05,
|
| 10873 |
+
"loss": 0.2993,
|
| 10874 |
+
"step": 3102
|
| 10875 |
+
},
|
| 10876 |
+
{
|
| 10877 |
+
"epoch": 1.7215751525235718,
|
| 10878 |
+
"grad_norm": 0.2971371114253998,
|
| 10879 |
+
"learning_rate": 9.997990181456528e-06,
|
| 10880 |
+
"loss": 0.3881,
|
| 10881 |
+
"step": 3104
|
| 10882 |
+
},
|
| 10883 |
+
{
|
| 10884 |
+
"epoch": 1.7226844148641154,
|
| 10885 |
+
"grad_norm": 0.2904921770095825,
|
| 10886 |
+
"learning_rate": 9.919825054459442e-06,
|
| 10887 |
+
"loss": 0.3812,
|
| 10888 |
+
"step": 3106
|
| 10889 |
+
},
|
| 10890 |
+
{
|
| 10891 |
+
"epoch": 1.723793677204659,
|
| 10892 |
+
"grad_norm": 0.3357609212398529,
|
| 10893 |
+
"learning_rate": 9.841950729587668e-06,
|
| 10894 |
+
"loss": 0.4121,
|
| 10895 |
+
"step": 3108
|
| 10896 |
+
},
|
| 10897 |
+
{
|
| 10898 |
+
"epoch": 1.7249029395452025,
|
| 10899 |
+
"grad_norm": 0.2711123526096344,
|
| 10900 |
+
"learning_rate": 9.764367458239677e-06,
|
| 10901 |
+
"loss": 0.3789,
|
| 10902 |
+
"step": 3110
|
| 10903 |
+
},
|
| 10904 |
+
{
|
| 10905 |
+
"epoch": 1.726012201885746,
|
| 10906 |
+
"grad_norm": 0.24408982694149017,
|
| 10907 |
+
"learning_rate": 9.687075490874376e-06,
|
| 10908 |
+
"loss": 0.3457,
|
| 10909 |
+
"step": 3112
|
| 10910 |
+
},
|
| 10911 |
+
{
|
| 10912 |
+
"epoch": 1.7271214642262895,
|
| 10913 |
+
"grad_norm": 0.25458860397338867,
|
| 10914 |
+
"learning_rate": 9.61007507701024e-06,
|
| 10915 |
+
"loss": 0.3098,
|
| 10916 |
+
"step": 3114
|
| 10917 |
+
},
|
| 10918 |
+
{
|
| 10919 |
+
"epoch": 1.728230726566833,
|
| 10920 |
+
"grad_norm": 0.2704317569732666,
|
| 10921 |
+
"learning_rate": 9.533366465224514e-06,
|
| 10922 |
+
"loss": 0.3471,
|
| 10923 |
+
"step": 3116
|
| 10924 |
+
},
|
| 10925 |
+
{
|
| 10926 |
+
"epoch": 1.7293399889073766,
|
| 10927 |
+
"grad_norm": 0.2258918136358261,
|
| 10928 |
+
"learning_rate": 9.456949903152478e-06,
|
| 10929 |
+
"loss": 0.4087,
|
| 10930 |
+
"step": 3118
|
| 10931 |
+
},
|
| 10932 |
+
{
|
| 10933 |
+
"epoch": 1.7304492512479202,
|
| 10934 |
+
"grad_norm": 0.20709431171417236,
|
| 10935 |
+
"learning_rate": 9.38082563748659e-06,
|
| 10936 |
+
"loss": 0.3383,
|
| 10937 |
+
"step": 3120
|
| 10938 |
+
},
|
| 10939 |
+
{
|
| 10940 |
+
"epoch": 1.7315585135884637,
|
| 10941 |
+
"grad_norm": 0.24197116494178772,
|
| 10942 |
+
"learning_rate": 9.30499391397568e-06,
|
| 10943 |
+
"loss": 0.3323,
|
| 10944 |
+
"step": 3122
|
| 10945 |
+
},
|
| 10946 |
+
{
|
| 10947 |
+
"epoch": 1.7326677759290072,
|
| 10948 |
+
"grad_norm": 0.30395829677581787,
|
| 10949 |
+
"learning_rate": 9.229454977424157e-06,
|
| 10950 |
+
"loss": 0.378,
|
| 10951 |
+
"step": 3124
|
| 10952 |
+
},
|
| 10953 |
+
{
|
| 10954 |
+
"epoch": 1.7337770382695508,
|
| 10955 |
+
"grad_norm": 0.2813956141471863,
|
| 10956 |
+
"learning_rate": 9.154209071691289e-06,
|
| 10957 |
+
"loss": 0.3326,
|
| 10958 |
+
"step": 3126
|
| 10959 |
+
},
|
| 10960 |
+
{
|
| 10961 |
+
"epoch": 1.7348863006100943,
|
| 10962 |
+
"grad_norm": 0.3281961679458618,
|
| 10963 |
+
"learning_rate": 9.079256439690354e-06,
|
| 10964 |
+
"loss": 0.3518,
|
| 10965 |
+
"step": 3128
|
| 10966 |
+
},
|
| 10967 |
+
{
|
| 10968 |
+
"epoch": 1.7359955629506378,
|
| 10969 |
+
"grad_norm": 0.3628225326538086,
|
| 10970 |
+
"learning_rate": 9.004597323387798e-06,
|
| 10971 |
+
"loss": 0.4188,
|
| 10972 |
+
"step": 3130
|
| 10973 |
+
},
|
| 10974 |
+
{
|
| 10975 |
+
"epoch": 1.7371048252911814,
|
| 10976 |
+
"grad_norm": 0.3164060711860657,
|
| 10977 |
+
"learning_rate": 8.930231963802637e-06,
|
| 10978 |
+
"loss": 0.3381,
|
| 10979 |
+
"step": 3132
|
| 10980 |
+
},
|
| 10981 |
+
{
|
| 10982 |
+
"epoch": 1.738214087631725,
|
| 10983 |
+
"grad_norm": 0.27229782938957214,
|
| 10984 |
+
"learning_rate": 8.856160601005459e-06,
|
| 10985 |
+
"loss": 0.3767,
|
| 10986 |
+
"step": 3134
|
| 10987 |
+
},
|
| 10988 |
+
{
|
| 10989 |
+
"epoch": 1.7393233499722685,
|
| 10990 |
+
"grad_norm": 0.34024956822395325,
|
| 10991 |
+
"learning_rate": 8.782383474117838e-06,
|
| 10992 |
+
"loss": 0.4573,
|
| 10993 |
+
"step": 3136
|
| 10994 |
+
},
|
| 10995 |
+
{
|
| 10996 |
+
"epoch": 1.740432612312812,
|
| 10997 |
+
"grad_norm": 0.32661277055740356,
|
| 10998 |
+
"learning_rate": 8.708900821311405e-06,
|
| 10999 |
+
"loss": 0.5145,
|
| 11000 |
+
"step": 3138
|
| 11001 |
+
},
|
| 11002 |
+
{
|
| 11003 |
+
"epoch": 1.7415418746533555,
|
| 11004 |
+
"grad_norm": 0.24198585748672485,
|
| 11005 |
+
"learning_rate": 8.635712879807222e-06,
|
| 11006 |
+
"loss": 0.2969,
|
| 11007 |
+
"step": 3140
|
| 11008 |
+
},
|
| 11009 |
+
{
|
| 11010 |
+
"epoch": 1.742651136993899,
|
| 11011 |
+
"grad_norm": 0.37718066573143005,
|
| 11012 |
+
"learning_rate": 8.562819885874884e-06,
|
| 11013 |
+
"loss": 0.5287,
|
| 11014 |
+
"step": 3142
|
| 11015 |
+
},
|
| 11016 |
+
{
|
| 11017 |
+
"epoch": 1.7437603993344426,
|
| 11018 |
+
"grad_norm": 0.3092913329601288,
|
| 11019 |
+
"learning_rate": 8.490222074831845e-06,
|
| 11020 |
+
"loss": 0.3534,
|
| 11021 |
+
"step": 3144
|
| 11022 |
+
},
|
| 11023 |
+
{
|
| 11024 |
+
"epoch": 1.7448696616749861,
|
| 11025 |
+
"grad_norm": 0.2609056830406189,
|
| 11026 |
+
"learning_rate": 8.417919681042652e-06,
|
| 11027 |
+
"loss": 0.3774,
|
| 11028 |
+
"step": 3146
|
| 11029 |
+
},
|
| 11030 |
+
{
|
| 11031 |
+
"epoch": 1.7459789240155297,
|
| 11032 |
+
"grad_norm": 0.3176262080669403,
|
| 11033 |
+
"learning_rate": 8.345912937918121e-06,
|
| 11034 |
+
"loss": 0.3448,
|
| 11035 |
+
"step": 3148
|
| 11036 |
+
},
|
| 11037 |
+
{
|
| 11038 |
+
"epoch": 1.7470881863560732,
|
| 11039 |
+
"grad_norm": 0.3105904757976532,
|
| 11040 |
+
"learning_rate": 8.274202077914705e-06,
|
| 11041 |
+
"loss": 0.3949,
|
| 11042 |
+
"step": 3150
|
| 11043 |
+
},
|
| 11044 |
+
{
|
| 11045 |
+
"epoch": 1.7481974486966168,
|
| 11046 |
+
"grad_norm": 0.3904447555541992,
|
| 11047 |
+
"learning_rate": 8.20278733253359e-06,
|
| 11048 |
+
"loss": 0.45,
|
| 11049 |
+
"step": 3152
|
| 11050 |
+
},
|
| 11051 |
+
{
|
| 11052 |
+
"epoch": 1.7493067110371603,
|
| 11053 |
+
"grad_norm": 0.27570340037345886,
|
| 11054 |
+
"learning_rate": 8.13166893232008e-06,
|
| 11055 |
+
"loss": 0.4282,
|
| 11056 |
+
"step": 3154
|
| 11057 |
+
},
|
| 11058 |
+
{
|
| 11059 |
+
"epoch": 1.7504159733777038,
|
| 11060 |
+
"grad_norm": 0.2809303104877472,
|
| 11061 |
+
"learning_rate": 8.060847106862779e-06,
|
| 11062 |
+
"loss": 0.3358,
|
| 11063 |
+
"step": 3156
|
| 11064 |
+
},
|
| 11065 |
+
{
|
| 11066 |
+
"epoch": 1.7515252357182474,
|
| 11067 |
+
"grad_norm": 0.43461307883262634,
|
| 11068 |
+
"learning_rate": 7.990322084792867e-06,
|
| 11069 |
+
"loss": 0.3352,
|
| 11070 |
+
"step": 3158
|
| 11071 |
+
},
|
| 11072 |
+
{
|
| 11073 |
+
"epoch": 1.752634498058791,
|
| 11074 |
+
"grad_norm": 0.3733227550983429,
|
| 11075 |
+
"learning_rate": 7.92009409378337e-06,
|
| 11076 |
+
"loss": 0.4386,
|
| 11077 |
+
"step": 3160
|
| 11078 |
+
},
|
| 11079 |
+
{
|
| 11080 |
+
"epoch": 1.7537437603993344,
|
| 11081 |
+
"grad_norm": 0.22569668292999268,
|
| 11082 |
+
"learning_rate": 7.850163360548424e-06,
|
| 11083 |
+
"loss": 0.2785,
|
| 11084 |
+
"step": 3162
|
| 11085 |
+
},
|
| 11086 |
+
{
|
| 11087 |
+
"epoch": 1.754853022739878,
|
| 11088 |
+
"grad_norm": 0.286538690328598,
|
| 11089 |
+
"learning_rate": 7.780530110842565e-06,
|
| 11090 |
+
"loss": 0.312,
|
| 11091 |
+
"step": 3164
|
| 11092 |
+
},
|
| 11093 |
+
{
|
| 11094 |
+
"epoch": 1.7559622850804215,
|
| 11095 |
+
"grad_norm": 0.2738610804080963,
|
| 11096 |
+
"learning_rate": 7.711194569459934e-06,
|
| 11097 |
+
"loss": 0.3244,
|
| 11098 |
+
"step": 3166
|
| 11099 |
+
},
|
| 11100 |
+
{
|
| 11101 |
+
"epoch": 1.757071547420965,
|
| 11102 |
+
"grad_norm": 0.30075690150260925,
|
| 11103 |
+
"learning_rate": 7.642156960233592e-06,
|
| 11104 |
+
"loss": 0.3691,
|
| 11105 |
+
"step": 3168
|
| 11106 |
+
},
|
| 11107 |
+
{
|
| 11108 |
+
"epoch": 1.7581808097615086,
|
| 11109 |
+
"grad_norm": 0.2853529453277588,
|
| 11110 |
+
"learning_rate": 7.573417506034852e-06,
|
| 11111 |
+
"loss": 0.3259,
|
| 11112 |
+
"step": 3170
|
| 11113 |
+
},
|
| 11114 |
+
{
|
| 11115 |
+
"epoch": 1.7592900721020521,
|
| 11116 |
+
"grad_norm": 0.23462392389774323,
|
| 11117 |
+
"learning_rate": 7.504976428772437e-06,
|
| 11118 |
+
"loss": 0.3671,
|
| 11119 |
+
"step": 3172
|
| 11120 |
+
},
|
| 11121 |
+
{
|
| 11122 |
+
"epoch": 1.7603993344425957,
|
| 11123 |
+
"grad_norm": 0.365106999874115,
|
| 11124 |
+
"learning_rate": 7.436833949391853e-06,
|
| 11125 |
+
"loss": 0.3698,
|
| 11126 |
+
"step": 3174
|
| 11127 |
+
},
|
| 11128 |
+
{
|
| 11129 |
+
"epoch": 1.7615085967831392,
|
| 11130 |
+
"grad_norm": 0.2944175899028778,
|
| 11131 |
+
"learning_rate": 7.368990287874711e-06,
|
| 11132 |
+
"loss": 0.3515,
|
| 11133 |
+
"step": 3176
|
| 11134 |
+
},
|
| 11135 |
+
{
|
| 11136 |
+
"epoch": 1.7626178591236827,
|
| 11137 |
+
"grad_norm": 0.2920864224433899,
|
| 11138 |
+
"learning_rate": 7.301445663237861e-06,
|
| 11139 |
+
"loss": 0.3424,
|
| 11140 |
+
"step": 3178
|
| 11141 |
+
},
|
| 11142 |
+
{
|
| 11143 |
+
"epoch": 1.7637271214642263,
|
| 11144 |
+
"grad_norm": 0.26654571294784546,
|
| 11145 |
+
"learning_rate": 7.234200293532889e-06,
|
| 11146 |
+
"loss": 0.3553,
|
| 11147 |
+
"step": 3180
|
| 11148 |
+
},
|
| 11149 |
+
{
|
| 11150 |
+
"epoch": 1.7648363838047698,
|
| 11151 |
+
"grad_norm": 0.2544094920158386,
|
| 11152 |
+
"learning_rate": 7.167254395845202e-06,
|
| 11153 |
+
"loss": 0.3715,
|
| 11154 |
+
"step": 3182
|
| 11155 |
+
},
|
| 11156 |
+
{
|
| 11157 |
+
"epoch": 1.7659456461453134,
|
| 11158 |
+
"grad_norm": 0.2914319932460785,
|
| 11159 |
+
"learning_rate": 7.1006081862935444e-06,
|
| 11160 |
+
"loss": 0.4023,
|
| 11161 |
+
"step": 3184
|
| 11162 |
+
},
|
| 11163 |
+
{
|
| 11164 |
+
"epoch": 1.767054908485857,
|
| 11165 |
+
"grad_norm": 0.3055804371833801,
|
| 11166 |
+
"learning_rate": 7.034261880029114e-06,
|
| 11167 |
+
"loss": 0.3967,
|
| 11168 |
+
"step": 3186
|
| 11169 |
+
},
|
| 11170 |
+
{
|
| 11171 |
+
"epoch": 1.7681641708264004,
|
| 11172 |
+
"grad_norm": 0.2863101661205292,
|
| 11173 |
+
"learning_rate": 6.968215691234936e-06,
|
| 11174 |
+
"loss": 0.3853,
|
| 11175 |
+
"step": 3188
|
| 11176 |
+
},
|
| 11177 |
+
{
|
| 11178 |
+
"epoch": 1.769273433166944,
|
| 11179 |
+
"grad_norm": 0.28304606676101685,
|
| 11180 |
+
"learning_rate": 6.902469833125236e-06,
|
| 11181 |
+
"loss": 0.3937,
|
| 11182 |
+
"step": 3190
|
| 11183 |
+
},
|
| 11184 |
+
{
|
| 11185 |
+
"epoch": 1.7703826955074875,
|
| 11186 |
+
"grad_norm": 0.2828314006328583,
|
| 11187 |
+
"learning_rate": 6.837024517944657e-06,
|
| 11188 |
+
"loss": 0.3907,
|
| 11189 |
+
"step": 3192
|
| 11190 |
+
},
|
| 11191 |
+
{
|
| 11192 |
+
"epoch": 1.771491957848031,
|
| 11193 |
+
"grad_norm": 0.2963877022266388,
|
| 11194 |
+
"learning_rate": 6.77187995696763e-06,
|
| 11195 |
+
"loss": 0.3885,
|
| 11196 |
+
"step": 3194
|
| 11197 |
+
},
|
| 11198 |
+
{
|
| 11199 |
+
"epoch": 1.7726012201885746,
|
| 11200 |
+
"grad_norm": 0.24497413635253906,
|
| 11201 |
+
"learning_rate": 6.707036360497632e-06,
|
| 11202 |
+
"loss": 0.4195,
|
| 11203 |
+
"step": 3196
|
| 11204 |
+
},
|
| 11205 |
+
{
|
| 11206 |
+
"epoch": 1.7737104825291181,
|
| 11207 |
+
"grad_norm": 0.25655171275138855,
|
| 11208 |
+
"learning_rate": 6.642493937866623e-06,
|
| 11209 |
+
"loss": 0.3315,
|
| 11210 |
+
"step": 3198
|
| 11211 |
+
},
|
| 11212 |
+
{
|
| 11213 |
+
"epoch": 1.7748197448696617,
|
| 11214 |
+
"grad_norm": 0.3175029456615448,
|
| 11215 |
+
"learning_rate": 6.578252897434223e-06,
|
| 11216 |
+
"loss": 0.464,
|
| 11217 |
+
"step": 3200
|
| 11218 |
}
|
| 11219 |
],
|
| 11220 |
"logging_steps": 2,
|
|
|
|
| 11234 |
"attributes": {}
|
| 11235 |
}
|
| 11236 |
},
|
| 11237 |
+
"total_flos": 7794204280750080.0,
|
| 11238 |
"train_batch_size": 8,
|
| 11239 |
"trial_name": null,
|
| 11240 |
"trial_params": null
|