Training in progress, step 1900, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 671149168
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a97696a92cabb745ba6cadf2fcc30794ffbc03c91397a016c61944f979c42d0e
|
| 3 |
size 671149168
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 179316182
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad721c15307c50724acd6ac4b18952a24ff74a5cf21f65080c486168ba9fcdd8
|
| 3 |
size 179316182
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:431a2334446ca149d087ee53da191415d4c95211b38709b7a8404d805dee4327
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2080
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:536e3a5de3004cb57f6febbf7190ef605673677b3a7bfb0620dc7718281289d6
|
| 3 |
size 2080
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 100,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -12759,6 +12759,714 @@
|
|
| 12759 |
"eval_samples_per_second": 2.868,
|
| 12760 |
"eval_steps_per_second": 1.434,
|
| 12761 |
"step": 1800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12762 |
}
|
| 12763 |
],
|
| 12764 |
"logging_steps": 1,
|
|
@@ -12778,7 +13486,7 @@
|
|
| 12778 |
"attributes": {}
|
| 12779 |
}
|
| 12780 |
},
|
| 12781 |
-
"total_flos": 3.
|
| 12782 |
"train_batch_size": 2,
|
| 12783 |
"trial_name": null,
|
| 12784 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.49517852488923636,
|
| 5 |
"eval_steps": 100,
|
| 6 |
+
"global_step": 1900,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 12759 |
"eval_samples_per_second": 2.868,
|
| 12760 |
"eval_steps_per_second": 1.434,
|
| 12761 |
"step": 1800
|
| 12762 |
+
},
|
| 12763 |
+
{
|
| 12764 |
+
"epoch": 0.4693771175397446,
|
| 12765 |
+
"grad_norm": 10.37785816192627,
|
| 12766 |
+
"learning_rate": 0.00018848585023079067,
|
| 12767 |
+
"loss": 5.1423,
|
| 12768 |
+
"step": 1801
|
| 12769 |
+
},
|
| 12770 |
+
{
|
| 12771 |
+
"epoch": 0.4696377378160021,
|
| 12772 |
+
"grad_norm": 13.957540512084961,
|
| 12773 |
+
"learning_rate": 0.00018847310275305063,
|
| 12774 |
+
"loss": 4.527,
|
| 12775 |
+
"step": 1802
|
| 12776 |
+
},
|
| 12777 |
+
{
|
| 12778 |
+
"epoch": 0.4698983580922596,
|
| 12779 |
+
"grad_norm": 16.043025970458984,
|
| 12780 |
+
"learning_rate": 0.00018846034072339535,
|
| 12781 |
+
"loss": 5.4377,
|
| 12782 |
+
"step": 1803
|
| 12783 |
+
},
|
| 12784 |
+
{
|
| 12785 |
+
"epoch": 0.47015897836851706,
|
| 12786 |
+
"grad_norm": 12.010665893554688,
|
| 12787 |
+
"learning_rate": 0.00018844757869374007,
|
| 12788 |
+
"loss": 4.7116,
|
| 12789 |
+
"step": 1804
|
| 12790 |
+
},
|
| 12791 |
+
{
|
| 12792 |
+
"epoch": 0.47041959864477456,
|
| 12793 |
+
"grad_norm": 14.77712631225586,
|
| 12794 |
+
"learning_rate": 0.0001884348166640848,
|
| 12795 |
+
"loss": 5.9034,
|
| 12796 |
+
"step": 1805
|
| 12797 |
+
},
|
| 12798 |
+
{
|
| 12799 |
+
"epoch": 0.47068021892103207,
|
| 12800 |
+
"grad_norm": 10.91681957244873,
|
| 12801 |
+
"learning_rate": 0.00018842202553059906,
|
| 12802 |
+
"loss": 5.0634,
|
| 12803 |
+
"step": 1806
|
| 12804 |
+
},
|
| 12805 |
+
{
|
| 12806 |
+
"epoch": 0.4709408391972896,
|
| 12807 |
+
"grad_norm": 12.498564720153809,
|
| 12808 |
+
"learning_rate": 0.00018840924894902855,
|
| 12809 |
+
"loss": 5.6897,
|
| 12810 |
+
"step": 1807
|
| 12811 |
+
},
|
| 12812 |
+
{
|
| 12813 |
+
"epoch": 0.471201459473547,
|
| 12814 |
+
"grad_norm": 15.501856803894043,
|
| 12815 |
+
"learning_rate": 0.00018839645781554282,
|
| 12816 |
+
"loss": 3.1788,
|
| 12817 |
+
"step": 1808
|
| 12818 |
+
},
|
| 12819 |
+
{
|
| 12820 |
+
"epoch": 0.47146207974980453,
|
| 12821 |
+
"grad_norm": 13.191068649291992,
|
| 12822 |
+
"learning_rate": 0.00018838365213014185,
|
| 12823 |
+
"loss": 4.8908,
|
| 12824 |
+
"step": 1809
|
| 12825 |
+
},
|
| 12826 |
+
{
|
| 12827 |
+
"epoch": 0.47172270002606204,
|
| 12828 |
+
"grad_norm": 18.41266441345215,
|
| 12829 |
+
"learning_rate": 0.00018837086099665612,
|
| 12830 |
+
"loss": 5.3714,
|
| 12831 |
+
"step": 1810
|
| 12832 |
+
},
|
| 12833 |
+
{
|
| 12834 |
+
"epoch": 0.47198332030231954,
|
| 12835 |
+
"grad_norm": 15.97313117980957,
|
| 12836 |
+
"learning_rate": 0.00018835805531125516,
|
| 12837 |
+
"loss": 5.7231,
|
| 12838 |
+
"step": 1811
|
| 12839 |
+
},
|
| 12840 |
+
{
|
| 12841 |
+
"epoch": 0.472243940578577,
|
| 12842 |
+
"grad_norm": 12.212700843811035,
|
| 12843 |
+
"learning_rate": 0.00018834523507393897,
|
| 12844 |
+
"loss": 5.0301,
|
| 12845 |
+
"step": 1812
|
| 12846 |
+
},
|
| 12847 |
+
{
|
| 12848 |
+
"epoch": 0.4725045608548345,
|
| 12849 |
+
"grad_norm": 218.50746154785156,
|
| 12850 |
+
"learning_rate": 0.00018833241483662277,
|
| 12851 |
+
"loss": 6.6723,
|
| 12852 |
+
"step": 1813
|
| 12853 |
+
},
|
| 12854 |
+
{
|
| 12855 |
+
"epoch": 0.472765181131092,
|
| 12856 |
+
"grad_norm": 12.388707160949707,
|
| 12857 |
+
"learning_rate": 0.00018831958004739136,
|
| 12858 |
+
"loss": 5.1241,
|
| 12859 |
+
"step": 1814
|
| 12860 |
+
},
|
| 12861 |
+
{
|
| 12862 |
+
"epoch": 0.4730258014073495,
|
| 12863 |
+
"grad_norm": 18.206995010375977,
|
| 12864 |
+
"learning_rate": 0.00018830674525815994,
|
| 12865 |
+
"loss": 4.8303,
|
| 12866 |
+
"step": 1815
|
| 12867 |
+
},
|
| 12868 |
+
{
|
| 12869 |
+
"epoch": 0.47328642168360696,
|
| 12870 |
+
"grad_norm": 72.17833709716797,
|
| 12871 |
+
"learning_rate": 0.0001882938959170133,
|
| 12872 |
+
"loss": 4.8661,
|
| 12873 |
+
"step": 1816
|
| 12874 |
+
},
|
| 12875 |
+
{
|
| 12876 |
+
"epoch": 0.47354704195986447,
|
| 12877 |
+
"grad_norm": 18.09402847290039,
|
| 12878 |
+
"learning_rate": 0.00018828104657586664,
|
| 12879 |
+
"loss": 5.473,
|
| 12880 |
+
"step": 1817
|
| 12881 |
+
},
|
| 12882 |
+
{
|
| 12883 |
+
"epoch": 0.473807662236122,
|
| 12884 |
+
"grad_norm": 241.85777282714844,
|
| 12885 |
+
"learning_rate": 0.00018826819723472,
|
| 12886 |
+
"loss": 5.6158,
|
| 12887 |
+
"step": 1818
|
| 12888 |
+
},
|
| 12889 |
+
{
|
| 12890 |
+
"epoch": 0.4740682825123795,
|
| 12891 |
+
"grad_norm": 12.830618858337402,
|
| 12892 |
+
"learning_rate": 0.00018825533334165812,
|
| 12893 |
+
"loss": 4.5027,
|
| 12894 |
+
"step": 1819
|
| 12895 |
+
},
|
| 12896 |
+
{
|
| 12897 |
+
"epoch": 0.47432890278863693,
|
| 12898 |
+
"grad_norm": 18.599620819091797,
|
| 12899 |
+
"learning_rate": 0.000188242454896681,
|
| 12900 |
+
"loss": 5.6648,
|
| 12901 |
+
"step": 1820
|
| 12902 |
+
},
|
| 12903 |
+
{
|
| 12904 |
+
"epoch": 0.47458952306489444,
|
| 12905 |
+
"grad_norm": 13.098050117492676,
|
| 12906 |
+
"learning_rate": 0.0001882295764517039,
|
| 12907 |
+
"loss": 5.0379,
|
| 12908 |
+
"step": 1821
|
| 12909 |
+
},
|
| 12910 |
+
{
|
| 12911 |
+
"epoch": 0.47485014334115194,
|
| 12912 |
+
"grad_norm": 9.492166519165039,
|
| 12913 |
+
"learning_rate": 0.0001882166980067268,
|
| 12914 |
+
"loss": 5.198,
|
| 12915 |
+
"step": 1822
|
| 12916 |
+
},
|
| 12917 |
+
{
|
| 12918 |
+
"epoch": 0.47511076361740945,
|
| 12919 |
+
"grad_norm": 10.473576545715332,
|
| 12920 |
+
"learning_rate": 0.00018820380500983447,
|
| 12921 |
+
"loss": 5.303,
|
| 12922 |
+
"step": 1823
|
| 12923 |
+
},
|
| 12924 |
+
{
|
| 12925 |
+
"epoch": 0.47537138389366695,
|
| 12926 |
+
"grad_norm": 9.241297721862793,
|
| 12927 |
+
"learning_rate": 0.00018819091201294214,
|
| 12928 |
+
"loss": 5.0623,
|
| 12929 |
+
"step": 1824
|
| 12930 |
+
},
|
| 12931 |
+
{
|
| 12932 |
+
"epoch": 0.4756320041699244,
|
| 12933 |
+
"grad_norm": 17.09585189819336,
|
| 12934 |
+
"learning_rate": 0.00018817800446413457,
|
| 12935 |
+
"loss": 4.8895,
|
| 12936 |
+
"step": 1825
|
| 12937 |
+
},
|
| 12938 |
+
{
|
| 12939 |
+
"epoch": 0.4758926244461819,
|
| 12940 |
+
"grad_norm": 10.225650787353516,
|
| 12941 |
+
"learning_rate": 0.000188165096915327,
|
| 12942 |
+
"loss": 4.9848,
|
| 12943 |
+
"step": 1826
|
| 12944 |
+
},
|
| 12945 |
+
{
|
| 12946 |
+
"epoch": 0.4761532447224394,
|
| 12947 |
+
"grad_norm": 10.421326637268066,
|
| 12948 |
+
"learning_rate": 0.00018815218936651945,
|
| 12949 |
+
"loss": 4.5205,
|
| 12950 |
+
"step": 1827
|
| 12951 |
+
},
|
| 12952 |
+
{
|
| 12953 |
+
"epoch": 0.4764138649986969,
|
| 12954 |
+
"grad_norm": 13.143168449401855,
|
| 12955 |
+
"learning_rate": 0.00018813925271388143,
|
| 12956 |
+
"loss": 5.2166,
|
| 12957 |
+
"step": 1828
|
| 12958 |
+
},
|
| 12959 |
+
{
|
| 12960 |
+
"epoch": 0.4766744852749544,
|
| 12961 |
+
"grad_norm": 12.645442962646484,
|
| 12962 |
+
"learning_rate": 0.00018812633061315864,
|
| 12963 |
+
"loss": 5.1552,
|
| 12964 |
+
"step": 1829
|
| 12965 |
+
},
|
| 12966 |
+
{
|
| 12967 |
+
"epoch": 0.4769351055512119,
|
| 12968 |
+
"grad_norm": 11.635645866394043,
|
| 12969 |
+
"learning_rate": 0.00018811339396052063,
|
| 12970 |
+
"loss": 4.7997,
|
| 12971 |
+
"step": 1830
|
| 12972 |
+
},
|
| 12973 |
+
{
|
| 12974 |
+
"epoch": 0.4771957258274694,
|
| 12975 |
+
"grad_norm": 13.22658920288086,
|
| 12976 |
+
"learning_rate": 0.0001881004573078826,
|
| 12977 |
+
"loss": 4.2591,
|
| 12978 |
+
"step": 1831
|
| 12979 |
+
},
|
| 12980 |
+
{
|
| 12981 |
+
"epoch": 0.4774563461037269,
|
| 12982 |
+
"grad_norm": 14.026827812194824,
|
| 12983 |
+
"learning_rate": 0.00018808750610332936,
|
| 12984 |
+
"loss": 5.8879,
|
| 12985 |
+
"step": 1832
|
| 12986 |
+
},
|
| 12987 |
+
{
|
| 12988 |
+
"epoch": 0.47771696637998434,
|
| 12989 |
+
"grad_norm": 12.296269416809082,
|
| 12990 |
+
"learning_rate": 0.00018807455489877611,
|
| 12991 |
+
"loss": 4.9644,
|
| 12992 |
+
"step": 1833
|
| 12993 |
+
},
|
| 12994 |
+
{
|
| 12995 |
+
"epoch": 0.47797758665624185,
|
| 12996 |
+
"grad_norm": 10.826774597167969,
|
| 12997 |
+
"learning_rate": 0.00018806158914230764,
|
| 12998 |
+
"loss": 4.0063,
|
| 12999 |
+
"step": 1834
|
| 13000 |
+
},
|
| 13001 |
+
{
|
| 13002 |
+
"epoch": 0.47823820693249935,
|
| 13003 |
+
"grad_norm": 11.467571258544922,
|
| 13004 |
+
"learning_rate": 0.00018804862338583916,
|
| 13005 |
+
"loss": 4.6543,
|
| 13006 |
+
"step": 1835
|
| 13007 |
+
},
|
| 13008 |
+
{
|
| 13009 |
+
"epoch": 0.47849882720875686,
|
| 13010 |
+
"grad_norm": 11.653413772583008,
|
| 13011 |
+
"learning_rate": 0.00018803564307745546,
|
| 13012 |
+
"loss": 5.013,
|
| 13013 |
+
"step": 1836
|
| 13014 |
+
},
|
| 13015 |
+
{
|
| 13016 |
+
"epoch": 0.4787594474850143,
|
| 13017 |
+
"grad_norm": 10.89033317565918,
|
| 13018 |
+
"learning_rate": 0.00018802266276907176,
|
| 13019 |
+
"loss": 4.8319,
|
| 13020 |
+
"step": 1837
|
| 13021 |
+
},
|
| 13022 |
+
{
|
| 13023 |
+
"epoch": 0.4790200677612718,
|
| 13024 |
+
"grad_norm": 12.89120101928711,
|
| 13025 |
+
"learning_rate": 0.00018800966790877283,
|
| 13026 |
+
"loss": 4.5967,
|
| 13027 |
+
"step": 1838
|
| 13028 |
+
},
|
| 13029 |
+
{
|
| 13030 |
+
"epoch": 0.4792806880375293,
|
| 13031 |
+
"grad_norm": 12.160778045654297,
|
| 13032 |
+
"learning_rate": 0.00018799668760038912,
|
| 13033 |
+
"loss": 5.7447,
|
| 13034 |
+
"step": 1839
|
| 13035 |
+
},
|
| 13036 |
+
{
|
| 13037 |
+
"epoch": 0.47954130831378683,
|
| 13038 |
+
"grad_norm": 13.569367408752441,
|
| 13039 |
+
"learning_rate": 0.00018798367818817496,
|
| 13040 |
+
"loss": 5.7456,
|
| 13041 |
+
"step": 1840
|
| 13042 |
+
},
|
| 13043 |
+
{
|
| 13044 |
+
"epoch": 0.4798019285900443,
|
| 13045 |
+
"grad_norm": 16.512102127075195,
|
| 13046 |
+
"learning_rate": 0.0001879706687759608,
|
| 13047 |
+
"loss": 5.2688,
|
| 13048 |
+
"step": 1841
|
| 13049 |
+
},
|
| 13050 |
+
{
|
| 13051 |
+
"epoch": 0.4800625488663018,
|
| 13052 |
+
"grad_norm": 15.978020668029785,
|
| 13053 |
+
"learning_rate": 0.00018795764481183141,
|
| 13054 |
+
"loss": 5.4303,
|
| 13055 |
+
"step": 1842
|
| 13056 |
+
},
|
| 13057 |
+
{
|
| 13058 |
+
"epoch": 0.4803231691425593,
|
| 13059 |
+
"grad_norm": 9.993230819702148,
|
| 13060 |
+
"learning_rate": 0.00018794463539961725,
|
| 13061 |
+
"loss": 5.0611,
|
| 13062 |
+
"step": 1843
|
| 13063 |
+
},
|
| 13064 |
+
{
|
| 13065 |
+
"epoch": 0.4805837894188168,
|
| 13066 |
+
"grad_norm": 16.74565315246582,
|
| 13067 |
+
"learning_rate": 0.00018793161143548787,
|
| 13068 |
+
"loss": 5.321,
|
| 13069 |
+
"step": 1844
|
| 13070 |
+
},
|
| 13071 |
+
{
|
| 13072 |
+
"epoch": 0.4808444096950743,
|
| 13073 |
+
"grad_norm": 12.556840896606445,
|
| 13074 |
+
"learning_rate": 0.00018791855836752802,
|
| 13075 |
+
"loss": 5.2619,
|
| 13076 |
+
"step": 1845
|
| 13077 |
+
},
|
| 13078 |
+
{
|
| 13079 |
+
"epoch": 0.48110502997133175,
|
| 13080 |
+
"grad_norm": 11.444070816040039,
|
| 13081 |
+
"learning_rate": 0.0001879055198514834,
|
| 13082 |
+
"loss": 5.0738,
|
| 13083 |
+
"step": 1846
|
| 13084 |
+
},
|
| 13085 |
+
{
|
| 13086 |
+
"epoch": 0.48136565024758926,
|
| 13087 |
+
"grad_norm": 23.41417121887207,
|
| 13088 |
+
"learning_rate": 0.0001878924813354388,
|
| 13089 |
+
"loss": 4.5046,
|
| 13090 |
+
"step": 1847
|
| 13091 |
+
},
|
| 13092 |
+
{
|
| 13093 |
+
"epoch": 0.48162627052384677,
|
| 13094 |
+
"grad_norm": 10.137743949890137,
|
| 13095 |
+
"learning_rate": 0.00018787942826747894,
|
| 13096 |
+
"loss": 4.5996,
|
| 13097 |
+
"step": 1848
|
| 13098 |
+
},
|
| 13099 |
+
{
|
| 13100 |
+
"epoch": 0.4818868908001043,
|
| 13101 |
+
"grad_norm": 11.530888557434082,
|
| 13102 |
+
"learning_rate": 0.00018786636064760387,
|
| 13103 |
+
"loss": 4.9895,
|
| 13104 |
+
"step": 1849
|
| 13105 |
+
},
|
| 13106 |
+
{
|
| 13107 |
+
"epoch": 0.4821475110763617,
|
| 13108 |
+
"grad_norm": 10.694371223449707,
|
| 13109 |
+
"learning_rate": 0.0001878532930277288,
|
| 13110 |
+
"loss": 4.4927,
|
| 13111 |
+
"step": 1850
|
| 13112 |
+
},
|
| 13113 |
+
{
|
| 13114 |
+
"epoch": 0.48240813135261923,
|
| 13115 |
+
"grad_norm": 11.96599292755127,
|
| 13116 |
+
"learning_rate": 0.00018784022540785372,
|
| 13117 |
+
"loss": 4.7774,
|
| 13118 |
+
"step": 1851
|
| 13119 |
+
},
|
| 13120 |
+
{
|
| 13121 |
+
"epoch": 0.48266875162887674,
|
| 13122 |
+
"grad_norm": 9.961639404296875,
|
| 13123 |
+
"learning_rate": 0.00018782714323606342,
|
| 13124 |
+
"loss": 4.7172,
|
| 13125 |
+
"step": 1852
|
| 13126 |
+
},
|
| 13127 |
+
{
|
| 13128 |
+
"epoch": 0.48292937190513424,
|
| 13129 |
+
"grad_norm": 16.102052688598633,
|
| 13130 |
+
"learning_rate": 0.0001878140465123579,
|
| 13131 |
+
"loss": 5.5745,
|
| 13132 |
+
"step": 1853
|
| 13133 |
+
},
|
| 13134 |
+
{
|
| 13135 |
+
"epoch": 0.4831899921813917,
|
| 13136 |
+
"grad_norm": 13.830510139465332,
|
| 13137 |
+
"learning_rate": 0.00018780094978865236,
|
| 13138 |
+
"loss": 5.3226,
|
| 13139 |
+
"step": 1854
|
| 13140 |
+
},
|
| 13141 |
+
{
|
| 13142 |
+
"epoch": 0.4834506124576492,
|
| 13143 |
+
"grad_norm": 13.367227554321289,
|
| 13144 |
+
"learning_rate": 0.00018778785306494683,
|
| 13145 |
+
"loss": 4.7316,
|
| 13146 |
+
"step": 1855
|
| 13147 |
+
},
|
| 13148 |
+
{
|
| 13149 |
+
"epoch": 0.4837112327339067,
|
| 13150 |
+
"grad_norm": 17.57742691040039,
|
| 13151 |
+
"learning_rate": 0.00018777474178932607,
|
| 13152 |
+
"loss": 5.3159,
|
| 13153 |
+
"step": 1856
|
| 13154 |
+
},
|
| 13155 |
+
{
|
| 13156 |
+
"epoch": 0.4839718530101642,
|
| 13157 |
+
"grad_norm": 9.331389427185059,
|
| 13158 |
+
"learning_rate": 0.0001877616305137053,
|
| 13159 |
+
"loss": 5.1851,
|
| 13160 |
+
"step": 1857
|
| 13161 |
+
},
|
| 13162 |
+
{
|
| 13163 |
+
"epoch": 0.48423247328642166,
|
| 13164 |
+
"grad_norm": 14.18066120147705,
|
| 13165 |
+
"learning_rate": 0.00018774850468616933,
|
| 13166 |
+
"loss": 4.0067,
|
| 13167 |
+
"step": 1858
|
| 13168 |
+
},
|
| 13169 |
+
{
|
| 13170 |
+
"epoch": 0.48449309356267917,
|
| 13171 |
+
"grad_norm": 16.757022857666016,
|
| 13172 |
+
"learning_rate": 0.00018773537885863334,
|
| 13173 |
+
"loss": 5.3286,
|
| 13174 |
+
"step": 1859
|
| 13175 |
+
},
|
| 13176 |
+
{
|
| 13177 |
+
"epoch": 0.4847537138389367,
|
| 13178 |
+
"grad_norm": 9.797025680541992,
|
| 13179 |
+
"learning_rate": 0.00018772225303109735,
|
| 13180 |
+
"loss": 4.3471,
|
| 13181 |
+
"step": 1860
|
| 13182 |
+
},
|
| 13183 |
+
{
|
| 13184 |
+
"epoch": 0.4850143341151942,
|
| 13185 |
+
"grad_norm": 10.527436256408691,
|
| 13186 |
+
"learning_rate": 0.00018770911265164614,
|
| 13187 |
+
"loss": 4.6839,
|
| 13188 |
+
"step": 1861
|
| 13189 |
+
},
|
| 13190 |
+
{
|
| 13191 |
+
"epoch": 0.48527495439145163,
|
| 13192 |
+
"grad_norm": 12.43220329284668,
|
| 13193 |
+
"learning_rate": 0.0001876959577202797,
|
| 13194 |
+
"loss": 4.8527,
|
| 13195 |
+
"step": 1862
|
| 13196 |
+
},
|
| 13197 |
+
{
|
| 13198 |
+
"epoch": 0.48553557466770914,
|
| 13199 |
+
"grad_norm": 28.21805763244629,
|
| 13200 |
+
"learning_rate": 0.00018768281734082848,
|
| 13201 |
+
"loss": 5.3501,
|
| 13202 |
+
"step": 1863
|
| 13203 |
+
},
|
| 13204 |
+
{
|
| 13205 |
+
"epoch": 0.48579619494396664,
|
| 13206 |
+
"grad_norm": 11.163954734802246,
|
| 13207 |
+
"learning_rate": 0.0001876696478575468,
|
| 13208 |
+
"loss": 4.8881,
|
| 13209 |
+
"step": 1864
|
| 13210 |
+
},
|
| 13211 |
+
{
|
| 13212 |
+
"epoch": 0.48605681522022415,
|
| 13213 |
+
"grad_norm": 10.474588394165039,
|
| 13214 |
+
"learning_rate": 0.00018765649292618036,
|
| 13215 |
+
"loss": 5.016,
|
| 13216 |
+
"step": 1865
|
| 13217 |
+
},
|
| 13218 |
+
{
|
| 13219 |
+
"epoch": 0.48631743549648165,
|
| 13220 |
+
"grad_norm": 9.17603588104248,
|
| 13221 |
+
"learning_rate": 0.0001876433234428987,
|
| 13222 |
+
"loss": 5.0946,
|
| 13223 |
+
"step": 1866
|
| 13224 |
+
},
|
| 13225 |
+
{
|
| 13226 |
+
"epoch": 0.4865780557727391,
|
| 13227 |
+
"grad_norm": 9.54752254486084,
|
| 13228 |
+
"learning_rate": 0.0001876301394077018,
|
| 13229 |
+
"loss": 5.6084,
|
| 13230 |
+
"step": 1867
|
| 13231 |
+
},
|
| 13232 |
+
{
|
| 13233 |
+
"epoch": 0.4868386760489966,
|
| 13234 |
+
"grad_norm": 14.107392311096191,
|
| 13235 |
+
"learning_rate": 0.0001876169553725049,
|
| 13236 |
+
"loss": 5.037,
|
| 13237 |
+
"step": 1868
|
| 13238 |
+
},
|
| 13239 |
+
{
|
| 13240 |
+
"epoch": 0.4870992963252541,
|
| 13241 |
+
"grad_norm": 10.658012390136719,
|
| 13242 |
+
"learning_rate": 0.00018760375678539276,
|
| 13243 |
+
"loss": 5.0839,
|
| 13244 |
+
"step": 1869
|
| 13245 |
+
},
|
| 13246 |
+
{
|
| 13247 |
+
"epoch": 0.4873599166015116,
|
| 13248 |
+
"grad_norm": 14.332069396972656,
|
| 13249 |
+
"learning_rate": 0.00018759055819828063,
|
| 13250 |
+
"loss": 5.2982,
|
| 13251 |
+
"step": 1870
|
| 13252 |
+
},
|
| 13253 |
+
{
|
| 13254 |
+
"epoch": 0.4876205368777691,
|
| 13255 |
+
"grad_norm": 10.385351181030273,
|
| 13256 |
+
"learning_rate": 0.00018757734505925328,
|
| 13257 |
+
"loss": 5.3114,
|
| 13258 |
+
"step": 1871
|
| 13259 |
+
},
|
| 13260 |
+
{
|
| 13261 |
+
"epoch": 0.4878811571540266,
|
| 13262 |
+
"grad_norm": 10.018989562988281,
|
| 13263 |
+
"learning_rate": 0.00018756414647214115,
|
| 13264 |
+
"loss": 5.7433,
|
| 13265 |
+
"step": 1872
|
| 13266 |
+
},
|
| 13267 |
+
{
|
| 13268 |
+
"epoch": 0.4881417774302841,
|
| 13269 |
+
"grad_norm": 14.237773895263672,
|
| 13270 |
+
"learning_rate": 0.00018755091878119856,
|
| 13271 |
+
"loss": 5.8772,
|
| 13272 |
+
"step": 1873
|
| 13273 |
+
},
|
| 13274 |
+
{
|
| 13275 |
+
"epoch": 0.4884023977065416,
|
| 13276 |
+
"grad_norm": 12.690217971801758,
|
| 13277 |
+
"learning_rate": 0.0001875377056421712,
|
| 13278 |
+
"loss": 5.134,
|
| 13279 |
+
"step": 1874
|
| 13280 |
+
},
|
| 13281 |
+
{
|
| 13282 |
+
"epoch": 0.48866301798279904,
|
| 13283 |
+
"grad_norm": 13.357400894165039,
|
| 13284 |
+
"learning_rate": 0.0001875244633993134,
|
| 13285 |
+
"loss": 5.1887,
|
| 13286 |
+
"step": 1875
|
| 13287 |
+
},
|
| 13288 |
+
{
|
| 13289 |
+
"epoch": 0.48892363825905655,
|
| 13290 |
+
"grad_norm": 13.582716941833496,
|
| 13291 |
+
"learning_rate": 0.00018751122115645558,
|
| 13292 |
+
"loss": 4.3156,
|
| 13293 |
+
"step": 1876
|
| 13294 |
+
},
|
| 13295 |
+
{
|
| 13296 |
+
"epoch": 0.48918425853531405,
|
| 13297 |
+
"grad_norm": 15.98849868774414,
|
| 13298 |
+
"learning_rate": 0.00018749797891359776,
|
| 13299 |
+
"loss": 5.4427,
|
| 13300 |
+
"step": 1877
|
| 13301 |
+
},
|
| 13302 |
+
{
|
| 13303 |
+
"epoch": 0.48944487881157156,
|
| 13304 |
+
"grad_norm": 11.24950122833252,
|
| 13305 |
+
"learning_rate": 0.00018748472211882472,
|
| 13306 |
+
"loss": 3.6912,
|
| 13307 |
+
"step": 1878
|
| 13308 |
+
},
|
| 13309 |
+
{
|
| 13310 |
+
"epoch": 0.489705499087829,
|
| 13311 |
+
"grad_norm": 12.458173751831055,
|
| 13312 |
+
"learning_rate": 0.00018747146532405168,
|
| 13313 |
+
"loss": 4.5879,
|
| 13314 |
+
"step": 1879
|
| 13315 |
+
},
|
| 13316 |
+
{
|
| 13317 |
+
"epoch": 0.4899661193640865,
|
| 13318 |
+
"grad_norm": 11.879009246826172,
|
| 13319 |
+
"learning_rate": 0.00018745820852927864,
|
| 13320 |
+
"loss": 5.0259,
|
| 13321 |
+
"step": 1880
|
| 13322 |
+
},
|
| 13323 |
+
{
|
| 13324 |
+
"epoch": 0.490226739640344,
|
| 13325 |
+
"grad_norm": 12.405200958251953,
|
| 13326 |
+
"learning_rate": 0.00018744493718259037,
|
| 13327 |
+
"loss": 4.2239,
|
| 13328 |
+
"step": 1881
|
| 13329 |
+
},
|
| 13330 |
+
{
|
| 13331 |
+
"epoch": 0.49048735991660153,
|
| 13332 |
+
"grad_norm": 16.421504974365234,
|
| 13333 |
+
"learning_rate": 0.00018743165128398687,
|
| 13334 |
+
"loss": 4.807,
|
| 13335 |
+
"step": 1882
|
| 13336 |
+
},
|
| 13337 |
+
{
|
| 13338 |
+
"epoch": 0.490747980192859,
|
| 13339 |
+
"grad_norm": 14.629226684570312,
|
| 13340 |
+
"learning_rate": 0.0001874183799372986,
|
| 13341 |
+
"loss": 5.0214,
|
| 13342 |
+
"step": 1883
|
| 13343 |
+
},
|
| 13344 |
+
{
|
| 13345 |
+
"epoch": 0.4910086004691165,
|
| 13346 |
+
"grad_norm": 14.704447746276855,
|
| 13347 |
+
"learning_rate": 0.00018740507948677987,
|
| 13348 |
+
"loss": 5.0339,
|
| 13349 |
+
"step": 1884
|
| 13350 |
+
},
|
| 13351 |
+
{
|
| 13352 |
+
"epoch": 0.491269220745374,
|
| 13353 |
+
"grad_norm": 12.018187522888184,
|
| 13354 |
+
"learning_rate": 0.00018739179358817637,
|
| 13355 |
+
"loss": 5.4619,
|
| 13356 |
+
"step": 1885
|
| 13357 |
+
},
|
| 13358 |
+
{
|
| 13359 |
+
"epoch": 0.4915298410216315,
|
| 13360 |
+
"grad_norm": 19.699617385864258,
|
| 13361 |
+
"learning_rate": 0.00018737847858574241,
|
| 13362 |
+
"loss": 5.2781,
|
| 13363 |
+
"step": 1886
|
| 13364 |
+
},
|
| 13365 |
+
{
|
| 13366 |
+
"epoch": 0.491790461297889,
|
| 13367 |
+
"grad_norm": 13.180678367614746,
|
| 13368 |
+
"learning_rate": 0.00018736516358330846,
|
| 13369 |
+
"loss": 5.0445,
|
| 13370 |
+
"step": 1887
|
| 13371 |
+
},
|
| 13372 |
+
{
|
| 13373 |
+
"epoch": 0.49205108157414645,
|
| 13374 |
+
"grad_norm": 15.855171203613281,
|
| 13375 |
+
"learning_rate": 0.0001873518485808745,
|
| 13376 |
+
"loss": 4.6027,
|
| 13377 |
+
"step": 1888
|
| 13378 |
+
},
|
| 13379 |
+
{
|
| 13380 |
+
"epoch": 0.49231170185040396,
|
| 13381 |
+
"grad_norm": 11.354763984680176,
|
| 13382 |
+
"learning_rate": 0.00018733851902652532,
|
| 13383 |
+
"loss": 5.392,
|
| 13384 |
+
"step": 1889
|
| 13385 |
+
},
|
| 13386 |
+
{
|
| 13387 |
+
"epoch": 0.49257232212666147,
|
| 13388 |
+
"grad_norm": 10.087592124938965,
|
| 13389 |
+
"learning_rate": 0.00018732520402409136,
|
| 13390 |
+
"loss": 4.3135,
|
| 13391 |
+
"step": 1890
|
| 13392 |
+
},
|
| 13393 |
+
{
|
| 13394 |
+
"epoch": 0.49283294240291897,
|
| 13395 |
+
"grad_norm": 11.550101280212402,
|
| 13396 |
+
"learning_rate": 0.00018731185991782695,
|
| 13397 |
+
"loss": 4.9187,
|
| 13398 |
+
"step": 1891
|
| 13399 |
+
},
|
| 13400 |
+
{
|
| 13401 |
+
"epoch": 0.4930935626791764,
|
| 13402 |
+
"grad_norm": 13.736188888549805,
|
| 13403 |
+
"learning_rate": 0.00018729851581156254,
|
| 13404 |
+
"loss": 5.687,
|
| 13405 |
+
"step": 1892
|
| 13406 |
+
},
|
| 13407 |
+
{
|
| 13408 |
+
"epoch": 0.49335418295543393,
|
| 13409 |
+
"grad_norm": 12.104930877685547,
|
| 13410 |
+
"learning_rate": 0.00018728517170529813,
|
| 13411 |
+
"loss": 5.2789,
|
| 13412 |
+
"step": 1893
|
| 13413 |
+
},
|
| 13414 |
+
{
|
| 13415 |
+
"epoch": 0.49361480323169143,
|
| 13416 |
+
"grad_norm": 12.98553466796875,
|
| 13417 |
+
"learning_rate": 0.00018727181304711848,
|
| 13418 |
+
"loss": 4.5616,
|
| 13419 |
+
"step": 1894
|
| 13420 |
+
},
|
| 13421 |
+
{
|
| 13422 |
+
"epoch": 0.49387542350794894,
|
| 13423 |
+
"grad_norm": 11.326464653015137,
|
| 13424 |
+
"learning_rate": 0.00018725845438893884,
|
| 13425 |
+
"loss": 4.837,
|
| 13426 |
+
"step": 1895
|
| 13427 |
+
},
|
| 13428 |
+
{
|
| 13429 |
+
"epoch": 0.4941360437842064,
|
| 13430 |
+
"grad_norm": 12.19218921661377,
|
| 13431 |
+
"learning_rate": 0.00018724508117884398,
|
| 13432 |
+
"loss": 5.0118,
|
| 13433 |
+
"step": 1896
|
| 13434 |
+
},
|
| 13435 |
+
{
|
| 13436 |
+
"epoch": 0.4943966640604639,
|
| 13437 |
+
"grad_norm": 9.611741065979004,
|
| 13438 |
+
"learning_rate": 0.0001872317079687491,
|
| 13439 |
+
"loss": 5.421,
|
| 13440 |
+
"step": 1897
|
| 13441 |
+
},
|
| 13442 |
+
{
|
| 13443 |
+
"epoch": 0.4946572843367214,
|
| 13444 |
+
"grad_norm": 20.134742736816406,
|
| 13445 |
+
"learning_rate": 0.00018721833475865424,
|
| 13446 |
+
"loss": 4.8459,
|
| 13447 |
+
"step": 1898
|
| 13448 |
+
},
|
| 13449 |
+
{
|
| 13450 |
+
"epoch": 0.4949179046129789,
|
| 13451 |
+
"grad_norm": 18.743206024169922,
|
| 13452 |
+
"learning_rate": 0.00018720494699664414,
|
| 13453 |
+
"loss": 4.4803,
|
| 13454 |
+
"step": 1899
|
| 13455 |
+
},
|
| 13456 |
+
{
|
| 13457 |
+
"epoch": 0.49517852488923636,
|
| 13458 |
+
"grad_norm": 12.631272315979004,
|
| 13459 |
+
"learning_rate": 0.0001871915446827188,
|
| 13460 |
+
"loss": 3.5087,
|
| 13461 |
+
"step": 1900
|
| 13462 |
+
},
|
| 13463 |
+
{
|
| 13464 |
+
"epoch": 0.49517852488923636,
|
| 13465 |
+
"eval_loss": 2.3786075115203857,
|
| 13466 |
+
"eval_runtime": 27.2055,
|
| 13467 |
+
"eval_samples_per_second": 2.867,
|
| 13468 |
+
"eval_steps_per_second": 1.434,
|
| 13469 |
+
"step": 1900
|
| 13470 |
}
|
| 13471 |
],
|
| 13472 |
"logging_steps": 1,
|
|
|
|
| 13486 |
"attributes": {}
|
| 13487 |
}
|
| 13488 |
},
|
| 13489 |
+
"total_flos": 3.397729721788662e+17,
|
| 13490 |
"train_batch_size": 2,
|
| 13491 |
"trial_name": null,
|
| 13492 |
"trial_params": null
|