Training in progress, step 1900, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 528526760
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d06847da158ce168fb93e55cc61bf6bea8dd7967293bd7a3c8e8402b075b8fe4
|
| 3 |
size 528526760
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 141172038
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2dadc0a09d187a8011aebcd91d84a82f37564642f08657fd83aef7232779b1b
|
| 3 |
size 141172038
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc0fc3759c3826ef1f53a065691416b3f84d400156909e771c89c5340e089c66
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2080
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69bc8f012b013c017f6532a85f380ecf6f76a1db060525a9d4f033160c892e6c
|
| 3 |
size 2080
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.5684272050857544,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1600",
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 100,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -12759,6 +12759,714 @@
|
|
| 12759 |
"eval_samples_per_second": 3.53,
|
| 12760 |
"eval_steps_per_second": 1.773,
|
| 12761 |
"step": 1800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12762 |
}
|
| 12763 |
],
|
| 12764 |
"logging_steps": 1,
|
|
@@ -12773,7 +13481,7 @@
|
|
| 12773 |
"early_stopping_threshold": 0.0
|
| 12774 |
},
|
| 12775 |
"attributes": {
|
| 12776 |
-
"early_stopping_patience_counter":
|
| 12777 |
}
|
| 12778 |
},
|
| 12779 |
"TrainerControl": {
|
|
@@ -12787,7 +13495,7 @@
|
|
| 12787 |
"attributes": {}
|
| 12788 |
}
|
| 12789 |
},
|
| 12790 |
-
"total_flos": 3.
|
| 12791 |
"train_batch_size": 2,
|
| 12792 |
"trial_name": null,
|
| 12793 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.5684272050857544,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1600",
|
| 4 |
+
"epoch": 0.3565731444121235,
|
| 5 |
"eval_steps": 100,
|
| 6 |
+
"global_step": 1900,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 12759 |
"eval_samples_per_second": 3.53,
|
| 12760 |
"eval_steps_per_second": 1.773,
|
| 12761 |
"step": 1800
|
| 12762 |
+
},
|
| 12763 |
+
{
|
| 12764 |
+
"epoch": 0.3379938068874918,
|
| 12765 |
+
"grad_norm": 7.328483581542969,
|
| 12766 |
+
"learning_rate": 0.00019398047879803926,
|
| 12767 |
+
"loss": 1.9055,
|
| 12768 |
+
"step": 1801
|
| 12769 |
+
},
|
| 12770 |
+
{
|
| 12771 |
+
"epoch": 0.33818147696349815,
|
| 12772 |
+
"grad_norm": 6.055893421173096,
|
| 12773 |
+
"learning_rate": 0.00019397375581320375,
|
| 12774 |
+
"loss": 2.4346,
|
| 12775 |
+
"step": 1802
|
| 12776 |
+
},
|
| 12777 |
+
{
|
| 12778 |
+
"epoch": 0.33836914703950455,
|
| 12779 |
+
"grad_norm": 8.04631519317627,
|
| 12780 |
+
"learning_rate": 0.00019396701827645302,
|
| 12781 |
+
"loss": 1.5086,
|
| 12782 |
+
"step": 1803
|
| 12783 |
+
},
|
| 12784 |
+
{
|
| 12785 |
+
"epoch": 0.33855681711551094,
|
| 12786 |
+
"grad_norm": 6.279934406280518,
|
| 12787 |
+
"learning_rate": 0.00019396026618778706,
|
| 12788 |
+
"loss": 2.2011,
|
| 12789 |
+
"step": 1804
|
| 12790 |
+
},
|
| 12791 |
+
{
|
| 12792 |
+
"epoch": 0.3387444871915173,
|
| 12793 |
+
"grad_norm": 7.011415481567383,
|
| 12794 |
+
"learning_rate": 0.00019395352865103632,
|
| 12795 |
+
"loss": 2.5766,
|
| 12796 |
+
"step": 1805
|
| 12797 |
+
},
|
| 12798 |
+
{
|
| 12799 |
+
"epoch": 0.3389321572675237,
|
| 12800 |
+
"grad_norm": 11.007534980773926,
|
| 12801 |
+
"learning_rate": 0.0001939467911142856,
|
| 12802 |
+
"loss": 3.25,
|
| 12803 |
+
"step": 1806
|
| 12804 |
+
},
|
| 12805 |
+
{
|
| 12806 |
+
"epoch": 0.3391198273435301,
|
| 12807 |
+
"grad_norm": 4.763912200927734,
|
| 12808 |
+
"learning_rate": 0.00019394003902561963,
|
| 12809 |
+
"loss": 1.7699,
|
| 12810 |
+
"step": 1807
|
| 12811 |
+
},
|
| 12812 |
+
{
|
| 12813 |
+
"epoch": 0.3393074974195365,
|
| 12814 |
+
"grad_norm": 7.067093849182129,
|
| 12815 |
+
"learning_rate": 0.00019393328693695366,
|
| 12816 |
+
"loss": 2.0153,
|
| 12817 |
+
"step": 1808
|
| 12818 |
+
},
|
| 12819 |
+
{
|
| 12820 |
+
"epoch": 0.3394951674955428,
|
| 12821 |
+
"grad_norm": 6.018016338348389,
|
| 12822 |
+
"learning_rate": 0.0001939265348482877,
|
| 12823 |
+
"loss": 1.0305,
|
| 12824 |
+
"step": 1809
|
| 12825 |
+
},
|
| 12826 |
+
{
|
| 12827 |
+
"epoch": 0.3396828375715492,
|
| 12828 |
+
"grad_norm": 5.589052677154541,
|
| 12829 |
+
"learning_rate": 0.0001939197682077065,
|
| 12830 |
+
"loss": 1.623,
|
| 12831 |
+
"step": 1810
|
| 12832 |
+
},
|
| 12833 |
+
{
|
| 12834 |
+
"epoch": 0.3398705076475556,
|
| 12835 |
+
"grad_norm": 4.106575965881348,
|
| 12836 |
+
"learning_rate": 0.00019391300156712532,
|
| 12837 |
+
"loss": 2.3932,
|
| 12838 |
+
"step": 1811
|
| 12839 |
+
},
|
| 12840 |
+
{
|
| 12841 |
+
"epoch": 0.340058177723562,
|
| 12842 |
+
"grad_norm": 6.3381171226501465,
|
| 12843 |
+
"learning_rate": 0.00019390624947845936,
|
| 12844 |
+
"loss": 1.859,
|
| 12845 |
+
"step": 1812
|
| 12846 |
+
},
|
| 12847 |
+
{
|
| 12848 |
+
"epoch": 0.34024584779956835,
|
| 12849 |
+
"grad_norm": 7.823512554168701,
|
| 12850 |
+
"learning_rate": 0.00019389946828596294,
|
| 12851 |
+
"loss": 2.0446,
|
| 12852 |
+
"step": 1813
|
| 12853 |
+
},
|
| 12854 |
+
{
|
| 12855 |
+
"epoch": 0.34043351787557474,
|
| 12856 |
+
"grad_norm": 4.890605449676514,
|
| 12857 |
+
"learning_rate": 0.00019389268709346652,
|
| 12858 |
+
"loss": 0.8467,
|
| 12859 |
+
"step": 1814
|
| 12860 |
+
},
|
| 12861 |
+
{
|
| 12862 |
+
"epoch": 0.34062118795158114,
|
| 12863 |
+
"grad_norm": 7.945042610168457,
|
| 12864 |
+
"learning_rate": 0.00019388592045288533,
|
| 12865 |
+
"loss": 2.6165,
|
| 12866 |
+
"step": 1815
|
| 12867 |
+
},
|
| 12868 |
+
{
|
| 12869 |
+
"epoch": 0.3408088580275875,
|
| 12870 |
+
"grad_norm": 4.810164451599121,
|
| 12871 |
+
"learning_rate": 0.0001938791392603889,
|
| 12872 |
+
"loss": 1.1764,
|
| 12873 |
+
"step": 1816
|
| 12874 |
+
},
|
| 12875 |
+
{
|
| 12876 |
+
"epoch": 0.3409965281035939,
|
| 12877 |
+
"grad_norm": 6.62808895111084,
|
| 12878 |
+
"learning_rate": 0.0001938723580678925,
|
| 12879 |
+
"loss": 2.3356,
|
| 12880 |
+
"step": 1817
|
| 12881 |
+
},
|
| 12882 |
+
{
|
| 12883 |
+
"epoch": 0.3411841981796003,
|
| 12884 |
+
"grad_norm": 8.076144218444824,
|
| 12885 |
+
"learning_rate": 0.00019386556232348084,
|
| 12886 |
+
"loss": 2.2133,
|
| 12887 |
+
"step": 1818
|
| 12888 |
+
},
|
| 12889 |
+
{
|
| 12890 |
+
"epoch": 0.34137186825560667,
|
| 12891 |
+
"grad_norm": 10.573699951171875,
|
| 12892 |
+
"learning_rate": 0.0001938587665790692,
|
| 12893 |
+
"loss": 2.3251,
|
| 12894 |
+
"step": 1819
|
| 12895 |
+
},
|
| 12896 |
+
{
|
| 12897 |
+
"epoch": 0.341559538331613,
|
| 12898 |
+
"grad_norm": 5.872825622558594,
|
| 12899 |
+
"learning_rate": 0.00019385197083465755,
|
| 12900 |
+
"loss": 1.8493,
|
| 12901 |
+
"step": 1820
|
| 12902 |
+
},
|
| 12903 |
+
{
|
| 12904 |
+
"epoch": 0.3417472084076194,
|
| 12905 |
+
"grad_norm": 3.5163869857788086,
|
| 12906 |
+
"learning_rate": 0.0001938451750902459,
|
| 12907 |
+
"loss": 1.1675,
|
| 12908 |
+
"step": 1821
|
| 12909 |
+
},
|
| 12910 |
+
{
|
| 12911 |
+
"epoch": 0.3419348784836258,
|
| 12912 |
+
"grad_norm": 6.068939685821533,
|
| 12913 |
+
"learning_rate": 0.00019383836479391903,
|
| 12914 |
+
"loss": 1.7621,
|
| 12915 |
+
"step": 1822
|
| 12916 |
+
},
|
| 12917 |
+
{
|
| 12918 |
+
"epoch": 0.34212254855963214,
|
| 12919 |
+
"grad_norm": 5.792971611022949,
|
| 12920 |
+
"learning_rate": 0.00019383155449759215,
|
| 12921 |
+
"loss": 1.8668,
|
| 12922 |
+
"step": 1823
|
| 12923 |
+
},
|
| 12924 |
+
{
|
| 12925 |
+
"epoch": 0.34231021863563854,
|
| 12926 |
+
"grad_norm": 8.612492561340332,
|
| 12927 |
+
"learning_rate": 0.00019382474420126528,
|
| 12928 |
+
"loss": 2.2727,
|
| 12929 |
+
"step": 1824
|
| 12930 |
+
},
|
| 12931 |
+
{
|
| 12932 |
+
"epoch": 0.34249788871164494,
|
| 12933 |
+
"grad_norm": 5.430174827575684,
|
| 12934 |
+
"learning_rate": 0.0001938179339049384,
|
| 12935 |
+
"loss": 1.7228,
|
| 12936 |
+
"step": 1825
|
| 12937 |
+
},
|
| 12938 |
+
{
|
| 12939 |
+
"epoch": 0.34268555878765133,
|
| 12940 |
+
"grad_norm": 6.0315375328063965,
|
| 12941 |
+
"learning_rate": 0.0001938111090566963,
|
| 12942 |
+
"loss": 2.283,
|
| 12943 |
+
"step": 1826
|
| 12944 |
+
},
|
| 12945 |
+
{
|
| 12946 |
+
"epoch": 0.3428732288636577,
|
| 12947 |
+
"grad_norm": 5.934972763061523,
|
| 12948 |
+
"learning_rate": 0.00019380429876036942,
|
| 12949 |
+
"loss": 1.37,
|
| 12950 |
+
"step": 1827
|
| 12951 |
+
},
|
| 12952 |
+
{
|
| 12953 |
+
"epoch": 0.34306089893966407,
|
| 12954 |
+
"grad_norm": 8.644633293151855,
|
| 12955 |
+
"learning_rate": 0.00019379747391212732,
|
| 12956 |
+
"loss": 1.7698,
|
| 12957 |
+
"step": 1828
|
| 12958 |
+
},
|
| 12959 |
+
{
|
| 12960 |
+
"epoch": 0.34324856901567047,
|
| 12961 |
+
"grad_norm": 6.068434238433838,
|
| 12962 |
+
"learning_rate": 0.0001937906490638852,
|
| 12963 |
+
"loss": 2.4035,
|
| 12964 |
+
"step": 1829
|
| 12965 |
+
},
|
| 12966 |
+
{
|
| 12967 |
+
"epoch": 0.3434362390916768,
|
| 12968 |
+
"grad_norm": 5.352113723754883,
|
| 12969 |
+
"learning_rate": 0.00019378380966372788,
|
| 12970 |
+
"loss": 1.4043,
|
| 12971 |
+
"step": 1830
|
| 12972 |
+
},
|
| 12973 |
+
{
|
| 12974 |
+
"epoch": 0.3436239091676832,
|
| 12975 |
+
"grad_norm": 5.741805553436279,
|
| 12976 |
+
"learning_rate": 0.00019377697026357055,
|
| 12977 |
+
"loss": 2.3867,
|
| 12978 |
+
"step": 1831
|
| 12979 |
+
},
|
| 12980 |
+
{
|
| 12981 |
+
"epoch": 0.3438115792436896,
|
| 12982 |
+
"grad_norm": 5.221137523651123,
|
| 12983 |
+
"learning_rate": 0.00019377013086341321,
|
| 12984 |
+
"loss": 2.5909,
|
| 12985 |
+
"step": 1832
|
| 12986 |
+
},
|
| 12987 |
+
{
|
| 12988 |
+
"epoch": 0.343999249319696,
|
| 12989 |
+
"grad_norm": 5.222947597503662,
|
| 12990 |
+
"learning_rate": 0.00019376329146325588,
|
| 12991 |
+
"loss": 1.8876,
|
| 12992 |
+
"step": 1833
|
| 12993 |
+
},
|
| 12994 |
+
{
|
| 12995 |
+
"epoch": 0.34418691939570234,
|
| 12996 |
+
"grad_norm": 4.082976341247559,
|
| 12997 |
+
"learning_rate": 0.00019375643751118332,
|
| 12998 |
+
"loss": 1.4823,
|
| 12999 |
+
"step": 1834
|
| 13000 |
+
},
|
| 13001 |
+
{
|
| 13002 |
+
"epoch": 0.34437458947170874,
|
| 13003 |
+
"grad_norm": 4.595718860626221,
|
| 13004 |
+
"learning_rate": 0.00019374958355911076,
|
| 13005 |
+
"loss": 1.3744,
|
| 13006 |
+
"step": 1835
|
| 13007 |
+
},
|
| 13008 |
+
{
|
| 13009 |
+
"epoch": 0.34456225954771513,
|
| 13010 |
+
"grad_norm": 5.016329765319824,
|
| 13011 |
+
"learning_rate": 0.0001937427296070382,
|
| 13012 |
+
"loss": 2.3715,
|
| 13013 |
+
"step": 1836
|
| 13014 |
+
},
|
| 13015 |
+
{
|
| 13016 |
+
"epoch": 0.3447499296237215,
|
| 13017 |
+
"grad_norm": 4.979654312133789,
|
| 13018 |
+
"learning_rate": 0.00019373587565496564,
|
| 13019 |
+
"loss": 1.7598,
|
| 13020 |
+
"step": 1837
|
| 13021 |
+
},
|
| 13022 |
+
{
|
| 13023 |
+
"epoch": 0.34493759969972787,
|
| 13024 |
+
"grad_norm": 5.692010879516602,
|
| 13025 |
+
"learning_rate": 0.00019372900715097785,
|
| 13026 |
+
"loss": 1.5982,
|
| 13027 |
+
"step": 1838
|
| 13028 |
+
},
|
| 13029 |
+
{
|
| 13030 |
+
"epoch": 0.34512526977573427,
|
| 13031 |
+
"grad_norm": 6.913799285888672,
|
| 13032 |
+
"learning_rate": 0.0001937221531989053,
|
| 13033 |
+
"loss": 1.9036,
|
| 13034 |
+
"step": 1839
|
| 13035 |
+
},
|
| 13036 |
+
{
|
| 13037 |
+
"epoch": 0.34531293985174066,
|
| 13038 |
+
"grad_norm": 5.701316833496094,
|
| 13039 |
+
"learning_rate": 0.0001937152846949175,
|
| 13040 |
+
"loss": 1.9009,
|
| 13041 |
+
"step": 1840
|
| 13042 |
+
},
|
| 13043 |
+
{
|
| 13044 |
+
"epoch": 0.345500609927747,
|
| 13045 |
+
"grad_norm": 19.9186954498291,
|
| 13046 |
+
"learning_rate": 0.0001937084161909297,
|
| 13047 |
+
"loss": 2.4767,
|
| 13048 |
+
"step": 1841
|
| 13049 |
+
},
|
| 13050 |
+
{
|
| 13051 |
+
"epoch": 0.3456882800037534,
|
| 13052 |
+
"grad_norm": 5.613144874572754,
|
| 13053 |
+
"learning_rate": 0.0001937015331350267,
|
| 13054 |
+
"loss": 3.6305,
|
| 13055 |
+
"step": 1842
|
| 13056 |
+
},
|
| 13057 |
+
{
|
| 13058 |
+
"epoch": 0.3458759500797598,
|
| 13059 |
+
"grad_norm": 3.7392101287841797,
|
| 13060 |
+
"learning_rate": 0.0001936946646310389,
|
| 13061 |
+
"loss": 1.1532,
|
| 13062 |
+
"step": 1843
|
| 13063 |
+
},
|
| 13064 |
+
{
|
| 13065 |
+
"epoch": 0.34606362015576614,
|
| 13066 |
+
"grad_norm": 7.338042259216309,
|
| 13067 |
+
"learning_rate": 0.00019368776702322066,
|
| 13068 |
+
"loss": 1.95,
|
| 13069 |
+
"step": 1844
|
| 13070 |
+
},
|
| 13071 |
+
{
|
| 13072 |
+
"epoch": 0.34625129023177253,
|
| 13073 |
+
"grad_norm": 5.003443717956543,
|
| 13074 |
+
"learning_rate": 0.00019368088396731764,
|
| 13075 |
+
"loss": 1.7028,
|
| 13076 |
+
"step": 1845
|
| 13077 |
+
},
|
| 13078 |
+
{
|
| 13079 |
+
"epoch": 0.34643896030777893,
|
| 13080 |
+
"grad_norm": 6.806427001953125,
|
| 13081 |
+
"learning_rate": 0.00019367400091141462,
|
| 13082 |
+
"loss": 2.4638,
|
| 13083 |
+
"step": 1846
|
| 13084 |
+
},
|
| 13085 |
+
{
|
| 13086 |
+
"epoch": 0.34662663038378533,
|
| 13087 |
+
"grad_norm": 5.704411506652832,
|
| 13088 |
+
"learning_rate": 0.00019366710330359638,
|
| 13089 |
+
"loss": 2.7797,
|
| 13090 |
+
"step": 1847
|
| 13091 |
+
},
|
| 13092 |
+
{
|
| 13093 |
+
"epoch": 0.34681430045979167,
|
| 13094 |
+
"grad_norm": 3.765601873397827,
|
| 13095 |
+
"learning_rate": 0.00019366020569577813,
|
| 13096 |
+
"loss": 1.8054,
|
| 13097 |
+
"step": 1848
|
| 13098 |
+
},
|
| 13099 |
+
{
|
| 13100 |
+
"epoch": 0.34700197053579807,
|
| 13101 |
+
"grad_norm": 5.865424156188965,
|
| 13102 |
+
"learning_rate": 0.00019365330808795989,
|
| 13103 |
+
"loss": 2.2234,
|
| 13104 |
+
"step": 1849
|
| 13105 |
+
},
|
| 13106 |
+
{
|
| 13107 |
+
"epoch": 0.34718964061180446,
|
| 13108 |
+
"grad_norm": 9.001687049865723,
|
| 13109 |
+
"learning_rate": 0.0001936463959282264,
|
| 13110 |
+
"loss": 2.0534,
|
| 13111 |
+
"step": 1850
|
| 13112 |
+
},
|
| 13113 |
+
{
|
| 13114 |
+
"epoch": 0.3473773106878108,
|
| 13115 |
+
"grad_norm": 3.3067872524261475,
|
| 13116 |
+
"learning_rate": 0.00019363948376849294,
|
| 13117 |
+
"loss": 1.7267,
|
| 13118 |
+
"step": 1851
|
| 13119 |
+
},
|
| 13120 |
+
{
|
| 13121 |
+
"epoch": 0.3475649807638172,
|
| 13122 |
+
"grad_norm": 6.117793560028076,
|
| 13123 |
+
"learning_rate": 0.00019363257160875946,
|
| 13124 |
+
"loss": 1.721,
|
| 13125 |
+
"step": 1852
|
| 13126 |
+
},
|
| 13127 |
+
{
|
| 13128 |
+
"epoch": 0.3477526508398236,
|
| 13129 |
+
"grad_norm": 9.610057830810547,
|
| 13130 |
+
"learning_rate": 0.000193625659449026,
|
| 13131 |
+
"loss": 4.0058,
|
| 13132 |
+
"step": 1853
|
| 13133 |
+
},
|
| 13134 |
+
{
|
| 13135 |
+
"epoch": 0.34794032091583,
|
| 13136 |
+
"grad_norm": 5.996690273284912,
|
| 13137 |
+
"learning_rate": 0.00019361873273737729,
|
| 13138 |
+
"loss": 1.9939,
|
| 13139 |
+
"step": 1854
|
| 13140 |
+
},
|
| 13141 |
+
{
|
| 13142 |
+
"epoch": 0.34812799099183633,
|
| 13143 |
+
"grad_norm": 4.617966175079346,
|
| 13144 |
+
"learning_rate": 0.00019361180602572858,
|
| 13145 |
+
"loss": 1.1395,
|
| 13146 |
+
"step": 1855
|
| 13147 |
+
},
|
| 13148 |
+
{
|
| 13149 |
+
"epoch": 0.34831566106784273,
|
| 13150 |
+
"grad_norm": 6.157725811004639,
|
| 13151 |
+
"learning_rate": 0.00019360487931407988,
|
| 13152 |
+
"loss": 1.5481,
|
| 13153 |
+
"step": 1856
|
| 13154 |
+
},
|
| 13155 |
+
{
|
| 13156 |
+
"epoch": 0.3485033311438491,
|
| 13157 |
+
"grad_norm": 4.873600482940674,
|
| 13158 |
+
"learning_rate": 0.00019359795260243118,
|
| 13159 |
+
"loss": 1.944,
|
| 13160 |
+
"step": 1857
|
| 13161 |
+
},
|
| 13162 |
+
{
|
| 13163 |
+
"epoch": 0.34869100121985547,
|
| 13164 |
+
"grad_norm": 4.755199432373047,
|
| 13165 |
+
"learning_rate": 0.00019359101133886725,
|
| 13166 |
+
"loss": 1.2533,
|
| 13167 |
+
"step": 1858
|
| 13168 |
+
},
|
| 13169 |
+
{
|
| 13170 |
+
"epoch": 0.34887867129586186,
|
| 13171 |
+
"grad_norm": 6.5496392250061035,
|
| 13172 |
+
"learning_rate": 0.00019358408462721854,
|
| 13173 |
+
"loss": 2.6968,
|
| 13174 |
+
"step": 1859
|
| 13175 |
+
},
|
| 13176 |
+
{
|
| 13177 |
+
"epoch": 0.34906634137186826,
|
| 13178 |
+
"grad_norm": 8.573905944824219,
|
| 13179 |
+
"learning_rate": 0.0001935771433636546,
|
| 13180 |
+
"loss": 2.1588,
|
| 13181 |
+
"step": 1860
|
| 13182 |
+
},
|
| 13183 |
+
{
|
| 13184 |
+
"epoch": 0.34925401144787466,
|
| 13185 |
+
"grad_norm": 10.172856330871582,
|
| 13186 |
+
"learning_rate": 0.00019357018754817545,
|
| 13187 |
+
"loss": 2.2443,
|
| 13188 |
+
"step": 1861
|
| 13189 |
+
},
|
| 13190 |
+
{
|
| 13191 |
+
"epoch": 0.349441681523881,
|
| 13192 |
+
"grad_norm": 9.657448768615723,
|
| 13193 |
+
"learning_rate": 0.00019356324628461152,
|
| 13194 |
+
"loss": 2.5489,
|
| 13195 |
+
"step": 1862
|
| 13196 |
+
},
|
| 13197 |
+
{
|
| 13198 |
+
"epoch": 0.3496293515998874,
|
| 13199 |
+
"grad_norm": 4.509249210357666,
|
| 13200 |
+
"learning_rate": 0.00019355629046913236,
|
| 13201 |
+
"loss": 1.3726,
|
| 13202 |
+
"step": 1863
|
| 13203 |
+
},
|
| 13204 |
+
{
|
| 13205 |
+
"epoch": 0.3498170216758938,
|
| 13206 |
+
"grad_norm": 3.9321093559265137,
|
| 13207 |
+
"learning_rate": 0.00019354934920556843,
|
| 13208 |
+
"loss": 2.2097,
|
| 13209 |
+
"step": 1864
|
| 13210 |
+
},
|
| 13211 |
+
{
|
| 13212 |
+
"epoch": 0.35000469175190013,
|
| 13213 |
+
"grad_norm": 5.736708641052246,
|
| 13214 |
+
"learning_rate": 0.00019354237883817405,
|
| 13215 |
+
"loss": 1.9158,
|
| 13216 |
+
"step": 1865
|
| 13217 |
+
},
|
| 13218 |
+
{
|
| 13219 |
+
"epoch": 0.35019236182790653,
|
| 13220 |
+
"grad_norm": 7.1537089347839355,
|
| 13221 |
+
"learning_rate": 0.00019353542302269489,
|
| 13222 |
+
"loss": 2.3103,
|
| 13223 |
+
"step": 1866
|
| 13224 |
+
},
|
| 13225 |
+
{
|
| 13226 |
+
"epoch": 0.3503800319039129,
|
| 13227 |
+
"grad_norm": 17.57890510559082,
|
| 13228 |
+
"learning_rate": 0.00019352843810338527,
|
| 13229 |
+
"loss": 2.7007,
|
| 13230 |
+
"step": 1867
|
| 13231 |
+
},
|
| 13232 |
+
{
|
| 13233 |
+
"epoch": 0.3505677019799193,
|
| 13234 |
+
"grad_norm": 4.420182228088379,
|
| 13235 |
+
"learning_rate": 0.0001935214822879061,
|
| 13236 |
+
"loss": 1.8044,
|
| 13237 |
+
"step": 1868
|
| 13238 |
+
},
|
| 13239 |
+
{
|
| 13240 |
+
"epoch": 0.35075537205592566,
|
| 13241 |
+
"grad_norm": 5.2922234535217285,
|
| 13242 |
+
"learning_rate": 0.00019351451192051172,
|
| 13243 |
+
"loss": 1.9718,
|
| 13244 |
+
"step": 1869
|
| 13245 |
+
},
|
| 13246 |
+
{
|
| 13247 |
+
"epoch": 0.35094304213193206,
|
| 13248 |
+
"grad_norm": 6.406688690185547,
|
| 13249 |
+
"learning_rate": 0.0001935075270012021,
|
| 13250 |
+
"loss": 2.7322,
|
| 13251 |
+
"step": 1870
|
| 13252 |
+
},
|
| 13253 |
+
{
|
| 13254 |
+
"epoch": 0.35113071220793846,
|
| 13255 |
+
"grad_norm": 5.1606974601745605,
|
| 13256 |
+
"learning_rate": 0.00019350055663380772,
|
| 13257 |
+
"loss": 1.9587,
|
| 13258 |
+
"step": 1871
|
| 13259 |
+
},
|
| 13260 |
+
{
|
| 13261 |
+
"epoch": 0.35131838228394485,
|
| 13262 |
+
"grad_norm": 6.796870708465576,
|
| 13263 |
+
"learning_rate": 0.00019349355716258287,
|
| 13264 |
+
"loss": 2.0894,
|
| 13265 |
+
"step": 1872
|
| 13266 |
+
},
|
| 13267 |
+
{
|
| 13268 |
+
"epoch": 0.3515060523599512,
|
| 13269 |
+
"grad_norm": 6.20542573928833,
|
| 13270 |
+
"learning_rate": 0.00019348657224327326,
|
| 13271 |
+
"loss": 2.7715,
|
| 13272 |
+
"step": 1873
|
| 13273 |
+
},
|
| 13274 |
+
{
|
| 13275 |
+
"epoch": 0.3516937224359576,
|
| 13276 |
+
"grad_norm": 5.50722599029541,
|
| 13277 |
+
"learning_rate": 0.00019347958732396364,
|
| 13278 |
+
"loss": 1.8027,
|
| 13279 |
+
"step": 1874
|
| 13280 |
+
},
|
| 13281 |
+
{
|
| 13282 |
+
"epoch": 0.351881392511964,
|
| 13283 |
+
"grad_norm": 7.180240631103516,
|
| 13284 |
+
"learning_rate": 0.0001934725878527388,
|
| 13285 |
+
"loss": 2.0733,
|
| 13286 |
+
"step": 1875
|
| 13287 |
+
},
|
| 13288 |
+
{
|
| 13289 |
+
"epoch": 0.3520690625879703,
|
| 13290 |
+
"grad_norm": 6.353847980499268,
|
| 13291 |
+
"learning_rate": 0.00019346558838151395,
|
| 13292 |
+
"loss": 2.6648,
|
| 13293 |
+
"step": 1876
|
| 13294 |
+
},
|
| 13295 |
+
{
|
| 13296 |
+
"epoch": 0.3522567326639767,
|
| 13297 |
+
"grad_norm": 5.271063327789307,
|
| 13298 |
+
"learning_rate": 0.00019345857435837388,
|
| 13299 |
+
"loss": 1.5801,
|
| 13300 |
+
"step": 1877
|
| 13301 |
+
},
|
| 13302 |
+
{
|
| 13303 |
+
"epoch": 0.3524444027399831,
|
| 13304 |
+
"grad_norm": 5.9332380294799805,
|
| 13305 |
+
"learning_rate": 0.00019345157488714904,
|
| 13306 |
+
"loss": 2.1235,
|
| 13307 |
+
"step": 1878
|
| 13308 |
+
},
|
| 13309 |
+
{
|
| 13310 |
+
"epoch": 0.3526320728159895,
|
| 13311 |
+
"grad_norm": 8.27168083190918,
|
| 13312 |
+
"learning_rate": 0.00019344456086400896,
|
| 13313 |
+
"loss": 2.6918,
|
| 13314 |
+
"step": 1879
|
| 13315 |
+
},
|
| 13316 |
+
{
|
| 13317 |
+
"epoch": 0.35281974289199586,
|
| 13318 |
+
"grad_norm": 5.08257532119751,
|
| 13319 |
+
"learning_rate": 0.0001934375468408689,
|
| 13320 |
+
"loss": 1.2914,
|
| 13321 |
+
"step": 1880
|
| 13322 |
+
},
|
| 13323 |
+
{
|
| 13324 |
+
"epoch": 0.35300741296800225,
|
| 13325 |
+
"grad_norm": 7.454222679138184,
|
| 13326 |
+
"learning_rate": 0.00019343053281772882,
|
| 13327 |
+
"loss": 2.7986,
|
| 13328 |
+
"step": 1881
|
| 13329 |
+
},
|
| 13330 |
+
{
|
| 13331 |
+
"epoch": 0.35319508304400865,
|
| 13332 |
+
"grad_norm": 3.519970417022705,
|
| 13333 |
+
"learning_rate": 0.00019342350424267352,
|
| 13334 |
+
"loss": 1.4717,
|
| 13335 |
+
"step": 1882
|
| 13336 |
+
},
|
| 13337 |
+
{
|
| 13338 |
+
"epoch": 0.353382753120015,
|
| 13339 |
+
"grad_norm": 7.176670074462891,
|
| 13340 |
+
"learning_rate": 0.00019341649021953344,
|
| 13341 |
+
"loss": 2.6793,
|
| 13342 |
+
"step": 1883
|
| 13343 |
+
},
|
| 13344 |
+
{
|
| 13345 |
+
"epoch": 0.3535704231960214,
|
| 13346 |
+
"grad_norm": 9.557621002197266,
|
| 13347 |
+
"learning_rate": 0.00019340946164447814,
|
| 13348 |
+
"loss": 3.08,
|
| 13349 |
+
"step": 1884
|
| 13350 |
+
},
|
| 13351 |
+
{
|
| 13352 |
+
"epoch": 0.3537580932720278,
|
| 13353 |
+
"grad_norm": 6.917932510375977,
|
| 13354 |
+
"learning_rate": 0.00019340243306942284,
|
| 13355 |
+
"loss": 2.3398,
|
| 13356 |
+
"step": 1885
|
| 13357 |
+
},
|
| 13358 |
+
{
|
| 13359 |
+
"epoch": 0.3539457633480342,
|
| 13360 |
+
"grad_norm": 4.977582931518555,
|
| 13361 |
+
"learning_rate": 0.0001933953899424523,
|
| 13362 |
+
"loss": 1.6738,
|
| 13363 |
+
"step": 1886
|
| 13364 |
+
},
|
| 13365 |
+
{
|
| 13366 |
+
"epoch": 0.3541334334240405,
|
| 13367 |
+
"grad_norm": 5.958339691162109,
|
| 13368 |
+
"learning_rate": 0.00019338834681548178,
|
| 13369 |
+
"loss": 1.9964,
|
| 13370 |
+
"step": 1887
|
| 13371 |
+
},
|
| 13372 |
+
{
|
| 13373 |
+
"epoch": 0.3543211035000469,
|
| 13374 |
+
"grad_norm": 7.204160213470459,
|
| 13375 |
+
"learning_rate": 0.00019338130368851125,
|
| 13376 |
+
"loss": 2.4598,
|
| 13377 |
+
"step": 1888
|
| 13378 |
+
},
|
| 13379 |
+
{
|
| 13380 |
+
"epoch": 0.3545087735760533,
|
| 13381 |
+
"grad_norm": 7.081577777862549,
|
| 13382 |
+
"learning_rate": 0.00019337426056154072,
|
| 13383 |
+
"loss": 2.799,
|
| 13384 |
+
"step": 1889
|
| 13385 |
+
},
|
| 13386 |
+
{
|
| 13387 |
+
"epoch": 0.35469644365205966,
|
| 13388 |
+
"grad_norm": 6.637358665466309,
|
| 13389 |
+
"learning_rate": 0.0001933672174345702,
|
| 13390 |
+
"loss": 2.0836,
|
| 13391 |
+
"step": 1890
|
| 13392 |
+
},
|
| 13393 |
+
{
|
| 13394 |
+
"epoch": 0.35488411372806605,
|
| 13395 |
+
"grad_norm": 10.060577392578125,
|
| 13396 |
+
"learning_rate": 0.00019336015975568444,
|
| 13397 |
+
"loss": 1.6455,
|
| 13398 |
+
"step": 1891
|
| 13399 |
+
},
|
| 13400 |
+
{
|
| 13401 |
+
"epoch": 0.35507178380407245,
|
| 13402 |
+
"grad_norm": 6.906970024108887,
|
| 13403 |
+
"learning_rate": 0.00019335310207679868,
|
| 13404 |
+
"loss": 1.9957,
|
| 13405 |
+
"step": 1892
|
| 13406 |
+
},
|
| 13407 |
+
{
|
| 13408 |
+
"epoch": 0.35525945388007885,
|
| 13409 |
+
"grad_norm": 7.135964870452881,
|
| 13410 |
+
"learning_rate": 0.0001933460298459977,
|
| 13411 |
+
"loss": 1.2429,
|
| 13412 |
+
"step": 1893
|
| 13413 |
+
},
|
| 13414 |
+
{
|
| 13415 |
+
"epoch": 0.3554471239560852,
|
| 13416 |
+
"grad_norm": 7.937412261962891,
|
| 13417 |
+
"learning_rate": 0.00019333897216711193,
|
| 13418 |
+
"loss": 2.0331,
|
| 13419 |
+
"step": 1894
|
| 13420 |
+
},
|
| 13421 |
+
{
|
| 13422 |
+
"epoch": 0.3556347940320916,
|
| 13423 |
+
"grad_norm": 5.553816318511963,
|
| 13424 |
+
"learning_rate": 0.00019333189993631095,
|
| 13425 |
+
"loss": 1.6946,
|
| 13426 |
+
"step": 1895
|
| 13427 |
+
},
|
| 13428 |
+
{
|
| 13429 |
+
"epoch": 0.355822464108098,
|
| 13430 |
+
"grad_norm": 7.168800354003906,
|
| 13431 |
+
"learning_rate": 0.0001933248422574252,
|
| 13432 |
+
"loss": 2.1942,
|
| 13433 |
+
"step": 1896
|
| 13434 |
+
},
|
| 13435 |
+
{
|
| 13436 |
+
"epoch": 0.3560101341841043,
|
| 13437 |
+
"grad_norm": 7.036073684692383,
|
| 13438 |
+
"learning_rate": 0.00019331775547470897,
|
| 13439 |
+
"loss": 2.5773,
|
| 13440 |
+
"step": 1897
|
| 13441 |
+
},
|
| 13442 |
+
{
|
| 13443 |
+
"epoch": 0.3561978042601107,
|
| 13444 |
+
"grad_norm": 12.46209716796875,
|
| 13445 |
+
"learning_rate": 0.00019331066869199276,
|
| 13446 |
+
"loss": 2.8978,
|
| 13447 |
+
"step": 1898
|
| 13448 |
+
},
|
| 13449 |
+
{
|
| 13450 |
+
"epoch": 0.3563854743361171,
|
| 13451 |
+
"grad_norm": 6.11179780960083,
|
| 13452 |
+
"learning_rate": 0.00019330359646119177,
|
| 13453 |
+
"loss": 2.1355,
|
| 13454 |
+
"step": 1899
|
| 13455 |
+
},
|
| 13456 |
+
{
|
| 13457 |
+
"epoch": 0.3565731444121235,
|
| 13458 |
+
"grad_norm": 5.809276580810547,
|
| 13459 |
+
"learning_rate": 0.00019329650967847556,
|
| 13460 |
+
"loss": 1.7289,
|
| 13461 |
+
"step": 1900
|
| 13462 |
+
},
|
| 13463 |
+
{
|
| 13464 |
+
"epoch": 0.3565731444121235,
|
| 13465 |
+
"eval_loss": 0.5747910141944885,
|
| 13466 |
+
"eval_runtime": 60.9369,
|
| 13467 |
+
"eval_samples_per_second": 3.528,
|
| 13468 |
+
"eval_steps_per_second": 1.772,
|
| 13469 |
+
"step": 1900
|
| 13470 |
}
|
| 13471 |
],
|
| 13472 |
"logging_steps": 1,
|
|
|
|
| 13481 |
"early_stopping_threshold": 0.0
|
| 13482 |
},
|
| 13483 |
"attributes": {
|
| 13484 |
+
"early_stopping_patience_counter": 3
|
| 13485 |
}
|
| 13486 |
},
|
| 13487 |
"TrainerControl": {
|
|
|
|
| 13495 |
"attributes": {}
|
| 13496 |
}
|
| 13497 |
},
|
| 13498 |
+
"total_flos": 3.420104552257413e+17,
|
| 13499 |
"train_batch_size": 2,
|
| 13500 |
"trial_name": null,
|
| 13501 |
"trial_params": null
|