test-finetune / checkpoint-2500 /trainer_state.json
Achmat Sodikkun
feat: add model finetuning pantun
1e78c18
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 8.012820512820513,
"eval_steps": 500,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 4.9903846153846154e-05,
"loss": 5.8218,
"step": 6
},
{
"epoch": 0.04,
"learning_rate": 4.980769230769231e-05,
"loss": 5.2499,
"step": 12
},
{
"epoch": 0.06,
"learning_rate": 4.9711538461538465e-05,
"loss": 5.1236,
"step": 18
},
{
"epoch": 0.08,
"learning_rate": 4.961538461538462e-05,
"loss": 4.8363,
"step": 24
},
{
"epoch": 0.1,
"learning_rate": 4.9519230769230776e-05,
"loss": 4.5805,
"step": 30
},
{
"epoch": 0.12,
"learning_rate": 4.942307692307693e-05,
"loss": 4.5276,
"step": 36
},
{
"epoch": 0.13,
"learning_rate": 4.932692307692308e-05,
"loss": 4.3871,
"step": 42
},
{
"epoch": 0.15,
"learning_rate": 4.923076923076924e-05,
"loss": 4.2706,
"step": 48
},
{
"epoch": 0.17,
"learning_rate": 4.9134615384615384e-05,
"loss": 4.0906,
"step": 54
},
{
"epoch": 0.19,
"learning_rate": 4.9038461538461536e-05,
"loss": 4.1704,
"step": 60
},
{
"epoch": 0.21,
"learning_rate": 4.8942307692307695e-05,
"loss": 3.9014,
"step": 66
},
{
"epoch": 0.23,
"learning_rate": 4.884615384615385e-05,
"loss": 4.0338,
"step": 72
},
{
"epoch": 0.25,
"learning_rate": 4.875e-05,
"loss": 4.011,
"step": 78
},
{
"epoch": 0.27,
"learning_rate": 4.865384615384616e-05,
"loss": 3.8017,
"step": 84
},
{
"epoch": 0.29,
"learning_rate": 4.855769230769231e-05,
"loss": 3.819,
"step": 90
},
{
"epoch": 0.31,
"learning_rate": 4.846153846153846e-05,
"loss": 3.8157,
"step": 96
},
{
"epoch": 0.33,
"learning_rate": 4.836538461538462e-05,
"loss": 3.7675,
"step": 102
},
{
"epoch": 0.35,
"learning_rate": 4.826923076923077e-05,
"loss": 3.756,
"step": 108
},
{
"epoch": 0.37,
"learning_rate": 4.8173076923076925e-05,
"loss": 3.7439,
"step": 114
},
{
"epoch": 0.38,
"learning_rate": 4.8076923076923084e-05,
"loss": 3.761,
"step": 120
},
{
"epoch": 0.4,
"learning_rate": 4.7980769230769236e-05,
"loss": 3.7318,
"step": 126
},
{
"epoch": 0.42,
"learning_rate": 4.788461538461539e-05,
"loss": 3.745,
"step": 132
},
{
"epoch": 0.44,
"learning_rate": 4.778846153846154e-05,
"loss": 3.784,
"step": 138
},
{
"epoch": 0.46,
"learning_rate": 4.76923076923077e-05,
"loss": 3.6114,
"step": 144
},
{
"epoch": 0.48,
"learning_rate": 4.7596153846153844e-05,
"loss": 3.637,
"step": 150
},
{
"epoch": 0.5,
"learning_rate": 4.75e-05,
"loss": 3.5881,
"step": 156
},
{
"epoch": 0.52,
"learning_rate": 4.7403846153846155e-05,
"loss": 3.5336,
"step": 162
},
{
"epoch": 0.54,
"learning_rate": 4.730769230769231e-05,
"loss": 3.5313,
"step": 168
},
{
"epoch": 0.56,
"learning_rate": 4.7211538461538465e-05,
"loss": 3.6357,
"step": 174
},
{
"epoch": 0.58,
"learning_rate": 4.711538461538462e-05,
"loss": 3.5818,
"step": 180
},
{
"epoch": 0.6,
"learning_rate": 4.701923076923077e-05,
"loss": 3.5017,
"step": 186
},
{
"epoch": 0.62,
"learning_rate": 4.692307692307693e-05,
"loss": 3.5815,
"step": 192
},
{
"epoch": 0.63,
"learning_rate": 4.682692307692308e-05,
"loss": 3.611,
"step": 198
},
{
"epoch": 0.65,
"learning_rate": 4.673076923076923e-05,
"loss": 3.6096,
"step": 204
},
{
"epoch": 0.67,
"learning_rate": 4.6634615384615384e-05,
"loss": 3.5436,
"step": 210
},
{
"epoch": 0.69,
"learning_rate": 4.653846153846154e-05,
"loss": 3.5258,
"step": 216
},
{
"epoch": 0.71,
"learning_rate": 4.6442307692307695e-05,
"loss": 3.5277,
"step": 222
},
{
"epoch": 0.73,
"learning_rate": 4.634615384615385e-05,
"loss": 3.505,
"step": 228
},
{
"epoch": 0.75,
"learning_rate": 4.6250000000000006e-05,
"loss": 3.4665,
"step": 234
},
{
"epoch": 0.77,
"learning_rate": 4.615384615384616e-05,
"loss": 3.4245,
"step": 240
},
{
"epoch": 0.79,
"learning_rate": 4.605769230769231e-05,
"loss": 3.4547,
"step": 246
},
{
"epoch": 0.81,
"learning_rate": 4.596153846153846e-05,
"loss": 3.4382,
"step": 252
},
{
"epoch": 0.83,
"learning_rate": 4.5865384615384614e-05,
"loss": 3.482,
"step": 258
},
{
"epoch": 0.85,
"learning_rate": 4.576923076923077e-05,
"loss": 3.4795,
"step": 264
},
{
"epoch": 0.87,
"learning_rate": 4.5673076923076925e-05,
"loss": 3.5018,
"step": 270
},
{
"epoch": 0.88,
"learning_rate": 4.557692307692308e-05,
"loss": 3.448,
"step": 276
},
{
"epoch": 0.9,
"learning_rate": 4.548076923076923e-05,
"loss": 3.4206,
"step": 282
},
{
"epoch": 0.92,
"learning_rate": 4.538461538461539e-05,
"loss": 3.528,
"step": 288
},
{
"epoch": 0.94,
"learning_rate": 4.528846153846154e-05,
"loss": 3.475,
"step": 294
},
{
"epoch": 0.96,
"learning_rate": 4.519230769230769e-05,
"loss": 3.4142,
"step": 300
},
{
"epoch": 0.98,
"learning_rate": 4.509615384615385e-05,
"loss": 3.4156,
"step": 306
},
{
"epoch": 1.0,
"learning_rate": 4.5e-05,
"loss": 3.3274,
"step": 312
},
{
"epoch": 1.02,
"learning_rate": 4.4903846153846155e-05,
"loss": 3.3594,
"step": 318
},
{
"epoch": 1.04,
"learning_rate": 4.4807692307692314e-05,
"loss": 3.2517,
"step": 324
},
{
"epoch": 1.06,
"learning_rate": 4.4711538461538466e-05,
"loss": 3.2934,
"step": 330
},
{
"epoch": 1.08,
"learning_rate": 4.461538461538462e-05,
"loss": 3.2986,
"step": 336
},
{
"epoch": 1.1,
"learning_rate": 4.451923076923077e-05,
"loss": 3.2655,
"step": 342
},
{
"epoch": 1.12,
"learning_rate": 4.442307692307692e-05,
"loss": 3.4058,
"step": 348
},
{
"epoch": 1.13,
"learning_rate": 4.4326923076923074e-05,
"loss": 3.265,
"step": 354
},
{
"epoch": 1.15,
"learning_rate": 4.423076923076923e-05,
"loss": 3.3208,
"step": 360
},
{
"epoch": 1.17,
"learning_rate": 4.4134615384615385e-05,
"loss": 3.238,
"step": 366
},
{
"epoch": 1.19,
"learning_rate": 4.403846153846154e-05,
"loss": 3.2153,
"step": 372
},
{
"epoch": 1.21,
"learning_rate": 4.3942307692307695e-05,
"loss": 3.264,
"step": 378
},
{
"epoch": 1.23,
"learning_rate": 4.384615384615385e-05,
"loss": 3.2911,
"step": 384
},
{
"epoch": 1.25,
"learning_rate": 4.375e-05,
"loss": 3.3027,
"step": 390
},
{
"epoch": 1.27,
"learning_rate": 4.365384615384616e-05,
"loss": 3.2589,
"step": 396
},
{
"epoch": 1.29,
"learning_rate": 4.355769230769231e-05,
"loss": 3.3683,
"step": 402
},
{
"epoch": 1.31,
"learning_rate": 4.346153846153846e-05,
"loss": 3.2849,
"step": 408
},
{
"epoch": 1.33,
"learning_rate": 4.336538461538462e-05,
"loss": 3.2397,
"step": 414
},
{
"epoch": 1.35,
"learning_rate": 4.326923076923077e-05,
"loss": 3.2128,
"step": 420
},
{
"epoch": 1.37,
"learning_rate": 4.3173076923076925e-05,
"loss": 3.1944,
"step": 426
},
{
"epoch": 1.38,
"learning_rate": 4.3076923076923084e-05,
"loss": 3.1837,
"step": 432
},
{
"epoch": 1.4,
"learning_rate": 4.2980769230769236e-05,
"loss": 3.1793,
"step": 438
},
{
"epoch": 1.42,
"learning_rate": 4.288461538461538e-05,
"loss": 3.1447,
"step": 444
},
{
"epoch": 1.44,
"learning_rate": 4.278846153846154e-05,
"loss": 3.1028,
"step": 450
},
{
"epoch": 1.46,
"learning_rate": 4.269230769230769e-05,
"loss": 3.2471,
"step": 456
},
{
"epoch": 1.48,
"learning_rate": 4.2596153846153844e-05,
"loss": 3.1855,
"step": 462
},
{
"epoch": 1.5,
"learning_rate": 4.25e-05,
"loss": 3.1817,
"step": 468
},
{
"epoch": 1.52,
"learning_rate": 4.2403846153846155e-05,
"loss": 3.214,
"step": 474
},
{
"epoch": 1.54,
"learning_rate": 4.230769230769231e-05,
"loss": 2.997,
"step": 480
},
{
"epoch": 1.56,
"learning_rate": 4.2211538461538466e-05,
"loss": 3.2059,
"step": 486
},
{
"epoch": 1.58,
"learning_rate": 4.211538461538462e-05,
"loss": 3.1517,
"step": 492
},
{
"epoch": 1.6,
"learning_rate": 4.201923076923077e-05,
"loss": 3.2236,
"step": 498
},
{
"epoch": 1.62,
"learning_rate": 4.192307692307693e-05,
"loss": 3.1939,
"step": 504
},
{
"epoch": 1.63,
"learning_rate": 4.182692307692308e-05,
"loss": 3.1143,
"step": 510
},
{
"epoch": 1.65,
"learning_rate": 4.173076923076923e-05,
"loss": 3.2261,
"step": 516
},
{
"epoch": 1.67,
"learning_rate": 4.163461538461539e-05,
"loss": 3.2095,
"step": 522
},
{
"epoch": 1.69,
"learning_rate": 4.1538461538461544e-05,
"loss": 3.2498,
"step": 528
},
{
"epoch": 1.71,
"learning_rate": 4.1442307692307696e-05,
"loss": 3.1493,
"step": 534
},
{
"epoch": 1.73,
"learning_rate": 4.134615384615385e-05,
"loss": 3.0919,
"step": 540
},
{
"epoch": 1.75,
"learning_rate": 4.125e-05,
"loss": 3.2617,
"step": 546
},
{
"epoch": 1.77,
"learning_rate": 4.115384615384615e-05,
"loss": 3.1447,
"step": 552
},
{
"epoch": 1.79,
"learning_rate": 4.105769230769231e-05,
"loss": 3.2025,
"step": 558
},
{
"epoch": 1.81,
"learning_rate": 4.096153846153846e-05,
"loss": 3.1829,
"step": 564
},
{
"epoch": 1.83,
"learning_rate": 4.0865384615384615e-05,
"loss": 3.2216,
"step": 570
},
{
"epoch": 1.85,
"learning_rate": 4.0769230769230773e-05,
"loss": 3.1391,
"step": 576
},
{
"epoch": 1.87,
"learning_rate": 4.0673076923076926e-05,
"loss": 3.1344,
"step": 582
},
{
"epoch": 1.88,
"learning_rate": 4.057692307692308e-05,
"loss": 3.1699,
"step": 588
},
{
"epoch": 1.9,
"learning_rate": 4.0480769230769236e-05,
"loss": 3.0138,
"step": 594
},
{
"epoch": 1.92,
"learning_rate": 4.038461538461539e-05,
"loss": 3.1157,
"step": 600
},
{
"epoch": 1.94,
"learning_rate": 4.028846153846154e-05,
"loss": 3.1173,
"step": 606
},
{
"epoch": 1.96,
"learning_rate": 4.019230769230769e-05,
"loss": 3.2019,
"step": 612
},
{
"epoch": 1.98,
"learning_rate": 4.009615384615385e-05,
"loss": 3.0801,
"step": 618
},
{
"epoch": 2.0,
"learning_rate": 4e-05,
"loss": 3.0888,
"step": 624
},
{
"epoch": 2.02,
"learning_rate": 3.9903846153846155e-05,
"loss": 3.0332,
"step": 630
},
{
"epoch": 2.04,
"learning_rate": 3.980769230769231e-05,
"loss": 2.9418,
"step": 636
},
{
"epoch": 2.06,
"learning_rate": 3.971153846153846e-05,
"loss": 2.933,
"step": 642
},
{
"epoch": 2.08,
"learning_rate": 3.961538461538462e-05,
"loss": 3.0467,
"step": 648
},
{
"epoch": 2.1,
"learning_rate": 3.951923076923077e-05,
"loss": 3.0991,
"step": 654
},
{
"epoch": 2.12,
"learning_rate": 3.942307692307692e-05,
"loss": 2.913,
"step": 660
},
{
"epoch": 2.13,
"learning_rate": 3.932692307692308e-05,
"loss": 3.0531,
"step": 666
},
{
"epoch": 2.15,
"learning_rate": 3.923076923076923e-05,
"loss": 2.9838,
"step": 672
},
{
"epoch": 2.17,
"learning_rate": 3.9134615384615385e-05,
"loss": 2.9406,
"step": 678
},
{
"epoch": 2.19,
"learning_rate": 3.903846153846154e-05,
"loss": 3.046,
"step": 684
},
{
"epoch": 2.21,
"learning_rate": 3.8942307692307696e-05,
"loss": 3.006,
"step": 690
},
{
"epoch": 2.23,
"learning_rate": 3.884615384615385e-05,
"loss": 2.9774,
"step": 696
},
{
"epoch": 2.25,
"learning_rate": 3.875e-05,
"loss": 2.9937,
"step": 702
},
{
"epoch": 2.27,
"learning_rate": 3.865384615384616e-05,
"loss": 2.9737,
"step": 708
},
{
"epoch": 2.29,
"learning_rate": 3.855769230769231e-05,
"loss": 2.9772,
"step": 714
},
{
"epoch": 2.31,
"learning_rate": 3.846153846153846e-05,
"loss": 2.8405,
"step": 720
},
{
"epoch": 2.33,
"learning_rate": 3.836538461538462e-05,
"loss": 3.0856,
"step": 726
},
{
"epoch": 2.35,
"learning_rate": 3.826923076923077e-05,
"loss": 2.9947,
"step": 732
},
{
"epoch": 2.37,
"learning_rate": 3.8173076923076926e-05,
"loss": 2.9251,
"step": 738
},
{
"epoch": 2.38,
"learning_rate": 3.807692307692308e-05,
"loss": 2.9613,
"step": 744
},
{
"epoch": 2.4,
"learning_rate": 3.798076923076923e-05,
"loss": 2.9402,
"step": 750
},
{
"epoch": 2.42,
"learning_rate": 3.788461538461538e-05,
"loss": 2.8823,
"step": 756
},
{
"epoch": 2.44,
"learning_rate": 3.778846153846154e-05,
"loss": 2.8906,
"step": 762
},
{
"epoch": 2.46,
"learning_rate": 3.769230769230769e-05,
"loss": 2.9475,
"step": 768
},
{
"epoch": 2.48,
"learning_rate": 3.7596153846153845e-05,
"loss": 2.9999,
"step": 774
},
{
"epoch": 2.5,
"learning_rate": 3.7500000000000003e-05,
"loss": 2.9503,
"step": 780
},
{
"epoch": 2.52,
"learning_rate": 3.7403846153846156e-05,
"loss": 3.0112,
"step": 786
},
{
"epoch": 2.54,
"learning_rate": 3.730769230769231e-05,
"loss": 3.0096,
"step": 792
},
{
"epoch": 2.56,
"learning_rate": 3.7211538461538466e-05,
"loss": 2.9804,
"step": 798
},
{
"epoch": 2.58,
"learning_rate": 3.711538461538462e-05,
"loss": 2.9203,
"step": 804
},
{
"epoch": 2.6,
"learning_rate": 3.701923076923077e-05,
"loss": 2.939,
"step": 810
},
{
"epoch": 2.62,
"learning_rate": 3.692307692307693e-05,
"loss": 2.8455,
"step": 816
},
{
"epoch": 2.63,
"learning_rate": 3.682692307692308e-05,
"loss": 2.9651,
"step": 822
},
{
"epoch": 2.65,
"learning_rate": 3.673076923076923e-05,
"loss": 2.9528,
"step": 828
},
{
"epoch": 2.67,
"learning_rate": 3.6634615384615385e-05,
"loss": 2.8042,
"step": 834
},
{
"epoch": 2.69,
"learning_rate": 3.653846153846154e-05,
"loss": 2.8311,
"step": 840
},
{
"epoch": 2.71,
"learning_rate": 3.644230769230769e-05,
"loss": 2.8888,
"step": 846
},
{
"epoch": 2.73,
"learning_rate": 3.634615384615385e-05,
"loss": 2.9151,
"step": 852
},
{
"epoch": 2.75,
"learning_rate": 3.625e-05,
"loss": 2.9463,
"step": 858
},
{
"epoch": 2.77,
"learning_rate": 3.615384615384615e-05,
"loss": 2.957,
"step": 864
},
{
"epoch": 2.79,
"learning_rate": 3.605769230769231e-05,
"loss": 2.9473,
"step": 870
},
{
"epoch": 2.81,
"learning_rate": 3.596153846153846e-05,
"loss": 2.9994,
"step": 876
},
{
"epoch": 2.83,
"learning_rate": 3.5865384615384615e-05,
"loss": 3.0486,
"step": 882
},
{
"epoch": 2.85,
"learning_rate": 3.5769230769230774e-05,
"loss": 2.9487,
"step": 888
},
{
"epoch": 2.87,
"learning_rate": 3.5673076923076926e-05,
"loss": 3.0173,
"step": 894
},
{
"epoch": 2.88,
"learning_rate": 3.557692307692308e-05,
"loss": 2.8656,
"step": 900
},
{
"epoch": 2.9,
"learning_rate": 3.548076923076924e-05,
"loss": 2.8834,
"step": 906
},
{
"epoch": 2.92,
"learning_rate": 3.538461538461539e-05,
"loss": 2.9829,
"step": 912
},
{
"epoch": 2.94,
"learning_rate": 3.528846153846154e-05,
"loss": 3.001,
"step": 918
},
{
"epoch": 2.96,
"learning_rate": 3.51923076923077e-05,
"loss": 2.9618,
"step": 924
},
{
"epoch": 2.98,
"learning_rate": 3.5096153846153845e-05,
"loss": 2.7964,
"step": 930
},
{
"epoch": 3.0,
"learning_rate": 3.5e-05,
"loss": 2.89,
"step": 936
},
{
"epoch": 3.02,
"learning_rate": 3.4903846153846156e-05,
"loss": 2.7191,
"step": 942
},
{
"epoch": 3.04,
"learning_rate": 3.480769230769231e-05,
"loss": 2.7875,
"step": 948
},
{
"epoch": 3.06,
"learning_rate": 3.471153846153846e-05,
"loss": 2.8498,
"step": 954
},
{
"epoch": 3.08,
"learning_rate": 3.461538461538462e-05,
"loss": 2.7858,
"step": 960
},
{
"epoch": 3.1,
"learning_rate": 3.451923076923077e-05,
"loss": 2.7435,
"step": 966
},
{
"epoch": 3.12,
"learning_rate": 3.442307692307692e-05,
"loss": 2.8449,
"step": 972
},
{
"epoch": 3.13,
"learning_rate": 3.432692307692308e-05,
"loss": 2.7817,
"step": 978
},
{
"epoch": 3.15,
"learning_rate": 3.4230769230769234e-05,
"loss": 2.8797,
"step": 984
},
{
"epoch": 3.17,
"learning_rate": 3.4134615384615386e-05,
"loss": 2.7719,
"step": 990
},
{
"epoch": 3.19,
"learning_rate": 3.4038461538461544e-05,
"loss": 2.7919,
"step": 996
},
{
"epoch": 3.21,
"learning_rate": 3.3942307692307696e-05,
"loss": 2.8072,
"step": 1002
},
{
"epoch": 3.23,
"learning_rate": 3.384615384615385e-05,
"loss": 2.7281,
"step": 1008
},
{
"epoch": 3.25,
"learning_rate": 3.375000000000001e-05,
"loss": 2.8431,
"step": 1014
},
{
"epoch": 3.27,
"learning_rate": 3.365384615384616e-05,
"loss": 2.7154,
"step": 1020
},
{
"epoch": 3.29,
"learning_rate": 3.3557692307692304e-05,
"loss": 2.83,
"step": 1026
},
{
"epoch": 3.31,
"learning_rate": 3.346153846153846e-05,
"loss": 2.8434,
"step": 1032
},
{
"epoch": 3.33,
"learning_rate": 3.3365384615384615e-05,
"loss": 2.8399,
"step": 1038
},
{
"epoch": 3.35,
"learning_rate": 3.326923076923077e-05,
"loss": 2.7072,
"step": 1044
},
{
"epoch": 3.37,
"learning_rate": 3.3173076923076926e-05,
"loss": 2.773,
"step": 1050
},
{
"epoch": 3.38,
"learning_rate": 3.307692307692308e-05,
"loss": 2.822,
"step": 1056
},
{
"epoch": 3.4,
"learning_rate": 3.298076923076923e-05,
"loss": 2.9056,
"step": 1062
},
{
"epoch": 3.42,
"learning_rate": 3.288461538461539e-05,
"loss": 2.749,
"step": 1068
},
{
"epoch": 3.44,
"learning_rate": 3.278846153846154e-05,
"loss": 2.7579,
"step": 1074
},
{
"epoch": 3.46,
"learning_rate": 3.269230769230769e-05,
"loss": 2.7444,
"step": 1080
},
{
"epoch": 3.48,
"learning_rate": 3.2596153846153845e-05,
"loss": 2.818,
"step": 1086
},
{
"epoch": 3.5,
"learning_rate": 3.2500000000000004e-05,
"loss": 2.6285,
"step": 1092
},
{
"epoch": 3.52,
"learning_rate": 3.2403846153846156e-05,
"loss": 2.6473,
"step": 1098
},
{
"epoch": 3.54,
"learning_rate": 3.230769230769231e-05,
"loss": 2.7093,
"step": 1104
},
{
"epoch": 3.56,
"learning_rate": 3.221153846153847e-05,
"loss": 2.732,
"step": 1110
},
{
"epoch": 3.58,
"learning_rate": 3.211538461538462e-05,
"loss": 2.7064,
"step": 1116
},
{
"epoch": 3.6,
"learning_rate": 3.201923076923077e-05,
"loss": 2.9072,
"step": 1122
},
{
"epoch": 3.62,
"learning_rate": 3.192307692307692e-05,
"loss": 2.7544,
"step": 1128
},
{
"epoch": 3.63,
"learning_rate": 3.1826923076923075e-05,
"loss": 2.738,
"step": 1134
},
{
"epoch": 3.65,
"learning_rate": 3.1730769230769234e-05,
"loss": 2.6721,
"step": 1140
},
{
"epoch": 3.67,
"learning_rate": 3.1634615384615386e-05,
"loss": 2.8218,
"step": 1146
},
{
"epoch": 3.69,
"learning_rate": 3.153846153846154e-05,
"loss": 2.7553,
"step": 1152
},
{
"epoch": 3.71,
"learning_rate": 3.144230769230769e-05,
"loss": 2.8186,
"step": 1158
},
{
"epoch": 3.73,
"learning_rate": 3.134615384615385e-05,
"loss": 2.7511,
"step": 1164
},
{
"epoch": 3.75,
"learning_rate": 3.125e-05,
"loss": 2.6013,
"step": 1170
},
{
"epoch": 3.77,
"learning_rate": 3.115384615384615e-05,
"loss": 2.7191,
"step": 1176
},
{
"epoch": 3.79,
"learning_rate": 3.105769230769231e-05,
"loss": 2.7695,
"step": 1182
},
{
"epoch": 3.81,
"learning_rate": 3.0961538461538464e-05,
"loss": 2.738,
"step": 1188
},
{
"epoch": 3.83,
"learning_rate": 3.0865384615384616e-05,
"loss": 2.7202,
"step": 1194
},
{
"epoch": 3.85,
"learning_rate": 3.0769230769230774e-05,
"loss": 2.6429,
"step": 1200
},
{
"epoch": 3.87,
"learning_rate": 3.0673076923076926e-05,
"loss": 2.6742,
"step": 1206
},
{
"epoch": 3.88,
"learning_rate": 3.057692307692308e-05,
"loss": 2.7048,
"step": 1212
},
{
"epoch": 3.9,
"learning_rate": 3.0480769230769234e-05,
"loss": 2.7016,
"step": 1218
},
{
"epoch": 3.92,
"learning_rate": 3.0384615384615382e-05,
"loss": 2.7375,
"step": 1224
},
{
"epoch": 3.94,
"learning_rate": 3.0288461538461538e-05,
"loss": 2.7926,
"step": 1230
},
{
"epoch": 3.96,
"learning_rate": 3.0192307692307693e-05,
"loss": 2.7038,
"step": 1236
},
{
"epoch": 3.98,
"learning_rate": 3.0096153846153845e-05,
"loss": 2.8294,
"step": 1242
},
{
"epoch": 4.0,
"learning_rate": 3e-05,
"loss": 2.7543,
"step": 1248
},
{
"epoch": 4.02,
"learning_rate": 2.9903846153846156e-05,
"loss": 2.6022,
"step": 1254
},
{
"epoch": 4.04,
"learning_rate": 2.9807692307692308e-05,
"loss": 2.6609,
"step": 1260
},
{
"epoch": 4.06,
"learning_rate": 2.9711538461538464e-05,
"loss": 2.6091,
"step": 1266
},
{
"epoch": 4.08,
"learning_rate": 2.9615384615384616e-05,
"loss": 2.5386,
"step": 1272
},
{
"epoch": 4.1,
"learning_rate": 2.951923076923077e-05,
"loss": 2.6655,
"step": 1278
},
{
"epoch": 4.12,
"learning_rate": 2.9423076923076926e-05,
"loss": 2.6023,
"step": 1284
},
{
"epoch": 4.13,
"learning_rate": 2.932692307692308e-05,
"loss": 2.6491,
"step": 1290
},
{
"epoch": 4.15,
"learning_rate": 2.9230769230769234e-05,
"loss": 2.6548,
"step": 1296
},
{
"epoch": 4.17,
"learning_rate": 2.913461538461539e-05,
"loss": 2.584,
"step": 1302
},
{
"epoch": 4.19,
"learning_rate": 2.903846153846154e-05,
"loss": 2.5447,
"step": 1308
},
{
"epoch": 4.21,
"learning_rate": 2.8942307692307697e-05,
"loss": 2.5931,
"step": 1314
},
{
"epoch": 4.23,
"learning_rate": 2.8846153846153845e-05,
"loss": 2.503,
"step": 1320
},
{
"epoch": 4.25,
"learning_rate": 2.8749999999999997e-05,
"loss": 2.6097,
"step": 1326
},
{
"epoch": 4.27,
"learning_rate": 2.8653846153846153e-05,
"loss": 2.6926,
"step": 1332
},
{
"epoch": 4.29,
"learning_rate": 2.855769230769231e-05,
"loss": 2.5829,
"step": 1338
},
{
"epoch": 4.31,
"learning_rate": 2.846153846153846e-05,
"loss": 2.6712,
"step": 1344
},
{
"epoch": 4.33,
"learning_rate": 2.8365384615384616e-05,
"loss": 2.3988,
"step": 1350
},
{
"epoch": 4.35,
"learning_rate": 2.826923076923077e-05,
"loss": 2.5714,
"step": 1356
},
{
"epoch": 4.37,
"learning_rate": 2.8173076923076923e-05,
"loss": 2.6627,
"step": 1362
},
{
"epoch": 4.38,
"learning_rate": 2.807692307692308e-05,
"loss": 2.7195,
"step": 1368
},
{
"epoch": 4.4,
"learning_rate": 2.7980769230769234e-05,
"loss": 2.7181,
"step": 1374
},
{
"epoch": 4.42,
"learning_rate": 2.7884615384615386e-05,
"loss": 2.626,
"step": 1380
},
{
"epoch": 4.44,
"learning_rate": 2.778846153846154e-05,
"loss": 2.638,
"step": 1386
},
{
"epoch": 4.46,
"learning_rate": 2.7692307692307694e-05,
"loss": 2.6275,
"step": 1392
},
{
"epoch": 4.48,
"learning_rate": 2.759615384615385e-05,
"loss": 2.6399,
"step": 1398
},
{
"epoch": 4.5,
"learning_rate": 2.7500000000000004e-05,
"loss": 2.6894,
"step": 1404
},
{
"epoch": 4.52,
"learning_rate": 2.7403846153846156e-05,
"loss": 2.5917,
"step": 1410
},
{
"epoch": 4.54,
"learning_rate": 2.7307692307692305e-05,
"loss": 2.5564,
"step": 1416
},
{
"epoch": 4.56,
"learning_rate": 2.721153846153846e-05,
"loss": 2.6134,
"step": 1422
},
{
"epoch": 4.58,
"learning_rate": 2.7115384615384616e-05,
"loss": 2.6375,
"step": 1428
},
{
"epoch": 4.6,
"learning_rate": 2.7019230769230768e-05,
"loss": 2.6301,
"step": 1434
},
{
"epoch": 4.62,
"learning_rate": 2.6923076923076923e-05,
"loss": 2.5679,
"step": 1440
},
{
"epoch": 4.63,
"learning_rate": 2.682692307692308e-05,
"loss": 2.6704,
"step": 1446
},
{
"epoch": 4.65,
"learning_rate": 2.673076923076923e-05,
"loss": 2.6778,
"step": 1452
},
{
"epoch": 4.67,
"learning_rate": 2.6634615384615386e-05,
"loss": 2.5626,
"step": 1458
},
{
"epoch": 4.69,
"learning_rate": 2.6538461538461538e-05,
"loss": 2.5557,
"step": 1464
},
{
"epoch": 4.71,
"learning_rate": 2.6442307692307694e-05,
"loss": 2.5245,
"step": 1470
},
{
"epoch": 4.73,
"learning_rate": 2.634615384615385e-05,
"loss": 2.5189,
"step": 1476
},
{
"epoch": 4.75,
"learning_rate": 2.625e-05,
"loss": 2.4994,
"step": 1482
},
{
"epoch": 4.77,
"learning_rate": 2.6153846153846157e-05,
"loss": 2.472,
"step": 1488
},
{
"epoch": 4.79,
"learning_rate": 2.6057692307692312e-05,
"loss": 2.5451,
"step": 1494
},
{
"epoch": 4.81,
"learning_rate": 2.5961538461538464e-05,
"loss": 2.5559,
"step": 1500
},
{
"epoch": 4.83,
"learning_rate": 2.586538461538462e-05,
"loss": 2.5902,
"step": 1506
},
{
"epoch": 4.85,
"learning_rate": 2.5769230769230768e-05,
"loss": 2.6841,
"step": 1512
},
{
"epoch": 4.87,
"learning_rate": 2.5673076923076923e-05,
"loss": 2.6092,
"step": 1518
},
{
"epoch": 4.88,
"learning_rate": 2.5576923076923075e-05,
"loss": 2.5646,
"step": 1524
},
{
"epoch": 4.9,
"learning_rate": 2.548076923076923e-05,
"loss": 2.5201,
"step": 1530
},
{
"epoch": 4.92,
"learning_rate": 2.5384615384615383e-05,
"loss": 2.5622,
"step": 1536
},
{
"epoch": 4.94,
"learning_rate": 2.528846153846154e-05,
"loss": 2.6323,
"step": 1542
},
{
"epoch": 4.96,
"learning_rate": 2.5192307692307694e-05,
"loss": 2.7179,
"step": 1548
},
{
"epoch": 4.98,
"learning_rate": 2.5096153846153846e-05,
"loss": 2.6441,
"step": 1554
},
{
"epoch": 5.0,
"learning_rate": 2.5e-05,
"loss": 2.668,
"step": 1560
},
{
"epoch": 5.02,
"learning_rate": 2.4903846153846157e-05,
"loss": 2.5535,
"step": 1566
},
{
"epoch": 5.04,
"learning_rate": 2.480769230769231e-05,
"loss": 2.4569,
"step": 1572
},
{
"epoch": 5.06,
"learning_rate": 2.4711538461538464e-05,
"loss": 2.5378,
"step": 1578
},
{
"epoch": 5.08,
"learning_rate": 2.461538461538462e-05,
"loss": 2.537,
"step": 1584
},
{
"epoch": 5.1,
"learning_rate": 2.4519230769230768e-05,
"loss": 2.4913,
"step": 1590
},
{
"epoch": 5.12,
"learning_rate": 2.4423076923076924e-05,
"loss": 2.5156,
"step": 1596
},
{
"epoch": 5.13,
"learning_rate": 2.432692307692308e-05,
"loss": 2.493,
"step": 1602
},
{
"epoch": 5.15,
"learning_rate": 2.423076923076923e-05,
"loss": 2.5357,
"step": 1608
},
{
"epoch": 5.17,
"learning_rate": 2.4134615384615386e-05,
"loss": 2.4508,
"step": 1614
},
{
"epoch": 5.19,
"learning_rate": 2.4038461538461542e-05,
"loss": 2.5045,
"step": 1620
},
{
"epoch": 5.21,
"learning_rate": 2.3942307692307694e-05,
"loss": 2.4617,
"step": 1626
},
{
"epoch": 5.23,
"learning_rate": 2.384615384615385e-05,
"loss": 2.5392,
"step": 1632
},
{
"epoch": 5.25,
"learning_rate": 2.375e-05,
"loss": 2.3976,
"step": 1638
},
{
"epoch": 5.27,
"learning_rate": 2.3653846153846153e-05,
"loss": 2.5288,
"step": 1644
},
{
"epoch": 5.29,
"learning_rate": 2.355769230769231e-05,
"loss": 2.5243,
"step": 1650
},
{
"epoch": 5.31,
"learning_rate": 2.3461538461538464e-05,
"loss": 2.5018,
"step": 1656
},
{
"epoch": 5.33,
"learning_rate": 2.3365384615384616e-05,
"loss": 2.5947,
"step": 1662
},
{
"epoch": 5.35,
"learning_rate": 2.326923076923077e-05,
"loss": 2.5226,
"step": 1668
},
{
"epoch": 5.37,
"learning_rate": 2.3173076923076924e-05,
"loss": 2.4687,
"step": 1674
},
{
"epoch": 5.38,
"learning_rate": 2.307692307692308e-05,
"loss": 2.5297,
"step": 1680
},
{
"epoch": 5.4,
"learning_rate": 2.298076923076923e-05,
"loss": 2.5457,
"step": 1686
},
{
"epoch": 5.42,
"learning_rate": 2.2884615384615387e-05,
"loss": 2.4218,
"step": 1692
},
{
"epoch": 5.44,
"learning_rate": 2.278846153846154e-05,
"loss": 2.5219,
"step": 1698
},
{
"epoch": 5.46,
"learning_rate": 2.2692307692307694e-05,
"loss": 2.4858,
"step": 1704
},
{
"epoch": 5.48,
"learning_rate": 2.2596153846153846e-05,
"loss": 2.4715,
"step": 1710
},
{
"epoch": 5.5,
"learning_rate": 2.25e-05,
"loss": 2.3294,
"step": 1716
},
{
"epoch": 5.52,
"learning_rate": 2.2403846153846157e-05,
"loss": 2.5062,
"step": 1722
},
{
"epoch": 5.54,
"learning_rate": 2.230769230769231e-05,
"loss": 2.392,
"step": 1728
},
{
"epoch": 5.56,
"learning_rate": 2.221153846153846e-05,
"loss": 2.4092,
"step": 1734
},
{
"epoch": 5.58,
"learning_rate": 2.2115384615384616e-05,
"loss": 2.4257,
"step": 1740
},
{
"epoch": 5.6,
"learning_rate": 2.201923076923077e-05,
"loss": 2.4426,
"step": 1746
},
{
"epoch": 5.62,
"learning_rate": 2.1923076923076924e-05,
"loss": 2.4672,
"step": 1752
},
{
"epoch": 5.63,
"learning_rate": 2.182692307692308e-05,
"loss": 2.5468,
"step": 1758
},
{
"epoch": 5.65,
"learning_rate": 2.173076923076923e-05,
"loss": 2.3917,
"step": 1764
},
{
"epoch": 5.67,
"learning_rate": 2.1634615384615387e-05,
"loss": 2.3895,
"step": 1770
},
{
"epoch": 5.69,
"learning_rate": 2.1538461538461542e-05,
"loss": 2.5246,
"step": 1776
},
{
"epoch": 5.71,
"learning_rate": 2.144230769230769e-05,
"loss": 2.5025,
"step": 1782
},
{
"epoch": 5.73,
"learning_rate": 2.1346153846153846e-05,
"loss": 2.4898,
"step": 1788
},
{
"epoch": 5.75,
"learning_rate": 2.125e-05,
"loss": 2.5358,
"step": 1794
},
{
"epoch": 5.77,
"learning_rate": 2.1153846153846154e-05,
"loss": 2.5346,
"step": 1800
},
{
"epoch": 5.79,
"learning_rate": 2.105769230769231e-05,
"loss": 2.3057,
"step": 1806
},
{
"epoch": 5.81,
"learning_rate": 2.0961538461538464e-05,
"loss": 2.5392,
"step": 1812
},
{
"epoch": 5.83,
"learning_rate": 2.0865384615384616e-05,
"loss": 2.5815,
"step": 1818
},
{
"epoch": 5.85,
"learning_rate": 2.0769230769230772e-05,
"loss": 2.2718,
"step": 1824
},
{
"epoch": 5.87,
"learning_rate": 2.0673076923076924e-05,
"loss": 2.474,
"step": 1830
},
{
"epoch": 5.88,
"learning_rate": 2.0576923076923076e-05,
"loss": 2.4821,
"step": 1836
},
{
"epoch": 5.9,
"learning_rate": 2.048076923076923e-05,
"loss": 2.5334,
"step": 1842
},
{
"epoch": 5.92,
"learning_rate": 2.0384615384615387e-05,
"loss": 2.3877,
"step": 1848
},
{
"epoch": 5.94,
"learning_rate": 2.028846153846154e-05,
"loss": 2.3301,
"step": 1854
},
{
"epoch": 5.96,
"learning_rate": 2.0192307692307694e-05,
"loss": 2.545,
"step": 1860
},
{
"epoch": 5.98,
"learning_rate": 2.0096153846153846e-05,
"loss": 2.3865,
"step": 1866
},
{
"epoch": 6.0,
"learning_rate": 2e-05,
"loss": 2.4531,
"step": 1872
},
{
"epoch": 6.02,
"learning_rate": 1.9903846153846154e-05,
"loss": 2.4046,
"step": 1878
},
{
"epoch": 6.04,
"learning_rate": 1.980769230769231e-05,
"loss": 2.476,
"step": 1884
},
{
"epoch": 6.06,
"learning_rate": 1.971153846153846e-05,
"loss": 2.3547,
"step": 1890
},
{
"epoch": 6.08,
"learning_rate": 1.9615384615384617e-05,
"loss": 2.5396,
"step": 1896
},
{
"epoch": 6.1,
"learning_rate": 1.951923076923077e-05,
"loss": 2.3868,
"step": 1902
},
{
"epoch": 6.12,
"learning_rate": 1.9423076923076924e-05,
"loss": 2.2733,
"step": 1908
},
{
"epoch": 6.13,
"learning_rate": 1.932692307692308e-05,
"loss": 2.283,
"step": 1914
},
{
"epoch": 6.15,
"learning_rate": 1.923076923076923e-05,
"loss": 2.4231,
"step": 1920
},
{
"epoch": 6.17,
"learning_rate": 1.9134615384615383e-05,
"loss": 2.375,
"step": 1926
},
{
"epoch": 6.19,
"learning_rate": 1.903846153846154e-05,
"loss": 2.3824,
"step": 1932
},
{
"epoch": 6.21,
"learning_rate": 1.894230769230769e-05,
"loss": 2.3903,
"step": 1938
},
{
"epoch": 6.23,
"learning_rate": 1.8846153846153846e-05,
"loss": 2.2854,
"step": 1944
},
{
"epoch": 6.25,
"learning_rate": 1.8750000000000002e-05,
"loss": 2.2358,
"step": 1950
},
{
"epoch": 6.27,
"learning_rate": 1.8653846153846154e-05,
"loss": 2.4751,
"step": 1956
},
{
"epoch": 6.29,
"learning_rate": 1.855769230769231e-05,
"loss": 2.3434,
"step": 1962
},
{
"epoch": 6.31,
"learning_rate": 1.8461538461538465e-05,
"loss": 2.4088,
"step": 1968
},
{
"epoch": 6.33,
"learning_rate": 1.8365384615384617e-05,
"loss": 2.2746,
"step": 1974
},
{
"epoch": 6.35,
"learning_rate": 1.826923076923077e-05,
"loss": 2.4396,
"step": 1980
},
{
"epoch": 6.37,
"learning_rate": 1.8173076923076924e-05,
"loss": 2.3663,
"step": 1986
},
{
"epoch": 6.38,
"learning_rate": 1.8076923076923076e-05,
"loss": 2.4488,
"step": 1992
},
{
"epoch": 6.4,
"learning_rate": 1.798076923076923e-05,
"loss": 2.3495,
"step": 1998
},
{
"epoch": 6.42,
"learning_rate": 1.7884615384615387e-05,
"loss": 2.4099,
"step": 2004
},
{
"epoch": 6.44,
"learning_rate": 1.778846153846154e-05,
"loss": 2.4343,
"step": 2010
},
{
"epoch": 6.46,
"learning_rate": 1.7692307692307694e-05,
"loss": 2.3508,
"step": 2016
},
{
"epoch": 6.48,
"learning_rate": 1.759615384615385e-05,
"loss": 2.4724,
"step": 2022
},
{
"epoch": 6.5,
"learning_rate": 1.75e-05,
"loss": 2.4746,
"step": 2028
},
{
"epoch": 6.52,
"learning_rate": 1.7403846153846154e-05,
"loss": 2.3899,
"step": 2034
},
{
"epoch": 6.54,
"learning_rate": 1.730769230769231e-05,
"loss": 2.3465,
"step": 2040
},
{
"epoch": 6.56,
"learning_rate": 1.721153846153846e-05,
"loss": 2.3984,
"step": 2046
},
{
"epoch": 6.58,
"learning_rate": 1.7115384615384617e-05,
"loss": 2.3041,
"step": 2052
},
{
"epoch": 6.6,
"learning_rate": 1.7019230769230772e-05,
"loss": 2.3035,
"step": 2058
},
{
"epoch": 6.62,
"learning_rate": 1.6923076923076924e-05,
"loss": 2.2846,
"step": 2064
},
{
"epoch": 6.63,
"learning_rate": 1.682692307692308e-05,
"loss": 2.4577,
"step": 2070
},
{
"epoch": 6.65,
"learning_rate": 1.673076923076923e-05,
"loss": 2.3338,
"step": 2076
},
{
"epoch": 6.67,
"learning_rate": 1.6634615384615384e-05,
"loss": 2.3623,
"step": 2082
},
{
"epoch": 6.69,
"learning_rate": 1.653846153846154e-05,
"loss": 2.3601,
"step": 2088
},
{
"epoch": 6.71,
"learning_rate": 1.6442307692307695e-05,
"loss": 2.34,
"step": 2094
},
{
"epoch": 6.73,
"learning_rate": 1.6346153846153847e-05,
"loss": 2.3451,
"step": 2100
},
{
"epoch": 6.75,
"learning_rate": 1.6250000000000002e-05,
"loss": 2.3392,
"step": 2106
},
{
"epoch": 6.77,
"learning_rate": 1.6153846153846154e-05,
"loss": 2.4011,
"step": 2112
},
{
"epoch": 6.79,
"learning_rate": 1.605769230769231e-05,
"loss": 2.382,
"step": 2118
},
{
"epoch": 6.81,
"learning_rate": 1.596153846153846e-05,
"loss": 2.4005,
"step": 2124
},
{
"epoch": 6.83,
"learning_rate": 1.5865384615384617e-05,
"loss": 2.3398,
"step": 2130
},
{
"epoch": 6.85,
"learning_rate": 1.576923076923077e-05,
"loss": 2.3156,
"step": 2136
},
{
"epoch": 6.87,
"learning_rate": 1.5673076923076924e-05,
"loss": 2.3249,
"step": 2142
},
{
"epoch": 6.88,
"learning_rate": 1.5576923076923076e-05,
"loss": 2.2891,
"step": 2148
},
{
"epoch": 6.9,
"learning_rate": 1.5480769230769232e-05,
"loss": 2.3154,
"step": 2154
},
{
"epoch": 6.92,
"learning_rate": 1.5384615384615387e-05,
"loss": 2.3642,
"step": 2160
},
{
"epoch": 6.94,
"learning_rate": 1.528846153846154e-05,
"loss": 2.3303,
"step": 2166
},
{
"epoch": 6.96,
"learning_rate": 1.5192307692307691e-05,
"loss": 2.4364,
"step": 2172
},
{
"epoch": 6.98,
"learning_rate": 1.5096153846153847e-05,
"loss": 2.4258,
"step": 2178
},
{
"epoch": 7.0,
"learning_rate": 1.5e-05,
"loss": 2.3566,
"step": 2184
},
{
"epoch": 7.02,
"learning_rate": 1.4903846153846154e-05,
"loss": 2.2916,
"step": 2190
},
{
"epoch": 7.04,
"learning_rate": 1.4807692307692308e-05,
"loss": 2.3829,
"step": 2196
},
{
"epoch": 7.06,
"learning_rate": 1.4711538461538463e-05,
"loss": 2.242,
"step": 2202
},
{
"epoch": 7.08,
"learning_rate": 1.4615384615384617e-05,
"loss": 2.2546,
"step": 2208
},
{
"epoch": 7.1,
"learning_rate": 1.451923076923077e-05,
"loss": 2.1526,
"step": 2214
},
{
"epoch": 7.12,
"learning_rate": 1.4423076923076923e-05,
"loss": 2.2073,
"step": 2220
},
{
"epoch": 7.13,
"learning_rate": 1.4326923076923076e-05,
"loss": 2.2992,
"step": 2226
},
{
"epoch": 7.15,
"learning_rate": 1.423076923076923e-05,
"loss": 2.2544,
"step": 2232
},
{
"epoch": 7.17,
"learning_rate": 1.4134615384615386e-05,
"loss": 2.2632,
"step": 2238
},
{
"epoch": 7.19,
"learning_rate": 1.403846153846154e-05,
"loss": 2.3158,
"step": 2244
},
{
"epoch": 7.21,
"learning_rate": 1.3942307692307693e-05,
"loss": 2.2981,
"step": 2250
},
{
"epoch": 7.23,
"learning_rate": 1.3846153846153847e-05,
"loss": 2.2468,
"step": 2256
},
{
"epoch": 7.25,
"learning_rate": 1.3750000000000002e-05,
"loss": 2.4333,
"step": 2262
},
{
"epoch": 7.27,
"learning_rate": 1.3653846153846153e-05,
"loss": 2.3005,
"step": 2268
},
{
"epoch": 7.29,
"learning_rate": 1.3557692307692308e-05,
"loss": 2.2629,
"step": 2274
},
{
"epoch": 7.31,
"learning_rate": 1.3461538461538462e-05,
"loss": 2.3122,
"step": 2280
},
{
"epoch": 7.33,
"learning_rate": 1.3365384615384615e-05,
"loss": 2.3257,
"step": 2286
},
{
"epoch": 7.35,
"learning_rate": 1.3269230769230769e-05,
"loss": 2.4191,
"step": 2292
},
{
"epoch": 7.37,
"learning_rate": 1.3173076923076925e-05,
"loss": 2.2431,
"step": 2298
},
{
"epoch": 7.38,
"learning_rate": 1.3076923076923078e-05,
"loss": 2.3716,
"step": 2304
},
{
"epoch": 7.4,
"learning_rate": 1.2980769230769232e-05,
"loss": 2.1685,
"step": 2310
},
{
"epoch": 7.42,
"learning_rate": 1.2884615384615384e-05,
"loss": 2.2519,
"step": 2316
},
{
"epoch": 7.44,
"learning_rate": 1.2788461538461538e-05,
"loss": 2.2242,
"step": 2322
},
{
"epoch": 7.46,
"learning_rate": 1.2692307692307691e-05,
"loss": 2.3481,
"step": 2328
},
{
"epoch": 7.48,
"learning_rate": 1.2596153846153847e-05,
"loss": 2.3117,
"step": 2334
},
{
"epoch": 7.5,
"learning_rate": 1.25e-05,
"loss": 2.2924,
"step": 2340
},
{
"epoch": 7.52,
"learning_rate": 1.2403846153846154e-05,
"loss": 2.2933,
"step": 2346
},
{
"epoch": 7.54,
"learning_rate": 1.230769230769231e-05,
"loss": 2.298,
"step": 2352
},
{
"epoch": 7.56,
"learning_rate": 1.2211538461538462e-05,
"loss": 2.2145,
"step": 2358
},
{
"epoch": 7.58,
"learning_rate": 1.2115384615384615e-05,
"loss": 2.3118,
"step": 2364
},
{
"epoch": 7.6,
"learning_rate": 1.2019230769230771e-05,
"loss": 2.3132,
"step": 2370
},
{
"epoch": 7.62,
"learning_rate": 1.1923076923076925e-05,
"loss": 2.4653,
"step": 2376
},
{
"epoch": 7.63,
"learning_rate": 1.1826923076923077e-05,
"loss": 2.2083,
"step": 2382
},
{
"epoch": 7.65,
"learning_rate": 1.1730769230769232e-05,
"loss": 2.2876,
"step": 2388
},
{
"epoch": 7.67,
"learning_rate": 1.1634615384615386e-05,
"loss": 2.2115,
"step": 2394
},
{
"epoch": 7.69,
"learning_rate": 1.153846153846154e-05,
"loss": 2.3116,
"step": 2400
},
{
"epoch": 7.71,
"learning_rate": 1.1442307692307693e-05,
"loss": 2.2858,
"step": 2406
},
{
"epoch": 7.73,
"learning_rate": 1.1346153846153847e-05,
"loss": 2.2525,
"step": 2412
},
{
"epoch": 7.75,
"learning_rate": 1.125e-05,
"loss": 2.3215,
"step": 2418
},
{
"epoch": 7.77,
"learning_rate": 1.1153846153846154e-05,
"loss": 2.3884,
"step": 2424
},
{
"epoch": 7.79,
"learning_rate": 1.1057692307692308e-05,
"loss": 2.3767,
"step": 2430
},
{
"epoch": 7.81,
"learning_rate": 1.0961538461538462e-05,
"loss": 2.1214,
"step": 2436
},
{
"epoch": 7.83,
"learning_rate": 1.0865384615384616e-05,
"loss": 2.3003,
"step": 2442
},
{
"epoch": 7.85,
"learning_rate": 1.0769230769230771e-05,
"loss": 2.426,
"step": 2448
},
{
"epoch": 7.87,
"learning_rate": 1.0673076923076923e-05,
"loss": 2.2785,
"step": 2454
},
{
"epoch": 7.88,
"learning_rate": 1.0576923076923077e-05,
"loss": 2.3252,
"step": 2460
},
{
"epoch": 7.9,
"learning_rate": 1.0480769230769232e-05,
"loss": 2.2636,
"step": 2466
},
{
"epoch": 7.92,
"learning_rate": 1.0384615384615386e-05,
"loss": 2.2732,
"step": 2472
},
{
"epoch": 7.94,
"learning_rate": 1.0288461538461538e-05,
"loss": 2.1934,
"step": 2478
},
{
"epoch": 7.96,
"learning_rate": 1.0192307692307693e-05,
"loss": 2.2967,
"step": 2484
},
{
"epoch": 7.98,
"learning_rate": 1.0096153846153847e-05,
"loss": 2.2939,
"step": 2490
},
{
"epoch": 8.0,
"learning_rate": 1e-05,
"loss": 2.3393,
"step": 2496
}
],
"logging_steps": 6,
"max_steps": 3120,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 1007380809216000.0,
"trial_name": null,
"trial_params": null
}