mrm8488's picture
Initial commit from mrm8488
afb5067
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"global_step": 25985,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.9903790648451034e-05,
"loss": 1.5568,
"step": 50
},
{
"epoch": 0.02,
"learning_rate": 4.980758129690206e-05,
"loss": 1.3161,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 4.971137194535309e-05,
"loss": 1.2764,
"step": 150
},
{
"epoch": 0.04,
"learning_rate": 4.961516259380412e-05,
"loss": 1.2186,
"step": 200
},
{
"epoch": 0.05,
"learning_rate": 4.951895324225515e-05,
"loss": 1.2072,
"step": 250
},
{
"epoch": 0.06,
"learning_rate": 4.942274389070618e-05,
"loss": 1.2057,
"step": 300
},
{
"epoch": 0.07,
"learning_rate": 4.932653453915721e-05,
"loss": 1.2048,
"step": 350
},
{
"epoch": 0.08,
"learning_rate": 4.923032518760824e-05,
"loss": 1.1741,
"step": 400
},
{
"epoch": 0.09,
"learning_rate": 4.913411583605927e-05,
"loss": 1.2347,
"step": 450
},
{
"epoch": 0.1,
"learning_rate": 4.9037906484510296e-05,
"loss": 1.1057,
"step": 500
},
{
"epoch": 0.11,
"learning_rate": 4.894169713296133e-05,
"loss": 1.2702,
"step": 550
},
{
"epoch": 0.12,
"learning_rate": 4.884548778141236e-05,
"loss": 1.1257,
"step": 600
},
{
"epoch": 0.13,
"learning_rate": 4.874927842986339e-05,
"loss": 1.093,
"step": 650
},
{
"epoch": 0.13,
"learning_rate": 4.8653069078314415e-05,
"loss": 1.1402,
"step": 700
},
{
"epoch": 0.14,
"learning_rate": 4.855685972676544e-05,
"loss": 1.1661,
"step": 750
},
{
"epoch": 0.15,
"learning_rate": 4.846065037521647e-05,
"loss": 1.1504,
"step": 800
},
{
"epoch": 0.16,
"learning_rate": 4.83644410236675e-05,
"loss": 1.1579,
"step": 850
},
{
"epoch": 0.17,
"learning_rate": 4.826823167211853e-05,
"loss": 1.1235,
"step": 900
},
{
"epoch": 0.18,
"learning_rate": 4.817202232056956e-05,
"loss": 1.1364,
"step": 950
},
{
"epoch": 0.19,
"learning_rate": 4.807581296902059e-05,
"loss": 1.1273,
"step": 1000
},
{
"epoch": 0.2,
"learning_rate": 4.797960361747162e-05,
"loss": 1.1337,
"step": 1050
},
{
"epoch": 0.21,
"learning_rate": 4.7883394265922646e-05,
"loss": 1.0788,
"step": 1100
},
{
"epoch": 0.22,
"learning_rate": 4.778718491437368e-05,
"loss": 1.1334,
"step": 1150
},
{
"epoch": 0.23,
"learning_rate": 4.769097556282471e-05,
"loss": 1.0406,
"step": 1200
},
{
"epoch": 0.24,
"learning_rate": 4.759476621127574e-05,
"loss": 1.0592,
"step": 1250
},
{
"epoch": 0.25,
"learning_rate": 4.7498556859726765e-05,
"loss": 1.1162,
"step": 1300
},
{
"epoch": 0.26,
"learning_rate": 4.7402347508177797e-05,
"loss": 1.1041,
"step": 1350
},
{
"epoch": 0.27,
"learning_rate": 4.730613815662883e-05,
"loss": 1.1012,
"step": 1400
},
{
"epoch": 0.28,
"learning_rate": 4.720992880507986e-05,
"loss": 1.0975,
"step": 1450
},
{
"epoch": 0.29,
"learning_rate": 4.7113719453530884e-05,
"loss": 1.0853,
"step": 1500
},
{
"epoch": 0.3,
"learning_rate": 4.7017510101981915e-05,
"loss": 1.1258,
"step": 1550
},
{
"epoch": 0.31,
"learning_rate": 4.692130075043295e-05,
"loss": 1.0874,
"step": 1600
},
{
"epoch": 0.32,
"learning_rate": 4.682509139888397e-05,
"loss": 1.0703,
"step": 1650
},
{
"epoch": 0.33,
"learning_rate": 4.6728882047335e-05,
"loss": 1.078,
"step": 1700
},
{
"epoch": 0.34,
"learning_rate": 4.6632672695786034e-05,
"loss": 1.0578,
"step": 1750
},
{
"epoch": 0.35,
"learning_rate": 4.6536463344237066e-05,
"loss": 1.0748,
"step": 1800
},
{
"epoch": 0.36,
"learning_rate": 4.644025399268809e-05,
"loss": 1.0652,
"step": 1850
},
{
"epoch": 0.37,
"learning_rate": 4.634404464113912e-05,
"loss": 1.0861,
"step": 1900
},
{
"epoch": 0.38,
"learning_rate": 4.624783528959015e-05,
"loss": 1.1258,
"step": 1950
},
{
"epoch": 0.38,
"learning_rate": 4.6151625938041185e-05,
"loss": 1.0661,
"step": 2000
},
{
"epoch": 0.39,
"learning_rate": 4.605541658649221e-05,
"loss": 1.0718,
"step": 2050
},
{
"epoch": 0.4,
"learning_rate": 4.595920723494324e-05,
"loss": 1.0186,
"step": 2100
},
{
"epoch": 0.41,
"learning_rate": 4.586299788339427e-05,
"loss": 1.0994,
"step": 2150
},
{
"epoch": 0.42,
"learning_rate": 4.57667885318453e-05,
"loss": 1.063,
"step": 2200
},
{
"epoch": 0.43,
"learning_rate": 4.567057918029632e-05,
"loss": 1.0748,
"step": 2250
},
{
"epoch": 0.44,
"learning_rate": 4.557436982874735e-05,
"loss": 1.0637,
"step": 2300
},
{
"epoch": 0.45,
"learning_rate": 4.5478160477198384e-05,
"loss": 1.0256,
"step": 2350
},
{
"epoch": 0.46,
"learning_rate": 4.5381951125649416e-05,
"loss": 1.0493,
"step": 2400
},
{
"epoch": 0.47,
"learning_rate": 4.528574177410044e-05,
"loss": 1.0789,
"step": 2450
},
{
"epoch": 0.48,
"learning_rate": 4.518953242255147e-05,
"loss": 1.0564,
"step": 2500
},
{
"epoch": 0.49,
"learning_rate": 4.50933230710025e-05,
"loss": 1.0308,
"step": 2550
},
{
"epoch": 0.5,
"learning_rate": 4.4997113719453535e-05,
"loss": 1.0008,
"step": 2600
},
{
"epoch": 0.51,
"learning_rate": 4.490090436790456e-05,
"loss": 0.9971,
"step": 2650
},
{
"epoch": 0.52,
"learning_rate": 4.480469501635559e-05,
"loss": 1.02,
"step": 2700
},
{
"epoch": 0.53,
"learning_rate": 4.470848566480662e-05,
"loss": 1.0672,
"step": 2750
},
{
"epoch": 0.54,
"learning_rate": 4.4612276313257654e-05,
"loss": 1.0252,
"step": 2800
},
{
"epoch": 0.55,
"learning_rate": 4.451606696170868e-05,
"loss": 1.0235,
"step": 2850
},
{
"epoch": 0.56,
"learning_rate": 4.441985761015971e-05,
"loss": 1.0952,
"step": 2900
},
{
"epoch": 0.57,
"learning_rate": 4.432364825861074e-05,
"loss": 1.0167,
"step": 2950
},
{
"epoch": 0.58,
"learning_rate": 4.4227438907061766e-05,
"loss": 1.0493,
"step": 3000
},
{
"epoch": 0.59,
"learning_rate": 4.41312295555128e-05,
"loss": 1.0299,
"step": 3050
},
{
"epoch": 0.6,
"learning_rate": 4.403502020396383e-05,
"loss": 1.0101,
"step": 3100
},
{
"epoch": 0.61,
"learning_rate": 4.393881085241486e-05,
"loss": 1.0351,
"step": 3150
},
{
"epoch": 0.62,
"learning_rate": 4.3842601500865885e-05,
"loss": 1.0602,
"step": 3200
},
{
"epoch": 0.63,
"learning_rate": 4.3746392149316916e-05,
"loss": 1.0591,
"step": 3250
},
{
"epoch": 0.63,
"learning_rate": 4.365018279776795e-05,
"loss": 1.1272,
"step": 3300
},
{
"epoch": 0.64,
"learning_rate": 4.355397344621898e-05,
"loss": 1.0016,
"step": 3350
},
{
"epoch": 0.65,
"learning_rate": 4.3457764094670004e-05,
"loss": 1.0356,
"step": 3400
},
{
"epoch": 0.66,
"learning_rate": 4.3361554743121035e-05,
"loss": 1.0675,
"step": 3450
},
{
"epoch": 0.67,
"learning_rate": 4.3265345391572066e-05,
"loss": 1.081,
"step": 3500
},
{
"epoch": 0.68,
"learning_rate": 4.31691360400231e-05,
"loss": 1.0639,
"step": 3550
},
{
"epoch": 0.69,
"learning_rate": 4.307292668847412e-05,
"loss": 1.0362,
"step": 3600
},
{
"epoch": 0.7,
"learning_rate": 4.297671733692515e-05,
"loss": 1.0322,
"step": 3650
},
{
"epoch": 0.71,
"learning_rate": 4.288050798537618e-05,
"loss": 1.047,
"step": 3700
},
{
"epoch": 0.72,
"learning_rate": 4.278429863382721e-05,
"loss": 1.0495,
"step": 3750
},
{
"epoch": 0.73,
"learning_rate": 4.2688089282278235e-05,
"loss": 0.9846,
"step": 3800
},
{
"epoch": 0.74,
"learning_rate": 4.2591879930729266e-05,
"loss": 1.0603,
"step": 3850
},
{
"epoch": 0.75,
"learning_rate": 4.24956705791803e-05,
"loss": 1.0081,
"step": 3900
},
{
"epoch": 0.76,
"learning_rate": 4.239946122763133e-05,
"loss": 1.0277,
"step": 3950
},
{
"epoch": 0.77,
"learning_rate": 4.2303251876082353e-05,
"loss": 1.0452,
"step": 4000
},
{
"epoch": 0.78,
"learning_rate": 4.2207042524533385e-05,
"loss": 1.0459,
"step": 4050
},
{
"epoch": 0.79,
"learning_rate": 4.2110833172984416e-05,
"loss": 1.0238,
"step": 4100
},
{
"epoch": 0.8,
"learning_rate": 4.201462382143545e-05,
"loss": 1.0076,
"step": 4150
},
{
"epoch": 0.81,
"learning_rate": 4.191841446988647e-05,
"loss": 1.0593,
"step": 4200
},
{
"epoch": 0.82,
"learning_rate": 4.1822205118337504e-05,
"loss": 0.9765,
"step": 4250
},
{
"epoch": 0.83,
"learning_rate": 4.1725995766788535e-05,
"loss": 1.0129,
"step": 4300
},
{
"epoch": 0.84,
"learning_rate": 4.162978641523957e-05,
"loss": 1.0691,
"step": 4350
},
{
"epoch": 0.85,
"learning_rate": 4.153357706369059e-05,
"loss": 1.0025,
"step": 4400
},
{
"epoch": 0.86,
"learning_rate": 4.143736771214162e-05,
"loss": 1.0469,
"step": 4450
},
{
"epoch": 0.87,
"learning_rate": 4.1341158360592654e-05,
"loss": 0.9761,
"step": 4500
},
{
"epoch": 0.88,
"learning_rate": 4.124494900904368e-05,
"loss": 1.0338,
"step": 4550
},
{
"epoch": 0.89,
"learning_rate": 4.114873965749471e-05,
"loss": 1.0535,
"step": 4600
},
{
"epoch": 0.89,
"learning_rate": 4.105253030594574e-05,
"loss": 1.0271,
"step": 4650
},
{
"epoch": 0.9,
"learning_rate": 4.095632095439677e-05,
"loss": 0.9932,
"step": 4700
},
{
"epoch": 0.91,
"learning_rate": 4.08601116028478e-05,
"loss": 1.046,
"step": 4750
},
{
"epoch": 0.92,
"learning_rate": 4.076390225129883e-05,
"loss": 1.0436,
"step": 4800
},
{
"epoch": 0.93,
"learning_rate": 4.066769289974986e-05,
"loss": 1.0411,
"step": 4850
},
{
"epoch": 0.94,
"learning_rate": 4.057148354820089e-05,
"loss": 1.0217,
"step": 4900
},
{
"epoch": 0.95,
"learning_rate": 4.0475274196651917e-05,
"loss": 1.0135,
"step": 4950
},
{
"epoch": 0.96,
"learning_rate": 4.037906484510295e-05,
"loss": 1.0147,
"step": 5000
},
{
"epoch": 0.97,
"learning_rate": 4.028285549355398e-05,
"loss": 1.0569,
"step": 5050
},
{
"epoch": 0.98,
"learning_rate": 4.0186646142005004e-05,
"loss": 0.9712,
"step": 5100
},
{
"epoch": 0.99,
"learning_rate": 4.009043679045603e-05,
"loss": 1.0107,
"step": 5150
},
{
"epoch": 1.0,
"learning_rate": 3.999422743890706e-05,
"loss": 0.9829,
"step": 5200
},
{
"epoch": 1.01,
"learning_rate": 3.989801808735809e-05,
"loss": 1.0263,
"step": 5250
},
{
"epoch": 1.02,
"learning_rate": 3.980180873580912e-05,
"loss": 1.0463,
"step": 5300
},
{
"epoch": 1.03,
"learning_rate": 3.970559938426015e-05,
"loss": 0.9776,
"step": 5350
},
{
"epoch": 1.04,
"learning_rate": 3.960939003271118e-05,
"loss": 0.9697,
"step": 5400
},
{
"epoch": 1.05,
"learning_rate": 3.951318068116221e-05,
"loss": 0.9636,
"step": 5450
},
{
"epoch": 1.06,
"learning_rate": 3.941697132961324e-05,
"loss": 0.9746,
"step": 5500
},
{
"epoch": 1.07,
"learning_rate": 3.9320761978064267e-05,
"loss": 0.9786,
"step": 5550
},
{
"epoch": 1.08,
"learning_rate": 3.92245526265153e-05,
"loss": 1.0174,
"step": 5600
},
{
"epoch": 1.09,
"learning_rate": 3.912834327496633e-05,
"loss": 0.9955,
"step": 5650
},
{
"epoch": 1.1,
"learning_rate": 3.903213392341736e-05,
"loss": 0.9633,
"step": 5700
},
{
"epoch": 1.11,
"learning_rate": 3.8935924571868385e-05,
"loss": 0.9661,
"step": 5750
},
{
"epoch": 1.12,
"learning_rate": 3.883971522031942e-05,
"loss": 0.9731,
"step": 5800
},
{
"epoch": 1.13,
"learning_rate": 3.874350586877045e-05,
"loss": 0.986,
"step": 5850
},
{
"epoch": 1.14,
"learning_rate": 3.864729651722147e-05,
"loss": 0.9579,
"step": 5900
},
{
"epoch": 1.14,
"learning_rate": 3.8551087165672504e-05,
"loss": 0.9643,
"step": 5950
},
{
"epoch": 1.15,
"learning_rate": 3.8454877814123536e-05,
"loss": 1.0001,
"step": 6000
},
{
"epoch": 1.16,
"learning_rate": 3.835866846257457e-05,
"loss": 0.931,
"step": 6050
},
{
"epoch": 1.17,
"learning_rate": 3.826245911102559e-05,
"loss": 0.9706,
"step": 6100
},
{
"epoch": 1.18,
"learning_rate": 3.816624975947662e-05,
"loss": 1.0086,
"step": 6150
},
{
"epoch": 1.19,
"learning_rate": 3.8070040407927655e-05,
"loss": 0.9136,
"step": 6200
},
{
"epoch": 1.2,
"learning_rate": 3.7973831056378686e-05,
"loss": 0.9657,
"step": 6250
},
{
"epoch": 1.21,
"learning_rate": 3.787762170482971e-05,
"loss": 1.0179,
"step": 6300
},
{
"epoch": 1.22,
"learning_rate": 3.778141235328074e-05,
"loss": 0.9634,
"step": 6350
},
{
"epoch": 1.23,
"learning_rate": 3.7685203001731774e-05,
"loss": 0.956,
"step": 6400
},
{
"epoch": 1.24,
"learning_rate": 3.7588993650182805e-05,
"loss": 0.9316,
"step": 6450
},
{
"epoch": 1.25,
"learning_rate": 3.749278429863382e-05,
"loss": 0.9615,
"step": 6500
},
{
"epoch": 1.26,
"learning_rate": 3.7396574947084854e-05,
"loss": 1.0051,
"step": 6550
},
{
"epoch": 1.27,
"learning_rate": 3.7300365595535886e-05,
"loss": 0.9738,
"step": 6600
},
{
"epoch": 1.28,
"learning_rate": 3.720415624398692e-05,
"loss": 0.9919,
"step": 6650
},
{
"epoch": 1.29,
"learning_rate": 3.710794689243794e-05,
"loss": 1.0258,
"step": 6700
},
{
"epoch": 1.3,
"learning_rate": 3.701173754088897e-05,
"loss": 0.95,
"step": 6750
},
{
"epoch": 1.31,
"learning_rate": 3.6915528189340005e-05,
"loss": 0.9876,
"step": 6800
},
{
"epoch": 1.32,
"learning_rate": 3.6819318837791036e-05,
"loss": 0.9641,
"step": 6850
},
{
"epoch": 1.33,
"learning_rate": 3.672310948624206e-05,
"loss": 0.9604,
"step": 6900
},
{
"epoch": 1.34,
"learning_rate": 3.662690013469309e-05,
"loss": 0.9775,
"step": 6950
},
{
"epoch": 1.35,
"learning_rate": 3.6530690783144124e-05,
"loss": 0.9484,
"step": 7000
},
{
"epoch": 1.36,
"learning_rate": 3.6434481431595155e-05,
"loss": 0.9288,
"step": 7050
},
{
"epoch": 1.37,
"learning_rate": 3.633827208004618e-05,
"loss": 0.9688,
"step": 7100
},
{
"epoch": 1.38,
"learning_rate": 3.624206272849721e-05,
"loss": 0.9436,
"step": 7150
},
{
"epoch": 1.39,
"learning_rate": 3.614585337694824e-05,
"loss": 0.9607,
"step": 7200
},
{
"epoch": 1.4,
"learning_rate": 3.6049644025399274e-05,
"loss": 0.9548,
"step": 7250
},
{
"epoch": 1.4,
"learning_rate": 3.59534346738503e-05,
"loss": 0.9638,
"step": 7300
},
{
"epoch": 1.41,
"learning_rate": 3.585722532230133e-05,
"loss": 0.9608,
"step": 7350
},
{
"epoch": 1.42,
"learning_rate": 3.576101597075236e-05,
"loss": 0.9739,
"step": 7400
},
{
"epoch": 1.43,
"learning_rate": 3.5664806619203386e-05,
"loss": 0.9444,
"step": 7450
},
{
"epoch": 1.44,
"learning_rate": 3.556859726765442e-05,
"loss": 1.0029,
"step": 7500
},
{
"epoch": 1.45,
"learning_rate": 3.547238791610545e-05,
"loss": 0.9284,
"step": 7550
},
{
"epoch": 1.46,
"learning_rate": 3.537617856455648e-05,
"loss": 0.9946,
"step": 7600
},
{
"epoch": 1.47,
"learning_rate": 3.5279969213007505e-05,
"loss": 0.9267,
"step": 7650
},
{
"epoch": 1.48,
"learning_rate": 3.5183759861458536e-05,
"loss": 0.9609,
"step": 7700
},
{
"epoch": 1.49,
"learning_rate": 3.508755050990957e-05,
"loss": 1.0015,
"step": 7750
},
{
"epoch": 1.5,
"learning_rate": 3.49913411583606e-05,
"loss": 0.9598,
"step": 7800
},
{
"epoch": 1.51,
"learning_rate": 3.4895131806811624e-05,
"loss": 1.0063,
"step": 7850
},
{
"epoch": 1.52,
"learning_rate": 3.4798922455262655e-05,
"loss": 0.9482,
"step": 7900
},
{
"epoch": 1.53,
"learning_rate": 3.470271310371368e-05,
"loss": 0.9251,
"step": 7950
},
{
"epoch": 1.54,
"learning_rate": 3.460650375216471e-05,
"loss": 1.0041,
"step": 8000
},
{
"epoch": 1.55,
"learning_rate": 3.4510294400615736e-05,
"loss": 0.9382,
"step": 8050
},
{
"epoch": 1.56,
"learning_rate": 3.441408504906677e-05,
"loss": 0.9844,
"step": 8100
},
{
"epoch": 1.57,
"learning_rate": 3.43178756975178e-05,
"loss": 0.9732,
"step": 8150
},
{
"epoch": 1.58,
"learning_rate": 3.422166634596883e-05,
"loss": 0.9647,
"step": 8200
},
{
"epoch": 1.59,
"learning_rate": 3.4125456994419855e-05,
"loss": 0.9859,
"step": 8250
},
{
"epoch": 1.6,
"learning_rate": 3.4029247642870886e-05,
"loss": 0.9316,
"step": 8300
},
{
"epoch": 1.61,
"learning_rate": 3.393303829132192e-05,
"loss": 0.9852,
"step": 8350
},
{
"epoch": 1.62,
"learning_rate": 3.383682893977295e-05,
"loss": 0.9839,
"step": 8400
},
{
"epoch": 1.63,
"learning_rate": 3.3740619588223974e-05,
"loss": 0.958,
"step": 8450
},
{
"epoch": 1.64,
"learning_rate": 3.3644410236675005e-05,
"loss": 0.9693,
"step": 8500
},
{
"epoch": 1.65,
"learning_rate": 3.354820088512604e-05,
"loss": 0.99,
"step": 8550
},
{
"epoch": 1.65,
"learning_rate": 3.345199153357707e-05,
"loss": 0.9832,
"step": 8600
},
{
"epoch": 1.66,
"learning_rate": 3.335578218202809e-05,
"loss": 0.9417,
"step": 8650
},
{
"epoch": 1.67,
"learning_rate": 3.3259572830479124e-05,
"loss": 0.9372,
"step": 8700
},
{
"epoch": 1.68,
"learning_rate": 3.3163363478930156e-05,
"loss": 0.9493,
"step": 8750
},
{
"epoch": 1.69,
"learning_rate": 3.306715412738119e-05,
"loss": 0.9727,
"step": 8800
},
{
"epoch": 1.7,
"learning_rate": 3.297094477583221e-05,
"loss": 0.9129,
"step": 8850
},
{
"epoch": 1.71,
"learning_rate": 3.287473542428324e-05,
"loss": 0.9653,
"step": 8900
},
{
"epoch": 1.72,
"learning_rate": 3.2778526072734274e-05,
"loss": 0.9745,
"step": 8950
},
{
"epoch": 1.73,
"learning_rate": 3.26823167211853e-05,
"loss": 0.9281,
"step": 9000
},
{
"epoch": 1.74,
"learning_rate": 3.258610736963633e-05,
"loss": 0.9438,
"step": 9050
},
{
"epoch": 1.75,
"learning_rate": 3.248989801808736e-05,
"loss": 0.9526,
"step": 9100
},
{
"epoch": 1.76,
"learning_rate": 3.239368866653839e-05,
"loss": 0.9878,
"step": 9150
},
{
"epoch": 1.77,
"learning_rate": 3.229747931498942e-05,
"loss": 0.9627,
"step": 9200
},
{
"epoch": 1.78,
"learning_rate": 3.220126996344045e-05,
"loss": 0.9809,
"step": 9250
},
{
"epoch": 1.79,
"learning_rate": 3.210506061189148e-05,
"loss": 0.9427,
"step": 9300
},
{
"epoch": 1.8,
"learning_rate": 3.200885126034251e-05,
"loss": 0.907,
"step": 9350
},
{
"epoch": 1.81,
"learning_rate": 3.191264190879354e-05,
"loss": 0.9627,
"step": 9400
},
{
"epoch": 1.82,
"learning_rate": 3.181643255724456e-05,
"loss": 0.9693,
"step": 9450
},
{
"epoch": 1.83,
"learning_rate": 3.172022320569559e-05,
"loss": 0.9561,
"step": 9500
},
{
"epoch": 1.84,
"learning_rate": 3.1624013854146624e-05,
"loss": 0.9346,
"step": 9550
},
{
"epoch": 1.85,
"learning_rate": 3.152780450259765e-05,
"loss": 0.9597,
"step": 9600
},
{
"epoch": 1.86,
"learning_rate": 3.143159515104868e-05,
"loss": 0.9479,
"step": 9650
},
{
"epoch": 1.87,
"learning_rate": 3.133538579949971e-05,
"loss": 0.9935,
"step": 9700
},
{
"epoch": 1.88,
"learning_rate": 3.123917644795074e-05,
"loss": 0.9036,
"step": 9750
},
{
"epoch": 1.89,
"learning_rate": 3.114296709640177e-05,
"loss": 0.9218,
"step": 9800
},
{
"epoch": 1.9,
"learning_rate": 3.10467577448528e-05,
"loss": 0.9123,
"step": 9850
},
{
"epoch": 1.9,
"learning_rate": 3.095054839330383e-05,
"loss": 0.9674,
"step": 9900
},
{
"epoch": 1.91,
"learning_rate": 3.085433904175486e-05,
"loss": 0.9119,
"step": 9950
},
{
"epoch": 1.92,
"learning_rate": 3.075812969020589e-05,
"loss": 0.931,
"step": 10000
},
{
"epoch": 1.93,
"learning_rate": 3.066192033865692e-05,
"loss": 0.9491,
"step": 10050
},
{
"epoch": 1.94,
"learning_rate": 3.056571098710795e-05,
"loss": 0.9285,
"step": 10100
},
{
"epoch": 1.95,
"learning_rate": 3.0469501635558978e-05,
"loss": 0.9483,
"step": 10150
},
{
"epoch": 1.96,
"learning_rate": 3.037329228401001e-05,
"loss": 0.9874,
"step": 10200
},
{
"epoch": 1.97,
"learning_rate": 3.0277082932461037e-05,
"loss": 0.9781,
"step": 10250
},
{
"epoch": 1.98,
"learning_rate": 3.0180873580912065e-05,
"loss": 0.9851,
"step": 10300
},
{
"epoch": 1.99,
"learning_rate": 3.0084664229363097e-05,
"loss": 0.9219,
"step": 10350
},
{
"epoch": 2.0,
"learning_rate": 2.9988454877814125e-05,
"loss": 0.9572,
"step": 10400
},
{
"epoch": 2.01,
"learning_rate": 2.9892245526265156e-05,
"loss": 0.9187,
"step": 10450
},
{
"epoch": 2.02,
"learning_rate": 2.9796036174716184e-05,
"loss": 0.905,
"step": 10500
},
{
"epoch": 2.03,
"learning_rate": 2.9699826823167216e-05,
"loss": 0.9498,
"step": 10550
},
{
"epoch": 2.04,
"learning_rate": 2.9603617471618244e-05,
"loss": 0.9586,
"step": 10600
},
{
"epoch": 2.05,
"learning_rate": 2.9507408120069275e-05,
"loss": 0.9464,
"step": 10650
},
{
"epoch": 2.06,
"learning_rate": 2.9411198768520303e-05,
"loss": 0.9205,
"step": 10700
},
{
"epoch": 2.07,
"learning_rate": 2.9314989416971334e-05,
"loss": 0.9331,
"step": 10750
},
{
"epoch": 2.08,
"learning_rate": 2.9218780065422362e-05,
"loss": 0.943,
"step": 10800
},
{
"epoch": 2.09,
"learning_rate": 2.9122570713873387e-05,
"loss": 0.9544,
"step": 10850
},
{
"epoch": 2.1,
"learning_rate": 2.9026361362324415e-05,
"loss": 0.9148,
"step": 10900
},
{
"epoch": 2.11,
"learning_rate": 2.8930152010775447e-05,
"loss": 0.9569,
"step": 10950
},
{
"epoch": 2.12,
"learning_rate": 2.8833942659226475e-05,
"loss": 0.8757,
"step": 11000
},
{
"epoch": 2.13,
"learning_rate": 2.8737733307677506e-05,
"loss": 0.9618,
"step": 11050
},
{
"epoch": 2.14,
"learning_rate": 2.8641523956128534e-05,
"loss": 0.9181,
"step": 11100
},
{
"epoch": 2.15,
"learning_rate": 2.8545314604579565e-05,
"loss": 0.9251,
"step": 11150
},
{
"epoch": 2.16,
"learning_rate": 2.8449105253030594e-05,
"loss": 0.9052,
"step": 11200
},
{
"epoch": 2.16,
"learning_rate": 2.8352895901481625e-05,
"loss": 0.9723,
"step": 11250
},
{
"epoch": 2.17,
"learning_rate": 2.8256686549932653e-05,
"loss": 0.9938,
"step": 11300
},
{
"epoch": 2.18,
"learning_rate": 2.8160477198383684e-05,
"loss": 0.9029,
"step": 11350
},
{
"epoch": 2.19,
"learning_rate": 2.8064267846834712e-05,
"loss": 0.952,
"step": 11400
},
{
"epoch": 2.2,
"learning_rate": 2.7968058495285744e-05,
"loss": 0.9147,
"step": 11450
},
{
"epoch": 2.21,
"learning_rate": 2.7871849143736772e-05,
"loss": 0.9595,
"step": 11500
},
{
"epoch": 2.22,
"learning_rate": 2.7775639792187803e-05,
"loss": 0.8966,
"step": 11550
},
{
"epoch": 2.23,
"learning_rate": 2.767943044063883e-05,
"loss": 0.9401,
"step": 11600
},
{
"epoch": 2.24,
"learning_rate": 2.7583221089089863e-05,
"loss": 0.9376,
"step": 11650
},
{
"epoch": 2.25,
"learning_rate": 2.748701173754089e-05,
"loss": 0.901,
"step": 11700
},
{
"epoch": 2.26,
"learning_rate": 2.739080238599192e-05,
"loss": 0.9362,
"step": 11750
},
{
"epoch": 2.27,
"learning_rate": 2.729459303444295e-05,
"loss": 0.9836,
"step": 11800
},
{
"epoch": 2.28,
"learning_rate": 2.7198383682893978e-05,
"loss": 0.963,
"step": 11850
},
{
"epoch": 2.29,
"learning_rate": 2.710217433134501e-05,
"loss": 0.8923,
"step": 11900
},
{
"epoch": 2.3,
"learning_rate": 2.7005964979796038e-05,
"loss": 0.9313,
"step": 11950
},
{
"epoch": 2.31,
"learning_rate": 2.690975562824707e-05,
"loss": 0.9261,
"step": 12000
},
{
"epoch": 2.32,
"learning_rate": 2.6813546276698097e-05,
"loss": 0.9517,
"step": 12050
},
{
"epoch": 2.33,
"learning_rate": 2.671733692514913e-05,
"loss": 0.9302,
"step": 12100
},
{
"epoch": 2.34,
"learning_rate": 2.6621127573600157e-05,
"loss": 0.9249,
"step": 12150
},
{
"epoch": 2.35,
"learning_rate": 2.6524918222051188e-05,
"loss": 0.8492,
"step": 12200
},
{
"epoch": 2.36,
"learning_rate": 2.6428708870502216e-05,
"loss": 0.9433,
"step": 12250
},
{
"epoch": 2.37,
"learning_rate": 2.633249951895324e-05,
"loss": 0.9526,
"step": 12300
},
{
"epoch": 2.38,
"learning_rate": 2.6236290167404272e-05,
"loss": 0.9241,
"step": 12350
},
{
"epoch": 2.39,
"learning_rate": 2.61400808158553e-05,
"loss": 0.9073,
"step": 12400
},
{
"epoch": 2.4,
"learning_rate": 2.6043871464306328e-05,
"loss": 0.9214,
"step": 12450
},
{
"epoch": 2.41,
"learning_rate": 2.594766211275736e-05,
"loss": 0.9242,
"step": 12500
},
{
"epoch": 2.41,
"learning_rate": 2.5851452761208388e-05,
"loss": 0.9421,
"step": 12550
},
{
"epoch": 2.42,
"learning_rate": 2.575524340965942e-05,
"loss": 0.9111,
"step": 12600
},
{
"epoch": 2.43,
"learning_rate": 2.5659034058110447e-05,
"loss": 0.9089,
"step": 12650
},
{
"epoch": 2.44,
"learning_rate": 2.556282470656148e-05,
"loss": 0.8959,
"step": 12700
},
{
"epoch": 2.45,
"learning_rate": 2.5466615355012507e-05,
"loss": 0.8655,
"step": 12750
},
{
"epoch": 2.46,
"learning_rate": 2.5370406003463538e-05,
"loss": 0.9657,
"step": 12800
},
{
"epoch": 2.47,
"learning_rate": 2.5274196651914566e-05,
"loss": 0.9018,
"step": 12850
},
{
"epoch": 2.48,
"learning_rate": 2.5177987300365597e-05,
"loss": 0.9141,
"step": 12900
},
{
"epoch": 2.49,
"learning_rate": 2.5081777948816626e-05,
"loss": 0.9265,
"step": 12950
},
{
"epoch": 2.5,
"learning_rate": 2.4985568597267657e-05,
"loss": 0.9464,
"step": 13000
},
{
"epoch": 2.51,
"learning_rate": 2.4889359245718685e-05,
"loss": 0.9249,
"step": 13050
},
{
"epoch": 2.52,
"learning_rate": 2.4793149894169716e-05,
"loss": 0.9138,
"step": 13100
},
{
"epoch": 2.53,
"learning_rate": 2.4696940542620744e-05,
"loss": 0.9607,
"step": 13150
},
{
"epoch": 2.54,
"learning_rate": 2.4600731191071776e-05,
"loss": 0.9267,
"step": 13200
},
{
"epoch": 2.55,
"learning_rate": 2.4504521839522804e-05,
"loss": 0.945,
"step": 13250
},
{
"epoch": 2.56,
"learning_rate": 2.4408312487973832e-05,
"loss": 0.9202,
"step": 13300
},
{
"epoch": 2.57,
"learning_rate": 2.4312103136424863e-05,
"loss": 0.9134,
"step": 13350
},
{
"epoch": 2.58,
"learning_rate": 2.421589378487589e-05,
"loss": 0.9387,
"step": 13400
},
{
"epoch": 2.59,
"learning_rate": 2.411968443332692e-05,
"loss": 0.9385,
"step": 13450
},
{
"epoch": 2.6,
"learning_rate": 2.402347508177795e-05,
"loss": 0.9397,
"step": 13500
},
{
"epoch": 2.61,
"learning_rate": 2.392726573022898e-05,
"loss": 0.9437,
"step": 13550
},
{
"epoch": 2.62,
"learning_rate": 2.3831056378680007e-05,
"loss": 0.9482,
"step": 13600
},
{
"epoch": 2.63,
"learning_rate": 2.3734847027131038e-05,
"loss": 0.8986,
"step": 13650
},
{
"epoch": 2.64,
"learning_rate": 2.3638637675582066e-05,
"loss": 0.8702,
"step": 13700
},
{
"epoch": 2.65,
"learning_rate": 2.3542428324033098e-05,
"loss": 0.9375,
"step": 13750
},
{
"epoch": 2.66,
"learning_rate": 2.3446218972484126e-05,
"loss": 0.9304,
"step": 13800
},
{
"epoch": 2.66,
"learning_rate": 2.3350009620935157e-05,
"loss": 0.9104,
"step": 13850
},
{
"epoch": 2.67,
"learning_rate": 2.3253800269386185e-05,
"loss": 0.9192,
"step": 13900
},
{
"epoch": 2.68,
"learning_rate": 2.3157590917837217e-05,
"loss": 0.9053,
"step": 13950
},
{
"epoch": 2.69,
"learning_rate": 2.3061381566288245e-05,
"loss": 0.891,
"step": 14000
},
{
"epoch": 2.7,
"learning_rate": 2.2965172214739276e-05,
"loss": 0.89,
"step": 14050
},
{
"epoch": 2.71,
"learning_rate": 2.28689628631903e-05,
"loss": 0.9109,
"step": 14100
},
{
"epoch": 2.72,
"learning_rate": 2.2772753511641332e-05,
"loss": 0.9166,
"step": 14150
},
{
"epoch": 2.73,
"learning_rate": 2.267654416009236e-05,
"loss": 0.8821,
"step": 14200
},
{
"epoch": 2.74,
"learning_rate": 2.258033480854339e-05,
"loss": 0.9186,
"step": 14250
},
{
"epoch": 2.75,
"learning_rate": 2.248412545699442e-05,
"loss": 0.9296,
"step": 14300
},
{
"epoch": 2.76,
"learning_rate": 2.238791610544545e-05,
"loss": 0.8647,
"step": 14350
},
{
"epoch": 2.77,
"learning_rate": 2.229170675389648e-05,
"loss": 0.9332,
"step": 14400
},
{
"epoch": 2.78,
"learning_rate": 2.219549740234751e-05,
"loss": 0.9078,
"step": 14450
},
{
"epoch": 2.79,
"learning_rate": 2.209928805079854e-05,
"loss": 0.9714,
"step": 14500
},
{
"epoch": 2.8,
"learning_rate": 2.200307869924957e-05,
"loss": 0.891,
"step": 14550
},
{
"epoch": 2.81,
"learning_rate": 2.1906869347700598e-05,
"loss": 0.9242,
"step": 14600
},
{
"epoch": 2.82,
"learning_rate": 2.181065999615163e-05,
"loss": 0.8897,
"step": 14650
},
{
"epoch": 2.83,
"learning_rate": 2.1714450644602657e-05,
"loss": 0.9339,
"step": 14700
},
{
"epoch": 2.84,
"learning_rate": 2.1618241293053686e-05,
"loss": 0.9459,
"step": 14750
},
{
"epoch": 2.85,
"learning_rate": 2.1522031941504714e-05,
"loss": 0.9447,
"step": 14800
},
{
"epoch": 2.86,
"learning_rate": 2.1425822589955745e-05,
"loss": 0.8597,
"step": 14850
},
{
"epoch": 2.87,
"learning_rate": 2.1329613238406773e-05,
"loss": 0.9117,
"step": 14900
},
{
"epoch": 2.88,
"learning_rate": 2.1233403886857804e-05,
"loss": 0.9621,
"step": 14950
},
{
"epoch": 2.89,
"learning_rate": 2.1137194535308832e-05,
"loss": 0.8957,
"step": 15000
},
{
"epoch": 2.9,
"learning_rate": 2.104098518375986e-05,
"loss": 0.9096,
"step": 15050
},
{
"epoch": 2.91,
"learning_rate": 2.0944775832210892e-05,
"loss": 0.8775,
"step": 15100
},
{
"epoch": 2.92,
"learning_rate": 2.084856648066192e-05,
"loss": 0.9468,
"step": 15150
},
{
"epoch": 2.92,
"learning_rate": 2.075235712911295e-05,
"loss": 0.9181,
"step": 15200
},
{
"epoch": 2.93,
"learning_rate": 2.065614777756398e-05,
"loss": 0.9437,
"step": 15250
},
{
"epoch": 2.94,
"learning_rate": 2.055993842601501e-05,
"loss": 0.9625,
"step": 15300
},
{
"epoch": 2.95,
"learning_rate": 2.046372907446604e-05,
"loss": 0.9204,
"step": 15350
},
{
"epoch": 2.96,
"learning_rate": 2.036751972291707e-05,
"loss": 0.916,
"step": 15400
},
{
"epoch": 2.97,
"learning_rate": 2.02713103713681e-05,
"loss": 0.8747,
"step": 15450
},
{
"epoch": 2.98,
"learning_rate": 2.017510101981913e-05,
"loss": 0.9374,
"step": 15500
},
{
"epoch": 2.99,
"learning_rate": 2.0078891668270154e-05,
"loss": 0.8864,
"step": 15550
},
{
"epoch": 3.0,
"learning_rate": 1.9982682316721186e-05,
"loss": 0.923,
"step": 15600
},
{
"epoch": 3.01,
"learning_rate": 1.9886472965172214e-05,
"loss": 0.8445,
"step": 15650
},
{
"epoch": 3.02,
"learning_rate": 1.9790263613623245e-05,
"loss": 0.8908,
"step": 15700
},
{
"epoch": 3.03,
"learning_rate": 1.9694054262074273e-05,
"loss": 0.8816,
"step": 15750
},
{
"epoch": 3.04,
"learning_rate": 1.9597844910525305e-05,
"loss": 0.8911,
"step": 15800
},
{
"epoch": 3.05,
"learning_rate": 1.9501635558976333e-05,
"loss": 0.9215,
"step": 15850
},
{
"epoch": 3.06,
"learning_rate": 1.9405426207427364e-05,
"loss": 0.8848,
"step": 15900
},
{
"epoch": 3.07,
"learning_rate": 1.9309216855878392e-05,
"loss": 0.8707,
"step": 15950
},
{
"epoch": 3.08,
"learning_rate": 1.9213007504329424e-05,
"loss": 0.8616,
"step": 16000
},
{
"epoch": 3.09,
"learning_rate": 1.911679815278045e-05,
"loss": 0.8581,
"step": 16050
},
{
"epoch": 3.1,
"learning_rate": 1.9020588801231483e-05,
"loss": 0.8828,
"step": 16100
},
{
"epoch": 3.11,
"learning_rate": 1.892437944968251e-05,
"loss": 0.9461,
"step": 16150
},
{
"epoch": 3.12,
"learning_rate": 1.882817009813354e-05,
"loss": 0.9004,
"step": 16200
},
{
"epoch": 3.13,
"learning_rate": 1.8731960746584567e-05,
"loss": 0.8839,
"step": 16250
},
{
"epoch": 3.14,
"learning_rate": 1.86357513950356e-05,
"loss": 0.8692,
"step": 16300
},
{
"epoch": 3.15,
"learning_rate": 1.8539542043486627e-05,
"loss": 0.918,
"step": 16350
},
{
"epoch": 3.16,
"learning_rate": 1.8443332691937658e-05,
"loss": 0.8885,
"step": 16400
},
{
"epoch": 3.17,
"learning_rate": 1.8347123340388686e-05,
"loss": 0.8852,
"step": 16450
},
{
"epoch": 3.17,
"learning_rate": 1.8250913988839714e-05,
"loss": 0.9233,
"step": 16500
},
{
"epoch": 3.18,
"learning_rate": 1.8154704637290746e-05,
"loss": 0.9075,
"step": 16550
},
{
"epoch": 3.19,
"learning_rate": 1.8058495285741774e-05,
"loss": 0.919,
"step": 16600
},
{
"epoch": 3.2,
"learning_rate": 1.7962285934192805e-05,
"loss": 0.9143,
"step": 16650
},
{
"epoch": 3.21,
"learning_rate": 1.7866076582643833e-05,
"loss": 0.9093,
"step": 16700
},
{
"epoch": 3.22,
"learning_rate": 1.7769867231094864e-05,
"loss": 0.8987,
"step": 16750
},
{
"epoch": 3.23,
"learning_rate": 1.7673657879545892e-05,
"loss": 0.9162,
"step": 16800
},
{
"epoch": 3.24,
"learning_rate": 1.7577448527996924e-05,
"loss": 0.8884,
"step": 16850
},
{
"epoch": 3.25,
"learning_rate": 1.7481239176447952e-05,
"loss": 0.8765,
"step": 16900
},
{
"epoch": 3.26,
"learning_rate": 1.7385029824898983e-05,
"loss": 0.8825,
"step": 16950
},
{
"epoch": 3.27,
"learning_rate": 1.7288820473350008e-05,
"loss": 0.8595,
"step": 17000
},
{
"epoch": 3.28,
"learning_rate": 1.719261112180104e-05,
"loss": 0.8704,
"step": 17050
},
{
"epoch": 3.29,
"learning_rate": 1.7096401770252067e-05,
"loss": 0.8655,
"step": 17100
},
{
"epoch": 3.3,
"learning_rate": 1.70001924187031e-05,
"loss": 0.8501,
"step": 17150
},
{
"epoch": 3.31,
"learning_rate": 1.6903983067154127e-05,
"loss": 0.8723,
"step": 17200
},
{
"epoch": 3.32,
"learning_rate": 1.680777371560516e-05,
"loss": 0.9043,
"step": 17250
},
{
"epoch": 3.33,
"learning_rate": 1.6711564364056186e-05,
"loss": 0.9083,
"step": 17300
},
{
"epoch": 3.34,
"learning_rate": 1.6615355012507218e-05,
"loss": 0.8813,
"step": 17350
},
{
"epoch": 3.35,
"learning_rate": 1.6519145660958246e-05,
"loss": 0.901,
"step": 17400
},
{
"epoch": 3.36,
"learning_rate": 1.6422936309409277e-05,
"loss": 0.8891,
"step": 17450
},
{
"epoch": 3.37,
"learning_rate": 1.6326726957860305e-05,
"loss": 0.8703,
"step": 17500
},
{
"epoch": 3.38,
"learning_rate": 1.6230517606311337e-05,
"loss": 0.8793,
"step": 17550
},
{
"epoch": 3.39,
"learning_rate": 1.6134308254762365e-05,
"loss": 0.9044,
"step": 17600
},
{
"epoch": 3.4,
"learning_rate": 1.6038098903213393e-05,
"loss": 0.8915,
"step": 17650
},
{
"epoch": 3.41,
"learning_rate": 1.594188955166442e-05,
"loss": 0.8458,
"step": 17700
},
{
"epoch": 3.42,
"learning_rate": 1.5845680200115452e-05,
"loss": 0.9001,
"step": 17750
},
{
"epoch": 3.43,
"learning_rate": 1.574947084856648e-05,
"loss": 0.8719,
"step": 17800
},
{
"epoch": 3.43,
"learning_rate": 1.565326149701751e-05,
"loss": 0.8884,
"step": 17850
},
{
"epoch": 3.44,
"learning_rate": 1.555705214546854e-05,
"loss": 0.9189,
"step": 17900
},
{
"epoch": 3.45,
"learning_rate": 1.5460842793919568e-05,
"loss": 0.9295,
"step": 17950
},
{
"epoch": 3.46,
"learning_rate": 1.53646334423706e-05,
"loss": 0.8898,
"step": 18000
},
{
"epoch": 3.47,
"learning_rate": 1.5268424090821627e-05,
"loss": 0.8434,
"step": 18050
},
{
"epoch": 3.48,
"learning_rate": 1.5172214739272659e-05,
"loss": 0.8689,
"step": 18100
},
{
"epoch": 3.49,
"learning_rate": 1.5076005387723688e-05,
"loss": 0.8968,
"step": 18150
},
{
"epoch": 3.5,
"learning_rate": 1.4979796036174718e-05,
"loss": 0.9064,
"step": 18200
},
{
"epoch": 3.51,
"learning_rate": 1.4883586684625748e-05,
"loss": 0.8808,
"step": 18250
},
{
"epoch": 3.52,
"learning_rate": 1.4787377333076778e-05,
"loss": 0.8748,
"step": 18300
},
{
"epoch": 3.53,
"learning_rate": 1.4691167981527806e-05,
"loss": 0.8964,
"step": 18350
},
{
"epoch": 3.54,
"learning_rate": 1.4594958629978835e-05,
"loss": 0.9299,
"step": 18400
},
{
"epoch": 3.55,
"learning_rate": 1.4498749278429863e-05,
"loss": 0.8759,
"step": 18450
},
{
"epoch": 3.56,
"learning_rate": 1.4402539926880893e-05,
"loss": 0.8822,
"step": 18500
},
{
"epoch": 3.57,
"learning_rate": 1.4306330575331923e-05,
"loss": 0.8792,
"step": 18550
},
{
"epoch": 3.58,
"learning_rate": 1.4210121223782952e-05,
"loss": 0.8443,
"step": 18600
},
{
"epoch": 3.59,
"learning_rate": 1.411391187223398e-05,
"loss": 0.8951,
"step": 18650
},
{
"epoch": 3.6,
"learning_rate": 1.401770252068501e-05,
"loss": 0.9153,
"step": 18700
},
{
"epoch": 3.61,
"learning_rate": 1.392149316913604e-05,
"loss": 0.8899,
"step": 18750
},
{
"epoch": 3.62,
"learning_rate": 1.382528381758707e-05,
"loss": 0.9196,
"step": 18800
},
{
"epoch": 3.63,
"learning_rate": 1.37290744660381e-05,
"loss": 0.8981,
"step": 18850
},
{
"epoch": 3.64,
"learning_rate": 1.363286511448913e-05,
"loss": 0.8634,
"step": 18900
},
{
"epoch": 3.65,
"learning_rate": 1.3536655762940159e-05,
"loss": 0.8741,
"step": 18950
},
{
"epoch": 3.66,
"learning_rate": 1.3440446411391189e-05,
"loss": 0.8729,
"step": 19000
},
{
"epoch": 3.67,
"learning_rate": 1.3344237059842218e-05,
"loss": 0.9247,
"step": 19050
},
{
"epoch": 3.68,
"learning_rate": 1.3248027708293248e-05,
"loss": 0.9436,
"step": 19100
},
{
"epoch": 3.68,
"learning_rate": 1.3151818356744274e-05,
"loss": 0.8898,
"step": 19150
},
{
"epoch": 3.69,
"learning_rate": 1.3055609005195304e-05,
"loss": 0.9404,
"step": 19200
},
{
"epoch": 3.7,
"learning_rate": 1.2959399653646334e-05,
"loss": 0.8733,
"step": 19250
},
{
"epoch": 3.71,
"learning_rate": 1.2863190302097364e-05,
"loss": 0.8954,
"step": 19300
},
{
"epoch": 3.72,
"learning_rate": 1.2766980950548393e-05,
"loss": 0.8925,
"step": 19350
},
{
"epoch": 3.73,
"learning_rate": 1.2670771598999423e-05,
"loss": 0.8607,
"step": 19400
},
{
"epoch": 3.74,
"learning_rate": 1.2574562247450453e-05,
"loss": 0.8658,
"step": 19450
},
{
"epoch": 3.75,
"learning_rate": 1.2478352895901482e-05,
"loss": 0.9051,
"step": 19500
},
{
"epoch": 3.76,
"learning_rate": 1.2382143544352512e-05,
"loss": 0.8882,
"step": 19550
},
{
"epoch": 3.77,
"learning_rate": 1.2285934192803542e-05,
"loss": 0.8899,
"step": 19600
},
{
"epoch": 3.78,
"learning_rate": 1.2189724841254572e-05,
"loss": 0.9144,
"step": 19650
},
{
"epoch": 3.79,
"learning_rate": 1.20935154897056e-05,
"loss": 0.8804,
"step": 19700
},
{
"epoch": 3.8,
"learning_rate": 1.199730613815663e-05,
"loss": 0.8631,
"step": 19750
},
{
"epoch": 3.81,
"learning_rate": 1.190109678660766e-05,
"loss": 0.8948,
"step": 19800
},
{
"epoch": 3.82,
"learning_rate": 1.1804887435058689e-05,
"loss": 0.8623,
"step": 19850
},
{
"epoch": 3.83,
"learning_rate": 1.1708678083509719e-05,
"loss": 0.8337,
"step": 19900
},
{
"epoch": 3.84,
"learning_rate": 1.1612468731960748e-05,
"loss": 0.8464,
"step": 19950
},
{
"epoch": 3.85,
"learning_rate": 1.1516259380411776e-05,
"loss": 0.8713,
"step": 20000
},
{
"epoch": 3.86,
"learning_rate": 1.1420050028862806e-05,
"loss": 0.8843,
"step": 20050
},
{
"epoch": 3.87,
"learning_rate": 1.1323840677313836e-05,
"loss": 0.8483,
"step": 20100
},
{
"epoch": 3.88,
"learning_rate": 1.1227631325764864e-05,
"loss": 0.8579,
"step": 20150
},
{
"epoch": 3.89,
"learning_rate": 1.1131421974215894e-05,
"loss": 0.8882,
"step": 20200
},
{
"epoch": 3.9,
"learning_rate": 1.1035212622666923e-05,
"loss": 0.8448,
"step": 20250
},
{
"epoch": 3.91,
"learning_rate": 1.0939003271117953e-05,
"loss": 0.8808,
"step": 20300
},
{
"epoch": 3.92,
"learning_rate": 1.0842793919568983e-05,
"loss": 0.8838,
"step": 20350
},
{
"epoch": 3.93,
"learning_rate": 1.074658456802001e-05,
"loss": 0.8926,
"step": 20400
},
{
"epoch": 3.93,
"learning_rate": 1.065037521647104e-05,
"loss": 0.8919,
"step": 20450
},
{
"epoch": 3.94,
"learning_rate": 1.055416586492207e-05,
"loss": 0.8706,
"step": 20500
},
{
"epoch": 3.95,
"learning_rate": 1.04579565133731e-05,
"loss": 0.8564,
"step": 20550
},
{
"epoch": 3.96,
"learning_rate": 1.036174716182413e-05,
"loss": 0.8579,
"step": 20600
},
{
"epoch": 3.97,
"learning_rate": 1.026553781027516e-05,
"loss": 0.9184,
"step": 20650
},
{
"epoch": 3.98,
"learning_rate": 1.016932845872619e-05,
"loss": 0.8813,
"step": 20700
},
{
"epoch": 3.99,
"learning_rate": 1.0073119107177219e-05,
"loss": 0.8257,
"step": 20750
},
{
"epoch": 4.0,
"learning_rate": 9.976909755628247e-06,
"loss": 0.8352,
"step": 20800
},
{
"epoch": 4.01,
"learning_rate": 9.880700404079277e-06,
"loss": 0.8739,
"step": 20850
},
{
"epoch": 4.02,
"learning_rate": 9.784491052530306e-06,
"loss": 0.8762,
"step": 20900
},
{
"epoch": 4.03,
"learning_rate": 9.688281700981336e-06,
"loss": 0.8652,
"step": 20950
},
{
"epoch": 4.04,
"learning_rate": 9.592072349432366e-06,
"loss": 0.8875,
"step": 21000
},
{
"epoch": 4.05,
"learning_rate": 9.495862997883396e-06,
"loss": 0.8277,
"step": 21050
},
{
"epoch": 4.06,
"learning_rate": 9.399653646334425e-06,
"loss": 0.8568,
"step": 21100
},
{
"epoch": 4.07,
"learning_rate": 9.303444294785453e-06,
"loss": 0.8651,
"step": 21150
},
{
"epoch": 4.08,
"learning_rate": 9.207234943236483e-06,
"loss": 0.8883,
"step": 21200
},
{
"epoch": 4.09,
"learning_rate": 9.111025591687513e-06,
"loss": 0.9303,
"step": 21250
},
{
"epoch": 4.1,
"learning_rate": 9.014816240138543e-06,
"loss": 0.8416,
"step": 21300
},
{
"epoch": 4.11,
"learning_rate": 8.918606888589572e-06,
"loss": 0.8409,
"step": 21350
},
{
"epoch": 4.12,
"learning_rate": 8.822397537040602e-06,
"loss": 0.9157,
"step": 21400
},
{
"epoch": 4.13,
"learning_rate": 8.72618818549163e-06,
"loss": 0.8638,
"step": 21450
},
{
"epoch": 4.14,
"learning_rate": 8.62997883394266e-06,
"loss": 0.8443,
"step": 21500
},
{
"epoch": 4.15,
"learning_rate": 8.53376948239369e-06,
"loss": 0.8533,
"step": 21550
},
{
"epoch": 4.16,
"learning_rate": 8.437560130844717e-06,
"loss": 0.8888,
"step": 21600
},
{
"epoch": 4.17,
"learning_rate": 8.341350779295747e-06,
"loss": 0.8741,
"step": 21650
},
{
"epoch": 4.18,
"learning_rate": 8.245141427746777e-06,
"loss": 0.876,
"step": 21700
},
{
"epoch": 4.19,
"learning_rate": 8.148932076197807e-06,
"loss": 0.9204,
"step": 21750
},
{
"epoch": 4.19,
"learning_rate": 8.052722724648836e-06,
"loss": 0.9072,
"step": 21800
},
{
"epoch": 4.2,
"learning_rate": 7.956513373099864e-06,
"loss": 0.8982,
"step": 21850
},
{
"epoch": 4.21,
"learning_rate": 7.860304021550894e-06,
"loss": 0.881,
"step": 21900
},
{
"epoch": 4.22,
"learning_rate": 7.764094670001924e-06,
"loss": 0.8485,
"step": 21950
},
{
"epoch": 4.23,
"learning_rate": 7.667885318452954e-06,
"loss": 0.8694,
"step": 22000
},
{
"epoch": 4.24,
"learning_rate": 7.571675966903983e-06,
"loss": 0.8448,
"step": 22050
},
{
"epoch": 4.25,
"learning_rate": 7.475466615355013e-06,
"loss": 0.8995,
"step": 22100
},
{
"epoch": 4.26,
"learning_rate": 7.379257263806043e-06,
"loss": 0.9009,
"step": 22150
},
{
"epoch": 4.27,
"learning_rate": 7.283047912257071e-06,
"loss": 0.8966,
"step": 22200
},
{
"epoch": 4.28,
"learning_rate": 7.1868385607081006e-06,
"loss": 0.872,
"step": 22250
},
{
"epoch": 4.29,
"learning_rate": 7.09062920915913e-06,
"loss": 0.8402,
"step": 22300
},
{
"epoch": 4.3,
"learning_rate": 6.99441985761016e-06,
"loss": 0.8175,
"step": 22350
},
{
"epoch": 4.31,
"learning_rate": 6.89821050606119e-06,
"loss": 0.8563,
"step": 22400
},
{
"epoch": 4.32,
"learning_rate": 6.8020011545122195e-06,
"loss": 0.8987,
"step": 22450
},
{
"epoch": 4.33,
"learning_rate": 6.705791802963249e-06,
"loss": 0.8586,
"step": 22500
},
{
"epoch": 4.34,
"learning_rate": 6.609582451414278e-06,
"loss": 0.8356,
"step": 22550
},
{
"epoch": 4.35,
"learning_rate": 6.513373099865307e-06,
"loss": 0.883,
"step": 22600
},
{
"epoch": 4.36,
"learning_rate": 6.417163748316337e-06,
"loss": 0.8474,
"step": 22650
},
{
"epoch": 4.37,
"learning_rate": 6.3209543967673656e-06,
"loss": 0.8856,
"step": 22700
},
{
"epoch": 4.38,
"learning_rate": 6.224745045218395e-06,
"loss": 0.8719,
"step": 22750
},
{
"epoch": 4.39,
"learning_rate": 6.128535693669425e-06,
"loss": 0.8827,
"step": 22800
},
{
"epoch": 4.4,
"learning_rate": 6.032326342120454e-06,
"loss": 0.8108,
"step": 22850
},
{
"epoch": 4.41,
"learning_rate": 5.936116990571484e-06,
"loss": 0.8395,
"step": 22900
},
{
"epoch": 4.42,
"learning_rate": 5.839907639022513e-06,
"loss": 0.8933,
"step": 22950
},
{
"epoch": 4.43,
"learning_rate": 5.743698287473543e-06,
"loss": 0.8987,
"step": 23000
},
{
"epoch": 4.44,
"learning_rate": 5.647488935924572e-06,
"loss": 0.8256,
"step": 23050
},
{
"epoch": 4.44,
"learning_rate": 5.551279584375602e-06,
"loss": 0.827,
"step": 23100
},
{
"epoch": 4.45,
"learning_rate": 5.455070232826631e-06,
"loss": 0.867,
"step": 23150
},
{
"epoch": 4.46,
"learning_rate": 5.35886088127766e-06,
"loss": 0.8714,
"step": 23200
},
{
"epoch": 4.47,
"learning_rate": 5.26265152972869e-06,
"loss": 0.8998,
"step": 23250
},
{
"epoch": 4.48,
"learning_rate": 5.16644217817972e-06,
"loss": 0.8851,
"step": 23300
},
{
"epoch": 4.49,
"learning_rate": 5.070232826630749e-06,
"loss": 0.8945,
"step": 23350
},
{
"epoch": 4.5,
"learning_rate": 4.974023475081778e-06,
"loss": 0.8409,
"step": 23400
},
{
"epoch": 4.51,
"learning_rate": 4.877814123532807e-06,
"loss": 0.8124,
"step": 23450
},
{
"epoch": 4.52,
"learning_rate": 4.781604771983837e-06,
"loss": 0.8847,
"step": 23500
},
{
"epoch": 4.53,
"learning_rate": 4.685395420434866e-06,
"loss": 0.8554,
"step": 23550
},
{
"epoch": 4.54,
"learning_rate": 4.5891860688858956e-06,
"loss": 0.8627,
"step": 23600
},
{
"epoch": 4.55,
"learning_rate": 4.492976717336925e-06,
"loss": 0.8093,
"step": 23650
},
{
"epoch": 4.56,
"learning_rate": 4.396767365787955e-06,
"loss": 0.8283,
"step": 23700
},
{
"epoch": 4.57,
"learning_rate": 4.300558014238984e-06,
"loss": 0.8821,
"step": 23750
},
{
"epoch": 4.58,
"learning_rate": 4.204348662690014e-06,
"loss": 0.8918,
"step": 23800
},
{
"epoch": 4.59,
"learning_rate": 4.108139311141043e-06,
"loss": 0.8876,
"step": 23850
},
{
"epoch": 4.6,
"learning_rate": 4.011929959592073e-06,
"loss": 0.8835,
"step": 23900
},
{
"epoch": 4.61,
"learning_rate": 3.915720608043102e-06,
"loss": 0.8492,
"step": 23950
},
{
"epoch": 4.62,
"learning_rate": 3.819511256494132e-06,
"loss": 0.8681,
"step": 24000
},
{
"epoch": 4.63,
"learning_rate": 3.723301904945161e-06,
"loss": 0.825,
"step": 24050
},
{
"epoch": 4.64,
"learning_rate": 3.62709255339619e-06,
"loss": 0.9186,
"step": 24100
},
{
"epoch": 4.65,
"learning_rate": 3.5308832018472196e-06,
"loss": 0.8303,
"step": 24150
},
{
"epoch": 4.66,
"learning_rate": 3.4346738502982493e-06,
"loss": 0.8478,
"step": 24200
},
{
"epoch": 4.67,
"learning_rate": 3.338464498749279e-06,
"loss": 0.9059,
"step": 24250
},
{
"epoch": 4.68,
"learning_rate": 3.242255147200308e-06,
"loss": 0.8887,
"step": 24300
},
{
"epoch": 4.69,
"learning_rate": 3.1460457956513372e-06,
"loss": 0.8936,
"step": 24350
},
{
"epoch": 4.7,
"learning_rate": 3.049836444102367e-06,
"loss": 0.8862,
"step": 24400
},
{
"epoch": 4.7,
"learning_rate": 2.9536270925533963e-06,
"loss": 0.8715,
"step": 24450
},
{
"epoch": 4.71,
"learning_rate": 2.8574177410044256e-06,
"loss": 0.8568,
"step": 24500
},
{
"epoch": 4.72,
"learning_rate": 2.7612083894554553e-06,
"loss": 0.8931,
"step": 24550
},
{
"epoch": 4.73,
"learning_rate": 2.6649990379064846e-06,
"loss": 0.901,
"step": 24600
},
{
"epoch": 4.74,
"learning_rate": 2.5687896863575143e-06,
"loss": 0.8796,
"step": 24650
},
{
"epoch": 4.75,
"learning_rate": 2.4725803348085436e-06,
"loss": 0.8116,
"step": 24700
},
{
"epoch": 4.76,
"learning_rate": 2.376370983259573e-06,
"loss": 0.8417,
"step": 24750
},
{
"epoch": 4.77,
"learning_rate": 2.2801616317106022e-06,
"loss": 0.8734,
"step": 24800
},
{
"epoch": 4.78,
"learning_rate": 2.183952280161632e-06,
"loss": 0.8653,
"step": 24850
},
{
"epoch": 4.79,
"learning_rate": 2.0877429286126613e-06,
"loss": 0.8758,
"step": 24900
},
{
"epoch": 4.8,
"learning_rate": 1.9915335770636906e-06,
"loss": 0.9223,
"step": 24950
},
{
"epoch": 4.81,
"learning_rate": 1.89532422551472e-06,
"loss": 0.8926,
"step": 25000
},
{
"epoch": 4.82,
"learning_rate": 1.7991148739657494e-06,
"loss": 0.9047,
"step": 25050
},
{
"epoch": 4.83,
"learning_rate": 1.7029055224167791e-06,
"loss": 0.853,
"step": 25100
},
{
"epoch": 4.84,
"learning_rate": 1.6066961708678084e-06,
"loss": 0.8797,
"step": 25150
},
{
"epoch": 4.85,
"learning_rate": 1.5104868193188377e-06,
"loss": 0.8212,
"step": 25200
},
{
"epoch": 4.86,
"learning_rate": 1.4142774677698672e-06,
"loss": 0.8769,
"step": 25250
},
{
"epoch": 4.87,
"learning_rate": 1.3180681162208968e-06,
"loss": 0.8531,
"step": 25300
},
{
"epoch": 4.88,
"learning_rate": 1.221858764671926e-06,
"loss": 0.8877,
"step": 25350
},
{
"epoch": 4.89,
"learning_rate": 1.1256494131229556e-06,
"loss": 0.8349,
"step": 25400
},
{
"epoch": 4.9,
"learning_rate": 1.029440061573985e-06,
"loss": 0.867,
"step": 25450
},
{
"epoch": 4.91,
"learning_rate": 9.332307100250145e-07,
"loss": 0.8496,
"step": 25500
},
{
"epoch": 4.92,
"learning_rate": 8.37021358476044e-07,
"loss": 0.8753,
"step": 25550
},
{
"epoch": 4.93,
"learning_rate": 7.408120069270733e-07,
"loss": 0.8526,
"step": 25600
},
{
"epoch": 4.94,
"learning_rate": 6.446026553781028e-07,
"loss": 0.8247,
"step": 25650
},
{
"epoch": 4.95,
"learning_rate": 5.483933038291322e-07,
"loss": 0.8471,
"step": 25700
},
{
"epoch": 4.95,
"learning_rate": 4.521839522801616e-07,
"loss": 0.8819,
"step": 25750
},
{
"epoch": 4.96,
"learning_rate": 3.5597460073119107e-07,
"loss": 0.8805,
"step": 25800
},
{
"epoch": 4.97,
"learning_rate": 2.5976524918222053e-07,
"loss": 0.8706,
"step": 25850
},
{
"epoch": 4.98,
"learning_rate": 1.6355589763324997e-07,
"loss": 0.9151,
"step": 25900
},
{
"epoch": 4.99,
"learning_rate": 6.734654608427939e-08,
"loss": 0.8774,
"step": 25950
},
{
"epoch": 5.0,
"step": 25985,
"train_runtime": 11347.9361,
"train_samples_per_second": 2.29
}
],
"max_steps": 25985,
"num_train_epochs": 5,
"total_flos": 77899961943982080,
"trial_name": null,
"trial_params": null
}