ruadapt_llama2_7b_v0.1 / trainer_state.json
RefalMachine's picture
load
28bf242
{
"best_metric": 2.7569446563720703,
"best_model_checkpoint": "outputs/llama2_7b_darulm_unigram_tie_2e_16_11_23/checkpoint-260000",
"epoch": 1.9999927672239521,
"eval_steps": 1000,
"global_step": 276518,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.9994503030566046e-05,
"loss": 10.303,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 1.9987342504592864e-05,
"loss": 8.0353,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 1.99801096500745e-05,
"loss": 7.283,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 1.9972876795556137e-05,
"loss": 6.7724,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 1.996564394103777e-05,
"loss": 6.3252,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 1.995848341506459e-05,
"loss": 5.9418,
"step": 600
},
{
"epoch": 0.01,
"learning_rate": 1.9951250560546228e-05,
"loss": 5.6204,
"step": 700
},
{
"epoch": 0.01,
"learning_rate": 1.9944017706027864e-05,
"loss": 5.3454,
"step": 800
},
{
"epoch": 0.01,
"learning_rate": 1.9936784851509498e-05,
"loss": 5.1143,
"step": 900
},
{
"epoch": 0.01,
"learning_rate": 1.9929551996991134e-05,
"loss": 4.9167,
"step": 1000
},
{
"epoch": 0.01,
"eval_accuracy": 0.2686138407237027,
"eval_loss": 4.864671230316162,
"eval_runtime": 28.3398,
"eval_samples_per_second": 228.759,
"eval_steps_per_second": 2.399,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 1.992231914247277e-05,
"loss": 4.7595,
"step": 1100
},
{
"epoch": 0.01,
"learning_rate": 1.9915086287954407e-05,
"loss": 4.6176,
"step": 1200
},
{
"epoch": 0.01,
"learning_rate": 1.990785343343604e-05,
"loss": 4.492,
"step": 1300
},
{
"epoch": 0.01,
"learning_rate": 1.9900620578917677e-05,
"loss": 4.3949,
"step": 1400
},
{
"epoch": 0.01,
"learning_rate": 1.9893387724399313e-05,
"loss": 4.3078,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 1.988615486988095e-05,
"loss": 4.2192,
"step": 1600
},
{
"epoch": 0.01,
"learning_rate": 1.9878922015362586e-05,
"loss": 4.1453,
"step": 1700
},
{
"epoch": 0.01,
"learning_rate": 1.987168916084422e-05,
"loss": 4.0878,
"step": 1800
},
{
"epoch": 0.01,
"learning_rate": 1.9864456306325856e-05,
"loss": 4.0281,
"step": 1900
},
{
"epoch": 0.01,
"learning_rate": 1.9857223451807492e-05,
"loss": 3.9697,
"step": 2000
},
{
"epoch": 0.01,
"eval_accuracy": 0.34088309394404065,
"eval_loss": 3.9705445766448975,
"eval_runtime": 29.5273,
"eval_samples_per_second": 219.56,
"eval_steps_per_second": 2.303,
"step": 2000
},
{
"epoch": 0.02,
"learning_rate": 1.984999059728913e-05,
"loss": 3.9334,
"step": 2100
},
{
"epoch": 0.02,
"learning_rate": 1.9842757742770765e-05,
"loss": 3.8789,
"step": 2200
},
{
"epoch": 0.02,
"learning_rate": 1.98355248882524e-05,
"loss": 3.8488,
"step": 2300
},
{
"epoch": 0.02,
"learning_rate": 1.9828292033734035e-05,
"loss": 3.8073,
"step": 2400
},
{
"epoch": 0.02,
"learning_rate": 1.982105917921567e-05,
"loss": 3.7793,
"step": 2500
},
{
"epoch": 0.02,
"learning_rate": 1.9813826324697308e-05,
"loss": 3.7395,
"step": 2600
},
{
"epoch": 0.02,
"learning_rate": 1.9806593470178945e-05,
"loss": 3.7196,
"step": 2700
},
{
"epoch": 0.02,
"learning_rate": 1.9799360615660578e-05,
"loss": 3.6915,
"step": 2800
},
{
"epoch": 0.02,
"learning_rate": 1.9792127761142214e-05,
"loss": 3.6569,
"step": 2900
},
{
"epoch": 0.02,
"learning_rate": 1.978489490662385e-05,
"loss": 3.6398,
"step": 3000
},
{
"epoch": 0.02,
"eval_accuracy": 0.3693563558386489,
"eval_loss": 3.6475651264190674,
"eval_runtime": 28.718,
"eval_samples_per_second": 225.747,
"eval_steps_per_second": 2.368,
"step": 3000
},
{
"epoch": 0.02,
"learning_rate": 1.9777662052105487e-05,
"loss": 3.6159,
"step": 3100
},
{
"epoch": 0.02,
"learning_rate": 1.977042919758712e-05,
"loss": 3.5994,
"step": 3200
},
{
"epoch": 0.02,
"learning_rate": 1.9763196343068757e-05,
"loss": 3.5769,
"step": 3300
},
{
"epoch": 0.02,
"learning_rate": 1.9755963488550393e-05,
"loss": 3.5605,
"step": 3400
},
{
"epoch": 0.03,
"learning_rate": 1.974873063403203e-05,
"loss": 3.5432,
"step": 3500
},
{
"epoch": 0.03,
"learning_rate": 1.9741497779513667e-05,
"loss": 3.5282,
"step": 3600
},
{
"epoch": 0.03,
"learning_rate": 1.97342649249953e-05,
"loss": 3.508,
"step": 3700
},
{
"epoch": 0.03,
"learning_rate": 1.9727032070476936e-05,
"loss": 3.4947,
"step": 3800
},
{
"epoch": 0.03,
"learning_rate": 1.9719799215958573e-05,
"loss": 3.4862,
"step": 3900
},
{
"epoch": 0.03,
"learning_rate": 1.971256636144021e-05,
"loss": 3.468,
"step": 4000
},
{
"epoch": 0.03,
"eval_accuracy": 0.38501903923685776,
"eval_loss": 3.4784200191497803,
"eval_runtime": 29.0726,
"eval_samples_per_second": 222.994,
"eval_steps_per_second": 2.339,
"step": 4000
},
{
"epoch": 0.03,
"learning_rate": 1.9705333506921846e-05,
"loss": 3.454,
"step": 4100
},
{
"epoch": 0.03,
"learning_rate": 1.969810065240348e-05,
"loss": 3.4428,
"step": 4200
},
{
"epoch": 0.03,
"learning_rate": 1.9690867797885115e-05,
"loss": 3.4316,
"step": 4300
},
{
"epoch": 0.03,
"learning_rate": 1.9683634943366752e-05,
"loss": 3.4171,
"step": 4400
},
{
"epoch": 0.03,
"learning_rate": 1.967640208884839e-05,
"loss": 3.4073,
"step": 4500
},
{
"epoch": 0.03,
"learning_rate": 1.9669241562875206e-05,
"loss": 3.4011,
"step": 4600
},
{
"epoch": 0.03,
"learning_rate": 1.966200870835684e-05,
"loss": 3.389,
"step": 4700
},
{
"epoch": 0.03,
"learning_rate": 1.9654775853838476e-05,
"loss": 3.3863,
"step": 4800
},
{
"epoch": 0.04,
"learning_rate": 1.9647542999320113e-05,
"loss": 3.3624,
"step": 4900
},
{
"epoch": 0.04,
"learning_rate": 1.964031014480175e-05,
"loss": 3.3567,
"step": 5000
},
{
"epoch": 0.04,
"eval_accuracy": 0.39525576696820947,
"eval_loss": 3.3732998371124268,
"eval_runtime": 28.1701,
"eval_samples_per_second": 230.137,
"eval_steps_per_second": 2.414,
"step": 5000
},
{
"epoch": 0.04,
"learning_rate": 1.9633077290283386e-05,
"loss": 3.3488,
"step": 5100
},
{
"epoch": 0.04,
"learning_rate": 1.962584443576502e-05,
"loss": 3.3415,
"step": 5200
},
{
"epoch": 0.04,
"learning_rate": 1.9618611581246655e-05,
"loss": 3.3334,
"step": 5300
},
{
"epoch": 0.04,
"learning_rate": 1.9611378726728292e-05,
"loss": 3.3279,
"step": 5400
},
{
"epoch": 0.04,
"learning_rate": 1.9604145872209928e-05,
"loss": 3.3107,
"step": 5500
},
{
"epoch": 0.04,
"learning_rate": 1.9596913017691565e-05,
"loss": 3.3096,
"step": 5600
},
{
"epoch": 0.04,
"learning_rate": 1.9589752491718383e-05,
"loss": 3.301,
"step": 5700
},
{
"epoch": 0.04,
"learning_rate": 1.958251963720002e-05,
"loss": 3.3033,
"step": 5800
},
{
"epoch": 0.04,
"learning_rate": 1.9575286782681656e-05,
"loss": 3.2967,
"step": 5900
},
{
"epoch": 0.04,
"learning_rate": 1.9568053928163292e-05,
"loss": 3.2828,
"step": 6000
},
{
"epoch": 0.04,
"eval_accuracy": 0.40260530558050767,
"eval_loss": 3.299899101257324,
"eval_runtime": 28.3308,
"eval_samples_per_second": 228.832,
"eval_steps_per_second": 2.4,
"step": 6000
},
{
"epoch": 0.04,
"learning_rate": 1.9560821073644925e-05,
"loss": 3.2859,
"step": 6100
},
{
"epoch": 0.04,
"learning_rate": 1.9553588219126562e-05,
"loss": 3.2764,
"step": 6200
},
{
"epoch": 0.05,
"learning_rate": 1.9546427693153383e-05,
"loss": 3.2684,
"step": 6300
},
{
"epoch": 0.05,
"learning_rate": 1.953919483863502e-05,
"loss": 3.2635,
"step": 6400
},
{
"epoch": 0.05,
"learning_rate": 1.9531961984116653e-05,
"loss": 3.2525,
"step": 6500
},
{
"epoch": 0.05,
"learning_rate": 1.952472912959829e-05,
"loss": 3.2538,
"step": 6600
},
{
"epoch": 0.05,
"learning_rate": 1.9517496275079926e-05,
"loss": 3.2469,
"step": 6700
},
{
"epoch": 0.05,
"learning_rate": 1.9510263420561563e-05,
"loss": 3.2353,
"step": 6800
},
{
"epoch": 0.05,
"learning_rate": 1.9503030566043196e-05,
"loss": 3.2268,
"step": 6900
},
{
"epoch": 0.05,
"learning_rate": 1.9495797711524832e-05,
"loss": 3.2235,
"step": 7000
},
{
"epoch": 0.05,
"eval_accuracy": 0.4080512229571761,
"eval_loss": 3.245333433151245,
"eval_runtime": 30.054,
"eval_samples_per_second": 215.712,
"eval_steps_per_second": 2.263,
"step": 7000
},
{
"epoch": 0.05,
"learning_rate": 1.9488564857006465e-05,
"loss": 3.2305,
"step": 7100
},
{
"epoch": 0.05,
"learning_rate": 1.9481332002488102e-05,
"loss": 3.2131,
"step": 7200
},
{
"epoch": 0.05,
"learning_rate": 1.947409914796974e-05,
"loss": 3.2181,
"step": 7300
},
{
"epoch": 0.05,
"learning_rate": 1.9466866293451375e-05,
"loss": 3.213,
"step": 7400
},
{
"epoch": 0.05,
"learning_rate": 1.945963343893301e-05,
"loss": 3.206,
"step": 7500
},
{
"epoch": 0.05,
"learning_rate": 1.9452400584414645e-05,
"loss": 3.1944,
"step": 7600
},
{
"epoch": 0.06,
"learning_rate": 1.944516772989628e-05,
"loss": 3.194,
"step": 7700
},
{
"epoch": 0.06,
"learning_rate": 1.9437934875377918e-05,
"loss": 3.19,
"step": 7800
},
{
"epoch": 0.06,
"learning_rate": 1.9430702020859554e-05,
"loss": 3.1975,
"step": 7900
},
{
"epoch": 0.06,
"learning_rate": 1.942346916634119e-05,
"loss": 3.1898,
"step": 8000
},
{
"epoch": 0.06,
"eval_accuracy": 0.41249965974358277,
"eval_loss": 3.20282244682312,
"eval_runtime": 31.4355,
"eval_samples_per_second": 206.232,
"eval_steps_per_second": 2.163,
"step": 8000
},
{
"epoch": 0.06,
"learning_rate": 1.9416236311822824e-05,
"loss": 3.1871,
"step": 8100
},
{
"epoch": 0.06,
"learning_rate": 1.9409075785849645e-05,
"loss": 3.1755,
"step": 8200
},
{
"epoch": 0.06,
"learning_rate": 1.940184293133128e-05,
"loss": 3.1774,
"step": 8300
},
{
"epoch": 0.06,
"learning_rate": 1.9394610076812915e-05,
"loss": 3.1726,
"step": 8400
},
{
"epoch": 0.06,
"learning_rate": 1.938737722229455e-05,
"loss": 3.173,
"step": 8500
},
{
"epoch": 0.06,
"learning_rate": 1.9380144367776188e-05,
"loss": 3.1738,
"step": 8600
},
{
"epoch": 0.06,
"learning_rate": 1.9372911513257824e-05,
"loss": 3.1624,
"step": 8700
},
{
"epoch": 0.06,
"learning_rate": 1.936567865873946e-05,
"loss": 3.1588,
"step": 8800
},
{
"epoch": 0.06,
"learning_rate": 1.9358445804221094e-05,
"loss": 3.1592,
"step": 8900
},
{
"epoch": 0.07,
"learning_rate": 1.935121294970273e-05,
"loss": 3.1552,
"step": 9000
},
{
"epoch": 0.07,
"eval_accuracy": 0.4160389313831348,
"eval_loss": 3.1682770252227783,
"eval_runtime": 28.3625,
"eval_samples_per_second": 228.576,
"eval_steps_per_second": 2.398,
"step": 9000
},
{
"epoch": 0.07,
"learning_rate": 1.9343980095184367e-05,
"loss": 3.1564,
"step": 9100
},
{
"epoch": 0.07,
"learning_rate": 1.9336747240666004e-05,
"loss": 3.1467,
"step": 9200
},
{
"epoch": 0.07,
"learning_rate": 1.932951438614764e-05,
"loss": 3.1519,
"step": 9300
},
{
"epoch": 0.07,
"learning_rate": 1.9322281531629273e-05,
"loss": 3.1442,
"step": 9400
},
{
"epoch": 0.07,
"learning_rate": 1.931504867711091e-05,
"loss": 3.1372,
"step": 9500
},
{
"epoch": 0.07,
"learning_rate": 1.9307815822592546e-05,
"loss": 3.1327,
"step": 9600
},
{
"epoch": 0.07,
"learning_rate": 1.9300582968074183e-05,
"loss": 3.1327,
"step": 9700
},
{
"epoch": 0.07,
"learning_rate": 1.929335011355582e-05,
"loss": 3.1293,
"step": 9800
},
{
"epoch": 0.07,
"learning_rate": 1.9286117259037452e-05,
"loss": 3.1369,
"step": 9900
},
{
"epoch": 0.07,
"learning_rate": 1.927888440451909e-05,
"loss": 3.1068,
"step": 10000
},
{
"epoch": 0.07,
"eval_accuracy": 0.41901262124470334,
"eval_loss": 3.139714002609253,
"eval_runtime": 28.3677,
"eval_samples_per_second": 228.535,
"eval_steps_per_second": 2.397,
"step": 10000
},
{
"epoch": 0.07,
"learning_rate": 1.9271651550000725e-05,
"loss": 3.1264,
"step": 10100
},
{
"epoch": 0.07,
"learning_rate": 1.9264418695482362e-05,
"loss": 3.1248,
"step": 10200
},
{
"epoch": 0.07,
"learning_rate": 1.9257185840963995e-05,
"loss": 3.1291,
"step": 10300
},
{
"epoch": 0.08,
"learning_rate": 1.924995298644563e-05,
"loss": 3.1076,
"step": 10400
},
{
"epoch": 0.08,
"learning_rate": 1.9242792460472453e-05,
"loss": 3.107,
"step": 10500
},
{
"epoch": 0.08,
"learning_rate": 1.923555960595409e-05,
"loss": 3.1117,
"step": 10600
},
{
"epoch": 0.08,
"learning_rate": 1.9228326751435723e-05,
"loss": 3.1053,
"step": 10700
},
{
"epoch": 0.08,
"learning_rate": 1.922109389691736e-05,
"loss": 3.1084,
"step": 10800
},
{
"epoch": 0.08,
"learning_rate": 1.9213861042398996e-05,
"loss": 3.1009,
"step": 10900
},
{
"epoch": 0.08,
"learning_rate": 1.9206628187880632e-05,
"loss": 3.1019,
"step": 11000
},
{
"epoch": 0.08,
"eval_accuracy": 0.4217092667701046,
"eval_loss": 3.115234136581421,
"eval_runtime": 28.1789,
"eval_samples_per_second": 230.066,
"eval_steps_per_second": 2.413,
"step": 11000
},
{
"epoch": 0.08,
"learning_rate": 1.919939533336227e-05,
"loss": 3.1015,
"step": 11100
},
{
"epoch": 0.08,
"learning_rate": 1.9192162478843902e-05,
"loss": 3.1001,
"step": 11200
},
{
"epoch": 0.08,
"learning_rate": 1.9184929624325538e-05,
"loss": 3.0938,
"step": 11300
},
{
"epoch": 0.08,
"learning_rate": 1.9177696769807175e-05,
"loss": 3.1016,
"step": 11400
},
{
"epoch": 0.08,
"learning_rate": 1.9170536243833993e-05,
"loss": 3.0905,
"step": 11500
},
{
"epoch": 0.08,
"learning_rate": 1.916330338931563e-05,
"loss": 3.0883,
"step": 11600
},
{
"epoch": 0.08,
"learning_rate": 1.9156070534797266e-05,
"loss": 3.0899,
"step": 11700
},
{
"epoch": 0.09,
"learning_rate": 1.91488376802789e-05,
"loss": 3.0803,
"step": 11800
},
{
"epoch": 0.09,
"learning_rate": 1.9141604825760535e-05,
"loss": 3.0806,
"step": 11900
},
{
"epoch": 0.09,
"learning_rate": 1.9134371971242172e-05,
"loss": 3.0849,
"step": 12000
},
{
"epoch": 0.09,
"eval_accuracy": 0.423874809834469,
"eval_loss": 3.094170331954956,
"eval_runtime": 28.7606,
"eval_samples_per_second": 225.413,
"eval_steps_per_second": 2.364,
"step": 12000
},
{
"epoch": 0.09,
"learning_rate": 1.912713911672381e-05,
"loss": 3.0781,
"step": 12100
},
{
"epoch": 0.09,
"learning_rate": 1.911990626220544e-05,
"loss": 3.0784,
"step": 12200
},
{
"epoch": 0.09,
"learning_rate": 1.9112673407687078e-05,
"loss": 3.0682,
"step": 12300
},
{
"epoch": 0.09,
"learning_rate": 1.9105440553168715e-05,
"loss": 3.0766,
"step": 12400
},
{
"epoch": 0.09,
"learning_rate": 1.909820769865035e-05,
"loss": 3.0708,
"step": 12500
},
{
"epoch": 0.09,
"learning_rate": 1.9090974844131988e-05,
"loss": 3.06,
"step": 12600
},
{
"epoch": 0.09,
"learning_rate": 1.908374198961362e-05,
"loss": 3.0599,
"step": 12700
},
{
"epoch": 0.09,
"learning_rate": 1.9076509135095257e-05,
"loss": 3.0593,
"step": 12800
},
{
"epoch": 0.09,
"learning_rate": 1.9069276280576894e-05,
"loss": 3.0675,
"step": 12900
},
{
"epoch": 0.09,
"learning_rate": 1.906204342605853e-05,
"loss": 3.0561,
"step": 13000
},
{
"epoch": 0.09,
"eval_accuracy": 0.42562236679339327,
"eval_loss": 3.076077938079834,
"eval_runtime": 28.8743,
"eval_samples_per_second": 224.525,
"eval_steps_per_second": 2.355,
"step": 13000
},
{
"epoch": 0.09,
"learning_rate": 1.9054810571540167e-05,
"loss": 3.0535,
"step": 13100
},
{
"epoch": 0.1,
"learning_rate": 1.90475777170218e-05,
"loss": 3.0541,
"step": 13200
},
{
"epoch": 0.1,
"learning_rate": 1.9040344862503437e-05,
"loss": 3.0628,
"step": 13300
},
{
"epoch": 0.1,
"learning_rate": 1.9033112007985073e-05,
"loss": 3.0601,
"step": 13400
},
{
"epoch": 0.1,
"learning_rate": 1.902587915346671e-05,
"loss": 3.0525,
"step": 13500
},
{
"epoch": 0.1,
"learning_rate": 1.9018718627493528e-05,
"loss": 3.0553,
"step": 13600
},
{
"epoch": 0.1,
"learning_rate": 1.9011485772975164e-05,
"loss": 3.0457,
"step": 13700
},
{
"epoch": 0.1,
"learning_rate": 1.90042529184568e-05,
"loss": 3.0466,
"step": 13800
},
{
"epoch": 0.1,
"learning_rate": 1.8997020063938437e-05,
"loss": 3.0506,
"step": 13900
},
{
"epoch": 0.1,
"learning_rate": 1.898978720942007e-05,
"loss": 3.0429,
"step": 14000
},
{
"epoch": 0.1,
"eval_accuracy": 0.42772923452891876,
"eval_loss": 3.0595009326934814,
"eval_runtime": 28.2676,
"eval_samples_per_second": 229.344,
"eval_steps_per_second": 2.406,
"step": 14000
},
{
"epoch": 0.1,
"learning_rate": 1.8982554354901707e-05,
"loss": 3.046,
"step": 14100
},
{
"epoch": 0.1,
"learning_rate": 1.8975321500383343e-05,
"loss": 3.0338,
"step": 14200
},
{
"epoch": 0.1,
"learning_rate": 1.896808864586498e-05,
"loss": 3.0346,
"step": 14300
},
{
"epoch": 0.1,
"learning_rate": 1.8960855791346616e-05,
"loss": 3.0413,
"step": 14400
},
{
"epoch": 0.1,
"learning_rate": 1.895362293682825e-05,
"loss": 3.0441,
"step": 14500
},
{
"epoch": 0.11,
"learning_rate": 1.8946390082309886e-05,
"loss": 3.0364,
"step": 14600
},
{
"epoch": 0.11,
"learning_rate": 1.8939157227791522e-05,
"loss": 3.0317,
"step": 14700
},
{
"epoch": 0.11,
"learning_rate": 1.893192437327316e-05,
"loss": 3.0346,
"step": 14800
},
{
"epoch": 0.11,
"learning_rate": 1.8924691518754795e-05,
"loss": 3.0259,
"step": 14900
},
{
"epoch": 0.11,
"learning_rate": 1.891745866423643e-05,
"loss": 3.035,
"step": 15000
},
{
"epoch": 0.11,
"eval_accuracy": 0.4292789890906231,
"eval_loss": 3.045100450515747,
"eval_runtime": 37.5227,
"eval_samples_per_second": 172.775,
"eval_steps_per_second": 1.812,
"step": 15000
},
{
"epoch": 0.11,
"learning_rate": 1.8910298138263247e-05,
"loss": 3.0315,
"step": 15100
},
{
"epoch": 0.11,
"learning_rate": 1.8903065283744883e-05,
"loss": 3.0256,
"step": 15200
},
{
"epoch": 0.11,
"learning_rate": 1.889583242922652e-05,
"loss": 3.0319,
"step": 15300
},
{
"epoch": 0.11,
"learning_rate": 1.8888599574708156e-05,
"loss": 3.0208,
"step": 15400
},
{
"epoch": 0.11,
"learning_rate": 1.888136672018979e-05,
"loss": 3.0232,
"step": 15500
},
{
"epoch": 0.11,
"learning_rate": 1.887420619421661e-05,
"loss": 3.0272,
"step": 15600
},
{
"epoch": 0.11,
"learning_rate": 1.8866973339698247e-05,
"loss": 3.019,
"step": 15700
},
{
"epoch": 0.11,
"learning_rate": 1.8859740485179884e-05,
"loss": 3.0133,
"step": 15800
},
{
"epoch": 0.12,
"learning_rate": 1.8852507630661517e-05,
"loss": 3.0151,
"step": 15900
},
{
"epoch": 0.12,
"learning_rate": 1.8845274776143153e-05,
"loss": 3.0077,
"step": 16000
},
{
"epoch": 0.12,
"eval_accuracy": 0.4305855737328095,
"eval_loss": 3.0321857929229736,
"eval_runtime": 28.0011,
"eval_samples_per_second": 231.526,
"eval_steps_per_second": 2.428,
"step": 16000
},
{
"epoch": 0.12,
"learning_rate": 1.883804192162479e-05,
"loss": 3.0229,
"step": 16100
},
{
"epoch": 0.12,
"learning_rate": 1.8830809067106426e-05,
"loss": 3.0126,
"step": 16200
},
{
"epoch": 0.12,
"learning_rate": 1.8823576212588063e-05,
"loss": 3.0175,
"step": 16300
},
{
"epoch": 0.12,
"learning_rate": 1.8816343358069696e-05,
"loss": 3.0128,
"step": 16400
},
{
"epoch": 0.12,
"learning_rate": 1.8809110503551333e-05,
"loss": 3.0058,
"step": 16500
},
{
"epoch": 0.12,
"learning_rate": 1.880187764903297e-05,
"loss": 3.0173,
"step": 16600
},
{
"epoch": 0.12,
"learning_rate": 1.8794644794514606e-05,
"loss": 3.0078,
"step": 16700
},
{
"epoch": 0.12,
"learning_rate": 1.8787411939996242e-05,
"loss": 2.9971,
"step": 16800
},
{
"epoch": 0.12,
"learning_rate": 1.8780179085477875e-05,
"loss": 3.0027,
"step": 16900
},
{
"epoch": 0.12,
"learning_rate": 1.8772946230959512e-05,
"loss": 3.0008,
"step": 17000
},
{
"epoch": 0.12,
"eval_accuracy": 0.4320149531353495,
"eval_loss": 3.0199708938598633,
"eval_runtime": 27.8606,
"eval_samples_per_second": 232.694,
"eval_steps_per_second": 2.441,
"step": 17000
},
{
"epoch": 0.12,
"learning_rate": 1.8765713376441148e-05,
"loss": 3.0051,
"step": 17100
},
{
"epoch": 0.12,
"learning_rate": 1.8758480521922785e-05,
"loss": 3.0007,
"step": 17200
},
{
"epoch": 0.13,
"learning_rate": 1.8751319995949603e-05,
"loss": 3.0039,
"step": 17300
},
{
"epoch": 0.13,
"learning_rate": 1.874408714143124e-05,
"loss": 3.0022,
"step": 17400
},
{
"epoch": 0.13,
"learning_rate": 1.8736854286912876e-05,
"loss": 3.0052,
"step": 17500
},
{
"epoch": 0.13,
"learning_rate": 1.8729621432394512e-05,
"loss": 3.0004,
"step": 17600
},
{
"epoch": 0.13,
"learning_rate": 1.8722388577876146e-05,
"loss": 2.9994,
"step": 17700
},
{
"epoch": 0.13,
"learning_rate": 1.8715155723357782e-05,
"loss": 2.9951,
"step": 17800
},
{
"epoch": 0.13,
"learning_rate": 1.870792286883942e-05,
"loss": 2.9933,
"step": 17900
},
{
"epoch": 0.13,
"learning_rate": 1.8700690014321055e-05,
"loss": 2.9952,
"step": 18000
},
{
"epoch": 0.13,
"eval_accuracy": 0.4330487277434497,
"eval_loss": 3.0093255043029785,
"eval_runtime": 28.2096,
"eval_samples_per_second": 229.815,
"eval_steps_per_second": 2.411,
"step": 18000
},
{
"epoch": 0.13,
"learning_rate": 1.8693457159802688e-05,
"loss": 2.993,
"step": 18100
},
{
"epoch": 0.13,
"learning_rate": 1.8686224305284325e-05,
"loss": 2.9969,
"step": 18200
},
{
"epoch": 0.13,
"learning_rate": 1.867899145076596e-05,
"loss": 2.995,
"step": 18300
},
{
"epoch": 0.13,
"learning_rate": 1.8671758596247594e-05,
"loss": 2.9944,
"step": 18400
},
{
"epoch": 0.13,
"learning_rate": 1.866452574172923e-05,
"loss": 2.9843,
"step": 18500
},
{
"epoch": 0.13,
"learning_rate": 1.8657292887210867e-05,
"loss": 2.993,
"step": 18600
},
{
"epoch": 0.14,
"learning_rate": 1.8650060032692504e-05,
"loss": 2.9936,
"step": 18700
},
{
"epoch": 0.14,
"learning_rate": 1.864282717817414e-05,
"loss": 2.9957,
"step": 18800
},
{
"epoch": 0.14,
"learning_rate": 1.8635594323655774e-05,
"loss": 2.9793,
"step": 18900
},
{
"epoch": 0.14,
"learning_rate": 1.862836146913741e-05,
"loss": 2.9825,
"step": 19000
},
{
"epoch": 0.14,
"eval_accuracy": 0.4340897611551176,
"eval_loss": 2.999600410461426,
"eval_runtime": 28.9603,
"eval_samples_per_second": 223.858,
"eval_steps_per_second": 2.348,
"step": 19000
},
{
"epoch": 0.14,
"learning_rate": 1.8621128614619047e-05,
"loss": 2.9911,
"step": 19100
},
{
"epoch": 0.14,
"learning_rate": 1.8613895760100683e-05,
"loss": 2.9847,
"step": 19200
},
{
"epoch": 0.14,
"learning_rate": 1.8606662905582316e-05,
"loss": 2.979,
"step": 19300
},
{
"epoch": 0.14,
"learning_rate": 1.8599430051063953e-05,
"loss": 2.9858,
"step": 19400
},
{
"epoch": 0.14,
"learning_rate": 1.859219719654559e-05,
"loss": 2.9766,
"step": 19500
},
{
"epoch": 0.14,
"learning_rate": 1.8584964342027226e-05,
"loss": 2.9735,
"step": 19600
},
{
"epoch": 0.14,
"learning_rate": 1.8577731487508862e-05,
"loss": 2.9777,
"step": 19700
},
{
"epoch": 0.14,
"learning_rate": 1.8570498632990495e-05,
"loss": 2.9714,
"step": 19800
},
{
"epoch": 0.14,
"learning_rate": 1.8563265778472132e-05,
"loss": 2.9762,
"step": 19900
},
{
"epoch": 0.14,
"learning_rate": 1.855603292395377e-05,
"loss": 2.9781,
"step": 20000
},
{
"epoch": 0.14,
"eval_accuracy": 0.4350987348510282,
"eval_loss": 2.990344285964966,
"eval_runtime": 28.2496,
"eval_samples_per_second": 229.49,
"eval_steps_per_second": 2.407,
"step": 20000
},
{
"epoch": 0.15,
"learning_rate": 1.8548800069435405e-05,
"loss": 2.9686,
"step": 20100
},
{
"epoch": 0.15,
"learning_rate": 1.854156721491704e-05,
"loss": 2.9724,
"step": 20200
},
{
"epoch": 0.15,
"learning_rate": 1.8534334360398675e-05,
"loss": 2.9771,
"step": 20300
},
{
"epoch": 0.15,
"learning_rate": 1.852710150588031e-05,
"loss": 2.9698,
"step": 20400
},
{
"epoch": 0.15,
"learning_rate": 1.8519868651361948e-05,
"loss": 2.9693,
"step": 20500
},
{
"epoch": 0.15,
"learning_rate": 1.8512635796843584e-05,
"loss": 2.9712,
"step": 20600
},
{
"epoch": 0.15,
"learning_rate": 1.850540294232522e-05,
"loss": 2.9724,
"step": 20700
},
{
"epoch": 0.15,
"learning_rate": 1.849824241635204e-05,
"loss": 2.9726,
"step": 20800
},
{
"epoch": 0.15,
"learning_rate": 1.8491009561833675e-05,
"loss": 2.9656,
"step": 20900
},
{
"epoch": 0.15,
"learning_rate": 1.848377670731531e-05,
"loss": 2.957,
"step": 21000
},
{
"epoch": 0.15,
"eval_accuracy": 0.435996406892234,
"eval_loss": 2.9821181297302246,
"eval_runtime": 27.9904,
"eval_samples_per_second": 231.615,
"eval_steps_per_second": 2.429,
"step": 21000
},
{
"epoch": 0.15,
"learning_rate": 1.8476543852796948e-05,
"loss": 2.9718,
"step": 21100
},
{
"epoch": 0.15,
"learning_rate": 1.846931099827858e-05,
"loss": 2.9685,
"step": 21200
},
{
"epoch": 0.15,
"learning_rate": 1.8462078143760218e-05,
"loss": 2.9582,
"step": 21300
},
{
"epoch": 0.15,
"learning_rate": 1.8454845289241854e-05,
"loss": 2.9644,
"step": 21400
},
{
"epoch": 0.16,
"learning_rate": 1.844761243472349e-05,
"loss": 2.9678,
"step": 21500
},
{
"epoch": 0.16,
"learning_rate": 1.8440379580205124e-05,
"loss": 2.9632,
"step": 21600
},
{
"epoch": 0.16,
"learning_rate": 1.843314672568676e-05,
"loss": 2.9642,
"step": 21700
},
{
"epoch": 0.16,
"learning_rate": 1.8425913871168397e-05,
"loss": 2.9633,
"step": 21800
},
{
"epoch": 0.16,
"learning_rate": 1.841875334519522e-05,
"loss": 2.9495,
"step": 21900
},
{
"epoch": 0.16,
"learning_rate": 1.841152049067685e-05,
"loss": 2.9676,
"step": 22000
},
{
"epoch": 0.16,
"eval_accuracy": 0.4368305644022224,
"eval_loss": 2.973825454711914,
"eval_runtime": 30.0107,
"eval_samples_per_second": 216.023,
"eval_steps_per_second": 2.266,
"step": 22000
},
{
"epoch": 0.16,
"learning_rate": 1.8404287636158488e-05,
"loss": 2.9593,
"step": 22100
},
{
"epoch": 0.16,
"learning_rate": 1.8397054781640125e-05,
"loss": 2.9548,
"step": 22200
},
{
"epoch": 0.16,
"learning_rate": 1.838982192712176e-05,
"loss": 2.9656,
"step": 22300
},
{
"epoch": 0.16,
"learning_rate": 1.8382589072603398e-05,
"loss": 2.957,
"step": 22400
},
{
"epoch": 0.16,
"learning_rate": 1.837535621808503e-05,
"loss": 2.9557,
"step": 22500
},
{
"epoch": 0.16,
"learning_rate": 1.8368123363566667e-05,
"loss": 2.9577,
"step": 22600
},
{
"epoch": 0.16,
"learning_rate": 1.8360962837593485e-05,
"loss": 2.9519,
"step": 22700
},
{
"epoch": 0.16,
"learning_rate": 1.8353729983075122e-05,
"loss": 2.955,
"step": 22800
},
{
"epoch": 0.17,
"learning_rate": 1.834649712855676e-05,
"loss": 2.9548,
"step": 22900
},
{
"epoch": 0.17,
"learning_rate": 1.8339336602583576e-05,
"loss": 2.9513,
"step": 23000
},
{
"epoch": 0.17,
"eval_accuracy": 0.43764657490329156,
"eval_loss": 2.9663443565368652,
"eval_runtime": 28.3956,
"eval_samples_per_second": 228.31,
"eval_steps_per_second": 2.395,
"step": 23000
},
{
"epoch": 0.17,
"learning_rate": 1.8332103748065213e-05,
"loss": 2.9514,
"step": 23100
},
{
"epoch": 0.17,
"learning_rate": 1.832487089354685e-05,
"loss": 2.9546,
"step": 23200
},
{
"epoch": 0.17,
"learning_rate": 1.8317638039028486e-05,
"loss": 2.9449,
"step": 23300
},
{
"epoch": 0.17,
"learning_rate": 1.831040518451012e-05,
"loss": 2.9515,
"step": 23400
},
{
"epoch": 0.17,
"learning_rate": 1.8303172329991756e-05,
"loss": 2.946,
"step": 23500
},
{
"epoch": 0.17,
"learning_rate": 1.8295939475473392e-05,
"loss": 2.9443,
"step": 23600
},
{
"epoch": 0.17,
"learning_rate": 1.828870662095503e-05,
"loss": 2.9437,
"step": 23700
},
{
"epoch": 0.17,
"learning_rate": 1.8281473766436665e-05,
"loss": 2.9395,
"step": 23800
},
{
"epoch": 0.17,
"learning_rate": 1.8274240911918298e-05,
"loss": 2.9422,
"step": 23900
},
{
"epoch": 0.17,
"learning_rate": 1.8267008057399935e-05,
"loss": 2.9475,
"step": 24000
},
{
"epoch": 0.17,
"eval_accuracy": 0.4384904108180369,
"eval_loss": 2.959416389465332,
"eval_runtime": 30.8733,
"eval_samples_per_second": 209.988,
"eval_steps_per_second": 2.203,
"step": 24000
},
{
"epoch": 0.17,
"learning_rate": 1.825977520288157e-05,
"loss": 2.9432,
"step": 24100
},
{
"epoch": 0.18,
"learning_rate": 1.8252542348363208e-05,
"loss": 2.9413,
"step": 24200
},
{
"epoch": 0.18,
"learning_rate": 1.8245309493844844e-05,
"loss": 2.9378,
"step": 24300
},
{
"epoch": 0.18,
"learning_rate": 1.8238076639326477e-05,
"loss": 2.9429,
"step": 24400
},
{
"epoch": 0.18,
"learning_rate": 1.8230843784808114e-05,
"loss": 2.9398,
"step": 24500
},
{
"epoch": 0.18,
"learning_rate": 1.822361093028975e-05,
"loss": 2.9462,
"step": 24600
},
{
"epoch": 0.18,
"learning_rate": 1.8216378075771387e-05,
"loss": 2.9334,
"step": 24700
},
{
"epoch": 0.18,
"learning_rate": 1.820914522125302e-05,
"loss": 2.944,
"step": 24800
},
{
"epoch": 0.18,
"learning_rate": 1.8201912366734657e-05,
"loss": 2.9331,
"step": 24900
},
{
"epoch": 0.18,
"learning_rate": 1.8194679512216293e-05,
"loss": 2.9406,
"step": 25000
},
{
"epoch": 0.18,
"eval_accuracy": 0.43914672764061663,
"eval_loss": 2.953129291534424,
"eval_runtime": 29.0217,
"eval_samples_per_second": 223.385,
"eval_steps_per_second": 2.343,
"step": 25000
},
{
"epoch": 0.18,
"learning_rate": 1.818744665769793e-05,
"loss": 2.9408,
"step": 25100
},
{
"epoch": 0.18,
"learning_rate": 1.8180213803179566e-05,
"loss": 2.9313,
"step": 25200
},
{
"epoch": 0.18,
"learning_rate": 1.81729809486612e-05,
"loss": 2.9436,
"step": 25300
},
{
"epoch": 0.18,
"learning_rate": 1.8165748094142836e-05,
"loss": 2.9374,
"step": 25400
},
{
"epoch": 0.18,
"learning_rate": 1.8158515239624472e-05,
"loss": 2.9326,
"step": 25500
},
{
"epoch": 0.19,
"learning_rate": 1.815128238510611e-05,
"loss": 2.942,
"step": 25600
},
{
"epoch": 0.19,
"learning_rate": 1.8144049530587745e-05,
"loss": 2.9281,
"step": 25700
},
{
"epoch": 0.19,
"learning_rate": 1.813681667606938e-05,
"loss": 2.9341,
"step": 25800
},
{
"epoch": 0.19,
"learning_rate": 1.8129583821551015e-05,
"loss": 2.9369,
"step": 25900
},
{
"epoch": 0.19,
"learning_rate": 1.812235096703265e-05,
"loss": 2.9387,
"step": 26000
},
{
"epoch": 0.19,
"eval_accuracy": 0.4397600965420875,
"eval_loss": 2.9472999572753906,
"eval_runtime": 28.7579,
"eval_samples_per_second": 225.434,
"eval_steps_per_second": 2.365,
"step": 26000
},
{
"epoch": 0.19,
"learning_rate": 1.8115118112514288e-05,
"loss": 2.9313,
"step": 26100
},
{
"epoch": 0.19,
"learning_rate": 1.8107957586541106e-05,
"loss": 2.9328,
"step": 26200
},
{
"epoch": 0.19,
"learning_rate": 1.810072473202274e-05,
"loss": 2.9346,
"step": 26300
},
{
"epoch": 0.19,
"learning_rate": 1.8093491877504376e-05,
"loss": 2.9297,
"step": 26400
},
{
"epoch": 0.19,
"learning_rate": 1.8086259022986012e-05,
"loss": 2.9283,
"step": 26500
},
{
"epoch": 0.19,
"learning_rate": 1.807902616846765e-05,
"loss": 2.9278,
"step": 26600
},
{
"epoch": 0.19,
"learning_rate": 1.8071793313949285e-05,
"loss": 2.9337,
"step": 26700
},
{
"epoch": 0.19,
"learning_rate": 1.806456045943092e-05,
"loss": 2.9267,
"step": 26800
},
{
"epoch": 0.19,
"learning_rate": 1.8057327604912555e-05,
"loss": 2.9238,
"step": 26900
},
{
"epoch": 0.2,
"learning_rate": 1.8050167078939376e-05,
"loss": 2.9353,
"step": 27000
},
{
"epoch": 0.2,
"eval_accuracy": 0.4403087411117463,
"eval_loss": 2.941570520401001,
"eval_runtime": 29.8384,
"eval_samples_per_second": 217.271,
"eval_steps_per_second": 2.279,
"step": 27000
},
{
"epoch": 0.2,
"learning_rate": 1.8042934224421013e-05,
"loss": 2.9234,
"step": 27100
},
{
"epoch": 0.2,
"learning_rate": 1.8035701369902646e-05,
"loss": 2.9276,
"step": 27200
},
{
"epoch": 0.2,
"learning_rate": 1.8028468515384282e-05,
"loss": 2.9249,
"step": 27300
},
{
"epoch": 0.2,
"learning_rate": 1.802123566086592e-05,
"loss": 2.9259,
"step": 27400
},
{
"epoch": 0.2,
"learning_rate": 1.8014002806347555e-05,
"loss": 2.9235,
"step": 27500
},
{
"epoch": 0.2,
"learning_rate": 1.8006769951829192e-05,
"loss": 2.9182,
"step": 27600
},
{
"epoch": 0.2,
"learning_rate": 1.7999537097310825e-05,
"loss": 2.9273,
"step": 27700
},
{
"epoch": 0.2,
"learning_rate": 1.799230424279246e-05,
"loss": 2.925,
"step": 27800
},
{
"epoch": 0.2,
"learning_rate": 1.7985071388274098e-05,
"loss": 2.9196,
"step": 27900
},
{
"epoch": 0.2,
"learning_rate": 1.7977838533755735e-05,
"loss": 2.9208,
"step": 28000
},
{
"epoch": 0.2,
"eval_accuracy": 0.44107756938962533,
"eval_loss": 2.936343193054199,
"eval_runtime": 29.069,
"eval_samples_per_second": 223.021,
"eval_steps_per_second": 2.339,
"step": 28000
},
{
"epoch": 0.2,
"learning_rate": 1.797060567923737e-05,
"loss": 2.907,
"step": 28100
},
{
"epoch": 0.2,
"learning_rate": 1.7963372824719004e-05,
"loss": 2.9215,
"step": 28200
},
{
"epoch": 0.2,
"learning_rate": 1.795613997020064e-05,
"loss": 2.9223,
"step": 28300
},
{
"epoch": 0.21,
"learning_rate": 1.7948907115682277e-05,
"loss": 2.9122,
"step": 28400
},
{
"epoch": 0.21,
"learning_rate": 1.7941674261163914e-05,
"loss": 2.9166,
"step": 28500
},
{
"epoch": 0.21,
"learning_rate": 1.7934441406645547e-05,
"loss": 2.9289,
"step": 28600
},
{
"epoch": 0.21,
"learning_rate": 1.7927208552127183e-05,
"loss": 2.914,
"step": 28700
},
{
"epoch": 0.21,
"learning_rate": 1.791997569760882e-05,
"loss": 2.9175,
"step": 28800
},
{
"epoch": 0.21,
"learning_rate": 1.7912742843090456e-05,
"loss": 2.9142,
"step": 28900
},
{
"epoch": 0.21,
"learning_rate": 1.7905509988572093e-05,
"loss": 2.9142,
"step": 29000
},
{
"epoch": 0.21,
"eval_accuracy": 0.4415330593134987,
"eval_loss": 2.9310333728790283,
"eval_runtime": 30.502,
"eval_samples_per_second": 212.543,
"eval_steps_per_second": 2.229,
"step": 29000
},
{
"epoch": 0.21,
"learning_rate": 1.789834946259891e-05,
"loss": 2.9196,
"step": 29100
},
{
"epoch": 0.21,
"learning_rate": 1.7891116608080548e-05,
"loss": 2.9166,
"step": 29200
},
{
"epoch": 0.21,
"learning_rate": 1.788388375356218e-05,
"loss": 2.917,
"step": 29300
},
{
"epoch": 0.21,
"learning_rate": 1.7876650899043817e-05,
"loss": 2.9178,
"step": 29400
},
{
"epoch": 0.21,
"learning_rate": 1.7869418044525454e-05,
"loss": 2.9148,
"step": 29500
},
{
"epoch": 0.21,
"learning_rate": 1.786218519000709e-05,
"loss": 2.9086,
"step": 29600
},
{
"epoch": 0.21,
"learning_rate": 1.7855024664033908e-05,
"loss": 2.9154,
"step": 29700
},
{
"epoch": 0.22,
"learning_rate": 1.7847791809515545e-05,
"loss": 2.9156,
"step": 29800
},
{
"epoch": 0.22,
"learning_rate": 1.784055895499718e-05,
"loss": 2.9178,
"step": 29900
},
{
"epoch": 0.22,
"learning_rate": 1.7833326100478814e-05,
"loss": 2.9167,
"step": 30000
},
{
"epoch": 0.22,
"eval_accuracy": 0.44185305157077487,
"eval_loss": 2.9265494346618652,
"eval_runtime": 28.0744,
"eval_samples_per_second": 230.922,
"eval_steps_per_second": 2.422,
"step": 30000
},
{
"epoch": 0.22,
"learning_rate": 1.782609324596045e-05,
"loss": 2.905,
"step": 30100
},
{
"epoch": 0.22,
"learning_rate": 1.7818860391442087e-05,
"loss": 2.9161,
"step": 30200
},
{
"epoch": 0.22,
"learning_rate": 1.7811627536923724e-05,
"loss": 2.9094,
"step": 30300
},
{
"epoch": 0.22,
"learning_rate": 1.780439468240536e-05,
"loss": 2.9145,
"step": 30400
},
{
"epoch": 0.22,
"learning_rate": 1.7797161827886994e-05,
"loss": 2.9115,
"step": 30500
},
{
"epoch": 0.22,
"learning_rate": 1.778992897336863e-05,
"loss": 2.9093,
"step": 30600
},
{
"epoch": 0.22,
"learning_rate": 1.7782696118850267e-05,
"loss": 2.9111,
"step": 30700
},
{
"epoch": 0.22,
"learning_rate": 1.7775463264331903e-05,
"loss": 2.9052,
"step": 30800
},
{
"epoch": 0.22,
"learning_rate": 1.776823040981354e-05,
"loss": 2.9072,
"step": 30900
},
{
"epoch": 0.22,
"learning_rate": 1.7760997555295173e-05,
"loss": 2.9069,
"step": 31000
},
{
"epoch": 0.22,
"eval_accuracy": 0.4425190467981115,
"eval_loss": 2.921447992324829,
"eval_runtime": 27.8624,
"eval_samples_per_second": 232.679,
"eval_steps_per_second": 2.441,
"step": 31000
},
{
"epoch": 0.22,
"learning_rate": 1.7753837029321994e-05,
"loss": 2.9063,
"step": 31100
},
{
"epoch": 0.23,
"learning_rate": 1.7746676503348816e-05,
"loss": 2.9101,
"step": 31200
},
{
"epoch": 0.23,
"learning_rate": 1.773944364883045e-05,
"loss": 2.8999,
"step": 31300
},
{
"epoch": 0.23,
"learning_rate": 1.7732210794312085e-05,
"loss": 2.9026,
"step": 31400
},
{
"epoch": 0.23,
"learning_rate": 1.772497793979372e-05,
"loss": 2.9066,
"step": 31500
},
{
"epoch": 0.23,
"learning_rate": 1.7717745085275355e-05,
"loss": 2.9032,
"step": 31600
},
{
"epoch": 0.23,
"learning_rate": 1.771051223075699e-05,
"loss": 2.9003,
"step": 31700
},
{
"epoch": 0.23,
"learning_rate": 1.7703279376238628e-05,
"loss": 2.9049,
"step": 31800
},
{
"epoch": 0.23,
"learning_rate": 1.769604652172026e-05,
"loss": 2.8992,
"step": 31900
},
{
"epoch": 0.23,
"learning_rate": 1.7688813667201898e-05,
"loss": 2.9067,
"step": 32000
},
{
"epoch": 0.23,
"eval_accuracy": 0.44299268373090406,
"eval_loss": 2.9168407917022705,
"eval_runtime": 29.0945,
"eval_samples_per_second": 222.825,
"eval_steps_per_second": 2.337,
"step": 32000
},
{
"epoch": 0.23,
"learning_rate": 1.7681580812683534e-05,
"loss": 2.9007,
"step": 32100
},
{
"epoch": 0.23,
"learning_rate": 1.767434795816517e-05,
"loss": 2.8995,
"step": 32200
},
{
"epoch": 0.23,
"learning_rate": 1.7667115103646807e-05,
"loss": 2.9044,
"step": 32300
},
{
"epoch": 0.23,
"learning_rate": 1.765988224912844e-05,
"loss": 2.8931,
"step": 32400
},
{
"epoch": 0.24,
"learning_rate": 1.7652649394610077e-05,
"loss": 2.902,
"step": 32500
},
{
"epoch": 0.24,
"learning_rate": 1.7645416540091713e-05,
"loss": 2.9019,
"step": 32600
},
{
"epoch": 0.24,
"learning_rate": 1.763818368557335e-05,
"loss": 2.9064,
"step": 32700
},
{
"epoch": 0.24,
"learning_rate": 1.7630950831054986e-05,
"loss": 2.9004,
"step": 32800
},
{
"epoch": 0.24,
"learning_rate": 1.7623790305081804e-05,
"loss": 2.8917,
"step": 32900
},
{
"epoch": 0.24,
"learning_rate": 1.761655745056344e-05,
"loss": 2.8978,
"step": 33000
},
{
"epoch": 0.24,
"eval_accuracy": 0.4434288168452635,
"eval_loss": 2.91280198097229,
"eval_runtime": 27.8037,
"eval_samples_per_second": 233.17,
"eval_steps_per_second": 2.446,
"step": 33000
},
{
"epoch": 0.24,
"learning_rate": 1.7609324596045077e-05,
"loss": 2.9001,
"step": 33100
},
{
"epoch": 0.24,
"learning_rate": 1.7602091741526714e-05,
"loss": 2.898,
"step": 33200
},
{
"epoch": 0.24,
"learning_rate": 1.7594858887008347e-05,
"loss": 2.8938,
"step": 33300
},
{
"epoch": 0.24,
"learning_rate": 1.7587626032489984e-05,
"loss": 2.8943,
"step": 33400
},
{
"epoch": 0.24,
"learning_rate": 1.758039317797162e-05,
"loss": 2.8966,
"step": 33500
},
{
"epoch": 0.24,
"learning_rate": 1.7573160323453257e-05,
"loss": 2.9033,
"step": 33600
},
{
"epoch": 0.24,
"learning_rate": 1.756592746893489e-05,
"loss": 2.8971,
"step": 33700
},
{
"epoch": 0.24,
"learning_rate": 1.7558694614416526e-05,
"loss": 2.8919,
"step": 33800
},
{
"epoch": 0.25,
"learning_rate": 1.7551461759898163e-05,
"loss": 2.8973,
"step": 33900
},
{
"epoch": 0.25,
"learning_rate": 1.75442289053798e-05,
"loss": 2.8982,
"step": 34000
},
{
"epoch": 0.25,
"eval_accuracy": 0.44378510312037817,
"eval_loss": 2.9087538719177246,
"eval_runtime": 30.238,
"eval_samples_per_second": 214.399,
"eval_steps_per_second": 2.249,
"step": 34000
},
{
"epoch": 0.25,
"learning_rate": 1.7536996050861436e-05,
"loss": 2.8893,
"step": 34100
},
{
"epoch": 0.25,
"learning_rate": 1.752976319634307e-05,
"loss": 2.8867,
"step": 34200
},
{
"epoch": 0.25,
"learning_rate": 1.7522530341824705e-05,
"loss": 2.8991,
"step": 34300
},
{
"epoch": 0.25,
"learning_rate": 1.7515297487306342e-05,
"loss": 2.8888,
"step": 34400
},
{
"epoch": 0.25,
"learning_rate": 1.750806463278798e-05,
"loss": 2.895,
"step": 34500
},
{
"epoch": 0.25,
"learning_rate": 1.7500831778269615e-05,
"loss": 2.8962,
"step": 34600
},
{
"epoch": 0.25,
"learning_rate": 1.7493598923751248e-05,
"loss": 2.8883,
"step": 34700
},
{
"epoch": 0.25,
"learning_rate": 1.7486366069232885e-05,
"loss": 2.8861,
"step": 34800
},
{
"epoch": 0.25,
"learning_rate": 1.747913321471452e-05,
"loss": 2.8939,
"step": 34900
},
{
"epoch": 0.25,
"learning_rate": 1.7471900360196158e-05,
"loss": 2.8856,
"step": 35000
},
{
"epoch": 0.25,
"eval_accuracy": 0.44437488091025396,
"eval_loss": 2.904993772506714,
"eval_runtime": 29.3345,
"eval_samples_per_second": 221.002,
"eval_steps_per_second": 2.318,
"step": 35000
},
{
"epoch": 0.25,
"learning_rate": 1.7464667505677794e-05,
"loss": 2.8966,
"step": 35100
},
{
"epoch": 0.25,
"learning_rate": 1.7457434651159427e-05,
"loss": 2.8854,
"step": 35200
},
{
"epoch": 0.26,
"learning_rate": 1.7450201796641064e-05,
"loss": 2.8906,
"step": 35300
},
{
"epoch": 0.26,
"learning_rate": 1.7443041270667885e-05,
"loss": 2.8887,
"step": 35400
},
{
"epoch": 0.26,
"learning_rate": 1.743580841614952e-05,
"loss": 2.8843,
"step": 35500
},
{
"epoch": 0.26,
"learning_rate": 1.7428575561631155e-05,
"loss": 2.8908,
"step": 35600
},
{
"epoch": 0.26,
"learning_rate": 1.742134270711279e-05,
"loss": 2.8883,
"step": 35700
},
{
"epoch": 0.26,
"learning_rate": 1.7414109852594428e-05,
"loss": 2.8848,
"step": 35800
},
{
"epoch": 0.26,
"learning_rate": 1.740687699807606e-05,
"loss": 2.876,
"step": 35900
},
{
"epoch": 0.26,
"learning_rate": 1.7399644143557697e-05,
"loss": 2.8981,
"step": 36000
},
{
"epoch": 0.26,
"eval_accuracy": 0.44447287475841796,
"eval_loss": 2.901261806488037,
"eval_runtime": 28.6761,
"eval_samples_per_second": 226.077,
"eval_steps_per_second": 2.371,
"step": 36000
},
{
"epoch": 0.26,
"learning_rate": 1.7392411289039334e-05,
"loss": 2.8881,
"step": 36100
},
{
"epoch": 0.26,
"learning_rate": 1.7385178434520967e-05,
"loss": 2.8854,
"step": 36200
},
{
"epoch": 0.26,
"learning_rate": 1.7377945580002604e-05,
"loss": 2.8843,
"step": 36300
},
{
"epoch": 0.26,
"learning_rate": 1.7370785054029425e-05,
"loss": 2.8885,
"step": 36400
},
{
"epoch": 0.26,
"learning_rate": 1.736355219951106e-05,
"loss": 2.8869,
"step": 36500
},
{
"epoch": 0.26,
"learning_rate": 1.7356319344992695e-05,
"loss": 2.8813,
"step": 36600
},
{
"epoch": 0.27,
"learning_rate": 1.734908649047433e-05,
"loss": 2.884,
"step": 36700
},
{
"epoch": 0.27,
"learning_rate": 1.7341853635955968e-05,
"loss": 2.8834,
"step": 36800
},
{
"epoch": 0.27,
"learning_rate": 1.7334620781437604e-05,
"loss": 2.8773,
"step": 36900
},
{
"epoch": 0.27,
"learning_rate": 1.732738792691924e-05,
"loss": 2.8813,
"step": 37000
},
{
"epoch": 0.27,
"eval_accuracy": 0.44499187921350863,
"eval_loss": 2.8976523876190186,
"eval_runtime": 27.9421,
"eval_samples_per_second": 232.015,
"eval_steps_per_second": 2.434,
"step": 37000
},
{
"epoch": 0.27,
"learning_rate": 1.7320155072400874e-05,
"loss": 2.8865,
"step": 37100
},
{
"epoch": 0.27,
"learning_rate": 1.731292221788251e-05,
"loss": 2.881,
"step": 37200
},
{
"epoch": 0.27,
"learning_rate": 1.7305689363364147e-05,
"loss": 2.8781,
"step": 37300
},
{
"epoch": 0.27,
"learning_rate": 1.7298456508845783e-05,
"loss": 2.884,
"step": 37400
},
{
"epoch": 0.27,
"learning_rate": 1.7291223654327417e-05,
"loss": 2.8802,
"step": 37500
},
{
"epoch": 0.27,
"learning_rate": 1.7283990799809053e-05,
"loss": 2.8802,
"step": 37600
},
{
"epoch": 0.27,
"learning_rate": 1.727675794529069e-05,
"loss": 2.8685,
"step": 37700
},
{
"epoch": 0.27,
"learning_rate": 1.7269525090772326e-05,
"loss": 2.8745,
"step": 37800
},
{
"epoch": 0.27,
"learning_rate": 1.7262292236253963e-05,
"loss": 2.8799,
"step": 37900
},
{
"epoch": 0.27,
"learning_rate": 1.7255059381735596e-05,
"loss": 2.8765,
"step": 38000
},
{
"epoch": 0.27,
"eval_accuracy": 0.44532699397821757,
"eval_loss": 2.8943746089935303,
"eval_runtime": 28.0195,
"eval_samples_per_second": 231.375,
"eval_steps_per_second": 2.427,
"step": 38000
},
{
"epoch": 0.28,
"learning_rate": 1.7247826527217232e-05,
"loss": 2.8834,
"step": 38100
},
{
"epoch": 0.28,
"learning_rate": 1.724059367269887e-05,
"loss": 2.8782,
"step": 38200
},
{
"epoch": 0.28,
"learning_rate": 1.7233360818180505e-05,
"loss": 2.8855,
"step": 38300
},
{
"epoch": 0.28,
"learning_rate": 1.7226127963662142e-05,
"loss": 2.8681,
"step": 38400
},
{
"epoch": 0.28,
"learning_rate": 1.7218895109143775e-05,
"loss": 2.8832,
"step": 38500
},
{
"epoch": 0.28,
"learning_rate": 1.721166225462541e-05,
"loss": 2.8764,
"step": 38600
},
{
"epoch": 0.28,
"learning_rate": 1.7204429400107048e-05,
"loss": 2.8771,
"step": 38700
},
{
"epoch": 0.28,
"learning_rate": 1.7197196545588684e-05,
"loss": 2.869,
"step": 38800
},
{
"epoch": 0.28,
"learning_rate": 1.7190036019615502e-05,
"loss": 2.8749,
"step": 38900
},
{
"epoch": 0.28,
"learning_rate": 1.718280316509714e-05,
"loss": 2.879,
"step": 39000
},
{
"epoch": 0.28,
"eval_accuracy": 0.4457794594006043,
"eval_loss": 2.891030788421631,
"eval_runtime": 33.3451,
"eval_samples_per_second": 194.421,
"eval_steps_per_second": 2.039,
"step": 39000
},
{
"epoch": 0.28,
"learning_rate": 1.7175570310578776e-05,
"loss": 2.8757,
"step": 39100
},
{
"epoch": 0.28,
"learning_rate": 1.7168409784605594e-05,
"loss": 2.8721,
"step": 39200
},
{
"epoch": 0.28,
"learning_rate": 1.716117693008723e-05,
"loss": 2.8751,
"step": 39300
},
{
"epoch": 0.28,
"learning_rate": 1.7153944075568863e-05,
"loss": 2.8813,
"step": 39400
},
{
"epoch": 0.29,
"learning_rate": 1.71467112210505e-05,
"loss": 2.8738,
"step": 39500
},
{
"epoch": 0.29,
"learning_rate": 1.7139478366532136e-05,
"loss": 2.8761,
"step": 39600
},
{
"epoch": 0.29,
"learning_rate": 1.7132245512013773e-05,
"loss": 2.877,
"step": 39700
},
{
"epoch": 0.29,
"learning_rate": 1.712501265749541e-05,
"loss": 2.8673,
"step": 39800
},
{
"epoch": 0.29,
"learning_rate": 1.7117779802977042e-05,
"loss": 2.8789,
"step": 39900
},
{
"epoch": 0.29,
"learning_rate": 1.711054694845868e-05,
"loss": 2.8738,
"step": 40000
},
{
"epoch": 0.29,
"eval_accuracy": 0.4462434179286399,
"eval_loss": 2.8878371715545654,
"eval_runtime": 30.9298,
"eval_samples_per_second": 209.604,
"eval_steps_per_second": 2.199,
"step": 40000
},
{
"epoch": 0.29,
"learning_rate": 1.7103314093940315e-05,
"loss": 2.8666,
"step": 40100
},
{
"epoch": 0.29,
"learning_rate": 1.7096081239421952e-05,
"loss": 2.8741,
"step": 40200
},
{
"epoch": 0.29,
"learning_rate": 1.708884838490359e-05,
"loss": 2.8752,
"step": 40300
},
{
"epoch": 0.29,
"learning_rate": 1.708161553038522e-05,
"loss": 2.8725,
"step": 40400
},
{
"epoch": 0.29,
"learning_rate": 1.7074382675866858e-05,
"loss": 2.8711,
"step": 40500
},
{
"epoch": 0.29,
"learning_rate": 1.7067149821348495e-05,
"loss": 2.8709,
"step": 40600
},
{
"epoch": 0.29,
"learning_rate": 1.705991696683013e-05,
"loss": 2.8677,
"step": 40700
},
{
"epoch": 0.3,
"learning_rate": 1.7052684112311764e-05,
"loss": 2.8678,
"step": 40800
},
{
"epoch": 0.3,
"learning_rate": 1.70454512577934e-05,
"loss": 2.8671,
"step": 40900
},
{
"epoch": 0.3,
"learning_rate": 1.7038218403275037e-05,
"loss": 2.8671,
"step": 41000
},
{
"epoch": 0.3,
"eval_accuracy": 0.4465404239746184,
"eval_loss": 2.8851165771484375,
"eval_runtime": 28.3107,
"eval_samples_per_second": 228.994,
"eval_steps_per_second": 2.402,
"step": 41000
},
{
"epoch": 0.3,
"learning_rate": 1.7030985548756674e-05,
"loss": 2.8682,
"step": 41100
},
{
"epoch": 0.3,
"learning_rate": 1.7023825022783492e-05,
"loss": 2.8793,
"step": 41200
},
{
"epoch": 0.3,
"learning_rate": 1.701659216826513e-05,
"loss": 2.864,
"step": 41300
},
{
"epoch": 0.3,
"learning_rate": 1.7009359313746765e-05,
"loss": 2.8711,
"step": 41400
},
{
"epoch": 0.3,
"learning_rate": 1.70021264592284e-05,
"loss": 2.8738,
"step": 41500
},
{
"epoch": 0.3,
"learning_rate": 1.6994893604710038e-05,
"loss": 2.862,
"step": 41600
},
{
"epoch": 0.3,
"learning_rate": 1.698766075019167e-05,
"loss": 2.8652,
"step": 41700
},
{
"epoch": 0.3,
"learning_rate": 1.6980427895673307e-05,
"loss": 2.8758,
"step": 41800
},
{
"epoch": 0.3,
"learning_rate": 1.6973195041154944e-05,
"loss": 2.8695,
"step": 41900
},
{
"epoch": 0.3,
"learning_rate": 1.696596218663658e-05,
"loss": 2.866,
"step": 42000
},
{
"epoch": 0.3,
"eval_accuracy": 0.44681746831078567,
"eval_loss": 2.882030963897705,
"eval_runtime": 28.8345,
"eval_samples_per_second": 224.835,
"eval_steps_per_second": 2.358,
"step": 42000
},
{
"epoch": 0.3,
"learning_rate": 1.6958729332118217e-05,
"loss": 2.8696,
"step": 42100
},
{
"epoch": 0.31,
"learning_rate": 1.695149647759985e-05,
"loss": 2.8766,
"step": 42200
},
{
"epoch": 0.31,
"learning_rate": 1.6944263623081487e-05,
"loss": 2.8706,
"step": 42300
},
{
"epoch": 0.31,
"learning_rate": 1.6937030768563123e-05,
"loss": 2.8673,
"step": 42400
},
{
"epoch": 0.31,
"learning_rate": 1.692979791404476e-05,
"loss": 2.864,
"step": 42500
},
{
"epoch": 0.31,
"learning_rate": 1.6922565059526396e-05,
"loss": 2.8708,
"step": 42600
},
{
"epoch": 0.31,
"learning_rate": 1.691533220500803e-05,
"loss": 2.8642,
"step": 42700
},
{
"epoch": 0.31,
"learning_rate": 1.6908099350489666e-05,
"loss": 2.8601,
"step": 42800
},
{
"epoch": 0.31,
"learning_rate": 1.6900938824516484e-05,
"loss": 2.8662,
"step": 42900
},
{
"epoch": 0.31,
"learning_rate": 1.689370596999812e-05,
"loss": 2.8561,
"step": 43000
},
{
"epoch": 0.31,
"eval_accuracy": 0.4472874758417944,
"eval_loss": 2.879122018814087,
"eval_runtime": 29.1494,
"eval_samples_per_second": 222.406,
"eval_steps_per_second": 2.333,
"step": 43000
},
{
"epoch": 0.31,
"learning_rate": 1.6886473115479757e-05,
"loss": 2.8746,
"step": 43100
},
{
"epoch": 0.31,
"learning_rate": 1.687924026096139e-05,
"loss": 2.8657,
"step": 43200
},
{
"epoch": 0.31,
"learning_rate": 1.6872007406443027e-05,
"loss": 2.8655,
"step": 43300
},
{
"epoch": 0.31,
"learning_rate": 1.6864774551924663e-05,
"loss": 2.8716,
"step": 43400
},
{
"epoch": 0.31,
"learning_rate": 1.68575416974063e-05,
"loss": 2.8701,
"step": 43500
},
{
"epoch": 0.32,
"learning_rate": 1.6850308842887936e-05,
"loss": 2.8669,
"step": 43600
},
{
"epoch": 0.32,
"learning_rate": 1.684307598836957e-05,
"loss": 2.8615,
"step": 43700
},
{
"epoch": 0.32,
"learning_rate": 1.6835843133851206e-05,
"loss": 2.8664,
"step": 43800
},
{
"epoch": 0.32,
"learning_rate": 1.6828610279332842e-05,
"loss": 2.8594,
"step": 43900
},
{
"epoch": 0.32,
"learning_rate": 1.6821449753359664e-05,
"loss": 2.8601,
"step": 44000
},
{
"epoch": 0.32,
"eval_accuracy": 0.4477096962493157,
"eval_loss": 2.8765430450439453,
"eval_runtime": 29.4041,
"eval_samples_per_second": 220.48,
"eval_steps_per_second": 2.313,
"step": 44000
},
{
"epoch": 0.32,
"learning_rate": 1.6814216898841297e-05,
"loss": 2.8605,
"step": 44100
},
{
"epoch": 0.32,
"learning_rate": 1.6806984044322933e-05,
"loss": 2.8665,
"step": 44200
},
{
"epoch": 0.32,
"learning_rate": 1.679975118980457e-05,
"loss": 2.8647,
"step": 44300
},
{
"epoch": 0.32,
"learning_rate": 1.6792518335286206e-05,
"loss": 2.8585,
"step": 44400
},
{
"epoch": 0.32,
"learning_rate": 1.678528548076784e-05,
"loss": 2.8644,
"step": 44500
},
{
"epoch": 0.32,
"learning_rate": 1.6778052626249476e-05,
"loss": 2.8634,
"step": 44600
},
{
"epoch": 0.32,
"learning_rate": 1.6770819771731113e-05,
"loss": 2.8644,
"step": 44700
},
{
"epoch": 0.32,
"learning_rate": 1.676358691721275e-05,
"loss": 2.8555,
"step": 44800
},
{
"epoch": 0.32,
"learning_rate": 1.6756354062694386e-05,
"loss": 2.8591,
"step": 44900
},
{
"epoch": 0.33,
"learning_rate": 1.6749193536721204e-05,
"loss": 2.8518,
"step": 45000
},
{
"epoch": 0.33,
"eval_accuracy": 0.44793955836229293,
"eval_loss": 2.8740601539611816,
"eval_runtime": 29.2958,
"eval_samples_per_second": 221.294,
"eval_steps_per_second": 2.321,
"step": 45000
},
{
"epoch": 0.33,
"learning_rate": 1.674196068220284e-05,
"loss": 2.8613,
"step": 45100
},
{
"epoch": 0.33,
"learning_rate": 1.6734727827684477e-05,
"loss": 2.8672,
"step": 45200
},
{
"epoch": 0.33,
"learning_rate": 1.6727494973166113e-05,
"loss": 2.8603,
"step": 45300
},
{
"epoch": 0.33,
"learning_rate": 1.6720262118647746e-05,
"loss": 2.8581,
"step": 45400
},
{
"epoch": 0.33,
"learning_rate": 1.6713029264129383e-05,
"loss": 2.8484,
"step": 45500
},
{
"epoch": 0.33,
"learning_rate": 1.670579640961102e-05,
"loss": 2.8483,
"step": 45600
},
{
"epoch": 0.33,
"learning_rate": 1.6698563555092656e-05,
"loss": 2.8518,
"step": 45700
},
{
"epoch": 0.33,
"learning_rate": 1.6691330700574292e-05,
"loss": 2.8528,
"step": 45800
},
{
"epoch": 0.33,
"learning_rate": 1.6684097846055925e-05,
"loss": 2.8618,
"step": 45900
},
{
"epoch": 0.33,
"learning_rate": 1.6676864991537562e-05,
"loss": 2.8577,
"step": 46000
},
{
"epoch": 0.33,
"eval_accuracy": 0.4482964495377049,
"eval_loss": 2.8713486194610596,
"eval_runtime": 27.9428,
"eval_samples_per_second": 232.01,
"eval_steps_per_second": 2.434,
"step": 46000
},
{
"epoch": 0.33,
"learning_rate": 1.66696321370192e-05,
"loss": 2.8578,
"step": 46100
},
{
"epoch": 0.33,
"learning_rate": 1.6662399282500835e-05,
"loss": 2.8581,
"step": 46200
},
{
"epoch": 0.33,
"learning_rate": 1.665516642798247e-05,
"loss": 2.8532,
"step": 46300
},
{
"epoch": 0.34,
"learning_rate": 1.6647933573464105e-05,
"loss": 2.8543,
"step": 46400
},
{
"epoch": 0.34,
"learning_rate": 1.664070071894574e-05,
"loss": 2.865,
"step": 46500
},
{
"epoch": 0.34,
"learning_rate": 1.6633467864427378e-05,
"loss": 2.8621,
"step": 46600
},
{
"epoch": 0.34,
"learning_rate": 1.6626235009909014e-05,
"loss": 2.8531,
"step": 46700
},
{
"epoch": 0.34,
"learning_rate": 1.6619002155390647e-05,
"loss": 2.8576,
"step": 46800
},
{
"epoch": 0.34,
"learning_rate": 1.6611769300872284e-05,
"loss": 2.8607,
"step": 46900
},
{
"epoch": 0.34,
"learning_rate": 1.660453644635392e-05,
"loss": 2.8588,
"step": 47000
},
{
"epoch": 0.34,
"eval_accuracy": 0.44838053067903083,
"eval_loss": 2.869096040725708,
"eval_runtime": 27.7995,
"eval_samples_per_second": 233.206,
"eval_steps_per_second": 2.446,
"step": 47000
},
{
"epoch": 0.34,
"learning_rate": 1.6597448248925923e-05,
"loss": 2.8548,
"step": 47100
},
{
"epoch": 0.34,
"learning_rate": 1.659021539440756e-05,
"loss": 2.8561,
"step": 47200
},
{
"epoch": 0.34,
"learning_rate": 1.6582982539889193e-05,
"loss": 2.8538,
"step": 47300
},
{
"epoch": 0.34,
"learning_rate": 1.657574968537083e-05,
"loss": 2.851,
"step": 47400
},
{
"epoch": 0.34,
"learning_rate": 1.6568516830852466e-05,
"loss": 2.8456,
"step": 47500
},
{
"epoch": 0.34,
"learning_rate": 1.6561283976334102e-05,
"loss": 2.8511,
"step": 47600
},
{
"epoch": 0.35,
"learning_rate": 1.655405112181574e-05,
"loss": 2.8498,
"step": 47700
},
{
"epoch": 0.35,
"learning_rate": 1.6546818267297372e-05,
"loss": 2.8539,
"step": 47800
},
{
"epoch": 0.35,
"learning_rate": 1.653958541277901e-05,
"loss": 2.8587,
"step": 47900
},
{
"epoch": 0.35,
"learning_rate": 1.6532352558260645e-05,
"loss": 2.8584,
"step": 48000
},
{
"epoch": 0.35,
"eval_accuracy": 0.44867330242292813,
"eval_loss": 2.8666162490844727,
"eval_runtime": 29.6889,
"eval_samples_per_second": 218.364,
"eval_steps_per_second": 2.29,
"step": 48000
},
{
"epoch": 0.35,
"learning_rate": 1.652511970374228e-05,
"loss": 2.8576,
"step": 48100
},
{
"epoch": 0.35,
"learning_rate": 1.6517886849223915e-05,
"loss": 2.8559,
"step": 48200
},
{
"epoch": 0.35,
"learning_rate": 1.651065399470555e-05,
"loss": 2.8476,
"step": 48300
},
{
"epoch": 0.35,
"learning_rate": 1.6503421140187188e-05,
"loss": 2.8528,
"step": 48400
},
{
"epoch": 0.35,
"learning_rate": 1.6496188285668824e-05,
"loss": 2.8514,
"step": 48500
},
{
"epoch": 0.35,
"learning_rate": 1.648895543115046e-05,
"loss": 2.8468,
"step": 48600
},
{
"epoch": 0.35,
"learning_rate": 1.6481722576632094e-05,
"loss": 2.8479,
"step": 48700
},
{
"epoch": 0.35,
"learning_rate": 1.647448972211373e-05,
"loss": 2.858,
"step": 48800
},
{
"epoch": 0.35,
"learning_rate": 1.6467256867595367e-05,
"loss": 2.8567,
"step": 48900
},
{
"epoch": 0.35,
"learning_rate": 1.6460024013077003e-05,
"loss": 2.8527,
"step": 49000
},
{
"epoch": 0.35,
"eval_accuracy": 0.448782789376741,
"eval_loss": 2.864643096923828,
"eval_runtime": 27.8655,
"eval_samples_per_second": 232.653,
"eval_steps_per_second": 2.44,
"step": 49000
},
{
"epoch": 0.36,
"learning_rate": 1.645279115855864e-05,
"loss": 2.8534,
"step": 49100
},
{
"epoch": 0.36,
"learning_rate": 1.6445558304040273e-05,
"loss": 2.8563,
"step": 49200
},
{
"epoch": 0.36,
"learning_rate": 1.643832544952191e-05,
"loss": 2.8536,
"step": 49300
},
{
"epoch": 0.36,
"learning_rate": 1.6431092595003546e-05,
"loss": 2.8378,
"step": 49400
},
{
"epoch": 0.36,
"learning_rate": 1.6423932069030364e-05,
"loss": 2.8455,
"step": 49500
},
{
"epoch": 0.36,
"learning_rate": 1.6416699214512e-05,
"loss": 2.8519,
"step": 49600
},
{
"epoch": 0.36,
"learning_rate": 1.6409466359993634e-05,
"loss": 2.8505,
"step": 49700
},
{
"epoch": 0.36,
"learning_rate": 1.640223350547527e-05,
"loss": 2.8437,
"step": 49800
},
{
"epoch": 0.36,
"learning_rate": 1.6395000650956907e-05,
"loss": 2.8428,
"step": 49900
},
{
"epoch": 0.36,
"learning_rate": 1.6387767796438543e-05,
"loss": 2.8425,
"step": 50000
},
{
"epoch": 0.36,
"eval_accuracy": 0.44904108180369173,
"eval_loss": 2.8624136447906494,
"eval_runtime": 32.7107,
"eval_samples_per_second": 198.192,
"eval_steps_per_second": 2.079,
"step": 50000
},
{
"epoch": 0.36,
"learning_rate": 1.638053494192018e-05,
"loss": 2.8406,
"step": 50100
},
{
"epoch": 0.36,
"learning_rate": 1.6373302087401813e-05,
"loss": 2.8552,
"step": 50200
},
{
"epoch": 0.36,
"learning_rate": 1.636606923288345e-05,
"loss": 2.8473,
"step": 50300
},
{
"epoch": 0.36,
"learning_rate": 1.6358836378365086e-05,
"loss": 2.8458,
"step": 50400
},
{
"epoch": 0.37,
"learning_rate": 1.6351603523846723e-05,
"loss": 2.8471,
"step": 50500
},
{
"epoch": 0.37,
"learning_rate": 1.634437066932836e-05,
"loss": 2.8518,
"step": 50600
},
{
"epoch": 0.37,
"learning_rate": 1.6337137814809992e-05,
"loss": 2.848,
"step": 50700
},
{
"epoch": 0.37,
"learning_rate": 1.632990496029163e-05,
"loss": 2.8491,
"step": 50800
},
{
"epoch": 0.37,
"learning_rate": 1.6322672105773265e-05,
"loss": 2.8433,
"step": 50900
},
{
"epoch": 0.37,
"learning_rate": 1.6315439251254902e-05,
"loss": 2.8457,
"step": 51000
},
{
"epoch": 0.37,
"eval_accuracy": 0.4494481797037803,
"eval_loss": 2.8601181507110596,
"eval_runtime": 31.3807,
"eval_samples_per_second": 206.592,
"eval_steps_per_second": 2.167,
"step": 51000
},
{
"epoch": 0.37,
"learning_rate": 1.6308206396736538e-05,
"loss": 2.8436,
"step": 51100
},
{
"epoch": 0.37,
"learning_rate": 1.630097354221817e-05,
"loss": 2.8474,
"step": 51200
},
{
"epoch": 0.37,
"learning_rate": 1.6293740687699808e-05,
"loss": 2.8361,
"step": 51300
},
{
"epoch": 0.37,
"learning_rate": 1.6286507833181444e-05,
"loss": 2.842,
"step": 51400
},
{
"epoch": 0.37,
"learning_rate": 1.627927497866308e-05,
"loss": 2.8527,
"step": 51500
},
{
"epoch": 0.37,
"learning_rate": 1.62721144526899e-05,
"loss": 2.8376,
"step": 51600
},
{
"epoch": 0.37,
"learning_rate": 1.6264881598171535e-05,
"loss": 2.8461,
"step": 51700
},
{
"epoch": 0.37,
"learning_rate": 1.6257721072198357e-05,
"loss": 2.836,
"step": 51800
},
{
"epoch": 0.38,
"learning_rate": 1.625048821767999e-05,
"loss": 2.8444,
"step": 51900
},
{
"epoch": 0.38,
"learning_rate": 1.6243255363161627e-05,
"loss": 2.849,
"step": 52000
},
{
"epoch": 0.38,
"eval_accuracy": 0.4496465870012975,
"eval_loss": 2.8579957485198975,
"eval_runtime": 30.949,
"eval_samples_per_second": 209.474,
"eval_steps_per_second": 2.197,
"step": 52000
},
{
"epoch": 0.38,
"learning_rate": 1.6236022508643263e-05,
"loss": 2.8402,
"step": 52100
},
{
"epoch": 0.38,
"learning_rate": 1.62287896541249e-05,
"loss": 2.8411,
"step": 52200
},
{
"epoch": 0.38,
"learning_rate": 1.6221556799606536e-05,
"loss": 2.8423,
"step": 52300
},
{
"epoch": 0.38,
"learning_rate": 1.621432394508817e-05,
"loss": 2.838,
"step": 52400
},
{
"epoch": 0.38,
"learning_rate": 1.6207091090569806e-05,
"loss": 2.8415,
"step": 52500
},
{
"epoch": 0.38,
"learning_rate": 1.6199858236051442e-05,
"loss": 2.8418,
"step": 52600
},
{
"epoch": 0.38,
"learning_rate": 1.619262538153308e-05,
"loss": 2.8366,
"step": 52700
},
{
"epoch": 0.38,
"learning_rate": 1.6185392527014715e-05,
"loss": 2.8443,
"step": 52800
},
{
"epoch": 0.38,
"learning_rate": 1.617815967249635e-05,
"loss": 2.8395,
"step": 52900
},
{
"epoch": 0.38,
"learning_rate": 1.6170926817977985e-05,
"loss": 2.8431,
"step": 53000
},
{
"epoch": 0.38,
"eval_accuracy": 0.44991455783300516,
"eval_loss": 2.856027841567993,
"eval_runtime": 31.4773,
"eval_samples_per_second": 205.958,
"eval_steps_per_second": 2.16,
"step": 53000
},
{
"epoch": 0.38,
"learning_rate": 1.616369396345962e-05,
"loss": 2.8451,
"step": 53100
},
{
"epoch": 0.38,
"learning_rate": 1.6156461108941258e-05,
"loss": 2.8367,
"step": 53200
},
{
"epoch": 0.39,
"learning_rate": 1.6149300582968076e-05,
"loss": 2.8403,
"step": 53300
},
{
"epoch": 0.39,
"learning_rate": 1.614206772844971e-05,
"loss": 2.8385,
"step": 53400
},
{
"epoch": 0.39,
"learning_rate": 1.6134834873931346e-05,
"loss": 2.8323,
"step": 53500
},
{
"epoch": 0.39,
"learning_rate": 1.6127602019412982e-05,
"loss": 2.8408,
"step": 53600
},
{
"epoch": 0.39,
"learning_rate": 1.612036916489462e-05,
"loss": 2.8449,
"step": 53700
},
{
"epoch": 0.39,
"learning_rate": 1.6113136310376255e-05,
"loss": 2.8424,
"step": 53800
},
{
"epoch": 0.39,
"learning_rate": 1.6105903455857888e-05,
"loss": 2.8384,
"step": 53900
},
{
"epoch": 0.39,
"learning_rate": 1.609874292988471e-05,
"loss": 2.8463,
"step": 54000
},
{
"epoch": 0.39,
"eval_accuracy": 0.45014865424806355,
"eval_loss": 2.8539552688598633,
"eval_runtime": 31.4595,
"eval_samples_per_second": 206.075,
"eval_steps_per_second": 2.162,
"step": 54000
},
{
"epoch": 0.39,
"learning_rate": 1.6091510075366346e-05,
"loss": 2.8336,
"step": 54100
},
{
"epoch": 0.39,
"learning_rate": 1.6084277220847983e-05,
"loss": 2.8433,
"step": 54200
},
{
"epoch": 0.39,
"learning_rate": 1.6077044366329616e-05,
"loss": 2.8454,
"step": 54300
},
{
"epoch": 0.39,
"learning_rate": 1.6069811511811252e-05,
"loss": 2.846,
"step": 54400
},
{
"epoch": 0.39,
"learning_rate": 1.606257865729289e-05,
"loss": 2.8422,
"step": 54500
},
{
"epoch": 0.39,
"learning_rate": 1.6055345802774525e-05,
"loss": 2.8489,
"step": 54600
},
{
"epoch": 0.4,
"learning_rate": 1.6048112948256162e-05,
"loss": 2.8397,
"step": 54700
},
{
"epoch": 0.4,
"learning_rate": 1.6040880093737795e-05,
"loss": 2.8381,
"step": 54800
},
{
"epoch": 0.4,
"learning_rate": 1.603364723921943e-05,
"loss": 2.8319,
"step": 54900
},
{
"epoch": 0.4,
"learning_rate": 1.6026414384701068e-05,
"loss": 2.8437,
"step": 55000
},
{
"epoch": 0.4,
"eval_accuracy": 0.45043900639077167,
"eval_loss": 2.8520514965057373,
"eval_runtime": 28.1189,
"eval_samples_per_second": 230.557,
"eval_steps_per_second": 2.418,
"step": 55000
},
{
"epoch": 0.4,
"learning_rate": 1.6019181530182705e-05,
"loss": 2.8415,
"step": 55100
},
{
"epoch": 0.4,
"learning_rate": 1.601194867566434e-05,
"loss": 2.8417,
"step": 55200
},
{
"epoch": 0.4,
"learning_rate": 1.6004715821145974e-05,
"loss": 2.8385,
"step": 55300
},
{
"epoch": 0.4,
"learning_rate": 1.599748296662761e-05,
"loss": 2.8361,
"step": 55400
},
{
"epoch": 0.4,
"learning_rate": 1.5990250112109247e-05,
"loss": 2.8356,
"step": 55500
},
{
"epoch": 0.4,
"learning_rate": 1.5983017257590884e-05,
"loss": 2.8419,
"step": 55600
},
{
"epoch": 0.4,
"learning_rate": 1.5975784403072517e-05,
"loss": 2.8403,
"step": 55700
},
{
"epoch": 0.4,
"learning_rate": 1.5968551548554153e-05,
"loss": 2.8414,
"step": 55800
},
{
"epoch": 0.4,
"learning_rate": 1.596131869403579e-05,
"loss": 2.8342,
"step": 55900
},
{
"epoch": 0.41,
"learning_rate": 1.5954085839517426e-05,
"loss": 2.845,
"step": 56000
},
{
"epoch": 0.41,
"eval_accuracy": 0.45049465721812404,
"eval_loss": 2.850494146347046,
"eval_runtime": 27.8769,
"eval_samples_per_second": 232.558,
"eval_steps_per_second": 2.439,
"step": 56000
},
{
"epoch": 0.41,
"learning_rate": 1.5946852984999063e-05,
"loss": 2.8348,
"step": 56100
},
{
"epoch": 0.41,
"learning_rate": 1.5939620130480696e-05,
"loss": 2.8362,
"step": 56200
},
{
"epoch": 0.41,
"learning_rate": 1.5932387275962333e-05,
"loss": 2.8346,
"step": 56300
},
{
"epoch": 0.41,
"learning_rate": 1.592515442144397e-05,
"loss": 2.8384,
"step": 56400
},
{
"epoch": 0.41,
"learning_rate": 1.5917921566925606e-05,
"loss": 2.8455,
"step": 56500
},
{
"epoch": 0.41,
"learning_rate": 1.5910761040952424e-05,
"loss": 2.8325,
"step": 56600
},
{
"epoch": 0.41,
"learning_rate": 1.590352818643406e-05,
"loss": 2.8399,
"step": 56700
},
{
"epoch": 0.41,
"learning_rate": 1.5896295331915693e-05,
"loss": 2.8294,
"step": 56800
},
{
"epoch": 0.41,
"learning_rate": 1.588906247739733e-05,
"loss": 2.8307,
"step": 56900
},
{
"epoch": 0.41,
"learning_rate": 1.5881829622878966e-05,
"loss": 2.8218,
"step": 57000
},
{
"epoch": 0.41,
"eval_accuracy": 0.4507807750587509,
"eval_loss": 2.848620653152466,
"eval_runtime": 29.8126,
"eval_samples_per_second": 217.458,
"eval_steps_per_second": 2.281,
"step": 57000
},
{
"epoch": 0.41,
"learning_rate": 1.5874596768360603e-05,
"loss": 2.8282,
"step": 57100
},
{
"epoch": 0.41,
"learning_rate": 1.5867363913842236e-05,
"loss": 2.8378,
"step": 57200
},
{
"epoch": 0.41,
"learning_rate": 1.5860131059323872e-05,
"loss": 2.8355,
"step": 57300
},
{
"epoch": 0.42,
"learning_rate": 1.585289820480551e-05,
"loss": 2.8298,
"step": 57400
},
{
"epoch": 0.42,
"learning_rate": 1.5845665350287145e-05,
"loss": 2.8378,
"step": 57500
},
{
"epoch": 0.42,
"learning_rate": 1.5838432495768782e-05,
"loss": 2.8199,
"step": 57600
},
{
"epoch": 0.42,
"learning_rate": 1.58312719697956e-05,
"loss": 2.8338,
"step": 57700
},
{
"epoch": 0.42,
"learning_rate": 1.5824039115277237e-05,
"loss": 2.8349,
"step": 57800
},
{
"epoch": 0.42,
"learning_rate": 1.5816806260758873e-05,
"loss": 2.829,
"step": 57900
},
{
"epoch": 0.42,
"learning_rate": 1.580957340624051e-05,
"loss": 2.8366,
"step": 58000
},
{
"epoch": 0.42,
"eval_accuracy": 0.4509053845199965,
"eval_loss": 2.8470458984375,
"eval_runtime": 29.7851,
"eval_samples_per_second": 217.659,
"eval_steps_per_second": 2.283,
"step": 58000
},
{
"epoch": 0.42,
"learning_rate": 1.5802340551722143e-05,
"loss": 2.8331,
"step": 58100
},
{
"epoch": 0.42,
"learning_rate": 1.579510769720378e-05,
"loss": 2.8348,
"step": 58200
},
{
"epoch": 0.42,
"learning_rate": 1.5787874842685416e-05,
"loss": 2.8353,
"step": 58300
},
{
"epoch": 0.42,
"learning_rate": 1.5780641988167052e-05,
"loss": 2.8369,
"step": 58400
},
{
"epoch": 0.42,
"learning_rate": 1.577340913364869e-05,
"loss": 2.8315,
"step": 58500
},
{
"epoch": 0.42,
"learning_rate": 1.5766176279130322e-05,
"loss": 2.8344,
"step": 58600
},
{
"epoch": 0.42,
"learning_rate": 1.5759015753157143e-05,
"loss": 2.8307,
"step": 58700
},
{
"epoch": 0.43,
"learning_rate": 1.575178289863878e-05,
"loss": 2.8218,
"step": 58800
},
{
"epoch": 0.43,
"learning_rate": 1.5744550044120413e-05,
"loss": 2.8299,
"step": 58900
},
{
"epoch": 0.43,
"learning_rate": 1.573731718960205e-05,
"loss": 2.8339,
"step": 59000
},
{
"epoch": 0.43,
"eval_accuracy": 0.45116549164783915,
"eval_loss": 2.8453407287597656,
"eval_runtime": 29.6273,
"eval_samples_per_second": 218.818,
"eval_steps_per_second": 2.295,
"step": 59000
},
{
"epoch": 0.43,
"learning_rate": 1.5730084335083686e-05,
"loss": 2.8327,
"step": 59100
},
{
"epoch": 0.43,
"learning_rate": 1.5722851480565323e-05,
"loss": 2.8305,
"step": 59200
},
{
"epoch": 0.43,
"learning_rate": 1.571561862604696e-05,
"loss": 2.8327,
"step": 59300
},
{
"epoch": 0.43,
"learning_rate": 1.5708385771528592e-05,
"loss": 2.8264,
"step": 59400
},
{
"epoch": 0.43,
"learning_rate": 1.570115291701023e-05,
"loss": 2.8368,
"step": 59500
},
{
"epoch": 0.43,
"learning_rate": 1.5693920062491865e-05,
"loss": 2.8295,
"step": 59600
},
{
"epoch": 0.43,
"learning_rate": 1.5686687207973502e-05,
"loss": 2.8248,
"step": 59700
},
{
"epoch": 0.43,
"learning_rate": 1.5679454353455138e-05,
"loss": 2.826,
"step": 59800
},
{
"epoch": 0.43,
"learning_rate": 1.567222149893677e-05,
"loss": 2.8306,
"step": 59900
},
{
"epoch": 0.43,
"learning_rate": 1.5664988644418408e-05,
"loss": 2.8338,
"step": 60000
},
{
"epoch": 0.43,
"eval_accuracy": 0.4511189143249464,
"eval_loss": 2.843701124191284,
"eval_runtime": 30.181,
"eval_samples_per_second": 214.804,
"eval_steps_per_second": 2.253,
"step": 60000
},
{
"epoch": 0.43,
"learning_rate": 1.5657755789900044e-05,
"loss": 2.8309,
"step": 60100
},
{
"epoch": 0.44,
"learning_rate": 1.5650595263926862e-05,
"loss": 2.8288,
"step": 60200
},
{
"epoch": 0.44,
"learning_rate": 1.56433624094085e-05,
"loss": 2.8238,
"step": 60300
},
{
"epoch": 0.44,
"learning_rate": 1.5636129554890132e-05,
"loss": 2.8257,
"step": 60400
},
{
"epoch": 0.44,
"learning_rate": 1.562889670037177e-05,
"loss": 2.8334,
"step": 60500
},
{
"epoch": 0.44,
"learning_rate": 1.5621663845853405e-05,
"loss": 2.8232,
"step": 60600
},
{
"epoch": 0.44,
"learning_rate": 1.561443099133504e-05,
"loss": 2.8356,
"step": 60700
},
{
"epoch": 0.44,
"learning_rate": 1.5607198136816678e-05,
"loss": 2.8199,
"step": 60800
},
{
"epoch": 0.44,
"learning_rate": 1.559996528229831e-05,
"loss": 2.8215,
"step": 60900
},
{
"epoch": 0.44,
"learning_rate": 1.5592732427779948e-05,
"loss": 2.8237,
"step": 61000
},
{
"epoch": 0.44,
"eval_accuracy": 0.4512713491998681,
"eval_loss": 2.842045783996582,
"eval_runtime": 33.0993,
"eval_samples_per_second": 195.865,
"eval_steps_per_second": 2.054,
"step": 61000
},
{
"epoch": 0.44,
"learning_rate": 1.5585499573261584e-05,
"loss": 2.8305,
"step": 61100
},
{
"epoch": 0.44,
"learning_rate": 1.5578339047288406e-05,
"loss": 2.823,
"step": 61200
},
{
"epoch": 0.44,
"learning_rate": 1.557110619277004e-05,
"loss": 2.839,
"step": 61300
},
{
"epoch": 0.44,
"learning_rate": 1.5563873338251675e-05,
"loss": 2.8265,
"step": 61400
},
{
"epoch": 0.44,
"learning_rate": 1.5556640483733312e-05,
"loss": 2.8256,
"step": 61500
},
{
"epoch": 0.45,
"learning_rate": 1.554940762921495e-05,
"loss": 2.8295,
"step": 61600
},
{
"epoch": 0.45,
"learning_rate": 1.5542174774696585e-05,
"loss": 2.8207,
"step": 61700
},
{
"epoch": 0.45,
"learning_rate": 1.5534941920178218e-05,
"loss": 2.8201,
"step": 61800
},
{
"epoch": 0.45,
"learning_rate": 1.5527709065659855e-05,
"loss": 2.8283,
"step": 61900
},
{
"epoch": 0.45,
"learning_rate": 1.552047621114149e-05,
"loss": 2.8334,
"step": 62000
},
{
"epoch": 0.45,
"eval_accuracy": 0.45149939661195343,
"eval_loss": 2.840452194213867,
"eval_runtime": 29.9094,
"eval_samples_per_second": 216.755,
"eval_steps_per_second": 2.274,
"step": 62000
},
{
"epoch": 0.45,
"learning_rate": 1.5513243356623128e-05,
"loss": 2.8248,
"step": 62100
},
{
"epoch": 0.45,
"learning_rate": 1.5506010502104764e-05,
"loss": 2.8386,
"step": 62200
},
{
"epoch": 0.45,
"learning_rate": 1.5498777647586397e-05,
"loss": 2.8242,
"step": 62300
},
{
"epoch": 0.45,
"learning_rate": 1.5491544793068034e-05,
"loss": 2.8268,
"step": 62400
},
{
"epoch": 0.45,
"learning_rate": 1.548431193854967e-05,
"loss": 2.8286,
"step": 62500
},
{
"epoch": 0.45,
"learning_rate": 1.5477079084031307e-05,
"loss": 2.8274,
"step": 62600
},
{
"epoch": 0.45,
"learning_rate": 1.546984622951294e-05,
"loss": 2.8292,
"step": 62700
},
{
"epoch": 0.45,
"learning_rate": 1.5462613374994576e-05,
"loss": 2.8183,
"step": 62800
},
{
"epoch": 0.45,
"learning_rate": 1.5455380520476213e-05,
"loss": 2.8279,
"step": 62900
},
{
"epoch": 0.46,
"learning_rate": 1.544814766595785e-05,
"loss": 2.8229,
"step": 63000
},
{
"epoch": 0.46,
"eval_accuracy": 0.4518381407784462,
"eval_loss": 2.8387556076049805,
"eval_runtime": 29.7722,
"eval_samples_per_second": 217.753,
"eval_steps_per_second": 2.284,
"step": 63000
},
{
"epoch": 0.46,
"learning_rate": 1.5440914811439486e-05,
"loss": 2.8227,
"step": 63100
},
{
"epoch": 0.46,
"learning_rate": 1.543368195692112e-05,
"loss": 2.8249,
"step": 63200
},
{
"epoch": 0.46,
"learning_rate": 1.5426449102402756e-05,
"loss": 2.8246,
"step": 63300
},
{
"epoch": 0.46,
"learning_rate": 1.5419288576429574e-05,
"loss": 2.8265,
"step": 63400
},
{
"epoch": 0.46,
"learning_rate": 1.541205572191121e-05,
"loss": 2.824,
"step": 63500
},
{
"epoch": 0.46,
"learning_rate": 1.5404822867392847e-05,
"loss": 2.8188,
"step": 63600
},
{
"epoch": 0.46,
"learning_rate": 1.5397590012874483e-05,
"loss": 2.8234,
"step": 63700
},
{
"epoch": 0.46,
"learning_rate": 1.5390357158356116e-05,
"loss": 2.8216,
"step": 63800
},
{
"epoch": 0.46,
"learning_rate": 1.5383124303837753e-05,
"loss": 2.8256,
"step": 63900
},
{
"epoch": 0.46,
"learning_rate": 1.537589144931939e-05,
"loss": 2.8214,
"step": 64000
},
{
"epoch": 0.46,
"eval_accuracy": 0.45194218362958327,
"eval_loss": 2.837294340133667,
"eval_runtime": 33.0659,
"eval_samples_per_second": 196.063,
"eval_steps_per_second": 2.057,
"step": 64000
},
{
"epoch": 0.46,
"learning_rate": 1.5368658594801026e-05,
"loss": 2.8143,
"step": 64100
},
{
"epoch": 0.46,
"learning_rate": 1.536142574028266e-05,
"loss": 2.8199,
"step": 64200
},
{
"epoch": 0.47,
"learning_rate": 1.5354192885764295e-05,
"loss": 2.8217,
"step": 64300
},
{
"epoch": 0.47,
"learning_rate": 1.5346960031245932e-05,
"loss": 2.8202,
"step": 64400
},
{
"epoch": 0.47,
"learning_rate": 1.533972717672757e-05,
"loss": 2.8211,
"step": 64500
},
{
"epoch": 0.47,
"learning_rate": 1.5332566650754386e-05,
"loss": 2.8251,
"step": 64600
},
{
"epoch": 0.47,
"learning_rate": 1.5325333796236023e-05,
"loss": 2.8261,
"step": 64700
},
{
"epoch": 0.47,
"learning_rate": 1.531810094171766e-05,
"loss": 2.8217,
"step": 64800
},
{
"epoch": 0.47,
"learning_rate": 1.5310868087199296e-05,
"loss": 2.8227,
"step": 64900
},
{
"epoch": 0.47,
"learning_rate": 1.5303635232680933e-05,
"loss": 2.8245,
"step": 65000
},
{
"epoch": 0.47,
"eval_accuracy": 0.45223193087199404,
"eval_loss": 2.835636854171753,
"eval_runtime": 29.5456,
"eval_samples_per_second": 219.424,
"eval_steps_per_second": 2.302,
"step": 65000
},
{
"epoch": 0.47,
"learning_rate": 1.5296402378162566e-05,
"loss": 2.8268,
"step": 65100
},
{
"epoch": 0.47,
"learning_rate": 1.5289169523644202e-05,
"loss": 2.8265,
"step": 65200
},
{
"epoch": 0.47,
"learning_rate": 1.528193666912584e-05,
"loss": 2.8221,
"step": 65300
},
{
"epoch": 0.47,
"learning_rate": 1.5274703814607475e-05,
"loss": 2.8193,
"step": 65400
},
{
"epoch": 0.47,
"learning_rate": 1.5267470960089112e-05,
"loss": 2.8281,
"step": 65500
},
{
"epoch": 0.47,
"learning_rate": 1.526031043411593e-05,
"loss": 2.8179,
"step": 65600
},
{
"epoch": 0.48,
"learning_rate": 1.5253077579597566e-05,
"loss": 2.8206,
"step": 65700
},
{
"epoch": 0.48,
"learning_rate": 1.5245844725079201e-05,
"loss": 2.8187,
"step": 65800
},
{
"epoch": 0.48,
"learning_rate": 1.5238611870560838e-05,
"loss": 2.8146,
"step": 65900
},
{
"epoch": 0.48,
"learning_rate": 1.5231379016042474e-05,
"loss": 2.822,
"step": 66000
},
{
"epoch": 0.48,
"eval_accuracy": 0.4523559354329423,
"eval_loss": 2.834322929382324,
"eval_runtime": 27.8576,
"eval_samples_per_second": 232.719,
"eval_steps_per_second": 2.441,
"step": 66000
},
{
"epoch": 0.48,
"learning_rate": 1.5224146161524109e-05,
"loss": 2.8221,
"step": 66100
},
{
"epoch": 0.48,
"learning_rate": 1.5216913307005745e-05,
"loss": 2.8157,
"step": 66200
},
{
"epoch": 0.48,
"learning_rate": 1.520968045248738e-05,
"loss": 2.8233,
"step": 66300
},
{
"epoch": 0.48,
"learning_rate": 1.5202447597969017e-05,
"loss": 2.8273,
"step": 66400
},
{
"epoch": 0.48,
"learning_rate": 1.5195214743450653e-05,
"loss": 2.8149,
"step": 66500
},
{
"epoch": 0.48,
"learning_rate": 1.5187981888932288e-05,
"loss": 2.8215,
"step": 66600
},
{
"epoch": 0.48,
"learning_rate": 1.5180821362959106e-05,
"loss": 2.822,
"step": 66700
},
{
"epoch": 0.48,
"learning_rate": 1.5173588508440743e-05,
"loss": 2.8222,
"step": 66800
},
{
"epoch": 0.48,
"learning_rate": 1.5166355653922378e-05,
"loss": 2.8245,
"step": 66900
},
{
"epoch": 0.48,
"learning_rate": 1.5159122799404014e-05,
"loss": 2.8139,
"step": 67000
},
{
"epoch": 0.48,
"eval_accuracy": 0.45257007013818945,
"eval_loss": 2.833111524581909,
"eval_runtime": 29.3281,
"eval_samples_per_second": 221.051,
"eval_steps_per_second": 2.319,
"step": 67000
},
{
"epoch": 0.49,
"learning_rate": 1.5151889944885649e-05,
"loss": 2.8134,
"step": 67100
},
{
"epoch": 0.49,
"learning_rate": 1.5144657090367285e-05,
"loss": 2.812,
"step": 67200
},
{
"epoch": 0.49,
"learning_rate": 1.513742423584892e-05,
"loss": 2.8225,
"step": 67300
},
{
"epoch": 0.49,
"learning_rate": 1.5130191381330557e-05,
"loss": 2.8177,
"step": 67400
},
{
"epoch": 0.49,
"learning_rate": 1.5122958526812193e-05,
"loss": 2.8167,
"step": 67500
},
{
"epoch": 0.49,
"learning_rate": 1.5115725672293828e-05,
"loss": 2.8189,
"step": 67600
},
{
"epoch": 0.49,
"learning_rate": 1.5108565146320648e-05,
"loss": 2.8239,
"step": 67700
},
{
"epoch": 0.49,
"learning_rate": 1.5101332291802284e-05,
"loss": 2.8216,
"step": 67800
},
{
"epoch": 0.49,
"learning_rate": 1.509409943728392e-05,
"loss": 2.8193,
"step": 67900
},
{
"epoch": 0.49,
"learning_rate": 1.5086866582765556e-05,
"loss": 2.8201,
"step": 68000
},
{
"epoch": 0.49,
"eval_accuracy": 0.45263963367237997,
"eval_loss": 2.8317487239837646,
"eval_runtime": 30.249,
"eval_samples_per_second": 214.321,
"eval_steps_per_second": 2.248,
"step": 68000
},
{
"epoch": 0.49,
"learning_rate": 1.5079633728247192e-05,
"loss": 2.8189,
"step": 68100
},
{
"epoch": 0.49,
"learning_rate": 1.5072400873728827e-05,
"loss": 2.8271,
"step": 68200
},
{
"epoch": 0.49,
"learning_rate": 1.5065168019210463e-05,
"loss": 2.8229,
"step": 68300
},
{
"epoch": 0.49,
"learning_rate": 1.5057935164692098e-05,
"loss": 2.8144,
"step": 68400
},
{
"epoch": 0.5,
"learning_rate": 1.5050702310173735e-05,
"loss": 2.8177,
"step": 68500
},
{
"epoch": 0.5,
"learning_rate": 1.5043469455655371e-05,
"loss": 2.8075,
"step": 68600
},
{
"epoch": 0.5,
"learning_rate": 1.5036236601137006e-05,
"loss": 2.818,
"step": 68700
},
{
"epoch": 0.5,
"learning_rate": 1.5029003746618643e-05,
"loss": 2.8183,
"step": 68800
},
{
"epoch": 0.5,
"learning_rate": 1.5021915549190644e-05,
"loss": 2.8202,
"step": 68900
},
{
"epoch": 0.5,
"learning_rate": 1.501468269467228e-05,
"loss": 2.8132,
"step": 69000
},
{
"epoch": 0.5,
"eval_accuracy": 0.4527346030190574,
"eval_loss": 2.830460548400879,
"eval_runtime": 31.7655,
"eval_samples_per_second": 204.09,
"eval_steps_per_second": 2.141,
"step": 69000
},
{
"epoch": 0.5,
"learning_rate": 1.5007449840153915e-05,
"loss": 2.8269,
"step": 69100
},
{
"epoch": 0.5,
"learning_rate": 1.5000216985635552e-05,
"loss": 2.8226,
"step": 69200
},
{
"epoch": 0.5,
"learning_rate": 1.4992984131117188e-05,
"loss": 2.8165,
"step": 69300
},
{
"epoch": 0.5,
"learning_rate": 1.4985751276598823e-05,
"loss": 2.8167,
"step": 69400
},
{
"epoch": 0.5,
"learning_rate": 1.497851842208046e-05,
"loss": 2.8201,
"step": 69500
},
{
"epoch": 0.5,
"learning_rate": 1.4971285567562094e-05,
"loss": 2.814,
"step": 69600
},
{
"epoch": 0.5,
"learning_rate": 1.4964052713043731e-05,
"loss": 2.811,
"step": 69700
},
{
"epoch": 0.5,
"learning_rate": 1.4956819858525366e-05,
"loss": 2.8127,
"step": 69800
},
{
"epoch": 0.51,
"learning_rate": 1.4949587004007002e-05,
"loss": 2.8146,
"step": 69900
},
{
"epoch": 0.51,
"learning_rate": 1.4942354149488639e-05,
"loss": 2.8138,
"step": 70000
},
{
"epoch": 0.51,
"eval_accuracy": 0.45299531504719737,
"eval_loss": 2.8289763927459717,
"eval_runtime": 32.3608,
"eval_samples_per_second": 200.335,
"eval_steps_per_second": 2.101,
"step": 70000
},
{
"epoch": 0.51,
"learning_rate": 1.4935121294970274e-05,
"loss": 2.8153,
"step": 70100
},
{
"epoch": 0.51,
"learning_rate": 1.492788844045191e-05,
"loss": 2.8128,
"step": 70200
},
{
"epoch": 0.51,
"learning_rate": 1.4920655585933545e-05,
"loss": 2.8177,
"step": 70300
},
{
"epoch": 0.51,
"learning_rate": 1.4913422731415181e-05,
"loss": 2.8185,
"step": 70400
},
{
"epoch": 0.51,
"learning_rate": 1.4906189876896818e-05,
"loss": 2.8144,
"step": 70500
},
{
"epoch": 0.51,
"learning_rate": 1.4898957022378453e-05,
"loss": 2.8157,
"step": 70600
},
{
"epoch": 0.51,
"learning_rate": 1.489172416786009e-05,
"loss": 2.8074,
"step": 70700
},
{
"epoch": 0.51,
"learning_rate": 1.4884491313341724e-05,
"loss": 2.8124,
"step": 70800
},
{
"epoch": 0.51,
"learning_rate": 1.487725845882336e-05,
"loss": 2.8143,
"step": 70900
},
{
"epoch": 0.51,
"learning_rate": 1.4870025604304995e-05,
"loss": 2.8171,
"step": 71000
},
{
"epoch": 0.51,
"eval_accuracy": 0.45300983265433276,
"eval_loss": 2.8278744220733643,
"eval_runtime": 29.4032,
"eval_samples_per_second": 220.486,
"eval_steps_per_second": 2.313,
"step": 71000
},
{
"epoch": 0.51,
"learning_rate": 1.4862792749786632e-05,
"loss": 2.8229,
"step": 71100
},
{
"epoch": 0.51,
"learning_rate": 1.4855559895268268e-05,
"loss": 2.8153,
"step": 71200
},
{
"epoch": 0.52,
"learning_rate": 1.4848327040749903e-05,
"loss": 2.8163,
"step": 71300
},
{
"epoch": 0.52,
"learning_rate": 1.484109418623154e-05,
"loss": 2.8162,
"step": 71400
},
{
"epoch": 0.52,
"learning_rate": 1.4833861331713175e-05,
"loss": 2.8081,
"step": 71500
},
{
"epoch": 0.52,
"learning_rate": 1.4826628477194811e-05,
"loss": 2.8158,
"step": 71600
},
{
"epoch": 0.52,
"learning_rate": 1.4819467951221631e-05,
"loss": 2.8137,
"step": 71700
},
{
"epoch": 0.52,
"learning_rate": 1.4812235096703267e-05,
"loss": 2.8065,
"step": 71800
},
{
"epoch": 0.52,
"learning_rate": 1.4805002242184902e-05,
"loss": 2.8151,
"step": 71900
},
{
"epoch": 0.52,
"learning_rate": 1.4797769387666539e-05,
"loss": 2.8123,
"step": 72000
},
{
"epoch": 0.52,
"eval_accuracy": 0.4531562185262814,
"eval_loss": 2.8267478942871094,
"eval_runtime": 28.1731,
"eval_samples_per_second": 230.113,
"eval_steps_per_second": 2.414,
"step": 72000
},
{
"epoch": 0.52,
"learning_rate": 1.4790536533148174e-05,
"loss": 2.8118,
"step": 72100
},
{
"epoch": 0.52,
"learning_rate": 1.478330367862981e-05,
"loss": 2.8177,
"step": 72200
},
{
"epoch": 0.52,
"learning_rate": 1.4776070824111447e-05,
"loss": 2.8059,
"step": 72300
},
{
"epoch": 0.52,
"learning_rate": 1.4768837969593081e-05,
"loss": 2.8214,
"step": 72400
},
{
"epoch": 0.52,
"learning_rate": 1.4761605115074718e-05,
"loss": 2.8068,
"step": 72500
},
{
"epoch": 0.53,
"learning_rate": 1.4754372260556353e-05,
"loss": 2.8163,
"step": 72600
},
{
"epoch": 0.53,
"learning_rate": 1.474713940603799e-05,
"loss": 2.8139,
"step": 72700
},
{
"epoch": 0.53,
"learning_rate": 1.4739978880064807e-05,
"loss": 2.8139,
"step": 72800
},
{
"epoch": 0.53,
"learning_rate": 1.4732746025546442e-05,
"loss": 2.8151,
"step": 72900
},
{
"epoch": 0.53,
"learning_rate": 1.4725513171028079e-05,
"loss": 2.8118,
"step": 73000
},
{
"epoch": 0.53,
"eval_accuracy": 0.45336127972706897,
"eval_loss": 2.825540781021118,
"eval_runtime": 30.4285,
"eval_samples_per_second": 213.057,
"eval_steps_per_second": 2.235,
"step": 73000
},
{
"epoch": 0.53,
"learning_rate": 1.4718280316509715e-05,
"loss": 2.8174,
"step": 73100
},
{
"epoch": 0.53,
"learning_rate": 1.471104746199135e-05,
"loss": 2.8049,
"step": 73200
},
{
"epoch": 0.53,
"learning_rate": 1.4703814607472986e-05,
"loss": 2.8129,
"step": 73300
},
{
"epoch": 0.53,
"learning_rate": 1.4696581752954621e-05,
"loss": 2.8127,
"step": 73400
},
{
"epoch": 0.53,
"learning_rate": 1.4689348898436258e-05,
"loss": 2.8125,
"step": 73500
},
{
"epoch": 0.53,
"learning_rate": 1.4682116043917893e-05,
"loss": 2.8173,
"step": 73600
},
{
"epoch": 0.53,
"learning_rate": 1.4674883189399529e-05,
"loss": 2.8136,
"step": 73700
},
{
"epoch": 0.53,
"learning_rate": 1.4667722663426349e-05,
"loss": 2.8116,
"step": 73800
},
{
"epoch": 0.53,
"learning_rate": 1.4660489808907985e-05,
"loss": 2.804,
"step": 73900
},
{
"epoch": 0.54,
"learning_rate": 1.465325695438962e-05,
"loss": 2.8183,
"step": 74000
},
{
"epoch": 0.54,
"eval_accuracy": 0.45355484782220773,
"eval_loss": 2.8243465423583984,
"eval_runtime": 30.6984,
"eval_samples_per_second": 211.184,
"eval_steps_per_second": 2.215,
"step": 74000
},
{
"epoch": 0.54,
"learning_rate": 1.4646024099871257e-05,
"loss": 2.8043,
"step": 74100
},
{
"epoch": 0.54,
"learning_rate": 1.4638791245352893e-05,
"loss": 2.8099,
"step": 74200
},
{
"epoch": 0.54,
"learning_rate": 1.4631558390834528e-05,
"loss": 2.8101,
"step": 74300
},
{
"epoch": 0.54,
"learning_rate": 1.4624325536316165e-05,
"loss": 2.8051,
"step": 74400
},
{
"epoch": 0.54,
"learning_rate": 1.46170926817978e-05,
"loss": 2.8176,
"step": 74500
},
{
"epoch": 0.54,
"learning_rate": 1.4609859827279436e-05,
"loss": 2.826,
"step": 74600
},
{
"epoch": 0.54,
"learning_rate": 1.460262697276107e-05,
"loss": 2.7999,
"step": 74700
},
{
"epoch": 0.54,
"learning_rate": 1.4595394118242707e-05,
"loss": 2.8013,
"step": 74800
},
{
"epoch": 0.54,
"learning_rate": 1.4588161263724344e-05,
"loss": 2.8069,
"step": 74900
},
{
"epoch": 0.54,
"learning_rate": 1.4580928409205979e-05,
"loss": 2.8052,
"step": 75000
},
{
"epoch": 0.54,
"eval_accuracy": 0.4536056594471816,
"eval_loss": 2.8232624530792236,
"eval_runtime": 33.2026,
"eval_samples_per_second": 195.256,
"eval_steps_per_second": 2.048,
"step": 75000
},
{
"epoch": 0.54,
"learning_rate": 1.4573695554687615e-05,
"loss": 2.8113,
"step": 75100
},
{
"epoch": 0.54,
"learning_rate": 1.456646270016925e-05,
"loss": 2.809,
"step": 75200
},
{
"epoch": 0.54,
"learning_rate": 1.4559229845650886e-05,
"loss": 2.8048,
"step": 75300
},
{
"epoch": 0.55,
"learning_rate": 1.4551996991132521e-05,
"loss": 2.8071,
"step": 75400
},
{
"epoch": 0.55,
"learning_rate": 1.454483646515934e-05,
"loss": 2.8094,
"step": 75500
},
{
"epoch": 0.55,
"learning_rate": 1.4537603610640976e-05,
"loss": 2.8038,
"step": 75600
},
{
"epoch": 0.55,
"learning_rate": 1.4530370756122612e-05,
"loss": 2.815,
"step": 75700
},
{
"epoch": 0.55,
"learning_rate": 1.4523137901604247e-05,
"loss": 2.8076,
"step": 75800
},
{
"epoch": 0.55,
"learning_rate": 1.4515905047085884e-05,
"loss": 2.8066,
"step": 75900
},
{
"epoch": 0.55,
"learning_rate": 1.4508672192567518e-05,
"loss": 2.8101,
"step": 76000
},
{
"epoch": 0.55,
"eval_accuracy": 0.4537810805334011,
"eval_loss": 2.8219878673553467,
"eval_runtime": 29.3462,
"eval_samples_per_second": 220.914,
"eval_steps_per_second": 2.317,
"step": 76000
},
{
"epoch": 0.55,
"learning_rate": 1.4501439338049155e-05,
"loss": 2.8143,
"step": 76100
},
{
"epoch": 0.55,
"learning_rate": 1.449420648353079e-05,
"loss": 2.8062,
"step": 76200
},
{
"epoch": 0.55,
"learning_rate": 1.4486973629012426e-05,
"loss": 2.8041,
"step": 76300
},
{
"epoch": 0.55,
"learning_rate": 1.4479740774494063e-05,
"loss": 2.8018,
"step": 76400
},
{
"epoch": 0.55,
"learning_rate": 1.4472507919975698e-05,
"loss": 2.8006,
"step": 76500
},
{
"epoch": 0.55,
"learning_rate": 1.4465275065457334e-05,
"loss": 2.7967,
"step": 76600
},
{
"epoch": 0.55,
"learning_rate": 1.4458042210938969e-05,
"loss": 2.8028,
"step": 76700
},
{
"epoch": 0.56,
"learning_rate": 1.4450809356420605e-05,
"loss": 2.8051,
"step": 76800
},
{
"epoch": 0.56,
"learning_rate": 1.444357650190224e-05,
"loss": 2.8104,
"step": 76900
},
{
"epoch": 0.56,
"learning_rate": 1.4436343647383877e-05,
"loss": 2.8021,
"step": 77000
},
{
"epoch": 0.56,
"eval_accuracy": 0.45396497022378285,
"eval_loss": 2.8208632469177246,
"eval_runtime": 29.8785,
"eval_samples_per_second": 216.979,
"eval_steps_per_second": 2.276,
"step": 77000
},
{
"epoch": 0.56,
"learning_rate": 1.4429110792865513e-05,
"loss": 2.7955,
"step": 77100
},
{
"epoch": 0.56,
"learning_rate": 1.4421877938347148e-05,
"loss": 2.7983,
"step": 77200
},
{
"epoch": 0.56,
"learning_rate": 1.4414645083828785e-05,
"loss": 2.8088,
"step": 77300
},
{
"epoch": 0.56,
"learning_rate": 1.440741222931042e-05,
"loss": 2.8086,
"step": 77400
},
{
"epoch": 0.56,
"learning_rate": 1.4400179374792056e-05,
"loss": 2.8189,
"step": 77500
},
{
"epoch": 0.56,
"learning_rate": 1.4393018848818876e-05,
"loss": 2.8047,
"step": 77600
},
{
"epoch": 0.56,
"learning_rate": 1.4385785994300512e-05,
"loss": 2.8058,
"step": 77700
},
{
"epoch": 0.56,
"learning_rate": 1.4378553139782147e-05,
"loss": 2.8104,
"step": 77800
},
{
"epoch": 0.56,
"learning_rate": 1.4371320285263784e-05,
"loss": 2.8071,
"step": 77900
},
{
"epoch": 0.56,
"learning_rate": 1.4364159759290603e-05,
"loss": 2.8076,
"step": 78000
},
{
"epoch": 0.56,
"eval_accuracy": 0.4540236455526218,
"eval_loss": 2.819603204727173,
"eval_runtime": 28.0095,
"eval_samples_per_second": 231.457,
"eval_steps_per_second": 2.428,
"step": 78000
},
{
"epoch": 0.56,
"learning_rate": 1.435692690477224e-05,
"loss": 2.7967,
"step": 78100
},
{
"epoch": 0.57,
"learning_rate": 1.4349694050253875e-05,
"loss": 2.8074,
"step": 78200
},
{
"epoch": 0.57,
"learning_rate": 1.4342461195735511e-05,
"loss": 2.8124,
"step": 78300
},
{
"epoch": 0.57,
"learning_rate": 1.4335228341217146e-05,
"loss": 2.8052,
"step": 78400
},
{
"epoch": 0.57,
"learning_rate": 1.4327995486698782e-05,
"loss": 2.8116,
"step": 78500
},
{
"epoch": 0.57,
"learning_rate": 1.4320762632180419e-05,
"loss": 2.8076,
"step": 78600
},
{
"epoch": 0.57,
"learning_rate": 1.4313529777662054e-05,
"loss": 2.8153,
"step": 78700
},
{
"epoch": 0.57,
"learning_rate": 1.430629692314369e-05,
"loss": 2.7979,
"step": 78800
},
{
"epoch": 0.57,
"learning_rate": 1.4299064068625325e-05,
"loss": 2.8034,
"step": 78900
},
{
"epoch": 0.57,
"learning_rate": 1.4291903542652143e-05,
"loss": 2.7937,
"step": 79000
},
{
"epoch": 0.57,
"eval_accuracy": 0.45417608042754354,
"eval_loss": 2.8189663887023926,
"eval_runtime": 29.6646,
"eval_samples_per_second": 218.543,
"eval_steps_per_second": 2.292,
"step": 79000
},
{
"epoch": 0.57,
"learning_rate": 1.428467068813378e-05,
"loss": 2.7999,
"step": 79100
},
{
"epoch": 0.57,
"learning_rate": 1.4277437833615415e-05,
"loss": 2.7967,
"step": 79200
},
{
"epoch": 0.57,
"learning_rate": 1.4270204979097051e-05,
"loss": 2.8032,
"step": 79300
},
{
"epoch": 0.57,
"learning_rate": 1.4262972124578688e-05,
"loss": 2.8101,
"step": 79400
},
{
"epoch": 0.58,
"learning_rate": 1.4255739270060322e-05,
"loss": 2.8052,
"step": 79500
},
{
"epoch": 0.58,
"learning_rate": 1.4248506415541959e-05,
"loss": 2.7995,
"step": 79600
},
{
"epoch": 0.58,
"learning_rate": 1.4241273561023594e-05,
"loss": 2.8048,
"step": 79700
},
{
"epoch": 0.58,
"learning_rate": 1.423404070650523e-05,
"loss": 2.8033,
"step": 79800
},
{
"epoch": 0.58,
"learning_rate": 1.4226807851986865e-05,
"loss": 2.8033,
"step": 79900
},
{
"epoch": 0.58,
"learning_rate": 1.4219574997468502e-05,
"loss": 2.8057,
"step": 80000
},
{
"epoch": 0.58,
"eval_accuracy": 0.4541452305123808,
"eval_loss": 2.8179192543029785,
"eval_runtime": 27.9926,
"eval_samples_per_second": 231.597,
"eval_steps_per_second": 2.429,
"step": 80000
},
{
"epoch": 0.58,
"learning_rate": 1.4212342142950138e-05,
"loss": 2.8101,
"step": 80100
},
{
"epoch": 0.58,
"learning_rate": 1.4205109288431773e-05,
"loss": 2.8088,
"step": 80200
},
{
"epoch": 0.58,
"learning_rate": 1.419787643391341e-05,
"loss": 2.8033,
"step": 80300
},
{
"epoch": 0.58,
"learning_rate": 1.4190643579395044e-05,
"loss": 2.8072,
"step": 80400
},
{
"epoch": 0.58,
"learning_rate": 1.418341072487668e-05,
"loss": 2.8053,
"step": 80500
},
{
"epoch": 0.58,
"learning_rate": 1.4176177870358316e-05,
"loss": 2.8044,
"step": 80600
},
{
"epoch": 0.58,
"learning_rate": 1.4168945015839952e-05,
"loss": 2.8059,
"step": 80700
},
{
"epoch": 0.58,
"learning_rate": 1.4161712161321589e-05,
"loss": 2.7949,
"step": 80800
},
{
"epoch": 0.59,
"learning_rate": 1.4154479306803223e-05,
"loss": 2.8008,
"step": 80900
},
{
"epoch": 0.59,
"learning_rate": 1.414724645228486e-05,
"loss": 2.8082,
"step": 81000
},
{
"epoch": 0.59,
"eval_accuracy": 0.45446219826817047,
"eval_loss": 2.8168437480926514,
"eval_runtime": 30.4701,
"eval_samples_per_second": 212.766,
"eval_steps_per_second": 2.232,
"step": 81000
},
{
"epoch": 0.59,
"learning_rate": 1.4140013597766495e-05,
"loss": 2.803,
"step": 81100
},
{
"epoch": 0.59,
"learning_rate": 1.4132780743248131e-05,
"loss": 2.806,
"step": 81200
},
{
"epoch": 0.59,
"learning_rate": 1.4125620217274951e-05,
"loss": 2.8034,
"step": 81300
},
{
"epoch": 0.59,
"learning_rate": 1.4118387362756588e-05,
"loss": 2.7982,
"step": 81400
},
{
"epoch": 0.59,
"learning_rate": 1.4111154508238222e-05,
"loss": 2.7955,
"step": 81500
},
{
"epoch": 0.59,
"learning_rate": 1.4103921653719859e-05,
"loss": 2.8114,
"step": 81600
},
{
"epoch": 0.59,
"learning_rate": 1.4096688799201494e-05,
"loss": 2.8103,
"step": 81700
},
{
"epoch": 0.59,
"learning_rate": 1.408945594468313e-05,
"loss": 2.7982,
"step": 81800
},
{
"epoch": 0.59,
"learning_rate": 1.4082223090164767e-05,
"loss": 2.8026,
"step": 81900
},
{
"epoch": 0.59,
"learning_rate": 1.4074990235646402e-05,
"loss": 2.7986,
"step": 82000
},
{
"epoch": 0.59,
"eval_accuracy": 0.45461100374130836,
"eval_loss": 2.815699577331543,
"eval_runtime": 31.1592,
"eval_samples_per_second": 208.061,
"eval_steps_per_second": 2.182,
"step": 82000
},
{
"epoch": 0.59,
"learning_rate": 1.4067757381128038e-05,
"loss": 2.8006,
"step": 82100
},
{
"epoch": 0.59,
"learning_rate": 1.4060524526609673e-05,
"loss": 2.8056,
"step": 82200
},
{
"epoch": 0.6,
"learning_rate": 1.405329167209131e-05,
"loss": 2.7992,
"step": 82300
},
{
"epoch": 0.6,
"learning_rate": 1.4046058817572946e-05,
"loss": 2.8123,
"step": 82400
},
{
"epoch": 0.6,
"learning_rate": 1.403882596305458e-05,
"loss": 2.7977,
"step": 82500
},
{
"epoch": 0.6,
"learning_rate": 1.4031593108536217e-05,
"loss": 2.808,
"step": 82600
},
{
"epoch": 0.6,
"learning_rate": 1.4024360254017852e-05,
"loss": 2.7964,
"step": 82700
},
{
"epoch": 0.6,
"learning_rate": 1.4017127399499489e-05,
"loss": 2.7978,
"step": 82800
},
{
"epoch": 0.6,
"learning_rate": 1.4009894544981123e-05,
"loss": 2.7935,
"step": 82900
},
{
"epoch": 0.6,
"learning_rate": 1.400266169046276e-05,
"loss": 2.8062,
"step": 83000
},
{
"epoch": 0.6,
"eval_accuracy": 0.45453962550622595,
"eval_loss": 2.814990520477295,
"eval_runtime": 29.5244,
"eval_samples_per_second": 219.581,
"eval_steps_per_second": 2.303,
"step": 83000
},
{
"epoch": 0.6,
"learning_rate": 1.3995428835944396e-05,
"loss": 2.7966,
"step": 83100
},
{
"epoch": 0.6,
"learning_rate": 1.3988195981426031e-05,
"loss": 2.803,
"step": 83200
},
{
"epoch": 0.6,
"learning_rate": 1.3981107783998034e-05,
"loss": 2.799,
"step": 83300
},
{
"epoch": 0.6,
"learning_rate": 1.3973874929479669e-05,
"loss": 2.801,
"step": 83400
},
{
"epoch": 0.6,
"learning_rate": 1.3966642074961306e-05,
"loss": 2.7902,
"step": 83500
},
{
"epoch": 0.6,
"learning_rate": 1.395940922044294e-05,
"loss": 2.7999,
"step": 83600
},
{
"epoch": 0.61,
"learning_rate": 1.3952176365924577e-05,
"loss": 2.8011,
"step": 83700
},
{
"epoch": 0.61,
"learning_rate": 1.3944943511406213e-05,
"loss": 2.8004,
"step": 83800
},
{
"epoch": 0.61,
"learning_rate": 1.3937710656887848e-05,
"loss": 2.7973,
"step": 83900
},
{
"epoch": 0.61,
"learning_rate": 1.3930477802369485e-05,
"loss": 2.7981,
"step": 84000
},
{
"epoch": 0.61,
"eval_accuracy": 0.4545880175300106,
"eval_loss": 2.813809871673584,
"eval_runtime": 28.1508,
"eval_samples_per_second": 230.295,
"eval_steps_per_second": 2.416,
"step": 84000
},
{
"epoch": 0.61,
"learning_rate": 1.392324494785112e-05,
"loss": 2.7926,
"step": 84100
},
{
"epoch": 0.61,
"learning_rate": 1.3916012093332756e-05,
"loss": 2.7982,
"step": 84200
},
{
"epoch": 0.61,
"learning_rate": 1.390877923881439e-05,
"loss": 2.8091,
"step": 84300
},
{
"epoch": 0.61,
"learning_rate": 1.3901618712841212e-05,
"loss": 2.8055,
"step": 84400
},
{
"epoch": 0.61,
"learning_rate": 1.389445818686803e-05,
"loss": 2.7951,
"step": 84500
},
{
"epoch": 0.61,
"learning_rate": 1.3887225332349665e-05,
"loss": 2.8024,
"step": 84600
},
{
"epoch": 0.61,
"learning_rate": 1.3879992477831302e-05,
"loss": 2.7985,
"step": 84700
},
{
"epoch": 0.61,
"learning_rate": 1.3872759623312936e-05,
"loss": 2.7949,
"step": 84800
},
{
"epoch": 0.61,
"learning_rate": 1.3865526768794573e-05,
"loss": 2.7992,
"step": 84900
},
{
"epoch": 0.61,
"learning_rate": 1.3858293914276208e-05,
"loss": 2.8041,
"step": 85000
},
{
"epoch": 0.61,
"eval_accuracy": 0.4546454830582549,
"eval_loss": 2.8130455017089844,
"eval_runtime": 29.7879,
"eval_samples_per_second": 217.639,
"eval_steps_per_second": 2.283,
"step": 85000
},
{
"epoch": 0.62,
"learning_rate": 1.3851061059757844e-05,
"loss": 2.8013,
"step": 85100
},
{
"epoch": 0.62,
"learning_rate": 1.384382820523948e-05,
"loss": 2.8052,
"step": 85200
},
{
"epoch": 0.62,
"learning_rate": 1.3836595350721116e-05,
"loss": 2.801,
"step": 85300
},
{
"epoch": 0.62,
"learning_rate": 1.3829362496202752e-05,
"loss": 2.8045,
"step": 85400
},
{
"epoch": 0.62,
"learning_rate": 1.3822129641684387e-05,
"loss": 2.8031,
"step": 85500
},
{
"epoch": 0.62,
"learning_rate": 1.3814896787166023e-05,
"loss": 2.7966,
"step": 85600
},
{
"epoch": 0.62,
"learning_rate": 1.3807663932647658e-05,
"loss": 2.7934,
"step": 85700
},
{
"epoch": 0.62,
"learning_rate": 1.3800431078129295e-05,
"loss": 2.7978,
"step": 85800
},
{
"epoch": 0.62,
"learning_rate": 1.3793198223610931e-05,
"loss": 2.7973,
"step": 85900
},
{
"epoch": 0.62,
"learning_rate": 1.3785965369092566e-05,
"loss": 2.7978,
"step": 86000
},
{
"epoch": 0.62,
"eval_accuracy": 0.4548626422649887,
"eval_loss": 2.8118443489074707,
"eval_runtime": 29.2336,
"eval_samples_per_second": 221.765,
"eval_steps_per_second": 2.326,
"step": 86000
},
{
"epoch": 0.62,
"learning_rate": 1.3778732514574203e-05,
"loss": 2.798,
"step": 86100
},
{
"epoch": 0.62,
"learning_rate": 1.3771499660055837e-05,
"loss": 2.7967,
"step": 86200
},
{
"epoch": 0.62,
"learning_rate": 1.3764266805537474e-05,
"loss": 2.8004,
"step": 86300
},
{
"epoch": 0.62,
"learning_rate": 1.375703395101911e-05,
"loss": 2.7957,
"step": 86400
},
{
"epoch": 0.63,
"learning_rate": 1.3749801096500745e-05,
"loss": 2.7932,
"step": 86500
},
{
"epoch": 0.63,
"learning_rate": 1.3742568241982382e-05,
"loss": 2.8002,
"step": 86600
},
{
"epoch": 0.63,
"learning_rate": 1.3735335387464017e-05,
"loss": 2.8027,
"step": 86700
},
{
"epoch": 0.63,
"learning_rate": 1.3728102532945653e-05,
"loss": 2.8001,
"step": 86800
},
{
"epoch": 0.63,
"learning_rate": 1.3720869678427288e-05,
"loss": 2.7988,
"step": 86900
},
{
"epoch": 0.63,
"learning_rate": 1.3713636823908925e-05,
"loss": 2.8016,
"step": 87000
},
{
"epoch": 0.63,
"eval_accuracy": 0.45493644010126033,
"eval_loss": 2.8109002113342285,
"eval_runtime": 29.8642,
"eval_samples_per_second": 217.082,
"eval_steps_per_second": 2.277,
"step": 87000
},
{
"epoch": 0.63,
"learning_rate": 1.3706476297935744e-05,
"loss": 2.7946,
"step": 87100
},
{
"epoch": 0.63,
"learning_rate": 1.369924344341738e-05,
"loss": 2.794,
"step": 87200
},
{
"epoch": 0.63,
"learning_rate": 1.3692010588899016e-05,
"loss": 2.8011,
"step": 87300
},
{
"epoch": 0.63,
"learning_rate": 1.3684777734380652e-05,
"loss": 2.7973,
"step": 87400
},
{
"epoch": 0.63,
"learning_rate": 1.3677544879862289e-05,
"loss": 2.8076,
"step": 87500
},
{
"epoch": 0.63,
"learning_rate": 1.3670312025343923e-05,
"loss": 2.8012,
"step": 87600
},
{
"epoch": 0.63,
"learning_rate": 1.366307917082556e-05,
"loss": 2.7916,
"step": 87700
},
{
"epoch": 0.64,
"learning_rate": 1.3655846316307195e-05,
"loss": 2.798,
"step": 87800
},
{
"epoch": 0.64,
"learning_rate": 1.3648613461788831e-05,
"loss": 2.7971,
"step": 87900
},
{
"epoch": 0.64,
"learning_rate": 1.3641380607270466e-05,
"loss": 2.7901,
"step": 88000
},
{
"epoch": 0.64,
"eval_accuracy": 0.4551348473987775,
"eval_loss": 2.809919834136963,
"eval_runtime": 30.2675,
"eval_samples_per_second": 214.19,
"eval_steps_per_second": 2.247,
"step": 88000
},
{
"epoch": 0.64,
"learning_rate": 1.3634220081297288e-05,
"loss": 2.8022,
"step": 88100
},
{
"epoch": 0.64,
"learning_rate": 1.3626987226778922e-05,
"loss": 2.7927,
"step": 88200
},
{
"epoch": 0.64,
"learning_rate": 1.3619754372260559e-05,
"loss": 2.7951,
"step": 88300
},
{
"epoch": 0.64,
"learning_rate": 1.3612521517742194e-05,
"loss": 2.7995,
"step": 88400
},
{
"epoch": 0.64,
"learning_rate": 1.360528866322383e-05,
"loss": 2.7966,
"step": 88500
},
{
"epoch": 0.64,
"learning_rate": 1.3598055808705463e-05,
"loss": 2.7882,
"step": 88600
},
{
"epoch": 0.64,
"learning_rate": 1.35908229541871e-05,
"loss": 2.7933,
"step": 88700
},
{
"epoch": 0.64,
"learning_rate": 1.3583590099668735e-05,
"loss": 2.7965,
"step": 88800
},
{
"epoch": 0.64,
"learning_rate": 1.3576357245150371e-05,
"loss": 2.7948,
"step": 88900
},
{
"epoch": 0.64,
"learning_rate": 1.3569124390632008e-05,
"loss": 2.8075,
"step": 89000
},
{
"epoch": 0.64,
"eval_accuracy": 0.4553132929864835,
"eval_loss": 2.809250593185425,
"eval_runtime": 29.5022,
"eval_samples_per_second": 219.746,
"eval_steps_per_second": 2.305,
"step": 89000
},
{
"epoch": 0.64,
"learning_rate": 1.3561891536113643e-05,
"loss": 2.7911,
"step": 89100
},
{
"epoch": 0.65,
"learning_rate": 1.3554658681595279e-05,
"loss": 2.7966,
"step": 89200
},
{
"epoch": 0.65,
"learning_rate": 1.3547425827076914e-05,
"loss": 2.7951,
"step": 89300
},
{
"epoch": 0.65,
"learning_rate": 1.354019297255855e-05,
"loss": 2.7992,
"step": 89400
},
{
"epoch": 0.65,
"learning_rate": 1.3532960118040185e-05,
"loss": 2.7913,
"step": 89500
},
{
"epoch": 0.65,
"learning_rate": 1.3525727263521822e-05,
"loss": 2.7985,
"step": 89600
},
{
"epoch": 0.65,
"learning_rate": 1.3518494409003458e-05,
"loss": 2.7968,
"step": 89700
},
{
"epoch": 0.65,
"learning_rate": 1.3511261554485093e-05,
"loss": 2.7985,
"step": 89800
},
{
"epoch": 0.65,
"learning_rate": 1.350402869996673e-05,
"loss": 2.7937,
"step": 89900
},
{
"epoch": 0.65,
"learning_rate": 1.3496795845448364e-05,
"loss": 2.7915,
"step": 90000
},
{
"epoch": 0.65,
"eval_accuracy": 0.4552001766308868,
"eval_loss": 2.808422327041626,
"eval_runtime": 29.3832,
"eval_samples_per_second": 220.636,
"eval_steps_per_second": 2.314,
"step": 90000
},
{
"epoch": 0.65,
"learning_rate": 1.3489562990930001e-05,
"loss": 2.7934,
"step": 90100
},
{
"epoch": 0.65,
"learning_rate": 1.348240246495682e-05,
"loss": 2.7959,
"step": 90200
},
{
"epoch": 0.65,
"learning_rate": 1.3475169610438457e-05,
"loss": 2.8034,
"step": 90300
},
{
"epoch": 0.65,
"learning_rate": 1.3467936755920092e-05,
"loss": 2.7908,
"step": 90400
},
{
"epoch": 0.65,
"learning_rate": 1.3460703901401728e-05,
"loss": 2.7904,
"step": 90500
},
{
"epoch": 0.66,
"learning_rate": 1.3453543375428548e-05,
"loss": 2.789,
"step": 90600
},
{
"epoch": 0.66,
"learning_rate": 1.3446310520910185e-05,
"loss": 2.7961,
"step": 90700
},
{
"epoch": 0.66,
"learning_rate": 1.343907766639182e-05,
"loss": 2.7939,
"step": 90800
},
{
"epoch": 0.66,
"learning_rate": 1.3431844811873456e-05,
"loss": 2.7922,
"step": 90900
},
{
"epoch": 0.66,
"learning_rate": 1.3424611957355091e-05,
"loss": 2.7916,
"step": 91000
},
{
"epoch": 0.66,
"eval_accuracy": 0.4554560494566483,
"eval_loss": 2.807447910308838,
"eval_runtime": 30.8057,
"eval_samples_per_second": 210.448,
"eval_steps_per_second": 2.207,
"step": 91000
},
{
"epoch": 0.66,
"learning_rate": 1.3417379102836727e-05,
"loss": 2.7855,
"step": 91100
},
{
"epoch": 0.66,
"learning_rate": 1.3410146248318364e-05,
"loss": 2.8014,
"step": 91200
},
{
"epoch": 0.66,
"learning_rate": 1.3402913393799999e-05,
"loss": 2.7801,
"step": 91300
},
{
"epoch": 0.66,
"learning_rate": 1.3395680539281635e-05,
"loss": 2.7898,
"step": 91400
},
{
"epoch": 0.66,
"learning_rate": 1.338844768476327e-05,
"loss": 2.7983,
"step": 91500
},
{
"epoch": 0.66,
"learning_rate": 1.3381214830244907e-05,
"loss": 2.7945,
"step": 91600
},
{
"epoch": 0.66,
"learning_rate": 1.3373981975726541e-05,
"loss": 2.7854,
"step": 91700
},
{
"epoch": 0.66,
"learning_rate": 1.3366749121208178e-05,
"loss": 2.7905,
"step": 91800
},
{
"epoch": 0.66,
"learning_rate": 1.3359516266689814e-05,
"loss": 2.7875,
"step": 91900
},
{
"epoch": 0.67,
"learning_rate": 1.335228341217145e-05,
"loss": 2.7751,
"step": 92000
},
{
"epoch": 0.67,
"eval_accuracy": 0.4554330632453506,
"eval_loss": 2.806763172149658,
"eval_runtime": 32.0762,
"eval_samples_per_second": 202.113,
"eval_steps_per_second": 2.12,
"step": 92000
},
{
"epoch": 0.67,
"learning_rate": 1.3345050557653086e-05,
"loss": 2.7943,
"step": 92100
},
{
"epoch": 0.67,
"learning_rate": 1.333781770313472e-05,
"loss": 2.7919,
"step": 92200
},
{
"epoch": 0.67,
"learning_rate": 1.3330584848616357e-05,
"loss": 2.7911,
"step": 92300
},
{
"epoch": 0.67,
"learning_rate": 1.3323351994097994e-05,
"loss": 2.789,
"step": 92400
},
{
"epoch": 0.67,
"learning_rate": 1.3316119139579628e-05,
"loss": 2.7876,
"step": 92500
},
{
"epoch": 0.67,
"learning_rate": 1.3308886285061265e-05,
"loss": 2.7861,
"step": 92600
},
{
"epoch": 0.67,
"learning_rate": 1.33016534305429e-05,
"loss": 2.7807,
"step": 92700
},
{
"epoch": 0.67,
"learning_rate": 1.3294420576024536e-05,
"loss": 2.8013,
"step": 92800
},
{
"epoch": 0.67,
"learning_rate": 1.3287260050051354e-05,
"loss": 2.7933,
"step": 92900
},
{
"epoch": 0.67,
"learning_rate": 1.3280027195532989e-05,
"loss": 2.7896,
"step": 93000
},
{
"epoch": 0.67,
"eval_accuracy": 0.45561574313513775,
"eval_loss": 2.8058676719665527,
"eval_runtime": 29.3079,
"eval_samples_per_second": 221.203,
"eval_steps_per_second": 2.32,
"step": 93000
},
{
"epoch": 0.67,
"learning_rate": 1.3272794341014626e-05,
"loss": 2.7916,
"step": 93100
},
{
"epoch": 0.67,
"learning_rate": 1.326556148649626e-05,
"loss": 2.7959,
"step": 93200
},
{
"epoch": 0.67,
"learning_rate": 1.3258328631977897e-05,
"loss": 2.7946,
"step": 93300
},
{
"epoch": 0.68,
"learning_rate": 1.3251095777459533e-05,
"loss": 2.789,
"step": 93400
},
{
"epoch": 0.68,
"learning_rate": 1.3243862922941168e-05,
"loss": 2.7914,
"step": 93500
},
{
"epoch": 0.68,
"learning_rate": 1.3236630068422805e-05,
"loss": 2.7956,
"step": 93600
},
{
"epoch": 0.68,
"learning_rate": 1.322939721390444e-05,
"loss": 2.7945,
"step": 93700
},
{
"epoch": 0.68,
"learning_rate": 1.3222164359386076e-05,
"loss": 2.7877,
"step": 93800
},
{
"epoch": 0.68,
"learning_rate": 1.3214931504867713e-05,
"loss": 2.7865,
"step": 93900
},
{
"epoch": 0.68,
"learning_rate": 1.3207698650349347e-05,
"loss": 2.7886,
"step": 94000
},
{
"epoch": 0.68,
"eval_accuracy": 0.45565748125565203,
"eval_loss": 2.8051185607910156,
"eval_runtime": 29.7995,
"eval_samples_per_second": 217.554,
"eval_steps_per_second": 2.282,
"step": 94000
},
{
"epoch": 0.68,
"learning_rate": 1.3200538124376167e-05,
"loss": 2.7942,
"step": 94100
},
{
"epoch": 0.68,
"learning_rate": 1.3193305269857804e-05,
"loss": 2.7838,
"step": 94200
},
{
"epoch": 0.68,
"learning_rate": 1.3186072415339439e-05,
"loss": 2.7905,
"step": 94300
},
{
"epoch": 0.68,
"learning_rate": 1.3178839560821075e-05,
"loss": 2.7868,
"step": 94400
},
{
"epoch": 0.68,
"learning_rate": 1.3171606706302712e-05,
"loss": 2.7851,
"step": 94500
},
{
"epoch": 0.68,
"learning_rate": 1.3164373851784346e-05,
"loss": 2.7934,
"step": 94600
},
{
"epoch": 0.68,
"learning_rate": 1.3157140997265983e-05,
"loss": 2.789,
"step": 94700
},
{
"epoch": 0.69,
"learning_rate": 1.3149908142747618e-05,
"loss": 2.7892,
"step": 94800
},
{
"epoch": 0.69,
"learning_rate": 1.3142675288229254e-05,
"loss": 2.7828,
"step": 94900
},
{
"epoch": 0.69,
"learning_rate": 1.3135442433710889e-05,
"loss": 2.7909,
"step": 95000
},
{
"epoch": 0.69,
"eval_accuracy": 0.45570647817973403,
"eval_loss": 2.804401397705078,
"eval_runtime": 30.1768,
"eval_samples_per_second": 214.834,
"eval_steps_per_second": 2.253,
"step": 95000
},
{
"epoch": 0.69,
"learning_rate": 1.3128209579192526e-05,
"loss": 2.7872,
"step": 95100
},
{
"epoch": 0.69,
"learning_rate": 1.3121049053219344e-05,
"loss": 2.7878,
"step": 95200
},
{
"epoch": 0.69,
"learning_rate": 1.3113888527246163e-05,
"loss": 2.7852,
"step": 95300
},
{
"epoch": 0.69,
"learning_rate": 1.31066556727278e-05,
"loss": 2.7896,
"step": 95400
},
{
"epoch": 0.69,
"learning_rate": 1.3099422818209435e-05,
"loss": 2.7886,
"step": 95500
},
{
"epoch": 0.69,
"learning_rate": 1.3092189963691071e-05,
"loss": 2.7876,
"step": 95600
},
{
"epoch": 0.69,
"learning_rate": 1.3084957109172706e-05,
"loss": 2.7866,
"step": 95700
},
{
"epoch": 0.69,
"learning_rate": 1.3077724254654343e-05,
"loss": 2.791,
"step": 95800
},
{
"epoch": 0.69,
"learning_rate": 1.3070491400135979e-05,
"loss": 2.7915,
"step": 95900
},
{
"epoch": 0.69,
"learning_rate": 1.3063258545617614e-05,
"loss": 2.7926,
"step": 96000
},
{
"epoch": 0.69,
"eval_accuracy": 0.45580084262611414,
"eval_loss": 2.803481101989746,
"eval_runtime": 30.9308,
"eval_samples_per_second": 209.597,
"eval_steps_per_second": 2.198,
"step": 96000
},
{
"epoch": 0.7,
"learning_rate": 1.305602569109925e-05,
"loss": 2.7844,
"step": 96100
},
{
"epoch": 0.7,
"learning_rate": 1.3048792836580885e-05,
"loss": 2.7857,
"step": 96200
},
{
"epoch": 0.7,
"learning_rate": 1.3041559982062522e-05,
"loss": 2.7882,
"step": 96300
},
{
"epoch": 0.7,
"learning_rate": 1.3034327127544158e-05,
"loss": 2.7929,
"step": 96400
},
{
"epoch": 0.7,
"learning_rate": 1.3027094273025793e-05,
"loss": 2.7979,
"step": 96500
},
{
"epoch": 0.7,
"learning_rate": 1.301986141850743e-05,
"loss": 2.798,
"step": 96600
},
{
"epoch": 0.7,
"learning_rate": 1.3012628563989064e-05,
"loss": 2.7847,
"step": 96700
},
{
"epoch": 0.7,
"learning_rate": 1.3005395709470701e-05,
"loss": 2.7885,
"step": 96800
},
{
"epoch": 0.7,
"learning_rate": 1.2998162854952336e-05,
"loss": 2.7781,
"step": 96900
},
{
"epoch": 0.7,
"learning_rate": 1.2990930000433972e-05,
"loss": 2.7931,
"step": 97000
},
{
"epoch": 0.7,
"eval_accuracy": 0.45598533721679324,
"eval_loss": 2.802797317504883,
"eval_runtime": 30.7179,
"eval_samples_per_second": 211.05,
"eval_steps_per_second": 2.214,
"step": 97000
},
{
"epoch": 0.7,
"learning_rate": 1.2983769474460792e-05,
"loss": 2.7851,
"step": 97100
},
{
"epoch": 0.7,
"learning_rate": 1.2976536619942428e-05,
"loss": 2.7835,
"step": 97200
},
{
"epoch": 0.7,
"learning_rate": 1.2969303765424063e-05,
"loss": 2.7948,
"step": 97300
},
{
"epoch": 0.7,
"learning_rate": 1.29620709109057e-05,
"loss": 2.7872,
"step": 97400
},
{
"epoch": 0.71,
"learning_rate": 1.2954838056387336e-05,
"loss": 2.7882,
"step": 97500
},
{
"epoch": 0.71,
"learning_rate": 1.2947605201868971e-05,
"loss": 2.792,
"step": 97600
},
{
"epoch": 0.71,
"learning_rate": 1.2940372347350608e-05,
"loss": 2.7907,
"step": 97700
},
{
"epoch": 0.71,
"learning_rate": 1.2933139492832242e-05,
"loss": 2.7916,
"step": 97800
},
{
"epoch": 0.71,
"learning_rate": 1.2925906638313879e-05,
"loss": 2.7805,
"step": 97900
},
{
"epoch": 0.71,
"learning_rate": 1.2918673783795514e-05,
"loss": 2.7838,
"step": 98000
},
{
"epoch": 0.71,
"eval_accuracy": 0.4561528945991477,
"eval_loss": 2.802030563354492,
"eval_runtime": 29.961,
"eval_samples_per_second": 216.381,
"eval_steps_per_second": 2.27,
"step": 98000
},
{
"epoch": 0.71,
"learning_rate": 1.291144092927715e-05,
"loss": 2.7927,
"step": 98100
},
{
"epoch": 0.71,
"learning_rate": 1.2904208074758787e-05,
"loss": 2.7861,
"step": 98200
},
{
"epoch": 0.71,
"learning_rate": 1.2896975220240422e-05,
"loss": 2.7933,
"step": 98300
},
{
"epoch": 0.71,
"learning_rate": 1.2889742365722058e-05,
"loss": 2.785,
"step": 98400
},
{
"epoch": 0.71,
"learning_rate": 1.2882509511203693e-05,
"loss": 2.7827,
"step": 98500
},
{
"epoch": 0.71,
"learning_rate": 1.287527665668533e-05,
"loss": 2.7861,
"step": 98600
},
{
"epoch": 0.71,
"learning_rate": 1.2868043802166964e-05,
"loss": 2.7901,
"step": 98700
},
{
"epoch": 0.71,
"learning_rate": 1.28608109476486e-05,
"loss": 2.7939,
"step": 98800
},
{
"epoch": 0.72,
"learning_rate": 1.2853578093130237e-05,
"loss": 2.7834,
"step": 98900
},
{
"epoch": 0.72,
"learning_rate": 1.2846489895702239e-05,
"loss": 2.779,
"step": 99000
},
{
"epoch": 0.72,
"eval_accuracy": 0.45607909676287606,
"eval_loss": 2.8013815879821777,
"eval_runtime": 29.8613,
"eval_samples_per_second": 217.104,
"eval_steps_per_second": 2.277,
"step": 99000
},
{
"epoch": 0.72,
"learning_rate": 1.2839257041183875e-05,
"loss": 2.7808,
"step": 99100
},
{
"epoch": 0.72,
"learning_rate": 1.283202418666551e-05,
"loss": 2.7838,
"step": 99200
},
{
"epoch": 0.72,
"learning_rate": 1.2824791332147146e-05,
"loss": 2.7794,
"step": 99300
},
{
"epoch": 0.72,
"learning_rate": 1.2817558477628781e-05,
"loss": 2.7863,
"step": 99400
},
{
"epoch": 0.72,
"learning_rate": 1.2810325623110418e-05,
"loss": 2.7833,
"step": 99500
},
{
"epoch": 0.72,
"learning_rate": 1.2803092768592054e-05,
"loss": 2.7833,
"step": 99600
},
{
"epoch": 0.72,
"learning_rate": 1.2795859914073689e-05,
"loss": 2.7824,
"step": 99700
},
{
"epoch": 0.72,
"learning_rate": 1.2788627059555326e-05,
"loss": 2.792,
"step": 99800
},
{
"epoch": 0.72,
"learning_rate": 1.278139420503696e-05,
"loss": 2.7919,
"step": 99900
},
{
"epoch": 0.72,
"learning_rate": 1.2774161350518597e-05,
"loss": 2.7922,
"step": 100000
},
{
"epoch": 0.72,
"eval_accuracy": 0.4561583387018235,
"eval_loss": 2.8006463050842285,
"eval_runtime": 28.183,
"eval_samples_per_second": 230.032,
"eval_steps_per_second": 2.413,
"step": 100000
},
{
"epoch": 0.72,
"learning_rate": 1.2766928496000233e-05,
"loss": 2.7826,
"step": 100100
},
{
"epoch": 0.72,
"learning_rate": 1.2759695641481868e-05,
"loss": 2.788,
"step": 100200
},
{
"epoch": 0.73,
"learning_rate": 1.2752462786963505e-05,
"loss": 2.7929,
"step": 100300
},
{
"epoch": 0.73,
"learning_rate": 1.274522993244514e-05,
"loss": 2.7889,
"step": 100400
},
{
"epoch": 0.73,
"learning_rate": 1.2737997077926776e-05,
"loss": 2.7805,
"step": 100500
},
{
"epoch": 0.73,
"learning_rate": 1.2730764223408411e-05,
"loss": 2.7857,
"step": 100600
},
{
"epoch": 0.73,
"learning_rate": 1.2723531368890047e-05,
"loss": 2.7963,
"step": 100700
},
{
"epoch": 0.73,
"learning_rate": 1.2716298514371684e-05,
"loss": 2.7759,
"step": 100800
},
{
"epoch": 0.73,
"learning_rate": 1.2709065659853319e-05,
"loss": 2.784,
"step": 100900
},
{
"epoch": 0.73,
"learning_rate": 1.2701832805334955e-05,
"loss": 2.7786,
"step": 101000
},
{
"epoch": 0.73,
"eval_accuracy": 0.4561758808104454,
"eval_loss": 2.7999138832092285,
"eval_runtime": 29.8581,
"eval_samples_per_second": 217.127,
"eval_steps_per_second": 2.277,
"step": 101000
},
{
"epoch": 0.73,
"learning_rate": 1.269459995081659e-05,
"loss": 2.7861,
"step": 101100
},
{
"epoch": 0.73,
"learning_rate": 1.2687367096298227e-05,
"loss": 2.7875,
"step": 101200
},
{
"epoch": 0.73,
"learning_rate": 1.2680134241779861e-05,
"loss": 2.7815,
"step": 101300
},
{
"epoch": 0.73,
"learning_rate": 1.2672901387261498e-05,
"loss": 2.7838,
"step": 101400
},
{
"epoch": 0.73,
"learning_rate": 1.2665668532743135e-05,
"loss": 2.7861,
"step": 101500
},
{
"epoch": 0.73,
"learning_rate": 1.265843567822477e-05,
"loss": 2.7867,
"step": 101600
},
{
"epoch": 0.74,
"learning_rate": 1.2651202823706406e-05,
"loss": 2.7878,
"step": 101700
},
{
"epoch": 0.74,
"learning_rate": 1.264396996918804e-05,
"loss": 2.7861,
"step": 101800
},
{
"epoch": 0.74,
"learning_rate": 1.2636809443214859e-05,
"loss": 2.7844,
"step": 101900
},
{
"epoch": 0.74,
"learning_rate": 1.2629576588696495e-05,
"loss": 2.7791,
"step": 102000
},
{
"epoch": 0.74,
"eval_accuracy": 0.45630411967347484,
"eval_loss": 2.7991721630096436,
"eval_runtime": 29.6469,
"eval_samples_per_second": 218.674,
"eval_steps_per_second": 2.294,
"step": 102000
},
{
"epoch": 0.74,
"learning_rate": 1.262234373417813e-05,
"loss": 2.7853,
"step": 102100
},
{
"epoch": 0.74,
"learning_rate": 1.2615110879659767e-05,
"loss": 2.7833,
"step": 102200
},
{
"epoch": 0.74,
"learning_rate": 1.2607878025141403e-05,
"loss": 2.7832,
"step": 102300
},
{
"epoch": 0.74,
"learning_rate": 1.2600645170623038e-05,
"loss": 2.778,
"step": 102400
},
{
"epoch": 0.74,
"learning_rate": 1.2593412316104674e-05,
"loss": 2.7849,
"step": 102500
},
{
"epoch": 0.74,
"learning_rate": 1.258617946158631e-05,
"loss": 2.7798,
"step": 102600
},
{
"epoch": 0.74,
"learning_rate": 1.2578946607067946e-05,
"loss": 2.7834,
"step": 102700
},
{
"epoch": 0.74,
"learning_rate": 1.257171375254958e-05,
"loss": 2.7792,
"step": 102800
},
{
"epoch": 0.74,
"learning_rate": 1.2564480898031217e-05,
"loss": 2.7889,
"step": 102900
},
{
"epoch": 0.74,
"learning_rate": 1.2557248043512854e-05,
"loss": 2.7908,
"step": 103000
},
{
"epoch": 0.74,
"eval_accuracy": 0.4565454748921009,
"eval_loss": 2.798401117324829,
"eval_runtime": 29.5641,
"eval_samples_per_second": 219.287,
"eval_steps_per_second": 2.3,
"step": 103000
},
{
"epoch": 0.75,
"learning_rate": 1.2550087517539673e-05,
"loss": 2.7769,
"step": 103100
},
{
"epoch": 0.75,
"learning_rate": 1.2542854663021308e-05,
"loss": 2.7866,
"step": 103200
},
{
"epoch": 0.75,
"learning_rate": 1.2535621808502945e-05,
"loss": 2.7823,
"step": 103300
},
{
"epoch": 0.75,
"learning_rate": 1.2528388953984581e-05,
"loss": 2.785,
"step": 103400
},
{
"epoch": 0.75,
"learning_rate": 1.2521156099466216e-05,
"loss": 2.7782,
"step": 103500
},
{
"epoch": 0.75,
"learning_rate": 1.2513923244947853e-05,
"loss": 2.7795,
"step": 103600
},
{
"epoch": 0.75,
"learning_rate": 1.2506690390429487e-05,
"loss": 2.7857,
"step": 103700
},
{
"epoch": 0.75,
"learning_rate": 1.2499457535911124e-05,
"loss": 2.7851,
"step": 103800
},
{
"epoch": 0.75,
"learning_rate": 1.2492224681392759e-05,
"loss": 2.7816,
"step": 103900
},
{
"epoch": 0.75,
"learning_rate": 1.2484991826874395e-05,
"loss": 2.7872,
"step": 104000
},
{
"epoch": 0.75,
"eval_accuracy": 0.45662834623283216,
"eval_loss": 2.7977957725524902,
"eval_runtime": 28.0059,
"eval_samples_per_second": 231.487,
"eval_steps_per_second": 2.428,
"step": 104000
},
{
"epoch": 0.75,
"learning_rate": 1.2477758972356032e-05,
"loss": 2.7802,
"step": 104100
},
{
"epoch": 0.75,
"learning_rate": 1.2470526117837667e-05,
"loss": 2.7833,
"step": 104200
},
{
"epoch": 0.75,
"learning_rate": 1.2463365591864486e-05,
"loss": 2.779,
"step": 104300
},
{
"epoch": 0.76,
"learning_rate": 1.2456132737346123e-05,
"loss": 2.7811,
"step": 104400
},
{
"epoch": 0.76,
"learning_rate": 1.244889988282776e-05,
"loss": 2.7838,
"step": 104500
},
{
"epoch": 0.76,
"learning_rate": 1.2441667028309394e-05,
"loss": 2.7762,
"step": 104600
},
{
"epoch": 0.76,
"learning_rate": 1.243443417379103e-05,
"loss": 2.781,
"step": 104700
},
{
"epoch": 0.76,
"learning_rate": 1.2427201319272665e-05,
"loss": 2.7898,
"step": 104800
},
{
"epoch": 0.76,
"learning_rate": 1.2419968464754302e-05,
"loss": 2.7823,
"step": 104900
},
{
"epoch": 0.76,
"learning_rate": 1.2412735610235937e-05,
"loss": 2.7763,
"step": 105000
},
{
"epoch": 0.76,
"eval_accuracy": 0.45665980104829224,
"eval_loss": 2.7971575260162354,
"eval_runtime": 29.269,
"eval_samples_per_second": 221.497,
"eval_steps_per_second": 2.323,
"step": 105000
},
{
"epoch": 0.76,
"learning_rate": 1.2405502755717573e-05,
"loss": 2.7825,
"step": 105100
},
{
"epoch": 0.76,
"learning_rate": 1.239826990119921e-05,
"loss": 2.7826,
"step": 105200
},
{
"epoch": 0.76,
"learning_rate": 1.2391037046680845e-05,
"loss": 2.7811,
"step": 105300
},
{
"epoch": 0.76,
"learning_rate": 1.2383876520707663e-05,
"loss": 2.7769,
"step": 105400
},
{
"epoch": 0.76,
"learning_rate": 1.23766436661893e-05,
"loss": 2.7875,
"step": 105500
},
{
"epoch": 0.76,
"learning_rate": 1.2369410811670934e-05,
"loss": 2.7801,
"step": 105600
},
{
"epoch": 0.76,
"learning_rate": 1.236217795715257e-05,
"loss": 2.7875,
"step": 105700
},
{
"epoch": 0.77,
"learning_rate": 1.2354945102634205e-05,
"loss": 2.7838,
"step": 105800
},
{
"epoch": 0.77,
"learning_rate": 1.2347712248115842e-05,
"loss": 2.7865,
"step": 105900
},
{
"epoch": 0.77,
"learning_rate": 1.2340479393597478e-05,
"loss": 2.7785,
"step": 106000
},
{
"epoch": 0.77,
"eval_accuracy": 0.4568334074336198,
"eval_loss": 2.7966232299804688,
"eval_runtime": 29.6697,
"eval_samples_per_second": 218.506,
"eval_steps_per_second": 2.292,
"step": 106000
},
{
"epoch": 0.77,
"learning_rate": 1.2333246539079113e-05,
"loss": 2.7809,
"step": 106100
},
{
"epoch": 0.77,
"learning_rate": 1.232601368456075e-05,
"loss": 2.7837,
"step": 106200
},
{
"epoch": 0.77,
"learning_rate": 1.2318780830042385e-05,
"loss": 2.7868,
"step": 106300
},
{
"epoch": 0.77,
"learning_rate": 1.2311547975524021e-05,
"loss": 2.781,
"step": 106400
},
{
"epoch": 0.77,
"learning_rate": 1.2304315121005656e-05,
"loss": 2.781,
"step": 106500
},
{
"epoch": 0.77,
"learning_rate": 1.2297082266487292e-05,
"loss": 2.7859,
"step": 106600
},
{
"epoch": 0.77,
"learning_rate": 1.2289849411968929e-05,
"loss": 2.7813,
"step": 106700
},
{
"epoch": 0.77,
"learning_rate": 1.2282616557450564e-05,
"loss": 2.7857,
"step": 106800
},
{
"epoch": 0.77,
"learning_rate": 1.22753837029322e-05,
"loss": 2.7835,
"step": 106900
},
{
"epoch": 0.77,
"learning_rate": 1.2268150848413835e-05,
"loss": 2.7861,
"step": 107000
},
{
"epoch": 0.77,
"eval_accuracy": 0.4568231241285655,
"eval_loss": 2.795985698699951,
"eval_runtime": 29.6433,
"eval_samples_per_second": 218.7,
"eval_steps_per_second": 2.294,
"step": 107000
},
{
"epoch": 0.77,
"learning_rate": 1.2260917993895472e-05,
"loss": 2.7802,
"step": 107100
},
{
"epoch": 0.78,
"learning_rate": 1.2253685139377108e-05,
"loss": 2.7901,
"step": 107200
},
{
"epoch": 0.78,
"learning_rate": 1.2246452284858743e-05,
"loss": 2.7804,
"step": 107300
},
{
"epoch": 0.78,
"learning_rate": 1.223921943034038e-05,
"loss": 2.7784,
"step": 107400
},
{
"epoch": 0.78,
"learning_rate": 1.2231986575822014e-05,
"loss": 2.7824,
"step": 107500
},
{
"epoch": 0.78,
"learning_rate": 1.2224826049848834e-05,
"loss": 2.7756,
"step": 107600
},
{
"epoch": 0.78,
"learning_rate": 1.221759319533047e-05,
"loss": 2.7849,
"step": 107700
},
{
"epoch": 0.78,
"learning_rate": 1.2210360340812107e-05,
"loss": 2.7808,
"step": 107800
},
{
"epoch": 0.78,
"learning_rate": 1.2203127486293742e-05,
"loss": 2.7741,
"step": 107900
},
{
"epoch": 0.78,
"learning_rate": 1.2195894631775378e-05,
"loss": 2.784,
"step": 108000
},
{
"epoch": 0.78,
"eval_accuracy": 0.4569779786046765,
"eval_loss": 2.795300245285034,
"eval_runtime": 29.2976,
"eval_samples_per_second": 221.281,
"eval_steps_per_second": 2.321,
"step": 108000
},
{
"epoch": 0.78,
"learning_rate": 1.2188661777257013e-05,
"loss": 2.781,
"step": 108100
},
{
"epoch": 0.78,
"learning_rate": 1.218142892273865e-05,
"loss": 2.7924,
"step": 108200
},
{
"epoch": 0.78,
"learning_rate": 1.2174196068220286e-05,
"loss": 2.7781,
"step": 108300
},
{
"epoch": 0.78,
"learning_rate": 1.2166963213701921e-05,
"loss": 2.7808,
"step": 108400
},
{
"epoch": 0.78,
"learning_rate": 1.2159730359183557e-05,
"loss": 2.7725,
"step": 108500
},
{
"epoch": 0.79,
"learning_rate": 1.2152497504665192e-05,
"loss": 2.7791,
"step": 108600
},
{
"epoch": 0.79,
"learning_rate": 1.2145264650146829e-05,
"loss": 2.79,
"step": 108700
},
{
"epoch": 0.79,
"learning_rate": 1.2138031795628464e-05,
"loss": 2.7776,
"step": 108800
},
{
"epoch": 0.79,
"learning_rate": 1.21307989411101e-05,
"loss": 2.7821,
"step": 108900
},
{
"epoch": 0.79,
"learning_rate": 1.2123638415136918e-05,
"loss": 2.7804,
"step": 109000
},
{
"epoch": 0.79,
"eval_accuracy": 0.457063269546597,
"eval_loss": 2.794382333755493,
"eval_runtime": 29.8656,
"eval_samples_per_second": 217.073,
"eval_steps_per_second": 2.277,
"step": 109000
},
{
"epoch": 0.79,
"learning_rate": 1.2116405560618553e-05,
"loss": 2.7838,
"step": 109100
},
{
"epoch": 0.79,
"learning_rate": 1.210917270610019e-05,
"loss": 2.7806,
"step": 109200
},
{
"epoch": 0.79,
"learning_rate": 1.2101939851581826e-05,
"loss": 2.7839,
"step": 109300
},
{
"epoch": 0.79,
"learning_rate": 1.2094706997063461e-05,
"loss": 2.7805,
"step": 109400
},
{
"epoch": 0.79,
"learning_rate": 1.2087474142545097e-05,
"loss": 2.7782,
"step": 109500
},
{
"epoch": 0.79,
"learning_rate": 1.2080241288026732e-05,
"loss": 2.7788,
"step": 109600
},
{
"epoch": 0.79,
"learning_rate": 1.2073008433508369e-05,
"loss": 2.7809,
"step": 109700
},
{
"epoch": 0.79,
"learning_rate": 1.2065775578990005e-05,
"loss": 2.7753,
"step": 109800
},
{
"epoch": 0.79,
"learning_rate": 1.205854272447164e-05,
"loss": 2.7894,
"step": 109900
},
{
"epoch": 0.8,
"learning_rate": 1.2051309869953277e-05,
"loss": 2.7828,
"step": 110000
},
{
"epoch": 0.8,
"eval_accuracy": 0.45704633233827235,
"eval_loss": 2.793990135192871,
"eval_runtime": 30.316,
"eval_samples_per_second": 213.848,
"eval_steps_per_second": 2.243,
"step": 110000
},
{
"epoch": 0.8,
"learning_rate": 1.2044077015434911e-05,
"loss": 2.7702,
"step": 110100
},
{
"epoch": 0.8,
"learning_rate": 1.2036844160916548e-05,
"loss": 2.7875,
"step": 110200
},
{
"epoch": 0.8,
"learning_rate": 1.2029611306398183e-05,
"loss": 2.7813,
"step": 110300
},
{
"epoch": 0.8,
"learning_rate": 1.2022450780425004e-05,
"loss": 2.7843,
"step": 110400
},
{
"epoch": 0.8,
"learning_rate": 1.2015217925906639e-05,
"loss": 2.7784,
"step": 110500
},
{
"epoch": 0.8,
"learning_rate": 1.2007985071388275e-05,
"loss": 2.7802,
"step": 110600
},
{
"epoch": 0.8,
"learning_rate": 1.200075221686991e-05,
"loss": 2.7725,
"step": 110700
},
{
"epoch": 0.8,
"learning_rate": 1.1993519362351547e-05,
"loss": 2.7745,
"step": 110800
},
{
"epoch": 0.8,
"learning_rate": 1.1986286507833183e-05,
"loss": 2.7734,
"step": 110900
},
{
"epoch": 0.8,
"learning_rate": 1.1979125981860003e-05,
"loss": 2.7761,
"step": 111000
},
{
"epoch": 0.8,
"eval_accuracy": 0.4570904900599759,
"eval_loss": 2.793330192565918,
"eval_runtime": 30.099,
"eval_samples_per_second": 215.389,
"eval_steps_per_second": 2.259,
"step": 111000
},
{
"epoch": 0.8,
"learning_rate": 1.1971893127341638e-05,
"loss": 2.7721,
"step": 111100
},
{
"epoch": 0.8,
"learning_rate": 1.1964660272823274e-05,
"loss": 2.7846,
"step": 111200
},
{
"epoch": 0.81,
"learning_rate": 1.195742741830491e-05,
"loss": 2.7753,
"step": 111300
},
{
"epoch": 0.81,
"learning_rate": 1.1950194563786546e-05,
"loss": 2.767,
"step": 111400
},
{
"epoch": 0.81,
"learning_rate": 1.1942961709268182e-05,
"loss": 2.7864,
"step": 111500
},
{
"epoch": 0.81,
"learning_rate": 1.1935728854749817e-05,
"loss": 2.7781,
"step": 111600
},
{
"epoch": 0.81,
"learning_rate": 1.1928496000231454e-05,
"loss": 2.7768,
"step": 111700
},
{
"epoch": 0.81,
"learning_rate": 1.1921263145713088e-05,
"loss": 2.7838,
"step": 111800
},
{
"epoch": 0.81,
"learning_rate": 1.1914030291194725e-05,
"loss": 2.7771,
"step": 111900
},
{
"epoch": 0.81,
"learning_rate": 1.1906797436676361e-05,
"loss": 2.7797,
"step": 112000
},
{
"epoch": 0.81,
"eval_accuracy": 0.45710379786651667,
"eval_loss": 2.7928030490875244,
"eval_runtime": 30.824,
"eval_samples_per_second": 210.323,
"eval_steps_per_second": 2.206,
"step": 112000
},
{
"epoch": 0.81,
"learning_rate": 1.1899564582157996e-05,
"loss": 2.7739,
"step": 112100
},
{
"epoch": 0.81,
"learning_rate": 1.1892331727639633e-05,
"loss": 2.7837,
"step": 112200
},
{
"epoch": 0.81,
"learning_rate": 1.1885098873121268e-05,
"loss": 2.7712,
"step": 112300
},
{
"epoch": 0.81,
"learning_rate": 1.1877866018602904e-05,
"loss": 2.7802,
"step": 112400
},
{
"epoch": 0.81,
"learning_rate": 1.1870633164084539e-05,
"loss": 2.7717,
"step": 112500
},
{
"epoch": 0.81,
"learning_rate": 1.1863400309566175e-05,
"loss": 2.7827,
"step": 112600
},
{
"epoch": 0.82,
"learning_rate": 1.1856167455047812e-05,
"loss": 2.7758,
"step": 112700
},
{
"epoch": 0.82,
"learning_rate": 1.1848934600529447e-05,
"loss": 2.7769,
"step": 112800
},
{
"epoch": 0.82,
"learning_rate": 1.1841701746011083e-05,
"loss": 2.7712,
"step": 112900
},
{
"epoch": 0.82,
"learning_rate": 1.1834468891492716e-05,
"loss": 2.7792,
"step": 113000
},
{
"epoch": 0.82,
"eval_accuracy": 0.4573149080702773,
"eval_loss": 2.792189598083496,
"eval_runtime": 27.9605,
"eval_samples_per_second": 231.863,
"eval_steps_per_second": 2.432,
"step": 113000
},
{
"epoch": 0.82,
"learning_rate": 1.1827236036974353e-05,
"loss": 2.7799,
"step": 113100
},
{
"epoch": 0.82,
"learning_rate": 1.1820075511001173e-05,
"loss": 2.7849,
"step": 113200
},
{
"epoch": 0.82,
"learning_rate": 1.1812842656482807e-05,
"loss": 2.7789,
"step": 113300
},
{
"epoch": 0.82,
"learning_rate": 1.1805609801964444e-05,
"loss": 2.7732,
"step": 113400
},
{
"epoch": 0.82,
"learning_rate": 1.179837694744608e-05,
"loss": 2.7791,
"step": 113500
},
{
"epoch": 0.82,
"learning_rate": 1.1791144092927715e-05,
"loss": 2.7844,
"step": 113600
},
{
"epoch": 0.82,
"learning_rate": 1.1783911238409352e-05,
"loss": 2.7713,
"step": 113700
},
{
"epoch": 0.82,
"learning_rate": 1.1776678383890987e-05,
"loss": 2.7754,
"step": 113800
},
{
"epoch": 0.82,
"learning_rate": 1.1769445529372623e-05,
"loss": 2.7714,
"step": 113900
},
{
"epoch": 0.82,
"learning_rate": 1.1762212674854258e-05,
"loss": 2.7819,
"step": 114000
},
{
"epoch": 0.82,
"eval_accuracy": 0.4572997855628446,
"eval_loss": 2.791494846343994,
"eval_runtime": 28.1324,
"eval_samples_per_second": 230.446,
"eval_steps_per_second": 2.417,
"step": 114000
},
{
"epoch": 0.83,
"learning_rate": 1.1754979820335894e-05,
"loss": 2.7814,
"step": 114100
},
{
"epoch": 0.83,
"learning_rate": 1.1747746965817531e-05,
"loss": 2.7755,
"step": 114200
},
{
"epoch": 0.83,
"learning_rate": 1.1740514111299166e-05,
"loss": 2.7741,
"step": 114300
},
{
"epoch": 0.83,
"learning_rate": 1.1733281256780802e-05,
"loss": 2.772,
"step": 114400
},
{
"epoch": 0.83,
"learning_rate": 1.1726120730807622e-05,
"loss": 2.7795,
"step": 114500
},
{
"epoch": 0.83,
"learning_rate": 1.1718887876289257e-05,
"loss": 2.7728,
"step": 114600
},
{
"epoch": 0.83,
"learning_rate": 1.1711655021770893e-05,
"loss": 2.7714,
"step": 114700
},
{
"epoch": 0.83,
"learning_rate": 1.170442216725253e-05,
"loss": 2.7771,
"step": 114800
},
{
"epoch": 0.83,
"learning_rate": 1.1697189312734165e-05,
"loss": 2.779,
"step": 114900
},
{
"epoch": 0.83,
"learning_rate": 1.1689956458215801e-05,
"loss": 2.7837,
"step": 115000
},
{
"epoch": 0.83,
"eval_accuracy": 0.4572991806625473,
"eval_loss": 2.7909815311431885,
"eval_runtime": 29.3759,
"eval_samples_per_second": 220.691,
"eval_steps_per_second": 2.315,
"step": 115000
},
{
"epoch": 0.83,
"learning_rate": 1.168279593224262e-05,
"loss": 2.7764,
"step": 115100
},
{
"epoch": 0.83,
"learning_rate": 1.1675563077724254e-05,
"loss": 2.7767,
"step": 115200
},
{
"epoch": 0.83,
"learning_rate": 1.166833022320589e-05,
"loss": 2.7741,
"step": 115300
},
{
"epoch": 0.83,
"learning_rate": 1.1661097368687525e-05,
"loss": 2.7706,
"step": 115400
},
{
"epoch": 0.84,
"learning_rate": 1.1653864514169162e-05,
"loss": 2.7744,
"step": 115500
},
{
"epoch": 0.84,
"learning_rate": 1.1646631659650798e-05,
"loss": 2.7759,
"step": 115600
},
{
"epoch": 0.84,
"learning_rate": 1.1639398805132433e-05,
"loss": 2.77,
"step": 115700
},
{
"epoch": 0.84,
"learning_rate": 1.163216595061407e-05,
"loss": 2.7758,
"step": 115800
},
{
"epoch": 0.84,
"learning_rate": 1.1624933096095705e-05,
"loss": 2.7732,
"step": 115900
},
{
"epoch": 0.84,
"learning_rate": 1.1617700241577341e-05,
"loss": 2.781,
"step": 116000
},
{
"epoch": 0.84,
"eval_accuracy": 0.45746008414163136,
"eval_loss": 2.790616989135742,
"eval_runtime": 30.033,
"eval_samples_per_second": 215.862,
"eval_steps_per_second": 2.264,
"step": 116000
},
{
"epoch": 0.84,
"learning_rate": 1.1610467387058976e-05,
"loss": 2.776,
"step": 116100
},
{
"epoch": 0.84,
"learning_rate": 1.1603234532540612e-05,
"loss": 2.7762,
"step": 116200
},
{
"epoch": 0.84,
"learning_rate": 1.1596001678022249e-05,
"loss": 2.7787,
"step": 116300
},
{
"epoch": 0.84,
"learning_rate": 1.1588768823503884e-05,
"loss": 2.7725,
"step": 116400
},
{
"epoch": 0.84,
"learning_rate": 1.1581608297530704e-05,
"loss": 2.7759,
"step": 116500
},
{
"epoch": 0.84,
"learning_rate": 1.157437544301234e-05,
"loss": 2.774,
"step": 116600
},
{
"epoch": 0.84,
"learning_rate": 1.1567142588493977e-05,
"loss": 2.7737,
"step": 116700
},
{
"epoch": 0.84,
"learning_rate": 1.1559909733975611e-05,
"loss": 2.7801,
"step": 116800
},
{
"epoch": 0.85,
"learning_rate": 1.1552676879457248e-05,
"loss": 2.7774,
"step": 116900
},
{
"epoch": 0.85,
"learning_rate": 1.1545444024938883e-05,
"loss": 2.7765,
"step": 117000
},
{
"epoch": 0.85,
"eval_accuracy": 0.45765970123974314,
"eval_loss": 2.7898108959198,
"eval_runtime": 30.1629,
"eval_samples_per_second": 214.933,
"eval_steps_per_second": 2.254,
"step": 117000
},
{
"epoch": 0.85,
"learning_rate": 1.1538283498965704e-05,
"loss": 2.7819,
"step": 117100
},
{
"epoch": 0.85,
"learning_rate": 1.1531050644447339e-05,
"loss": 2.7817,
"step": 117200
},
{
"epoch": 0.85,
"learning_rate": 1.1523817789928975e-05,
"loss": 2.7694,
"step": 117300
},
{
"epoch": 0.85,
"learning_rate": 1.151658493541061e-05,
"loss": 2.7779,
"step": 117400
},
{
"epoch": 0.85,
"learning_rate": 1.1509352080892247e-05,
"loss": 2.7796,
"step": 117500
},
{
"epoch": 0.85,
"learning_rate": 1.1502119226373882e-05,
"loss": 2.7798,
"step": 117600
},
{
"epoch": 0.85,
"learning_rate": 1.1494886371855518e-05,
"loss": 2.7729,
"step": 117700
},
{
"epoch": 0.85,
"learning_rate": 1.1487653517337155e-05,
"loss": 2.7779,
"step": 117800
},
{
"epoch": 0.85,
"learning_rate": 1.148042066281879e-05,
"loss": 2.7795,
"step": 117900
},
{
"epoch": 0.85,
"learning_rate": 1.1473187808300426e-05,
"loss": 2.7778,
"step": 118000
},
{
"epoch": 0.85,
"eval_accuracy": 0.4575302525761191,
"eval_loss": 2.789475202560425,
"eval_runtime": 31.2717,
"eval_samples_per_second": 207.312,
"eval_steps_per_second": 2.174,
"step": 118000
},
{
"epoch": 0.85,
"learning_rate": 1.146595495378206e-05,
"loss": 2.7761,
"step": 118100
},
{
"epoch": 0.85,
"learning_rate": 1.1458722099263697e-05,
"loss": 2.7809,
"step": 118200
},
{
"epoch": 0.86,
"learning_rate": 1.1451489244745332e-05,
"loss": 2.7705,
"step": 118300
},
{
"epoch": 0.86,
"learning_rate": 1.1444256390226969e-05,
"loss": 2.773,
"step": 118400
},
{
"epoch": 0.86,
"learning_rate": 1.1437023535708605e-05,
"loss": 2.7688,
"step": 118500
},
{
"epoch": 0.86,
"learning_rate": 1.142979068119024e-05,
"loss": 2.7681,
"step": 118600
},
{
"epoch": 0.86,
"learning_rate": 1.1422557826671877e-05,
"loss": 2.7745,
"step": 118700
},
{
"epoch": 0.86,
"learning_rate": 1.1415324972153511e-05,
"loss": 2.7753,
"step": 118800
},
{
"epoch": 0.86,
"learning_rate": 1.1408092117635148e-05,
"loss": 2.7714,
"step": 118900
},
{
"epoch": 0.86,
"learning_rate": 1.1400859263116784e-05,
"loss": 2.776,
"step": 119000
},
{
"epoch": 0.86,
"eval_accuracy": 0.4576566767382566,
"eval_loss": 2.7886581420898438,
"eval_runtime": 29.9514,
"eval_samples_per_second": 216.451,
"eval_steps_per_second": 2.27,
"step": 119000
},
{
"epoch": 0.86,
"learning_rate": 1.139362640859842e-05,
"loss": 2.7766,
"step": 119100
},
{
"epoch": 0.86,
"learning_rate": 1.1386465882625237e-05,
"loss": 2.7718,
"step": 119200
},
{
"epoch": 0.86,
"learning_rate": 1.1379233028106874e-05,
"loss": 2.779,
"step": 119300
},
{
"epoch": 0.86,
"learning_rate": 1.1372000173588509e-05,
"loss": 2.7753,
"step": 119400
},
{
"epoch": 0.86,
"learning_rate": 1.1364767319070145e-05,
"loss": 2.7763,
"step": 119500
},
{
"epoch": 0.87,
"learning_rate": 1.135753446455178e-05,
"loss": 2.7798,
"step": 119600
},
{
"epoch": 0.87,
"learning_rate": 1.1350301610033416e-05,
"loss": 2.775,
"step": 119700
},
{
"epoch": 0.87,
"learning_rate": 1.1343068755515051e-05,
"loss": 2.7736,
"step": 119800
},
{
"epoch": 0.87,
"learning_rate": 1.1335908229541873e-05,
"loss": 2.7801,
"step": 119900
},
{
"epoch": 0.87,
"learning_rate": 1.1328675375023507e-05,
"loss": 2.7719,
"step": 120000
},
{
"epoch": 0.87,
"eval_accuracy": 0.45784359093012494,
"eval_loss": 2.788266658782959,
"eval_runtime": 31.0222,
"eval_samples_per_second": 208.979,
"eval_steps_per_second": 2.192,
"step": 120000
},
{
"epoch": 0.87,
"learning_rate": 1.1321442520505144e-05,
"loss": 2.771,
"step": 120100
},
{
"epoch": 0.87,
"learning_rate": 1.1314209665986779e-05,
"loss": 2.7762,
"step": 120200
},
{
"epoch": 0.87,
"learning_rate": 1.1306976811468415e-05,
"loss": 2.7741,
"step": 120300
},
{
"epoch": 0.87,
"learning_rate": 1.1299743956950052e-05,
"loss": 2.773,
"step": 120400
},
{
"epoch": 0.87,
"learning_rate": 1.1292511102431687e-05,
"loss": 2.7681,
"step": 120500
},
{
"epoch": 0.87,
"learning_rate": 1.1285278247913323e-05,
"loss": 2.7713,
"step": 120600
},
{
"epoch": 0.87,
"learning_rate": 1.1278045393394958e-05,
"loss": 2.7704,
"step": 120700
},
{
"epoch": 0.87,
"learning_rate": 1.1270812538876595e-05,
"loss": 2.7758,
"step": 120800
},
{
"epoch": 0.87,
"learning_rate": 1.126357968435823e-05,
"loss": 2.7696,
"step": 120900
},
{
"epoch": 0.88,
"learning_rate": 1.1256346829839866e-05,
"loss": 2.7759,
"step": 121000
},
{
"epoch": 0.88,
"eval_accuracy": 0.45785326933488185,
"eval_loss": 2.787775993347168,
"eval_runtime": 29.7592,
"eval_samples_per_second": 217.849,
"eval_steps_per_second": 2.285,
"step": 121000
},
{
"epoch": 0.88,
"learning_rate": 1.1249113975321502e-05,
"loss": 2.7682,
"step": 121100
},
{
"epoch": 0.88,
"learning_rate": 1.1241881120803137e-05,
"loss": 2.7752,
"step": 121200
},
{
"epoch": 0.88,
"learning_rate": 1.1234648266284774e-05,
"loss": 2.7708,
"step": 121300
},
{
"epoch": 0.88,
"learning_rate": 1.1227415411766409e-05,
"loss": 2.7718,
"step": 121400
},
{
"epoch": 0.88,
"learning_rate": 1.1220182557248045e-05,
"loss": 2.7701,
"step": 121500
},
{
"epoch": 0.88,
"learning_rate": 1.1212949702729682e-05,
"loss": 2.7732,
"step": 121600
},
{
"epoch": 0.88,
"learning_rate": 1.1205716848211316e-05,
"loss": 2.7709,
"step": 121700
},
{
"epoch": 0.88,
"learning_rate": 1.1198483993692953e-05,
"loss": 2.7735,
"step": 121800
},
{
"epoch": 0.88,
"learning_rate": 1.1191251139174588e-05,
"loss": 2.7724,
"step": 121900
},
{
"epoch": 0.88,
"learning_rate": 1.1184018284656224e-05,
"loss": 2.7654,
"step": 122000
},
{
"epoch": 0.88,
"eval_accuracy": 0.45778854500306987,
"eval_loss": 2.787409782409668,
"eval_runtime": 30.1905,
"eval_samples_per_second": 214.736,
"eval_steps_per_second": 2.252,
"step": 122000
},
{
"epoch": 0.88,
"learning_rate": 1.1176785430137859e-05,
"loss": 2.7705,
"step": 122100
},
{
"epoch": 0.88,
"learning_rate": 1.1169552575619496e-05,
"loss": 2.7753,
"step": 122200
},
{
"epoch": 0.88,
"learning_rate": 1.1162392049646314e-05,
"loss": 2.776,
"step": 122300
},
{
"epoch": 0.89,
"learning_rate": 1.1155159195127948e-05,
"loss": 2.7663,
"step": 122400
},
{
"epoch": 0.89,
"learning_rate": 1.1147926340609585e-05,
"loss": 2.7699,
"step": 122500
},
{
"epoch": 0.89,
"learning_rate": 1.1140693486091221e-05,
"loss": 2.7761,
"step": 122600
},
{
"epoch": 0.89,
"learning_rate": 1.1133460631572856e-05,
"loss": 2.7632,
"step": 122700
},
{
"epoch": 0.89,
"learning_rate": 1.1126227777054493e-05,
"loss": 2.7688,
"step": 122800
},
{
"epoch": 0.89,
"learning_rate": 1.1118994922536128e-05,
"loss": 2.7719,
"step": 122900
},
{
"epoch": 0.89,
"learning_rate": 1.1111762068017764e-05,
"loss": 2.7661,
"step": 123000
},
{
"epoch": 0.89,
"eval_accuracy": 0.45800570420980363,
"eval_loss": 2.7868072986602783,
"eval_runtime": 29.8221,
"eval_samples_per_second": 217.389,
"eval_steps_per_second": 2.28,
"step": 123000
},
{
"epoch": 0.89,
"learning_rate": 1.11045292134994e-05,
"loss": 2.7701,
"step": 123100
},
{
"epoch": 0.89,
"learning_rate": 1.1097296358981035e-05,
"loss": 2.763,
"step": 123200
},
{
"epoch": 0.89,
"learning_rate": 1.1090063504462672e-05,
"loss": 2.7767,
"step": 123300
},
{
"epoch": 0.89,
"learning_rate": 1.1082830649944307e-05,
"loss": 2.7696,
"step": 123400
},
{
"epoch": 0.89,
"learning_rate": 1.1075597795425943e-05,
"loss": 2.7704,
"step": 123500
},
{
"epoch": 0.89,
"learning_rate": 1.1068364940907578e-05,
"loss": 2.7772,
"step": 123600
},
{
"epoch": 0.89,
"learning_rate": 1.1061132086389215e-05,
"loss": 2.7761,
"step": 123700
},
{
"epoch": 0.9,
"learning_rate": 1.1053899231870851e-05,
"loss": 2.7743,
"step": 123800
},
{
"epoch": 0.9,
"learning_rate": 1.1046666377352486e-05,
"loss": 2.7656,
"step": 123900
},
{
"epoch": 0.9,
"learning_rate": 1.1039433522834122e-05,
"loss": 2.7718,
"step": 124000
},
{
"epoch": 0.9,
"eval_accuracy": 0.4579833228988032,
"eval_loss": 2.786105155944824,
"eval_runtime": 29.402,
"eval_samples_per_second": 220.495,
"eval_steps_per_second": 2.313,
"step": 124000
},
{
"epoch": 0.9,
"learning_rate": 1.1032200668315757e-05,
"loss": 2.7757,
"step": 124100
},
{
"epoch": 0.9,
"learning_rate": 1.1024967813797394e-05,
"loss": 2.7759,
"step": 124200
},
{
"epoch": 0.9,
"learning_rate": 1.101773495927903e-05,
"loss": 2.7791,
"step": 124300
},
{
"epoch": 0.9,
"learning_rate": 1.1010502104760665e-05,
"loss": 2.7698,
"step": 124400
},
{
"epoch": 0.9,
"learning_rate": 1.1003269250242302e-05,
"loss": 2.7707,
"step": 124500
},
{
"epoch": 0.9,
"learning_rate": 1.0996036395723936e-05,
"loss": 2.7732,
"step": 124600
},
{
"epoch": 0.9,
"learning_rate": 1.0988803541205573e-05,
"loss": 2.7705,
"step": 124700
},
{
"epoch": 0.9,
"learning_rate": 1.0981570686687208e-05,
"loss": 2.7693,
"step": 124800
},
{
"epoch": 0.9,
"learning_rate": 1.0974337832168844e-05,
"loss": 2.7693,
"step": 124900
},
{
"epoch": 0.9,
"learning_rate": 1.0967177306195664e-05,
"loss": 2.7775,
"step": 125000
},
{
"epoch": 0.9,
"eval_accuracy": 0.4579536827842351,
"eval_loss": 2.785790205001831,
"eval_runtime": 30.0386,
"eval_samples_per_second": 215.823,
"eval_steps_per_second": 2.264,
"step": 125000
},
{
"epoch": 0.9,
"learning_rate": 1.09599444516773e-05,
"loss": 2.7759,
"step": 125100
},
{
"epoch": 0.91,
"learning_rate": 1.0952711597158935e-05,
"loss": 2.7729,
"step": 125200
},
{
"epoch": 0.91,
"learning_rate": 1.0945478742640572e-05,
"loss": 2.7698,
"step": 125300
},
{
"epoch": 0.91,
"learning_rate": 1.0938245888122208e-05,
"loss": 2.7665,
"step": 125400
},
{
"epoch": 0.91,
"learning_rate": 1.0931013033603843e-05,
"loss": 2.7668,
"step": 125500
},
{
"epoch": 0.91,
"learning_rate": 1.092378017908548e-05,
"loss": 2.771,
"step": 125600
},
{
"epoch": 0.91,
"learning_rate": 1.0916547324567115e-05,
"loss": 2.7749,
"step": 125700
},
{
"epoch": 0.91,
"learning_rate": 1.0909386798593934e-05,
"loss": 2.7676,
"step": 125800
},
{
"epoch": 0.91,
"learning_rate": 1.090215394407557e-05,
"loss": 2.7716,
"step": 125900
},
{
"epoch": 0.91,
"learning_rate": 1.0894921089557207e-05,
"loss": 2.7835,
"step": 126000
},
{
"epoch": 0.91,
"eval_accuracy": 0.45800933361158747,
"eval_loss": 2.7854855060577393,
"eval_runtime": 27.8858,
"eval_samples_per_second": 232.484,
"eval_steps_per_second": 2.439,
"step": 126000
},
{
"epoch": 0.91,
"learning_rate": 1.0887688235038842e-05,
"loss": 2.7658,
"step": 126100
},
{
"epoch": 0.91,
"learning_rate": 1.0880455380520475e-05,
"loss": 2.7706,
"step": 126200
},
{
"epoch": 0.91,
"learning_rate": 1.0873222526002112e-05,
"loss": 2.7689,
"step": 126300
},
{
"epoch": 0.91,
"learning_rate": 1.0865989671483748e-05,
"loss": 2.768,
"step": 126400
},
{
"epoch": 0.91,
"learning_rate": 1.0858756816965383e-05,
"loss": 2.7656,
"step": 126500
},
{
"epoch": 0.92,
"learning_rate": 1.085152396244702e-05,
"loss": 2.769,
"step": 126600
},
{
"epoch": 0.92,
"learning_rate": 1.0844291107928654e-05,
"loss": 2.7722,
"step": 126700
},
{
"epoch": 0.92,
"learning_rate": 1.0837058253410291e-05,
"loss": 2.7725,
"step": 126800
},
{
"epoch": 0.92,
"learning_rate": 1.0829825398891927e-05,
"loss": 2.7739,
"step": 126900
},
{
"epoch": 0.92,
"learning_rate": 1.0822592544373562e-05,
"loss": 2.768,
"step": 127000
},
{
"epoch": 0.92,
"eval_accuracy": 0.4580704285416156,
"eval_loss": 2.784817695617676,
"eval_runtime": 30.4636,
"eval_samples_per_second": 212.811,
"eval_steps_per_second": 2.232,
"step": 127000
},
{
"epoch": 0.92,
"learning_rate": 1.0815359689855199e-05,
"loss": 2.7736,
"step": 127100
},
{
"epoch": 0.92,
"learning_rate": 1.0808126835336834e-05,
"loss": 2.7665,
"step": 127200
},
{
"epoch": 0.92,
"learning_rate": 1.0800966309363653e-05,
"loss": 2.7702,
"step": 127300
},
{
"epoch": 0.92,
"learning_rate": 1.079373345484529e-05,
"loss": 2.7739,
"step": 127400
},
{
"epoch": 0.92,
"learning_rate": 1.0786500600326926e-05,
"loss": 2.7685,
"step": 127500
},
{
"epoch": 0.92,
"learning_rate": 1.0779267745808561e-05,
"loss": 2.7725,
"step": 127600
},
{
"epoch": 0.92,
"learning_rate": 1.0772034891290198e-05,
"loss": 2.7645,
"step": 127700
},
{
"epoch": 0.92,
"learning_rate": 1.0764802036771833e-05,
"loss": 2.7751,
"step": 127800
},
{
"epoch": 0.93,
"learning_rate": 1.0757569182253469e-05,
"loss": 2.7822,
"step": 127900
},
{
"epoch": 0.93,
"learning_rate": 1.0750336327735106e-05,
"loss": 2.7701,
"step": 128000
},
{
"epoch": 0.93,
"eval_accuracy": 0.45819806250434775,
"eval_loss": 2.7843172550201416,
"eval_runtime": 29.9391,
"eval_samples_per_second": 216.54,
"eval_steps_per_second": 2.271,
"step": 128000
},
{
"epoch": 0.93,
"learning_rate": 1.074310347321674e-05,
"loss": 2.7667,
"step": 128100
},
{
"epoch": 0.93,
"learning_rate": 1.0735870618698377e-05,
"loss": 2.7741,
"step": 128200
},
{
"epoch": 0.93,
"learning_rate": 1.0728637764180012e-05,
"loss": 2.767,
"step": 128300
},
{
"epoch": 0.93,
"learning_rate": 1.0721404909661648e-05,
"loss": 2.7672,
"step": 128400
},
{
"epoch": 0.93,
"learning_rate": 1.0714172055143283e-05,
"loss": 2.7694,
"step": 128500
},
{
"epoch": 0.93,
"learning_rate": 1.070693920062492e-05,
"loss": 2.775,
"step": 128600
},
{
"epoch": 0.93,
"learning_rate": 1.0699706346106556e-05,
"loss": 2.7747,
"step": 128700
},
{
"epoch": 0.93,
"learning_rate": 1.0692473491588191e-05,
"loss": 2.7689,
"step": 128800
},
{
"epoch": 0.93,
"learning_rate": 1.0685240637069827e-05,
"loss": 2.7631,
"step": 128900
},
{
"epoch": 0.93,
"learning_rate": 1.0678007782551462e-05,
"loss": 2.7682,
"step": 129000
},
{
"epoch": 0.93,
"eval_accuracy": 0.45829424165161975,
"eval_loss": 2.783777952194214,
"eval_runtime": 29.7932,
"eval_samples_per_second": 217.6,
"eval_steps_per_second": 2.282,
"step": 129000
},
{
"epoch": 0.93,
"learning_rate": 1.0670774928033099e-05,
"loss": 2.7683,
"step": 129100
},
{
"epoch": 0.93,
"learning_rate": 1.0663542073514734e-05,
"loss": 2.773,
"step": 129200
},
{
"epoch": 0.94,
"learning_rate": 1.065630921899637e-05,
"loss": 2.769,
"step": 129300
},
{
"epoch": 0.94,
"learning_rate": 1.0649076364478007e-05,
"loss": 2.7738,
"step": 129400
},
{
"epoch": 0.94,
"learning_rate": 1.0641915838504826e-05,
"loss": 2.7609,
"step": 129500
},
{
"epoch": 0.94,
"learning_rate": 1.0634682983986461e-05,
"loss": 2.7699,
"step": 129600
},
{
"epoch": 0.94,
"learning_rate": 1.062752245801328e-05,
"loss": 2.7711,
"step": 129700
},
{
"epoch": 0.94,
"learning_rate": 1.0620289603494916e-05,
"loss": 2.7743,
"step": 129800
},
{
"epoch": 0.94,
"learning_rate": 1.061305674897655e-05,
"loss": 2.768,
"step": 129900
},
{
"epoch": 0.94,
"learning_rate": 1.0605823894458187e-05,
"loss": 2.7595,
"step": 130000
},
{
"epoch": 0.94,
"eval_accuracy": 0.458337794473026,
"eval_loss": 2.7834300994873047,
"eval_runtime": 28.4215,
"eval_samples_per_second": 228.102,
"eval_steps_per_second": 2.393,
"step": 130000
},
{
"epoch": 0.94,
"learning_rate": 1.0598591039939824e-05,
"loss": 2.7653,
"step": 130100
},
{
"epoch": 0.94,
"learning_rate": 1.0591358185421458e-05,
"loss": 2.7622,
"step": 130200
},
{
"epoch": 0.94,
"learning_rate": 1.0584125330903095e-05,
"loss": 2.7737,
"step": 130300
},
{
"epoch": 0.94,
"learning_rate": 1.057689247638473e-05,
"loss": 2.7744,
"step": 130400
},
{
"epoch": 0.94,
"learning_rate": 1.0569659621866366e-05,
"loss": 2.7739,
"step": 130500
},
{
"epoch": 0.94,
"learning_rate": 1.0562426767348001e-05,
"loss": 2.7748,
"step": 130600
},
{
"epoch": 0.95,
"learning_rate": 1.0555193912829638e-05,
"loss": 2.7696,
"step": 130700
},
{
"epoch": 0.95,
"learning_rate": 1.0547961058311274e-05,
"loss": 2.7628,
"step": 130800
},
{
"epoch": 0.95,
"learning_rate": 1.0540728203792909e-05,
"loss": 2.7739,
"step": 130900
},
{
"epoch": 0.95,
"learning_rate": 1.0533495349274545e-05,
"loss": 2.7627,
"step": 131000
},
{
"epoch": 0.95,
"eval_accuracy": 0.45831420336143097,
"eval_loss": 2.783060312271118,
"eval_runtime": 31.194,
"eval_samples_per_second": 207.829,
"eval_steps_per_second": 2.18,
"step": 131000
},
{
"epoch": 0.95,
"learning_rate": 1.052626249475618e-05,
"loss": 2.7654,
"step": 131100
},
{
"epoch": 0.95,
"learning_rate": 1.0519029640237817e-05,
"loss": 2.7662,
"step": 131200
},
{
"epoch": 0.95,
"learning_rate": 1.0511796785719453e-05,
"loss": 2.7627,
"step": 131300
},
{
"epoch": 0.95,
"learning_rate": 1.0504636259746273e-05,
"loss": 2.7735,
"step": 131400
},
{
"epoch": 0.95,
"learning_rate": 1.0497403405227908e-05,
"loss": 2.7787,
"step": 131500
},
{
"epoch": 0.95,
"learning_rate": 1.0490170550709544e-05,
"loss": 2.7708,
"step": 131600
},
{
"epoch": 0.95,
"learning_rate": 1.0482937696191179e-05,
"loss": 2.7678,
"step": 131700
},
{
"epoch": 0.95,
"learning_rate": 1.0475704841672816e-05,
"loss": 2.7681,
"step": 131800
},
{
"epoch": 0.95,
"learning_rate": 1.0468471987154452e-05,
"loss": 2.7792,
"step": 131900
},
{
"epoch": 0.95,
"learning_rate": 1.0461239132636087e-05,
"loss": 2.7716,
"step": 132000
},
{
"epoch": 0.95,
"eval_accuracy": 0.45840554330632455,
"eval_loss": 2.7826600074768066,
"eval_runtime": 29.7072,
"eval_samples_per_second": 218.23,
"eval_steps_per_second": 2.289,
"step": 132000
},
{
"epoch": 0.96,
"learning_rate": 1.0454006278117723e-05,
"loss": 2.7721,
"step": 132100
},
{
"epoch": 0.96,
"learning_rate": 1.0446773423599358e-05,
"loss": 2.7637,
"step": 132200
},
{
"epoch": 0.96,
"learning_rate": 1.0439540569080995e-05,
"loss": 2.7743,
"step": 132300
},
{
"epoch": 0.96,
"learning_rate": 1.0432307714562631e-05,
"loss": 2.7649,
"step": 132400
},
{
"epoch": 0.96,
"learning_rate": 1.0425074860044266e-05,
"loss": 2.7672,
"step": 132500
},
{
"epoch": 0.96,
"learning_rate": 1.0417842005525903e-05,
"loss": 2.7683,
"step": 132600
},
{
"epoch": 0.96,
"learning_rate": 1.0410609151007537e-05,
"loss": 2.7668,
"step": 132700
},
{
"epoch": 0.96,
"learning_rate": 1.0403376296489174e-05,
"loss": 2.7582,
"step": 132800
},
{
"epoch": 0.96,
"learning_rate": 1.0396143441970809e-05,
"loss": 2.7663,
"step": 132900
},
{
"epoch": 0.96,
"learning_rate": 1.0388910587452445e-05,
"loss": 2.7719,
"step": 133000
},
{
"epoch": 0.96,
"eval_accuracy": 0.45850656165597503,
"eval_loss": 2.782144069671631,
"eval_runtime": 29.573,
"eval_samples_per_second": 219.22,
"eval_steps_per_second": 2.299,
"step": 133000
},
{
"epoch": 0.96,
"learning_rate": 1.0381677732934082e-05,
"loss": 2.77,
"step": 133100
},
{
"epoch": 0.96,
"learning_rate": 1.0374444878415717e-05,
"loss": 2.7684,
"step": 133200
},
{
"epoch": 0.96,
"learning_rate": 1.0367212023897353e-05,
"loss": 2.7562,
"step": 133300
},
{
"epoch": 0.96,
"learning_rate": 1.0359979169378988e-05,
"loss": 2.7682,
"step": 133400
},
{
"epoch": 0.97,
"learning_rate": 1.0352746314860625e-05,
"loss": 2.7686,
"step": 133500
},
{
"epoch": 0.97,
"learning_rate": 1.0345513460342261e-05,
"loss": 2.7624,
"step": 133600
},
{
"epoch": 0.97,
"learning_rate": 1.0338352934369077e-05,
"loss": 2.7643,
"step": 133700
},
{
"epoch": 0.97,
"learning_rate": 1.0331120079850714e-05,
"loss": 2.7725,
"step": 133800
},
{
"epoch": 0.97,
"learning_rate": 1.0323959553877534e-05,
"loss": 2.7617,
"step": 133900
},
{
"epoch": 0.97,
"learning_rate": 1.031672669935917e-05,
"loss": 2.7723,
"step": 134000
},
{
"epoch": 0.97,
"eval_accuracy": 0.45827972404448436,
"eval_loss": 2.781625747680664,
"eval_runtime": 30.6595,
"eval_samples_per_second": 211.452,
"eval_steps_per_second": 2.218,
"step": 134000
},
{
"epoch": 0.97,
"learning_rate": 1.0309493844840805e-05,
"loss": 2.7677,
"step": 134100
},
{
"epoch": 0.97,
"learning_rate": 1.0302260990322441e-05,
"loss": 2.7655,
"step": 134200
},
{
"epoch": 0.97,
"learning_rate": 1.0295028135804076e-05,
"loss": 2.7718,
"step": 134300
},
{
"epoch": 0.97,
"learning_rate": 1.0287795281285713e-05,
"loss": 2.7667,
"step": 134400
},
{
"epoch": 0.97,
"learning_rate": 1.028056242676735e-05,
"loss": 2.7639,
"step": 134500
},
{
"epoch": 0.97,
"learning_rate": 1.0273329572248984e-05,
"loss": 2.771,
"step": 134600
},
{
"epoch": 0.97,
"learning_rate": 1.026609671773062e-05,
"loss": 2.7588,
"step": 134700
},
{
"epoch": 0.97,
"learning_rate": 1.0258863863212255e-05,
"loss": 2.7687,
"step": 134800
},
{
"epoch": 0.98,
"learning_rate": 1.0251631008693892e-05,
"loss": 2.7606,
"step": 134900
},
{
"epoch": 0.98,
"learning_rate": 1.0244398154175529e-05,
"loss": 2.7736,
"step": 135000
},
{
"epoch": 0.98,
"eval_accuracy": 0.45850595675567773,
"eval_loss": 2.7812275886535645,
"eval_runtime": 31.0326,
"eval_samples_per_second": 208.91,
"eval_steps_per_second": 2.191,
"step": 135000
},
{
"epoch": 0.98,
"learning_rate": 1.0237165299657163e-05,
"loss": 2.768,
"step": 135100
},
{
"epoch": 0.98,
"learning_rate": 1.02299324451388e-05,
"loss": 2.7736,
"step": 135200
},
{
"epoch": 0.98,
"learning_rate": 1.0222699590620435e-05,
"loss": 2.7712,
"step": 135300
},
{
"epoch": 0.98,
"learning_rate": 1.0215466736102071e-05,
"loss": 2.7636,
"step": 135400
},
{
"epoch": 0.98,
"learning_rate": 1.0208306210128891e-05,
"loss": 2.7671,
"step": 135500
},
{
"epoch": 0.98,
"learning_rate": 1.0201073355610527e-05,
"loss": 2.7746,
"step": 135600
},
{
"epoch": 0.98,
"learning_rate": 1.0193840501092162e-05,
"loss": 2.7717,
"step": 135700
},
{
"epoch": 0.98,
"learning_rate": 1.0186607646573799e-05,
"loss": 2.7739,
"step": 135800
},
{
"epoch": 0.98,
"learning_rate": 1.0179374792055434e-05,
"loss": 2.766,
"step": 135900
},
{
"epoch": 0.98,
"learning_rate": 1.017214193753707e-05,
"loss": 2.7646,
"step": 136000
},
{
"epoch": 0.98,
"eval_accuracy": 0.4585743104892736,
"eval_loss": 2.7808570861816406,
"eval_runtime": 30.7384,
"eval_samples_per_second": 210.909,
"eval_steps_per_second": 2.212,
"step": 136000
},
{
"epoch": 0.98,
"learning_rate": 1.0164909083018707e-05,
"loss": 2.7681,
"step": 136100
},
{
"epoch": 0.99,
"learning_rate": 1.0157676228500341e-05,
"loss": 2.766,
"step": 136200
},
{
"epoch": 0.99,
"learning_rate": 1.0150443373981978e-05,
"loss": 2.7689,
"step": 136300
},
{
"epoch": 0.99,
"learning_rate": 1.0143210519463613e-05,
"loss": 2.7692,
"step": 136400
},
{
"epoch": 0.99,
"learning_rate": 1.013597766494525e-05,
"loss": 2.766,
"step": 136500
},
{
"epoch": 0.99,
"learning_rate": 1.0128744810426884e-05,
"loss": 2.7685,
"step": 136600
},
{
"epoch": 0.99,
"learning_rate": 1.012151195590852e-05,
"loss": 2.7625,
"step": 136700
},
{
"epoch": 0.99,
"learning_rate": 1.0114351429935339e-05,
"loss": 2.7596,
"step": 136800
},
{
"epoch": 0.99,
"learning_rate": 1.0107118575416973e-05,
"loss": 2.7644,
"step": 136900
},
{
"epoch": 0.99,
"learning_rate": 1.009988572089861e-05,
"loss": 2.76,
"step": 137000
},
{
"epoch": 0.99,
"eval_accuracy": 0.458596691800274,
"eval_loss": 2.7804572582244873,
"eval_runtime": 29.4041,
"eval_samples_per_second": 220.48,
"eval_steps_per_second": 2.313,
"step": 137000
},
{
"epoch": 0.99,
"learning_rate": 1.0092652866380247e-05,
"loss": 2.7618,
"step": 137100
},
{
"epoch": 0.99,
"learning_rate": 1.0085420011861881e-05,
"loss": 2.7663,
"step": 137200
},
{
"epoch": 0.99,
"learning_rate": 1.0078187157343518e-05,
"loss": 2.7684,
"step": 137300
},
{
"epoch": 0.99,
"learning_rate": 1.0071026631370338e-05,
"loss": 2.7632,
"step": 137400
},
{
"epoch": 0.99,
"learning_rate": 1.0063793776851974e-05,
"loss": 2.7688,
"step": 137500
},
{
"epoch": 1.0,
"learning_rate": 1.0056560922333609e-05,
"loss": 2.7616,
"step": 137600
},
{
"epoch": 1.0,
"learning_rate": 1.0049328067815245e-05,
"loss": 2.7634,
"step": 137700
},
{
"epoch": 1.0,
"learning_rate": 1.004209521329688e-05,
"loss": 2.7608,
"step": 137800
},
{
"epoch": 1.0,
"learning_rate": 1.0034862358778517e-05,
"loss": 2.7697,
"step": 137900
},
{
"epoch": 1.0,
"learning_rate": 1.0027629504260152e-05,
"loss": 2.7659,
"step": 138000
},
{
"epoch": 1.0,
"eval_accuracy": 0.4585610026827328,
"eval_loss": 2.780273914337158,
"eval_runtime": 29.5606,
"eval_samples_per_second": 219.313,
"eval_steps_per_second": 2.3,
"step": 138000
},
{
"epoch": 1.0,
"learning_rate": 1.0020396649741788e-05,
"loss": 2.7704,
"step": 138100
},
{
"epoch": 1.0,
"learning_rate": 1.0013163795223425e-05,
"loss": 2.7639,
"step": 138200
},
{
"epoch": 1.0,
"learning_rate": 1.000593094070506e-05,
"loss": 2.7586,
"step": 138300
},
{
"epoch": 1.0,
"learning_rate": 9.998698086186696e-06,
"loss": 2.7646,
"step": 138400
},
{
"epoch": 1.0,
"learning_rate": 9.99146523166833e-06,
"loss": 2.7588,
"step": 138500
},
{
"epoch": 1.0,
"learning_rate": 9.984232377149967e-06,
"loss": 2.7584,
"step": 138600
},
{
"epoch": 1.0,
"learning_rate": 9.976999522631604e-06,
"loss": 2.7627,
"step": 138700
},
{
"epoch": 1.0,
"learning_rate": 9.969766668113239e-06,
"loss": 2.7523,
"step": 138800
},
{
"epoch": 1.0,
"learning_rate": 9.962533813594875e-06,
"loss": 2.7636,
"step": 138900
},
{
"epoch": 1.01,
"learning_rate": 9.95530095907651e-06,
"loss": 2.7604,
"step": 139000
},
{
"epoch": 1.01,
"eval_accuracy": 0.45872069636122226,
"eval_loss": 2.7798776626586914,
"eval_runtime": 29.5234,
"eval_samples_per_second": 219.589,
"eval_steps_per_second": 2.303,
"step": 139000
},
{
"epoch": 1.01,
"learning_rate": 9.948068104558146e-06,
"loss": 2.7533,
"step": 139100
},
{
"epoch": 1.01,
"learning_rate": 9.940835250039781e-06,
"loss": 2.7478,
"step": 139200
},
{
"epoch": 1.01,
"learning_rate": 9.933602395521418e-06,
"loss": 2.762,
"step": 139300
},
{
"epoch": 1.01,
"learning_rate": 9.926369541003054e-06,
"loss": 2.7616,
"step": 139400
},
{
"epoch": 1.01,
"learning_rate": 9.919136686484689e-06,
"loss": 2.766,
"step": 139500
},
{
"epoch": 1.01,
"learning_rate": 9.911903831966324e-06,
"loss": 2.762,
"step": 139600
},
{
"epoch": 1.01,
"learning_rate": 9.90467097744796e-06,
"loss": 2.7599,
"step": 139700
},
{
"epoch": 1.01,
"learning_rate": 9.897438122929595e-06,
"loss": 2.7588,
"step": 139800
},
{
"epoch": 1.01,
"learning_rate": 9.890205268411232e-06,
"loss": 2.752,
"step": 139900
},
{
"epoch": 1.01,
"learning_rate": 9.882972413892867e-06,
"loss": 2.7597,
"step": 140000
},
{
"epoch": 1.01,
"eval_accuracy": 0.4586741190383295,
"eval_loss": 2.7793562412261963,
"eval_runtime": 32.292,
"eval_samples_per_second": 200.762,
"eval_steps_per_second": 2.106,
"step": 140000
},
{
"epoch": 1.01,
"learning_rate": 9.875739559374503e-06,
"loss": 2.7523,
"step": 140100
},
{
"epoch": 1.01,
"learning_rate": 9.86850670485614e-06,
"loss": 2.7645,
"step": 140200
},
{
"epoch": 1.01,
"learning_rate": 9.861273850337774e-06,
"loss": 2.7598,
"step": 140300
},
{
"epoch": 1.02,
"learning_rate": 9.854040995819411e-06,
"loss": 2.7589,
"step": 140400
},
{
"epoch": 1.02,
"learning_rate": 9.846808141301046e-06,
"loss": 2.755,
"step": 140500
},
{
"epoch": 1.02,
"learning_rate": 9.839575286782682e-06,
"loss": 2.7631,
"step": 140600
},
{
"epoch": 1.02,
"learning_rate": 9.832342432264317e-06,
"loss": 2.7571,
"step": 140700
},
{
"epoch": 1.02,
"learning_rate": 9.825109577745954e-06,
"loss": 2.7596,
"step": 140800
},
{
"epoch": 1.02,
"learning_rate": 9.817949051772773e-06,
"loss": 2.7562,
"step": 140900
},
{
"epoch": 1.02,
"learning_rate": 9.81071619725441e-06,
"loss": 2.7551,
"step": 141000
},
{
"epoch": 1.02,
"eval_accuracy": 0.45877937169006117,
"eval_loss": 2.7791290283203125,
"eval_runtime": 29.4298,
"eval_samples_per_second": 220.287,
"eval_steps_per_second": 2.311,
"step": 141000
},
{
"epoch": 1.02,
"learning_rate": 9.803483342736045e-06,
"loss": 2.753,
"step": 141100
},
{
"epoch": 1.02,
"learning_rate": 9.796250488217681e-06,
"loss": 2.7499,
"step": 141200
},
{
"epoch": 1.02,
"learning_rate": 9.789017633699318e-06,
"loss": 2.7595,
"step": 141300
},
{
"epoch": 1.02,
"learning_rate": 9.781784779180953e-06,
"loss": 2.7612,
"step": 141400
},
{
"epoch": 1.02,
"learning_rate": 9.774551924662589e-06,
"loss": 2.7634,
"step": 141500
},
{
"epoch": 1.02,
"learning_rate": 9.767319070144224e-06,
"loss": 2.749,
"step": 141600
},
{
"epoch": 1.02,
"learning_rate": 9.76008621562586e-06,
"loss": 2.7567,
"step": 141700
},
{
"epoch": 1.03,
"learning_rate": 9.752853361107495e-06,
"loss": 2.7546,
"step": 141800
},
{
"epoch": 1.03,
"learning_rate": 9.74562050658913e-06,
"loss": 2.7588,
"step": 141900
},
{
"epoch": 1.03,
"learning_rate": 9.738387652070767e-06,
"loss": 2.7619,
"step": 142000
},
{
"epoch": 1.03,
"eval_accuracy": 0.4588090118046293,
"eval_loss": 2.7787861824035645,
"eval_runtime": 29.6893,
"eval_samples_per_second": 218.361,
"eval_steps_per_second": 2.29,
"step": 142000
},
{
"epoch": 1.03,
"learning_rate": 9.731227126097586e-06,
"loss": 2.7533,
"step": 142100
},
{
"epoch": 1.03,
"learning_rate": 9.723994271579223e-06,
"loss": 2.7554,
"step": 142200
},
{
"epoch": 1.03,
"learning_rate": 9.716761417060858e-06,
"loss": 2.7627,
"step": 142300
},
{
"epoch": 1.03,
"learning_rate": 9.709528562542494e-06,
"loss": 2.7635,
"step": 142400
},
{
"epoch": 1.03,
"learning_rate": 9.702368036569312e-06,
"loss": 2.757,
"step": 142500
},
{
"epoch": 1.03,
"learning_rate": 9.695135182050949e-06,
"loss": 2.7527,
"step": 142600
},
{
"epoch": 1.03,
"learning_rate": 9.687902327532585e-06,
"loss": 2.761,
"step": 142700
},
{
"epoch": 1.03,
"learning_rate": 9.68066947301422e-06,
"loss": 2.7562,
"step": 142800
},
{
"epoch": 1.03,
"learning_rate": 9.673436618495857e-06,
"loss": 2.7565,
"step": 142900
},
{
"epoch": 1.03,
"learning_rate": 9.666203763977491e-06,
"loss": 2.7658,
"step": 143000
},
{
"epoch": 1.03,
"eval_accuracy": 0.45889793214833363,
"eval_loss": 2.778470754623413,
"eval_runtime": 31.4699,
"eval_samples_per_second": 206.006,
"eval_steps_per_second": 2.161,
"step": 143000
},
{
"epoch": 1.04,
"learning_rate": 9.658970909459128e-06,
"loss": 2.7582,
"step": 143100
},
{
"epoch": 1.04,
"learning_rate": 9.651738054940764e-06,
"loss": 2.7627,
"step": 143200
},
{
"epoch": 1.04,
"learning_rate": 9.6445052004224e-06,
"loss": 2.7568,
"step": 143300
},
{
"epoch": 1.04,
"learning_rate": 9.637272345904036e-06,
"loss": 2.7529,
"step": 143400
},
{
"epoch": 1.04,
"learning_rate": 9.63003949138567e-06,
"loss": 2.7641,
"step": 143500
},
{
"epoch": 1.04,
"learning_rate": 9.622806636867307e-06,
"loss": 2.7539,
"step": 143600
},
{
"epoch": 1.04,
"learning_rate": 9.615573782348942e-06,
"loss": 2.7585,
"step": 143700
},
{
"epoch": 1.04,
"learning_rate": 9.608340927830578e-06,
"loss": 2.7628,
"step": 143800
},
{
"epoch": 1.04,
"learning_rate": 9.601108073312215e-06,
"loss": 2.763,
"step": 143900
},
{
"epoch": 1.04,
"learning_rate": 9.59387521879385e-06,
"loss": 2.751,
"step": 144000
},
{
"epoch": 1.04,
"eval_accuracy": 0.45885679892811665,
"eval_loss": 2.778137683868408,
"eval_runtime": 32.5333,
"eval_samples_per_second": 199.273,
"eval_steps_per_second": 2.09,
"step": 144000
},
{
"epoch": 1.04,
"learning_rate": 9.586642364275486e-06,
"loss": 2.7642,
"step": 144100
},
{
"epoch": 1.04,
"learning_rate": 9.579409509757121e-06,
"loss": 2.7635,
"step": 144200
},
{
"epoch": 1.04,
"learning_rate": 9.572176655238758e-06,
"loss": 2.7698,
"step": 144300
},
{
"epoch": 1.04,
"learning_rate": 9.564943800720392e-06,
"loss": 2.7617,
"step": 144400
},
{
"epoch": 1.05,
"learning_rate": 9.557710946202029e-06,
"loss": 2.7541,
"step": 144500
},
{
"epoch": 1.05,
"learning_rate": 9.550478091683665e-06,
"loss": 2.7584,
"step": 144600
},
{
"epoch": 1.05,
"learning_rate": 9.5432452371653e-06,
"loss": 2.7594,
"step": 144700
},
{
"epoch": 1.05,
"learning_rate": 9.536012382646937e-06,
"loss": 2.7506,
"step": 144800
},
{
"epoch": 1.05,
"learning_rate": 9.528779528128572e-06,
"loss": 2.7573,
"step": 144900
},
{
"epoch": 1.05,
"learning_rate": 9.521546673610208e-06,
"loss": 2.7589,
"step": 145000
},
{
"epoch": 1.05,
"eval_accuracy": 0.45895902707836184,
"eval_loss": 2.7777721881866455,
"eval_runtime": 29.5823,
"eval_samples_per_second": 219.151,
"eval_steps_per_second": 2.299,
"step": 145000
},
{
"epoch": 1.05,
"learning_rate": 9.514313819091845e-06,
"loss": 2.7514,
"step": 145100
},
{
"epoch": 1.05,
"learning_rate": 9.50708096457348e-06,
"loss": 2.7523,
"step": 145200
},
{
"epoch": 1.05,
"learning_rate": 9.499848110055116e-06,
"loss": 2.7626,
"step": 145300
},
{
"epoch": 1.05,
"learning_rate": 9.49261525553675e-06,
"loss": 2.7541,
"step": 145400
},
{
"epoch": 1.05,
"learning_rate": 9.485382401018387e-06,
"loss": 2.7624,
"step": 145500
},
{
"epoch": 1.05,
"learning_rate": 9.478221875045205e-06,
"loss": 2.7604,
"step": 145600
},
{
"epoch": 1.05,
"learning_rate": 9.470989020526842e-06,
"loss": 2.7576,
"step": 145700
},
{
"epoch": 1.05,
"learning_rate": 9.463828494553662e-06,
"loss": 2.7547,
"step": 145800
},
{
"epoch": 1.06,
"learning_rate": 9.456595640035298e-06,
"loss": 2.7526,
"step": 145900
},
{
"epoch": 1.06,
"learning_rate": 9.449362785516933e-06,
"loss": 2.7459,
"step": 146000
},
{
"epoch": 1.06,
"eval_accuracy": 0.4589529780753887,
"eval_loss": 2.777561902999878,
"eval_runtime": 29.3866,
"eval_samples_per_second": 220.611,
"eval_steps_per_second": 2.314,
"step": 146000
},
{
"epoch": 1.06,
"learning_rate": 9.442129930998568e-06,
"loss": 2.7579,
"step": 146100
},
{
"epoch": 1.06,
"learning_rate": 9.434897076480204e-06,
"loss": 2.7543,
"step": 146200
},
{
"epoch": 1.06,
"learning_rate": 9.427664221961839e-06,
"loss": 2.7608,
"step": 146300
},
{
"epoch": 1.06,
"learning_rate": 9.420431367443476e-06,
"loss": 2.7595,
"step": 146400
},
{
"epoch": 1.06,
"learning_rate": 9.413198512925112e-06,
"loss": 2.7531,
"step": 146500
},
{
"epoch": 1.06,
"learning_rate": 9.405965658406747e-06,
"loss": 2.7576,
"step": 146600
},
{
"epoch": 1.06,
"learning_rate": 9.398732803888383e-06,
"loss": 2.7546,
"step": 146700
},
{
"epoch": 1.06,
"learning_rate": 9.391499949370018e-06,
"loss": 2.7465,
"step": 146800
},
{
"epoch": 1.06,
"learning_rate": 9.384267094851655e-06,
"loss": 2.7597,
"step": 146900
},
{
"epoch": 1.06,
"learning_rate": 9.37703424033329e-06,
"loss": 2.7646,
"step": 147000
},
{
"epoch": 1.06,
"eval_accuracy": 0.4591423118684463,
"eval_loss": 2.7770681381225586,
"eval_runtime": 29.9251,
"eval_samples_per_second": 216.641,
"eval_steps_per_second": 2.272,
"step": 147000
},
{
"epoch": 1.06,
"learning_rate": 9.369801385814926e-06,
"loss": 2.7594,
"step": 147100
},
{
"epoch": 1.06,
"learning_rate": 9.362568531296563e-06,
"loss": 2.7502,
"step": 147200
},
{
"epoch": 1.07,
"learning_rate": 9.355335676778197e-06,
"loss": 2.7557,
"step": 147300
},
{
"epoch": 1.07,
"learning_rate": 9.348102822259834e-06,
"loss": 2.7589,
"step": 147400
},
{
"epoch": 1.07,
"learning_rate": 9.340942296286654e-06,
"loss": 2.7468,
"step": 147500
},
{
"epoch": 1.07,
"learning_rate": 9.33370944176829e-06,
"loss": 2.7484,
"step": 147600
},
{
"epoch": 1.07,
"learning_rate": 9.326476587249925e-06,
"loss": 2.7559,
"step": 147700
},
{
"epoch": 1.07,
"learning_rate": 9.319243732731561e-06,
"loss": 2.7502,
"step": 147800
},
{
"epoch": 1.07,
"learning_rate": 9.312010878213196e-06,
"loss": 2.7512,
"step": 147900
},
{
"epoch": 1.07,
"learning_rate": 9.304778023694833e-06,
"loss": 2.7529,
"step": 148000
},
{
"epoch": 1.07,
"eval_accuracy": 0.4589487437733076,
"eval_loss": 2.7767789363861084,
"eval_runtime": 29.4704,
"eval_samples_per_second": 219.984,
"eval_steps_per_second": 2.307,
"step": 148000
},
{
"epoch": 1.07,
"learning_rate": 9.297545169176468e-06,
"loss": 2.7583,
"step": 148100
},
{
"epoch": 1.07,
"learning_rate": 9.290312314658104e-06,
"loss": 2.7579,
"step": 148200
},
{
"epoch": 1.07,
"learning_rate": 9.283151788684924e-06,
"loss": 2.7561,
"step": 148300
},
{
"epoch": 1.07,
"learning_rate": 9.275918934166559e-06,
"loss": 2.7596,
"step": 148400
},
{
"epoch": 1.07,
"learning_rate": 9.268686079648195e-06,
"loss": 2.7601,
"step": 148500
},
{
"epoch": 1.07,
"learning_rate": 9.26145322512983e-06,
"loss": 2.7478,
"step": 148600
},
{
"epoch": 1.08,
"learning_rate": 9.254220370611467e-06,
"loss": 2.7583,
"step": 148700
},
{
"epoch": 1.08,
"learning_rate": 9.246987516093101e-06,
"loss": 2.7545,
"step": 148800
},
{
"epoch": 1.08,
"learning_rate": 9.239754661574738e-06,
"loss": 2.7507,
"step": 148900
},
{
"epoch": 1.08,
"learning_rate": 9.232521807056374e-06,
"loss": 2.7573,
"step": 149000
},
{
"epoch": 1.08,
"eval_accuracy": 0.4591822352880686,
"eval_loss": 2.7764034271240234,
"eval_runtime": 29.9267,
"eval_samples_per_second": 216.63,
"eval_steps_per_second": 2.272,
"step": 149000
},
{
"epoch": 1.08,
"learning_rate": 9.22528895253801e-06,
"loss": 2.7514,
"step": 149100
},
{
"epoch": 1.08,
"learning_rate": 9.218056098019646e-06,
"loss": 2.7569,
"step": 149200
},
{
"epoch": 1.08,
"learning_rate": 9.21082324350128e-06,
"loss": 2.757,
"step": 149300
},
{
"epoch": 1.08,
"learning_rate": 9.203590388982917e-06,
"loss": 2.7477,
"step": 149400
},
{
"epoch": 1.08,
"learning_rate": 9.196357534464554e-06,
"loss": 2.7616,
"step": 149500
},
{
"epoch": 1.08,
"learning_rate": 9.189124679946188e-06,
"loss": 2.7647,
"step": 149600
},
{
"epoch": 1.08,
"learning_rate": 9.181891825427825e-06,
"loss": 2.7509,
"step": 149700
},
{
"epoch": 1.08,
"learning_rate": 9.174731299454643e-06,
"loss": 2.7546,
"step": 149800
},
{
"epoch": 1.08,
"learning_rate": 9.16749844493628e-06,
"loss": 2.7492,
"step": 149900
},
{
"epoch": 1.08,
"learning_rate": 9.160265590417914e-06,
"loss": 2.754,
"step": 150000
},
{
"epoch": 1.08,
"eval_accuracy": 0.459124164859527,
"eval_loss": 2.7761712074279785,
"eval_runtime": 30.2068,
"eval_samples_per_second": 214.621,
"eval_steps_per_second": 2.251,
"step": 150000
},
{
"epoch": 1.09,
"learning_rate": 9.15303273589955e-06,
"loss": 2.7537,
"step": 150100
},
{
"epoch": 1.09,
"learning_rate": 9.145799881381187e-06,
"loss": 2.753,
"step": 150200
},
{
"epoch": 1.09,
"learning_rate": 9.138567026862822e-06,
"loss": 2.7594,
"step": 150300
},
{
"epoch": 1.09,
"learning_rate": 9.131334172344459e-06,
"loss": 2.7595,
"step": 150400
},
{
"epoch": 1.09,
"learning_rate": 9.124101317826093e-06,
"loss": 2.7532,
"step": 150500
},
{
"epoch": 1.09,
"learning_rate": 9.11686846330773e-06,
"loss": 2.7571,
"step": 150600
},
{
"epoch": 1.09,
"learning_rate": 9.109635608789365e-06,
"loss": 2.7499,
"step": 150700
},
{
"epoch": 1.09,
"learning_rate": 9.102475082816185e-06,
"loss": 2.7486,
"step": 150800
},
{
"epoch": 1.09,
"learning_rate": 9.095242228297821e-06,
"loss": 2.7537,
"step": 150900
},
{
"epoch": 1.09,
"learning_rate": 9.088009373779456e-06,
"loss": 2.7553,
"step": 151000
},
{
"epoch": 1.09,
"eval_accuracy": 0.45908545124049926,
"eval_loss": 2.7759199142456055,
"eval_runtime": 31.1913,
"eval_samples_per_second": 207.846,
"eval_steps_per_second": 2.18,
"step": 151000
},
{
"epoch": 1.09,
"learning_rate": 9.080776519261092e-06,
"loss": 2.7495,
"step": 151100
},
{
"epoch": 1.09,
"learning_rate": 9.073543664742727e-06,
"loss": 2.7562,
"step": 151200
},
{
"epoch": 1.09,
"learning_rate": 9.066310810224364e-06,
"loss": 2.7567,
"step": 151300
},
{
"epoch": 1.1,
"learning_rate": 9.059077955705999e-06,
"loss": 2.7532,
"step": 151400
},
{
"epoch": 1.1,
"learning_rate": 9.051845101187635e-06,
"loss": 2.7485,
"step": 151500
},
{
"epoch": 1.1,
"learning_rate": 9.044612246669272e-06,
"loss": 2.7502,
"step": 151600
},
{
"epoch": 1.1,
"learning_rate": 9.037379392150906e-06,
"loss": 2.756,
"step": 151700
},
{
"epoch": 1.1,
"learning_rate": 9.030146537632543e-06,
"loss": 2.7559,
"step": 151800
},
{
"epoch": 1.1,
"learning_rate": 9.022913683114178e-06,
"loss": 2.753,
"step": 151900
},
{
"epoch": 1.1,
"learning_rate": 9.015680828595814e-06,
"loss": 2.7485,
"step": 152000
},
{
"epoch": 1.1,
"eval_accuracy": 0.4592844634383138,
"eval_loss": 2.7755496501922607,
"eval_runtime": 34.9091,
"eval_samples_per_second": 185.711,
"eval_steps_per_second": 1.948,
"step": 152000
},
{
"epoch": 1.1,
"learning_rate": 9.00844797407745e-06,
"loss": 2.7569,
"step": 152100
},
{
"epoch": 1.1,
"learning_rate": 9.001215119559086e-06,
"loss": 2.7532,
"step": 152200
},
{
"epoch": 1.1,
"learning_rate": 8.993982265040722e-06,
"loss": 2.7619,
"step": 152300
},
{
"epoch": 1.1,
"learning_rate": 8.986749410522357e-06,
"loss": 2.754,
"step": 152400
},
{
"epoch": 1.1,
"learning_rate": 8.979516556003993e-06,
"loss": 2.7528,
"step": 152500
},
{
"epoch": 1.1,
"learning_rate": 8.972283701485628e-06,
"loss": 2.761,
"step": 152600
},
{
"epoch": 1.1,
"learning_rate": 8.965050846967265e-06,
"loss": 2.7578,
"step": 152700
},
{
"epoch": 1.11,
"learning_rate": 8.957817992448901e-06,
"loss": 2.7531,
"step": 152800
},
{
"epoch": 1.11,
"learning_rate": 8.950585137930536e-06,
"loss": 2.7593,
"step": 152900
},
{
"epoch": 1.11,
"learning_rate": 8.943352283412173e-06,
"loss": 2.7558,
"step": 153000
},
{
"epoch": 1.11,
"eval_accuracy": 0.45928869774039494,
"eval_loss": 2.7751994132995605,
"eval_runtime": 29.596,
"eval_samples_per_second": 219.05,
"eval_steps_per_second": 2.298,
"step": 153000
},
{
"epoch": 1.11,
"learning_rate": 8.936119428893807e-06,
"loss": 2.7529,
"step": 153100
},
{
"epoch": 1.11,
"learning_rate": 8.928886574375444e-06,
"loss": 2.7601,
"step": 153200
},
{
"epoch": 1.11,
"learning_rate": 8.92165371985708e-06,
"loss": 2.766,
"step": 153300
},
{
"epoch": 1.11,
"learning_rate": 8.914420865338715e-06,
"loss": 2.7564,
"step": 153400
},
{
"epoch": 1.11,
"learning_rate": 8.907188010820352e-06,
"loss": 2.7583,
"step": 153500
},
{
"epoch": 1.11,
"learning_rate": 8.899955156301987e-06,
"loss": 2.7546,
"step": 153600
},
{
"epoch": 1.11,
"learning_rate": 8.892722301783623e-06,
"loss": 2.752,
"step": 153700
},
{
"epoch": 1.11,
"learning_rate": 8.885489447265258e-06,
"loss": 2.7557,
"step": 153800
},
{
"epoch": 1.11,
"learning_rate": 8.878256592746894e-06,
"loss": 2.7539,
"step": 153900
},
{
"epoch": 1.11,
"learning_rate": 8.871023738228531e-06,
"loss": 2.7563,
"step": 154000
},
{
"epoch": 1.11,
"eval_accuracy": 0.459253613523151,
"eval_loss": 2.774827003479004,
"eval_runtime": 31.2857,
"eval_samples_per_second": 207.22,
"eval_steps_per_second": 2.174,
"step": 154000
},
{
"epoch": 1.11,
"learning_rate": 8.863790883710166e-06,
"loss": 2.7591,
"step": 154100
},
{
"epoch": 1.12,
"learning_rate": 8.856558029191802e-06,
"loss": 2.7548,
"step": 154200
},
{
"epoch": 1.12,
"learning_rate": 8.84939750321862e-06,
"loss": 2.7506,
"step": 154300
},
{
"epoch": 1.12,
"learning_rate": 8.842164648700257e-06,
"loss": 2.7549,
"step": 154400
},
{
"epoch": 1.12,
"learning_rate": 8.834931794181892e-06,
"loss": 2.7589,
"step": 154500
},
{
"epoch": 1.12,
"learning_rate": 8.827698939663528e-06,
"loss": 2.7581,
"step": 154600
},
{
"epoch": 1.12,
"learning_rate": 8.820466085145165e-06,
"loss": 2.7543,
"step": 154700
},
{
"epoch": 1.12,
"learning_rate": 8.8132332306268e-06,
"loss": 2.7567,
"step": 154800
},
{
"epoch": 1.12,
"learning_rate": 8.806000376108436e-06,
"loss": 2.7518,
"step": 154900
},
{
"epoch": 1.12,
"learning_rate": 8.798767521590071e-06,
"loss": 2.7557,
"step": 155000
},
{
"epoch": 1.12,
"eval_accuracy": 0.4593570514739908,
"eval_loss": 2.774669647216797,
"eval_runtime": 29.7619,
"eval_samples_per_second": 217.829,
"eval_steps_per_second": 2.285,
"step": 155000
},
{
"epoch": 1.12,
"learning_rate": 8.791534667071707e-06,
"loss": 2.7495,
"step": 155100
},
{
"epoch": 1.12,
"learning_rate": 8.784301812553342e-06,
"loss": 2.7528,
"step": 155200
},
{
"epoch": 1.12,
"learning_rate": 8.777068958034979e-06,
"loss": 2.7503,
"step": 155300
},
{
"epoch": 1.12,
"learning_rate": 8.769836103516615e-06,
"loss": 2.7503,
"step": 155400
},
{
"epoch": 1.12,
"learning_rate": 8.762675577543433e-06,
"loss": 2.7492,
"step": 155500
},
{
"epoch": 1.13,
"learning_rate": 8.75544272302507e-06,
"loss": 2.7544,
"step": 155600
},
{
"epoch": 1.13,
"learning_rate": 8.748209868506705e-06,
"loss": 2.7547,
"step": 155700
},
{
"epoch": 1.13,
"learning_rate": 8.740977013988341e-06,
"loss": 2.7446,
"step": 155800
},
{
"epoch": 1.13,
"learning_rate": 8.733744159469978e-06,
"loss": 2.753,
"step": 155900
},
{
"epoch": 1.13,
"learning_rate": 8.726511304951612e-06,
"loss": 2.7593,
"step": 156000
},
{
"epoch": 1.13,
"eval_accuracy": 0.4591931234934202,
"eval_loss": 2.7744040489196777,
"eval_runtime": 29.5223,
"eval_samples_per_second": 219.597,
"eval_steps_per_second": 2.303,
"step": 156000
},
{
"epoch": 1.13,
"learning_rate": 8.719278450433249e-06,
"loss": 2.758,
"step": 156100
},
{
"epoch": 1.13,
"learning_rate": 8.712045595914884e-06,
"loss": 2.761,
"step": 156200
},
{
"epoch": 1.13,
"learning_rate": 8.70481274139652e-06,
"loss": 2.756,
"step": 156300
},
{
"epoch": 1.13,
"learning_rate": 8.697579886878155e-06,
"loss": 2.7484,
"step": 156400
},
{
"epoch": 1.13,
"learning_rate": 8.690347032359792e-06,
"loss": 2.7491,
"step": 156500
},
{
"epoch": 1.13,
"learning_rate": 8.683114177841428e-06,
"loss": 2.7551,
"step": 156600
},
{
"epoch": 1.13,
"learning_rate": 8.675881323323063e-06,
"loss": 2.7515,
"step": 156700
},
{
"epoch": 1.13,
"learning_rate": 8.6686484688047e-06,
"loss": 2.7541,
"step": 156800
},
{
"epoch": 1.13,
"learning_rate": 8.66148794283152e-06,
"loss": 2.7546,
"step": 156900
},
{
"epoch": 1.14,
"learning_rate": 8.654255088313154e-06,
"loss": 2.752,
"step": 157000
},
{
"epoch": 1.14,
"eval_accuracy": 0.4592584527255295,
"eval_loss": 2.774146795272827,
"eval_runtime": 30.0141,
"eval_samples_per_second": 215.999,
"eval_steps_per_second": 2.266,
"step": 157000
},
{
"epoch": 1.14,
"learning_rate": 8.64702223379479e-06,
"loss": 2.7488,
"step": 157100
},
{
"epoch": 1.14,
"learning_rate": 8.639789379276427e-06,
"loss": 2.7539,
"step": 157200
},
{
"epoch": 1.14,
"learning_rate": 8.632556524758062e-06,
"loss": 2.7552,
"step": 157300
},
{
"epoch": 1.14,
"learning_rate": 8.625323670239697e-06,
"loss": 2.7566,
"step": 157400
},
{
"epoch": 1.14,
"learning_rate": 8.618090815721333e-06,
"loss": 2.7454,
"step": 157500
},
{
"epoch": 1.14,
"learning_rate": 8.610930289748153e-06,
"loss": 2.7527,
"step": 157600
},
{
"epoch": 1.14,
"learning_rate": 8.60369743522979e-06,
"loss": 2.7558,
"step": 157700
},
{
"epoch": 1.14,
"learning_rate": 8.596464580711424e-06,
"loss": 2.7579,
"step": 157800
},
{
"epoch": 1.14,
"learning_rate": 8.58923172619306e-06,
"loss": 2.7495,
"step": 157900
},
{
"epoch": 1.14,
"learning_rate": 8.581998871674696e-06,
"loss": 2.748,
"step": 158000
},
{
"epoch": 1.14,
"eval_accuracy": 0.4593262015588281,
"eval_loss": 2.773747205734253,
"eval_runtime": 30.6235,
"eval_samples_per_second": 211.7,
"eval_steps_per_second": 2.221,
"step": 158000
},
{
"epoch": 1.14,
"learning_rate": 8.574766017156332e-06,
"loss": 2.7465,
"step": 158100
},
{
"epoch": 1.14,
"learning_rate": 8.567533162637967e-06,
"loss": 2.7549,
"step": 158200
},
{
"epoch": 1.14,
"learning_rate": 8.560300308119603e-06,
"loss": 2.7489,
"step": 158300
},
{
"epoch": 1.15,
"learning_rate": 8.55306745360124e-06,
"loss": 2.7508,
"step": 158400
},
{
"epoch": 1.15,
"learning_rate": 8.545834599082875e-06,
"loss": 2.7512,
"step": 158500
},
{
"epoch": 1.15,
"learning_rate": 8.538601744564511e-06,
"loss": 2.7481,
"step": 158600
},
{
"epoch": 1.15,
"learning_rate": 8.531368890046146e-06,
"loss": 2.7517,
"step": 158700
},
{
"epoch": 1.15,
"learning_rate": 8.524136035527783e-06,
"loss": 2.7504,
"step": 158800
},
{
"epoch": 1.15,
"learning_rate": 8.516903181009417e-06,
"loss": 2.7538,
"step": 158900
},
{
"epoch": 1.15,
"learning_rate": 8.509670326491054e-06,
"loss": 2.7549,
"step": 159000
},
{
"epoch": 1.15,
"eval_accuracy": 0.4593860866882616,
"eval_loss": 2.773451566696167,
"eval_runtime": 27.8905,
"eval_samples_per_second": 232.445,
"eval_steps_per_second": 2.438,
"step": 159000
},
{
"epoch": 1.15,
"learning_rate": 8.50243747197269e-06,
"loss": 2.7499,
"step": 159100
},
{
"epoch": 1.15,
"learning_rate": 8.495204617454325e-06,
"loss": 2.7457,
"step": 159200
},
{
"epoch": 1.15,
"learning_rate": 8.487971762935962e-06,
"loss": 2.7566,
"step": 159300
},
{
"epoch": 1.15,
"learning_rate": 8.480738908417597e-06,
"loss": 2.7566,
"step": 159400
},
{
"epoch": 1.15,
"learning_rate": 8.473506053899233e-06,
"loss": 2.7539,
"step": 159500
},
{
"epoch": 1.15,
"learning_rate": 8.46627319938087e-06,
"loss": 2.7471,
"step": 159600
},
{
"epoch": 1.16,
"learning_rate": 8.459040344862503e-06,
"loss": 2.7586,
"step": 159700
},
{
"epoch": 1.16,
"learning_rate": 8.45180749034414e-06,
"loss": 2.7493,
"step": 159800
},
{
"epoch": 1.16,
"learning_rate": 8.444574635825776e-06,
"loss": 2.7554,
"step": 159900
},
{
"epoch": 1.16,
"learning_rate": 8.437414109852596e-06,
"loss": 2.7455,
"step": 160000
},
{
"epoch": 1.16,
"eval_accuracy": 0.45956574207656226,
"eval_loss": 2.7732744216918945,
"eval_runtime": 30.512,
"eval_samples_per_second": 212.474,
"eval_steps_per_second": 2.229,
"step": 160000
},
{
"epoch": 1.16,
"learning_rate": 8.43018125533423e-06,
"loss": 2.7551,
"step": 160100
},
{
"epoch": 1.16,
"learning_rate": 8.422948400815867e-06,
"loss": 2.7513,
"step": 160200
},
{
"epoch": 1.16,
"learning_rate": 8.415715546297503e-06,
"loss": 2.7594,
"step": 160300
},
{
"epoch": 1.16,
"learning_rate": 8.408555020324321e-06,
"loss": 2.7598,
"step": 160400
},
{
"epoch": 1.16,
"learning_rate": 8.401322165805958e-06,
"loss": 2.7547,
"step": 160500
},
{
"epoch": 1.16,
"learning_rate": 8.394089311287593e-06,
"loss": 2.7433,
"step": 160600
},
{
"epoch": 1.16,
"learning_rate": 8.38685645676923e-06,
"loss": 2.7512,
"step": 160700
},
{
"epoch": 1.16,
"learning_rate": 8.379623602250864e-06,
"loss": 2.7544,
"step": 160800
},
{
"epoch": 1.16,
"learning_rate": 8.3723907477325e-06,
"loss": 2.7475,
"step": 160900
},
{
"epoch": 1.16,
"learning_rate": 8.365157893214137e-06,
"loss": 2.7582,
"step": 161000
},
{
"epoch": 1.16,
"eval_accuracy": 0.45939636999331585,
"eval_loss": 2.7731149196624756,
"eval_runtime": 31.8214,
"eval_samples_per_second": 203.731,
"eval_steps_per_second": 2.137,
"step": 161000
},
{
"epoch": 1.17,
"learning_rate": 8.357925038695772e-06,
"loss": 2.747,
"step": 161100
},
{
"epoch": 1.17,
"learning_rate": 8.350692184177408e-06,
"loss": 2.7507,
"step": 161200
},
{
"epoch": 1.17,
"learning_rate": 8.343459329659043e-06,
"loss": 2.7441,
"step": 161300
},
{
"epoch": 1.17,
"learning_rate": 8.33622647514068e-06,
"loss": 2.7502,
"step": 161400
},
{
"epoch": 1.17,
"learning_rate": 8.328993620622315e-06,
"loss": 2.7447,
"step": 161500
},
{
"epoch": 1.17,
"learning_rate": 8.321760766103951e-06,
"loss": 2.7473,
"step": 161600
},
{
"epoch": 1.17,
"learning_rate": 8.314527911585588e-06,
"loss": 2.7443,
"step": 161700
},
{
"epoch": 1.17,
"learning_rate": 8.307295057067222e-06,
"loss": 2.7603,
"step": 161800
},
{
"epoch": 1.17,
"learning_rate": 8.300062202548859e-06,
"loss": 2.7555,
"step": 161900
},
{
"epoch": 1.17,
"learning_rate": 8.292829348030494e-06,
"loss": 2.7532,
"step": 162000
},
{
"epoch": 1.17,
"eval_accuracy": 0.45951795495307485,
"eval_loss": 2.7727766036987305,
"eval_runtime": 30.0745,
"eval_samples_per_second": 215.565,
"eval_steps_per_second": 2.261,
"step": 162000
},
{
"epoch": 1.17,
"learning_rate": 8.28559649351213e-06,
"loss": 2.7477,
"step": 162100
},
{
"epoch": 1.17,
"learning_rate": 8.278435967538948e-06,
"loss": 2.7539,
"step": 162200
},
{
"epoch": 1.17,
"learning_rate": 8.271203113020585e-06,
"loss": 2.7502,
"step": 162300
},
{
"epoch": 1.17,
"learning_rate": 8.263970258502221e-06,
"loss": 2.752,
"step": 162400
},
{
"epoch": 1.18,
"learning_rate": 8.256737403983856e-06,
"loss": 2.7551,
"step": 162500
},
{
"epoch": 1.18,
"learning_rate": 8.249504549465493e-06,
"loss": 2.7514,
"step": 162600
},
{
"epoch": 1.18,
"learning_rate": 8.242271694947128e-06,
"loss": 2.7475,
"step": 162700
},
{
"epoch": 1.18,
"learning_rate": 8.235038840428764e-06,
"loss": 2.751,
"step": 162800
},
{
"epoch": 1.18,
"learning_rate": 8.2278059859104e-06,
"loss": 2.7475,
"step": 162900
},
{
"epoch": 1.18,
"learning_rate": 8.220573131392035e-06,
"loss": 2.7496,
"step": 163000
},
{
"epoch": 1.18,
"eval_accuracy": 0.4595270284575345,
"eval_loss": 2.772428512573242,
"eval_runtime": 30.2915,
"eval_samples_per_second": 214.021,
"eval_steps_per_second": 2.245,
"step": 163000
},
{
"epoch": 1.18,
"learning_rate": 8.213340276873672e-06,
"loss": 2.7493,
"step": 163100
},
{
"epoch": 1.18,
"learning_rate": 8.206107422355307e-06,
"loss": 2.7641,
"step": 163200
},
{
"epoch": 1.18,
"learning_rate": 8.198874567836943e-06,
"loss": 2.7511,
"step": 163300
},
{
"epoch": 1.18,
"learning_rate": 8.191641713318578e-06,
"loss": 2.7539,
"step": 163400
},
{
"epoch": 1.18,
"learning_rate": 8.184408858800215e-06,
"loss": 2.7525,
"step": 163500
},
{
"epoch": 1.18,
"learning_rate": 8.177176004281851e-06,
"loss": 2.7518,
"step": 163600
},
{
"epoch": 1.18,
"learning_rate": 8.169943149763486e-06,
"loss": 2.7566,
"step": 163700
},
{
"epoch": 1.18,
"learning_rate": 8.162782623790306e-06,
"loss": 2.752,
"step": 163800
},
{
"epoch": 1.19,
"learning_rate": 8.155549769271942e-06,
"loss": 2.7538,
"step": 163900
},
{
"epoch": 1.19,
"learning_rate": 8.148316914753577e-06,
"loss": 2.75,
"step": 164000
},
{
"epoch": 1.19,
"eval_accuracy": 0.45964437911521233,
"eval_loss": 2.7721121311187744,
"eval_runtime": 29.4536,
"eval_samples_per_second": 220.109,
"eval_steps_per_second": 2.309,
"step": 164000
},
{
"epoch": 1.19,
"learning_rate": 8.141084060235212e-06,
"loss": 2.7486,
"step": 164100
},
{
"epoch": 1.19,
"learning_rate": 8.133851205716848e-06,
"loss": 2.7547,
"step": 164200
},
{
"epoch": 1.19,
"learning_rate": 8.126618351198485e-06,
"loss": 2.749,
"step": 164300
},
{
"epoch": 1.19,
"learning_rate": 8.11938549668012e-06,
"loss": 2.7498,
"step": 164400
},
{
"epoch": 1.19,
"learning_rate": 8.11222497070694e-06,
"loss": 2.7537,
"step": 164500
},
{
"epoch": 1.19,
"learning_rate": 8.104992116188576e-06,
"loss": 2.754,
"step": 164600
},
{
"epoch": 1.19,
"learning_rate": 8.097759261670212e-06,
"loss": 2.7577,
"step": 164700
},
{
"epoch": 1.19,
"learning_rate": 8.090526407151847e-06,
"loss": 2.7514,
"step": 164800
},
{
"epoch": 1.19,
"learning_rate": 8.083293552633484e-06,
"loss": 2.755,
"step": 164900
},
{
"epoch": 1.19,
"learning_rate": 8.076060698115119e-06,
"loss": 2.7517,
"step": 165000
},
{
"epoch": 1.19,
"eval_accuracy": 0.45970123974315935,
"eval_loss": 2.7717862129211426,
"eval_runtime": 29.3512,
"eval_samples_per_second": 220.877,
"eval_steps_per_second": 2.317,
"step": 165000
},
{
"epoch": 1.19,
"learning_rate": 8.068827843596755e-06,
"loss": 2.7543,
"step": 165100
},
{
"epoch": 1.19,
"learning_rate": 8.06159498907839e-06,
"loss": 2.744,
"step": 165200
},
{
"epoch": 1.2,
"learning_rate": 8.054362134560026e-06,
"loss": 2.7539,
"step": 165300
},
{
"epoch": 1.2,
"learning_rate": 8.047129280041663e-06,
"loss": 2.7509,
"step": 165400
},
{
"epoch": 1.2,
"learning_rate": 8.039896425523298e-06,
"loss": 2.7535,
"step": 165500
},
{
"epoch": 1.2,
"learning_rate": 8.032663571004934e-06,
"loss": 2.7485,
"step": 165600
},
{
"epoch": 1.2,
"learning_rate": 8.025430716486569e-06,
"loss": 2.7493,
"step": 165700
},
{
"epoch": 1.2,
"learning_rate": 8.018197861968206e-06,
"loss": 2.7477,
"step": 165800
},
{
"epoch": 1.2,
"learning_rate": 8.010965007449842e-06,
"loss": 2.7504,
"step": 165900
},
{
"epoch": 1.2,
"learning_rate": 8.003732152931477e-06,
"loss": 2.7522,
"step": 166000
},
{
"epoch": 1.2,
"eval_accuracy": 0.4596595016226451,
"eval_loss": 2.7715861797332764,
"eval_runtime": 29.3689,
"eval_samples_per_second": 220.744,
"eval_steps_per_second": 2.315,
"step": 166000
},
{
"epoch": 1.2,
"learning_rate": 7.996499298413113e-06,
"loss": 2.7494,
"step": 166100
},
{
"epoch": 1.2,
"learning_rate": 7.989266443894748e-06,
"loss": 2.7555,
"step": 166200
},
{
"epoch": 1.2,
"learning_rate": 7.982033589376383e-06,
"loss": 2.7561,
"step": 166300
},
{
"epoch": 1.2,
"learning_rate": 7.974873063403203e-06,
"loss": 2.7441,
"step": 166400
},
{
"epoch": 1.2,
"learning_rate": 7.96764020888484e-06,
"loss": 2.7574,
"step": 166500
},
{
"epoch": 1.2,
"learning_rate": 7.960407354366476e-06,
"loss": 2.7528,
"step": 166600
},
{
"epoch": 1.21,
"learning_rate": 7.95317449984811e-06,
"loss": 2.7582,
"step": 166700
},
{
"epoch": 1.21,
"learning_rate": 7.945941645329747e-06,
"loss": 2.7543,
"step": 166800
},
{
"epoch": 1.21,
"learning_rate": 7.938708790811382e-06,
"loss": 2.7501,
"step": 166900
},
{
"epoch": 1.21,
"learning_rate": 7.931475936293019e-06,
"loss": 2.7514,
"step": 167000
},
{
"epoch": 1.21,
"eval_accuracy": 0.45986395792313534,
"eval_loss": 2.771327495574951,
"eval_runtime": 32.3724,
"eval_samples_per_second": 200.263,
"eval_steps_per_second": 2.101,
"step": 167000
},
{
"epoch": 1.21,
"learning_rate": 7.924243081774653e-06,
"loss": 2.7594,
"step": 167100
},
{
"epoch": 1.21,
"learning_rate": 7.91701022725629e-06,
"loss": 2.7553,
"step": 167200
},
{
"epoch": 1.21,
"learning_rate": 7.909777372737926e-06,
"loss": 2.7411,
"step": 167300
},
{
"epoch": 1.21,
"learning_rate": 7.902544518219561e-06,
"loss": 2.7499,
"step": 167400
},
{
"epoch": 1.21,
"learning_rate": 7.895311663701198e-06,
"loss": 2.7544,
"step": 167500
},
{
"epoch": 1.21,
"learning_rate": 7.888078809182833e-06,
"loss": 2.7509,
"step": 167600
},
{
"epoch": 1.21,
"learning_rate": 7.880845954664469e-06,
"loss": 2.7516,
"step": 167700
},
{
"epoch": 1.21,
"learning_rate": 7.873613100146106e-06,
"loss": 2.7502,
"step": 167800
},
{
"epoch": 1.21,
"learning_rate": 7.86638024562774e-06,
"loss": 2.7581,
"step": 167900
},
{
"epoch": 1.22,
"learning_rate": 7.859147391109377e-06,
"loss": 2.7515,
"step": 168000
},
{
"epoch": 1.22,
"eval_accuracy": 0.4597810865824041,
"eval_loss": 2.7710556983947754,
"eval_runtime": 30.9587,
"eval_samples_per_second": 209.408,
"eval_steps_per_second": 2.196,
"step": 168000
},
{
"epoch": 1.22,
"learning_rate": 7.851914536591012e-06,
"loss": 2.75,
"step": 168100
},
{
"epoch": 1.22,
"learning_rate": 7.844681682072648e-06,
"loss": 2.7496,
"step": 168200
},
{
"epoch": 1.22,
"learning_rate": 7.837448827554283e-06,
"loss": 2.747,
"step": 168300
},
{
"epoch": 1.22,
"learning_rate": 7.83021597303592e-06,
"loss": 2.7516,
"step": 168400
},
{
"epoch": 1.22,
"learning_rate": 7.822983118517556e-06,
"loss": 2.7546,
"step": 168500
},
{
"epoch": 1.22,
"learning_rate": 7.81575026399919e-06,
"loss": 2.7462,
"step": 168600
},
{
"epoch": 1.22,
"learning_rate": 7.808517409480826e-06,
"loss": 2.7511,
"step": 168700
},
{
"epoch": 1.22,
"learning_rate": 7.801284554962462e-06,
"loss": 2.7571,
"step": 168800
},
{
"epoch": 1.22,
"learning_rate": 7.794051700444097e-06,
"loss": 2.7533,
"step": 168900
},
{
"epoch": 1.22,
"learning_rate": 7.786818845925734e-06,
"loss": 2.7493,
"step": 169000
},
{
"epoch": 1.22,
"eval_accuracy": 0.45982221980262106,
"eval_loss": 2.7707936763763428,
"eval_runtime": 30.5041,
"eval_samples_per_second": 212.529,
"eval_steps_per_second": 2.229,
"step": 169000
},
{
"epoch": 1.22,
"learning_rate": 7.779585991407368e-06,
"loss": 2.7488,
"step": 169100
},
{
"epoch": 1.22,
"learning_rate": 7.772353136889005e-06,
"loss": 2.7433,
"step": 169200
},
{
"epoch": 1.22,
"learning_rate": 7.765120282370641e-06,
"loss": 2.7549,
"step": 169300
},
{
"epoch": 1.23,
"learning_rate": 7.757887427852276e-06,
"loss": 2.7575,
"step": 169400
},
{
"epoch": 1.23,
"learning_rate": 7.750726901879096e-06,
"loss": 2.7478,
"step": 169500
},
{
"epoch": 1.23,
"learning_rate": 7.743566375905916e-06,
"loss": 2.7501,
"step": 169600
},
{
"epoch": 1.23,
"learning_rate": 7.73633352138755e-06,
"loss": 2.7466,
"step": 169700
},
{
"epoch": 1.23,
"learning_rate": 7.729100666869187e-06,
"loss": 2.7534,
"step": 169800
},
{
"epoch": 1.23,
"learning_rate": 7.721867812350824e-06,
"loss": 2.7458,
"step": 169900
},
{
"epoch": 1.23,
"learning_rate": 7.714634957832458e-06,
"loss": 2.7491,
"step": 170000
},
{
"epoch": 1.23,
"eval_accuracy": 0.45979620908983676,
"eval_loss": 2.7705278396606445,
"eval_runtime": 32.0791,
"eval_samples_per_second": 202.094,
"eval_steps_per_second": 2.12,
"step": 170000
},
{
"epoch": 1.23,
"learning_rate": 7.707402103314095e-06,
"loss": 2.7529,
"step": 170100
},
{
"epoch": 1.23,
"learning_rate": 7.70016924879573e-06,
"loss": 2.7448,
"step": 170200
},
{
"epoch": 1.23,
"learning_rate": 7.692936394277366e-06,
"loss": 2.7452,
"step": 170300
},
{
"epoch": 1.23,
"learning_rate": 7.685703539759003e-06,
"loss": 2.7512,
"step": 170400
},
{
"epoch": 1.23,
"learning_rate": 7.678470685240638e-06,
"loss": 2.7513,
"step": 170500
},
{
"epoch": 1.23,
"learning_rate": 7.671237830722274e-06,
"loss": 2.7544,
"step": 170600
},
{
"epoch": 1.23,
"learning_rate": 7.664004976203909e-06,
"loss": 2.749,
"step": 170700
},
{
"epoch": 1.24,
"learning_rate": 7.656772121685545e-06,
"loss": 2.7389,
"step": 170800
},
{
"epoch": 1.24,
"learning_rate": 7.64953926716718e-06,
"loss": 2.7474,
"step": 170900
},
{
"epoch": 1.24,
"learning_rate": 7.642306412648817e-06,
"loss": 2.7552,
"step": 171000
},
{
"epoch": 1.24,
"eval_accuracy": 0.45987726572967613,
"eval_loss": 2.7704155445098877,
"eval_runtime": 30.2626,
"eval_samples_per_second": 214.224,
"eval_steps_per_second": 2.247,
"step": 171000
},
{
"epoch": 1.24,
"learning_rate": 7.635073558130453e-06,
"loss": 2.7507,
"step": 171100
},
{
"epoch": 1.24,
"learning_rate": 7.627840703612088e-06,
"loss": 2.7527,
"step": 171200
},
{
"epoch": 1.24,
"learning_rate": 7.620607849093724e-06,
"loss": 2.7471,
"step": 171300
},
{
"epoch": 1.24,
"learning_rate": 7.61337499457536e-06,
"loss": 2.7481,
"step": 171400
},
{
"epoch": 1.24,
"learning_rate": 7.606142140056996e-06,
"loss": 2.7542,
"step": 171500
},
{
"epoch": 1.24,
"learning_rate": 7.5989092855386315e-06,
"loss": 2.7582,
"step": 171600
},
{
"epoch": 1.24,
"learning_rate": 7.5917487595654504e-06,
"loss": 2.7523,
"step": 171700
},
{
"epoch": 1.24,
"learning_rate": 7.58458823359227e-06,
"loss": 2.7543,
"step": 171800
},
{
"epoch": 1.24,
"learning_rate": 7.577355379073907e-06,
"loss": 2.7501,
"step": 171900
},
{
"epoch": 1.24,
"learning_rate": 7.570122524555542e-06,
"loss": 2.7536,
"step": 172000
},
{
"epoch": 1.24,
"eval_accuracy": 0.4599921967861647,
"eval_loss": 2.7700235843658447,
"eval_runtime": 30.9099,
"eval_samples_per_second": 209.739,
"eval_steps_per_second": 2.2,
"step": 172000
},
{
"epoch": 1.24,
"learning_rate": 7.562889670037178e-06,
"loss": 2.7438,
"step": 172100
},
{
"epoch": 1.25,
"learning_rate": 7.555656815518814e-06,
"loss": 2.7532,
"step": 172200
},
{
"epoch": 1.25,
"learning_rate": 7.548423961000449e-06,
"loss": 2.7427,
"step": 172300
},
{
"epoch": 1.25,
"learning_rate": 7.541191106482085e-06,
"loss": 2.7507,
"step": 172400
},
{
"epoch": 1.25,
"learning_rate": 7.533958251963721e-06,
"loss": 2.7479,
"step": 172500
},
{
"epoch": 1.25,
"learning_rate": 7.526725397445357e-06,
"loss": 2.7546,
"step": 172600
},
{
"epoch": 1.25,
"learning_rate": 7.519492542926993e-06,
"loss": 2.7616,
"step": 172700
},
{
"epoch": 1.25,
"learning_rate": 7.5122596884086285e-06,
"loss": 2.752,
"step": 172800
},
{
"epoch": 1.25,
"learning_rate": 7.505026833890263e-06,
"loss": 2.7501,
"step": 172900
},
{
"epoch": 1.25,
"learning_rate": 7.497793979371899e-06,
"loss": 2.7485,
"step": 173000
},
{
"epoch": 1.25,
"eval_accuracy": 0.4599425949617854,
"eval_loss": 2.769742727279663,
"eval_runtime": 29.9989,
"eval_samples_per_second": 216.108,
"eval_steps_per_second": 2.267,
"step": 173000
},
{
"epoch": 1.25,
"learning_rate": 7.490561124853535e-06,
"loss": 2.7499,
"step": 173100
},
{
"epoch": 1.25,
"learning_rate": 7.48332827033517e-06,
"loss": 2.7605,
"step": 173200
},
{
"epoch": 1.25,
"learning_rate": 7.476095415816806e-06,
"loss": 2.7559,
"step": 173300
},
{
"epoch": 1.25,
"learning_rate": 7.4688625612984425e-06,
"loss": 2.7446,
"step": 173400
},
{
"epoch": 1.25,
"learning_rate": 7.461629706780078e-06,
"loss": 2.7475,
"step": 173500
},
{
"epoch": 1.26,
"learning_rate": 7.454396852261714e-06,
"loss": 2.7498,
"step": 173600
},
{
"epoch": 1.26,
"learning_rate": 7.4471639977433495e-06,
"loss": 2.7551,
"step": 173700
},
{
"epoch": 1.26,
"learning_rate": 7.439931143224985e-06,
"loss": 2.7502,
"step": 173800
},
{
"epoch": 1.26,
"learning_rate": 7.432698288706621e-06,
"loss": 2.7391,
"step": 173900
},
{
"epoch": 1.26,
"learning_rate": 7.425465434188257e-06,
"loss": 2.7455,
"step": 174000
},
{
"epoch": 1.26,
"eval_accuracy": 0.45988815393502763,
"eval_loss": 2.7696611881256104,
"eval_runtime": 29.7398,
"eval_samples_per_second": 217.991,
"eval_steps_per_second": 2.286,
"step": 174000
},
{
"epoch": 1.26,
"learning_rate": 7.418232579669893e-06,
"loss": 2.7492,
"step": 174100
},
{
"epoch": 1.26,
"learning_rate": 7.410999725151529e-06,
"loss": 2.7532,
"step": 174200
},
{
"epoch": 1.26,
"learning_rate": 7.4038391991783485e-06,
"loss": 2.7515,
"step": 174300
},
{
"epoch": 1.26,
"learning_rate": 7.396606344659984e-06,
"loss": 2.746,
"step": 174400
},
{
"epoch": 1.26,
"learning_rate": 7.389373490141621e-06,
"loss": 2.7493,
"step": 174500
},
{
"epoch": 1.26,
"learning_rate": 7.382140635623256e-06,
"loss": 2.7509,
"step": 174600
},
{
"epoch": 1.26,
"learning_rate": 7.374907781104892e-06,
"loss": 2.7518,
"step": 174700
},
{
"epoch": 1.26,
"learning_rate": 7.367674926586528e-06,
"loss": 2.748,
"step": 174800
},
{
"epoch": 1.27,
"learning_rate": 7.360442072068163e-06,
"loss": 2.7488,
"step": 174900
},
{
"epoch": 1.27,
"learning_rate": 7.353209217549799e-06,
"loss": 2.7516,
"step": 175000
},
{
"epoch": 1.27,
"eval_accuracy": 0.45990811564483886,
"eval_loss": 2.7693846225738525,
"eval_runtime": 29.4233,
"eval_samples_per_second": 220.336,
"eval_steps_per_second": 2.311,
"step": 175000
},
{
"epoch": 1.27,
"learning_rate": 7.346048691576618e-06,
"loss": 2.7507,
"step": 175100
},
{
"epoch": 1.27,
"learning_rate": 7.338815837058254e-06,
"loss": 2.7448,
"step": 175200
},
{
"epoch": 1.27,
"learning_rate": 7.33158298253989e-06,
"loss": 2.7467,
"step": 175300
},
{
"epoch": 1.27,
"learning_rate": 7.324350128021526e-06,
"loss": 2.7441,
"step": 175400
},
{
"epoch": 1.27,
"learning_rate": 7.317117273503161e-06,
"loss": 2.7504,
"step": 175500
},
{
"epoch": 1.27,
"learning_rate": 7.309884418984797e-06,
"loss": 2.753,
"step": 175600
},
{
"epoch": 1.27,
"learning_rate": 7.302651564466433e-06,
"loss": 2.7544,
"step": 175700
},
{
"epoch": 1.27,
"learning_rate": 7.295418709948069e-06,
"loss": 2.7518,
"step": 175800
},
{
"epoch": 1.27,
"learning_rate": 7.288185855429705e-06,
"loss": 2.7521,
"step": 175900
},
{
"epoch": 1.27,
"learning_rate": 7.2809530009113406e-06,
"loss": 2.754,
"step": 176000
},
{
"epoch": 1.27,
"eval_accuracy": 0.460049662314409,
"eval_loss": 2.7690155506134033,
"eval_runtime": 29.3683,
"eval_samples_per_second": 220.749,
"eval_steps_per_second": 2.315,
"step": 176000
},
{
"epoch": 1.27,
"learning_rate": 7.273720146392976e-06,
"loss": 2.7484,
"step": 176100
},
{
"epoch": 1.27,
"learning_rate": 7.266487291874612e-06,
"loss": 2.7408,
"step": 176200
},
{
"epoch": 1.28,
"learning_rate": 7.2592544373562476e-06,
"loss": 2.7564,
"step": 176300
},
{
"epoch": 1.28,
"learning_rate": 7.2520939113830664e-06,
"loss": 2.7572,
"step": 176400
},
{
"epoch": 1.28,
"learning_rate": 7.244861056864703e-06,
"loss": 2.754,
"step": 176500
},
{
"epoch": 1.28,
"learning_rate": 7.237628202346339e-06,
"loss": 2.7524,
"step": 176600
},
{
"epoch": 1.28,
"learning_rate": 7.230395347827974e-06,
"loss": 2.7537,
"step": 176700
},
{
"epoch": 1.28,
"learning_rate": 7.22316249330961e-06,
"loss": 2.7523,
"step": 176800
},
{
"epoch": 1.28,
"learning_rate": 7.215929638791246e-06,
"loss": 2.7486,
"step": 176900
},
{
"epoch": 1.28,
"learning_rate": 7.208696784272881e-06,
"loss": 2.7489,
"step": 177000
},
{
"epoch": 1.28,
"eval_accuracy": 0.4598288737058914,
"eval_loss": 2.76901912689209,
"eval_runtime": 29.8199,
"eval_samples_per_second": 217.405,
"eval_steps_per_second": 2.28,
"step": 177000
},
{
"epoch": 1.28,
"learning_rate": 7.201463929754518e-06,
"loss": 2.7491,
"step": 177100
},
{
"epoch": 1.28,
"learning_rate": 7.1942310752361535e-06,
"loss": 2.7486,
"step": 177200
},
{
"epoch": 1.28,
"learning_rate": 7.186998220717789e-06,
"loss": 2.7605,
"step": 177300
},
{
"epoch": 1.28,
"learning_rate": 7.179765366199425e-06,
"loss": 2.7506,
"step": 177400
},
{
"epoch": 1.28,
"learning_rate": 7.1725325116810605e-06,
"loss": 2.7473,
"step": 177500
},
{
"epoch": 1.28,
"learning_rate": 7.165299657162696e-06,
"loss": 2.7484,
"step": 177600
},
{
"epoch": 1.29,
"learning_rate": 7.158066802644333e-06,
"loss": 2.7501,
"step": 177700
},
{
"epoch": 1.29,
"learning_rate": 7.150833948125968e-06,
"loss": 2.7492,
"step": 177800
},
{
"epoch": 1.29,
"learning_rate": 7.143673422152787e-06,
"loss": 2.7521,
"step": 177900
},
{
"epoch": 1.29,
"learning_rate": 7.136440567634423e-06,
"loss": 2.7491,
"step": 178000
},
{
"epoch": 1.29,
"eval_accuracy": 0.4601452365613838,
"eval_loss": 2.7686147689819336,
"eval_runtime": 29.3757,
"eval_samples_per_second": 220.693,
"eval_steps_per_second": 2.315,
"step": 178000
},
{
"epoch": 1.29,
"learning_rate": 7.1292077131160585e-06,
"loss": 2.7487,
"step": 178100
},
{
"epoch": 1.29,
"learning_rate": 7.121974858597694e-06,
"loss": 2.748,
"step": 178200
},
{
"epoch": 1.29,
"learning_rate": 7.11474200407933e-06,
"loss": 2.7602,
"step": 178300
},
{
"epoch": 1.29,
"learning_rate": 7.107509149560966e-06,
"loss": 2.7475,
"step": 178400
},
{
"epoch": 1.29,
"learning_rate": 7.100276295042602e-06,
"loss": 2.7534,
"step": 178500
},
{
"epoch": 1.29,
"learning_rate": 7.093043440524238e-06,
"loss": 2.7507,
"step": 178600
},
{
"epoch": 1.29,
"learning_rate": 7.085810586005873e-06,
"loss": 2.7415,
"step": 178700
},
{
"epoch": 1.29,
"learning_rate": 7.078577731487509e-06,
"loss": 2.7544,
"step": 178800
},
{
"epoch": 1.29,
"learning_rate": 7.071344876969145e-06,
"loss": 2.7539,
"step": 178900
},
{
"epoch": 1.29,
"learning_rate": 7.064112022450781e-06,
"loss": 2.7432,
"step": 179000
},
{
"epoch": 1.29,
"eval_accuracy": 0.46003030550489515,
"eval_loss": 2.768362283706665,
"eval_runtime": 29.3614,
"eval_samples_per_second": 220.8,
"eval_steps_per_second": 2.316,
"step": 179000
},
{
"epoch": 1.3,
"learning_rate": 7.056879167932417e-06,
"loss": 2.74,
"step": 179100
},
{
"epoch": 1.3,
"learning_rate": 7.0496463134140526e-06,
"loss": 2.7498,
"step": 179200
},
{
"epoch": 1.3,
"learning_rate": 7.042413458895688e-06,
"loss": 2.7522,
"step": 179300
},
{
"epoch": 1.3,
"learning_rate": 7.035180604377324e-06,
"loss": 2.7575,
"step": 179400
},
{
"epoch": 1.3,
"learning_rate": 7.0279477498589596e-06,
"loss": 2.7505,
"step": 179500
},
{
"epoch": 1.3,
"learning_rate": 7.020714895340596e-06,
"loss": 2.7489,
"step": 179600
},
{
"epoch": 1.3,
"learning_rate": 7.013482040822232e-06,
"loss": 2.7459,
"step": 179700
},
{
"epoch": 1.3,
"learning_rate": 7.006321514849051e-06,
"loss": 2.7523,
"step": 179800
},
{
"epoch": 1.3,
"learning_rate": 6.999088660330686e-06,
"loss": 2.7475,
"step": 179900
},
{
"epoch": 1.3,
"learning_rate": 6.991855805812322e-06,
"loss": 2.7388,
"step": 180000
},
{
"epoch": 1.3,
"eval_accuracy": 0.460155519866438,
"eval_loss": 2.768120050430298,
"eval_runtime": 29.4147,
"eval_samples_per_second": 220.4,
"eval_steps_per_second": 2.312,
"step": 180000
},
{
"epoch": 1.3,
"learning_rate": 6.984622951293958e-06,
"loss": 2.7413,
"step": 180100
},
{
"epoch": 1.3,
"learning_rate": 6.977390096775593e-06,
"loss": 2.743,
"step": 180200
},
{
"epoch": 1.3,
"learning_rate": 6.97015724225723e-06,
"loss": 2.7562,
"step": 180300
},
{
"epoch": 1.3,
"learning_rate": 6.9629243877388655e-06,
"loss": 2.7498,
"step": 180400
},
{
"epoch": 1.31,
"learning_rate": 6.955691533220501e-06,
"loss": 2.7485,
"step": 180500
},
{
"epoch": 1.31,
"learning_rate": 6.948458678702137e-06,
"loss": 2.7551,
"step": 180600
},
{
"epoch": 1.31,
"learning_rate": 6.9412258241837725e-06,
"loss": 2.7558,
"step": 180700
},
{
"epoch": 1.31,
"learning_rate": 6.933992969665408e-06,
"loss": 2.7451,
"step": 180800
},
{
"epoch": 1.31,
"learning_rate": 6.926760115147045e-06,
"loss": 2.7498,
"step": 180900
},
{
"epoch": 1.31,
"learning_rate": 6.91952726062868e-06,
"loss": 2.7501,
"step": 181000
},
{
"epoch": 1.31,
"eval_accuracy": 0.46024988431281816,
"eval_loss": 2.767861843109131,
"eval_runtime": 30.1014,
"eval_samples_per_second": 215.372,
"eval_steps_per_second": 2.259,
"step": 181000
},
{
"epoch": 1.31,
"learning_rate": 6.912294406110316e-06,
"loss": 2.7455,
"step": 181100
},
{
"epoch": 1.31,
"learning_rate": 6.905061551591952e-06,
"loss": 2.7505,
"step": 181200
},
{
"epoch": 1.31,
"learning_rate": 6.897828697073587e-06,
"loss": 2.7485,
"step": 181300
},
{
"epoch": 1.31,
"learning_rate": 6.890595842555223e-06,
"loss": 2.7509,
"step": 181400
},
{
"epoch": 1.31,
"learning_rate": 6.8833629880368595e-06,
"loss": 2.7526,
"step": 181500
},
{
"epoch": 1.31,
"learning_rate": 6.876130133518495e-06,
"loss": 2.7452,
"step": 181600
},
{
"epoch": 1.31,
"learning_rate": 6.868897279000131e-06,
"loss": 2.7528,
"step": 181700
},
{
"epoch": 1.31,
"learning_rate": 6.8616644244817665e-06,
"loss": 2.7542,
"step": 181800
},
{
"epoch": 1.32,
"learning_rate": 6.854431569963402e-06,
"loss": 2.7439,
"step": 181900
},
{
"epoch": 1.32,
"learning_rate": 6.847198715445038e-06,
"loss": 2.7526,
"step": 182000
},
{
"epoch": 1.32,
"eval_accuracy": 0.4602668215211428,
"eval_loss": 2.767512798309326,
"eval_runtime": 29.7082,
"eval_samples_per_second": 218.222,
"eval_steps_per_second": 2.289,
"step": 182000
},
{
"epoch": 1.32,
"learning_rate": 6.8399658609266735e-06,
"loss": 2.7462,
"step": 182100
},
{
"epoch": 1.32,
"learning_rate": 6.832877663498677e-06,
"loss": 2.7469,
"step": 182200
},
{
"epoch": 1.32,
"learning_rate": 6.825644808980313e-06,
"loss": 2.7407,
"step": 182300
},
{
"epoch": 1.32,
"learning_rate": 6.818411954461949e-06,
"loss": 2.751,
"step": 182400
},
{
"epoch": 1.32,
"learning_rate": 6.811179099943584e-06,
"loss": 2.7415,
"step": 182500
},
{
"epoch": 1.32,
"learning_rate": 6.80394624542522e-06,
"loss": 2.7469,
"step": 182600
},
{
"epoch": 1.32,
"learning_rate": 6.7967133909068565e-06,
"loss": 2.7459,
"step": 182700
},
{
"epoch": 1.32,
"learning_rate": 6.789480536388492e-06,
"loss": 2.7417,
"step": 182800
},
{
"epoch": 1.32,
"learning_rate": 6.782247681870128e-06,
"loss": 2.7458,
"step": 182900
},
{
"epoch": 1.32,
"learning_rate": 6.7750148273517635e-06,
"loss": 2.7478,
"step": 183000
},
{
"epoch": 1.32,
"eval_accuracy": 0.46032186744819786,
"eval_loss": 2.7674057483673096,
"eval_runtime": 30.0098,
"eval_samples_per_second": 216.029,
"eval_steps_per_second": 2.266,
"step": 183000
},
{
"epoch": 1.32,
"learning_rate": 6.767781972833399e-06,
"loss": 2.7386,
"step": 183100
},
{
"epoch": 1.33,
"learning_rate": 6.760549118315035e-06,
"loss": 2.7463,
"step": 183200
},
{
"epoch": 1.33,
"learning_rate": 6.753316263796671e-06,
"loss": 2.7496,
"step": 183300
},
{
"epoch": 1.33,
"learning_rate": 6.74615573782349e-06,
"loss": 2.7441,
"step": 183400
},
{
"epoch": 1.33,
"learning_rate": 6.738922883305126e-06,
"loss": 2.752,
"step": 183500
},
{
"epoch": 1.33,
"learning_rate": 6.7316900287867616e-06,
"loss": 2.753,
"step": 183600
},
{
"epoch": 1.33,
"learning_rate": 6.724457174268397e-06,
"loss": 2.7478,
"step": 183700
},
{
"epoch": 1.33,
"learning_rate": 6.717224319750033e-06,
"loss": 2.7465,
"step": 183800
},
{
"epoch": 1.33,
"learning_rate": 6.7099914652316686e-06,
"loss": 2.7441,
"step": 183900
},
{
"epoch": 1.33,
"learning_rate": 6.702758610713305e-06,
"loss": 2.7491,
"step": 184000
},
{
"epoch": 1.33,
"eval_accuracy": 0.46035937126663096,
"eval_loss": 2.767024040222168,
"eval_runtime": 29.3058,
"eval_samples_per_second": 221.219,
"eval_steps_per_second": 2.32,
"step": 184000
},
{
"epoch": 1.33,
"learning_rate": 6.695525756194941e-06,
"loss": 2.7492,
"step": 184100
},
{
"epoch": 1.33,
"learning_rate": 6.688292901676576e-06,
"loss": 2.7514,
"step": 184200
},
{
"epoch": 1.33,
"learning_rate": 6.681060047158212e-06,
"loss": 2.7474,
"step": 184300
},
{
"epoch": 1.33,
"learning_rate": 6.673827192639848e-06,
"loss": 2.7496,
"step": 184400
},
{
"epoch": 1.33,
"learning_rate": 6.666594338121483e-06,
"loss": 2.7429,
"step": 184500
},
{
"epoch": 1.34,
"learning_rate": 6.65936148360312e-06,
"loss": 2.7572,
"step": 184600
},
{
"epoch": 1.34,
"learning_rate": 6.652128629084756e-06,
"loss": 2.7417,
"step": 184700
},
{
"epoch": 1.34,
"learning_rate": 6.644895774566391e-06,
"loss": 2.7459,
"step": 184800
},
{
"epoch": 1.34,
"learning_rate": 6.637662920048027e-06,
"loss": 2.7471,
"step": 184900
},
{
"epoch": 1.34,
"learning_rate": 6.630430065529663e-06,
"loss": 2.7505,
"step": 185000
},
{
"epoch": 1.34,
"eval_accuracy": 0.46044466220855146,
"eval_loss": 2.7669997215270996,
"eval_runtime": 30.3971,
"eval_samples_per_second": 213.277,
"eval_steps_per_second": 2.237,
"step": 185000
},
{
"epoch": 1.34,
"learning_rate": 6.623197211011298e-06,
"loss": 2.7449,
"step": 185100
},
{
"epoch": 1.34,
"learning_rate": 6.615964356492934e-06,
"loss": 2.7466,
"step": 185200
},
{
"epoch": 1.34,
"learning_rate": 6.6087315019745704e-06,
"loss": 2.7491,
"step": 185300
},
{
"epoch": 1.34,
"learning_rate": 6.601498647456206e-06,
"loss": 2.753,
"step": 185400
},
{
"epoch": 1.34,
"learning_rate": 6.594265792937842e-06,
"loss": 2.7509,
"step": 185500
},
{
"epoch": 1.34,
"learning_rate": 6.5870329384194774e-06,
"loss": 2.7459,
"step": 185600
},
{
"epoch": 1.34,
"learning_rate": 6.579800083901113e-06,
"loss": 2.7465,
"step": 185700
},
{
"epoch": 1.34,
"learning_rate": 6.572567229382749e-06,
"loss": 2.7477,
"step": 185800
},
{
"epoch": 1.34,
"learning_rate": 6.565334374864385e-06,
"loss": 2.7506,
"step": 185900
},
{
"epoch": 1.35,
"learning_rate": 6.558101520346021e-06,
"loss": 2.7436,
"step": 186000
},
{
"epoch": 1.35,
"eval_accuracy": 0.46046341411776803,
"eval_loss": 2.766613245010376,
"eval_runtime": 30.3071,
"eval_samples_per_second": 213.91,
"eval_steps_per_second": 2.244,
"step": 186000
},
{
"epoch": 1.35,
"learning_rate": 6.550868665827657e-06,
"loss": 2.7414,
"step": 186100
},
{
"epoch": 1.35,
"learning_rate": 6.543635811309292e-06,
"loss": 2.7517,
"step": 186200
},
{
"epoch": 1.35,
"learning_rate": 6.536402956790928e-06,
"loss": 2.7479,
"step": 186300
},
{
"epoch": 1.35,
"learning_rate": 6.529170102272563e-06,
"loss": 2.7473,
"step": 186400
},
{
"epoch": 1.35,
"learning_rate": 6.5220095762993825e-06,
"loss": 2.7451,
"step": 186500
},
{
"epoch": 1.35,
"learning_rate": 6.514776721781019e-06,
"loss": 2.7426,
"step": 186600
},
{
"epoch": 1.35,
"learning_rate": 6.507543867262655e-06,
"loss": 2.7463,
"step": 186700
},
{
"epoch": 1.35,
"learning_rate": 6.50031101274429e-06,
"loss": 2.7519,
"step": 186800
},
{
"epoch": 1.35,
"learning_rate": 6.493078158225926e-06,
"loss": 2.7468,
"step": 186900
},
{
"epoch": 1.35,
"learning_rate": 6.485845303707562e-06,
"loss": 2.7389,
"step": 187000
},
{
"epoch": 1.35,
"eval_accuracy": 0.4603109792428463,
"eval_loss": 2.7665233612060547,
"eval_runtime": 30.7223,
"eval_samples_per_second": 211.019,
"eval_steps_per_second": 2.213,
"step": 187000
},
{
"epoch": 1.35,
"learning_rate": 6.478612449189197e-06,
"loss": 2.7471,
"step": 187100
},
{
"epoch": 1.35,
"learning_rate": 6.471379594670834e-06,
"loss": 2.7468,
"step": 187200
},
{
"epoch": 1.35,
"learning_rate": 6.4641467401524695e-06,
"loss": 2.7295,
"step": 187300
},
{
"epoch": 1.36,
"learning_rate": 6.456913885634105e-06,
"loss": 2.7394,
"step": 187400
},
{
"epoch": 1.36,
"learning_rate": 6.449681031115741e-06,
"loss": 2.7495,
"step": 187500
},
{
"epoch": 1.36,
"learning_rate": 6.44252050514256e-06,
"loss": 2.7429,
"step": 187600
},
{
"epoch": 1.36,
"learning_rate": 6.4352876506241954e-06,
"loss": 2.7461,
"step": 187700
},
{
"epoch": 1.36,
"learning_rate": 6.428054796105831e-06,
"loss": 2.7471,
"step": 187800
},
{
"epoch": 1.36,
"learning_rate": 6.420894270132652e-06,
"loss": 2.75,
"step": 187900
},
{
"epoch": 1.36,
"learning_rate": 6.413661415614287e-06,
"loss": 2.7564,
"step": 188000
},
{
"epoch": 1.36,
"eval_accuracy": 0.4603660251699014,
"eval_loss": 2.7662112712860107,
"eval_runtime": 28.077,
"eval_samples_per_second": 230.901,
"eval_steps_per_second": 2.422,
"step": 188000
},
{
"epoch": 1.36,
"learning_rate": 6.406428561095923e-06,
"loss": 2.7547,
"step": 188100
},
{
"epoch": 1.36,
"learning_rate": 6.399195706577559e-06,
"loss": 2.7463,
"step": 188200
},
{
"epoch": 1.36,
"learning_rate": 6.391962852059194e-06,
"loss": 2.7426,
"step": 188300
},
{
"epoch": 1.36,
"learning_rate": 6.384729997540829e-06,
"loss": 2.7424,
"step": 188400
},
{
"epoch": 1.36,
"learning_rate": 6.377497143022465e-06,
"loss": 2.7434,
"step": 188500
},
{
"epoch": 1.36,
"learning_rate": 6.370264288504101e-06,
"loss": 2.7483,
"step": 188600
},
{
"epoch": 1.36,
"learning_rate": 6.363031433985737e-06,
"loss": 2.752,
"step": 188700
},
{
"epoch": 1.37,
"learning_rate": 6.355798579467373e-06,
"loss": 2.7474,
"step": 188800
},
{
"epoch": 1.37,
"learning_rate": 6.348565724949008e-06,
"loss": 2.7408,
"step": 188900
},
{
"epoch": 1.37,
"learning_rate": 6.341332870430644e-06,
"loss": 2.7464,
"step": 189000
},
{
"epoch": 1.37,
"eval_accuracy": 0.4603968750850641,
"eval_loss": 2.76613712310791,
"eval_runtime": 28.0132,
"eval_samples_per_second": 231.427,
"eval_steps_per_second": 2.427,
"step": 189000
},
{
"epoch": 1.37,
"learning_rate": 6.33410001591228e-06,
"loss": 2.7481,
"step": 189100
},
{
"epoch": 1.37,
"learning_rate": 6.3269394899391e-06,
"loss": 2.7505,
"step": 189200
},
{
"epoch": 1.37,
"learning_rate": 6.319706635420736e-06,
"loss": 2.7506,
"step": 189300
},
{
"epoch": 1.37,
"learning_rate": 6.312473780902372e-06,
"loss": 2.7421,
"step": 189400
},
{
"epoch": 1.37,
"learning_rate": 6.305240926384007e-06,
"loss": 2.7442,
"step": 189500
},
{
"epoch": 1.37,
"learning_rate": 6.298008071865643e-06,
"loss": 2.739,
"step": 189600
},
{
"epoch": 1.37,
"learning_rate": 6.2907752173472795e-06,
"loss": 2.7498,
"step": 189700
},
{
"epoch": 1.37,
"learning_rate": 6.283542362828915e-06,
"loss": 2.7451,
"step": 189800
},
{
"epoch": 1.37,
"learning_rate": 6.276309508310551e-06,
"loss": 2.7447,
"step": 189900
},
{
"epoch": 1.37,
"learning_rate": 6.2690766537921865e-06,
"loss": 2.7459,
"step": 190000
},
{
"epoch": 1.37,
"eval_accuracy": 0.4604603896162815,
"eval_loss": 2.7658865451812744,
"eval_runtime": 28.2461,
"eval_samples_per_second": 229.518,
"eval_steps_per_second": 2.407,
"step": 190000
},
{
"epoch": 1.37,
"learning_rate": 6.261843799273822e-06,
"loss": 2.7398,
"step": 190100
},
{
"epoch": 1.38,
"learning_rate": 6.254610944755458e-06,
"loss": 2.7458,
"step": 190200
},
{
"epoch": 1.38,
"learning_rate": 6.247378090237094e-06,
"loss": 2.7484,
"step": 190300
},
{
"epoch": 1.38,
"learning_rate": 6.24014523571873e-06,
"loss": 2.7425,
"step": 190400
},
{
"epoch": 1.38,
"learning_rate": 6.232984709745549e-06,
"loss": 2.7482,
"step": 190500
},
{
"epoch": 1.38,
"learning_rate": 6.2257518552271845e-06,
"loss": 2.7479,
"step": 190600
},
{
"epoch": 1.38,
"learning_rate": 6.21851900070882e-06,
"loss": 2.7382,
"step": 190700
},
{
"epoch": 1.38,
"learning_rate": 6.211286146190456e-06,
"loss": 2.7414,
"step": 190800
},
{
"epoch": 1.38,
"learning_rate": 6.2040532916720915e-06,
"loss": 2.7534,
"step": 190900
},
{
"epoch": 1.38,
"learning_rate": 6.196820437153728e-06,
"loss": 2.7481,
"step": 191000
},
{
"epoch": 1.38,
"eval_accuracy": 0.4605136208424446,
"eval_loss": 2.765713691711426,
"eval_runtime": 28.3745,
"eval_samples_per_second": 228.48,
"eval_steps_per_second": 2.397,
"step": 191000
},
{
"epoch": 1.38,
"learning_rate": 6.189587582635364e-06,
"loss": 2.7396,
"step": 191100
},
{
"epoch": 1.38,
"learning_rate": 6.182354728116999e-06,
"loss": 2.7452,
"step": 191200
},
{
"epoch": 1.38,
"learning_rate": 6.175121873598635e-06,
"loss": 2.7406,
"step": 191300
},
{
"epoch": 1.38,
"learning_rate": 6.167889019080271e-06,
"loss": 2.7457,
"step": 191400
},
{
"epoch": 1.39,
"learning_rate": 6.160656164561906e-06,
"loss": 2.758,
"step": 191500
},
{
"epoch": 1.39,
"learning_rate": 6.153423310043543e-06,
"loss": 2.7454,
"step": 191600
},
{
"epoch": 1.39,
"learning_rate": 6.1461904555251785e-06,
"loss": 2.7408,
"step": 191700
},
{
"epoch": 1.39,
"learning_rate": 6.138957601006814e-06,
"loss": 2.7432,
"step": 191800
},
{
"epoch": 1.39,
"learning_rate": 6.131797075033633e-06,
"loss": 2.7435,
"step": 191900
},
{
"epoch": 1.39,
"learning_rate": 6.124564220515269e-06,
"loss": 2.7458,
"step": 192000
},
{
"epoch": 1.39,
"eval_accuracy": 0.46044345240795687,
"eval_loss": 2.765500783920288,
"eval_runtime": 32.8692,
"eval_samples_per_second": 197.236,
"eval_steps_per_second": 2.069,
"step": 192000
},
{
"epoch": 1.39,
"learning_rate": 6.1173313659969044e-06,
"loss": 2.7541,
"step": 192100
},
{
"epoch": 1.39,
"learning_rate": 6.11009851147854e-06,
"loss": 2.743,
"step": 192200
},
{
"epoch": 1.39,
"learning_rate": 6.102865656960177e-06,
"loss": 2.752,
"step": 192300
},
{
"epoch": 1.39,
"learning_rate": 6.095632802441812e-06,
"loss": 2.7452,
"step": 192400
},
{
"epoch": 1.39,
"learning_rate": 6.088399947923448e-06,
"loss": 2.7477,
"step": 192500
},
{
"epoch": 1.39,
"learning_rate": 6.081167093405084e-06,
"loss": 2.7538,
"step": 192600
},
{
"epoch": 1.39,
"learning_rate": 6.073934238886719e-06,
"loss": 2.7416,
"step": 192700
},
{
"epoch": 1.39,
"learning_rate": 6.066701384368355e-06,
"loss": 2.7492,
"step": 192800
},
{
"epoch": 1.4,
"learning_rate": 6.0594685298499915e-06,
"loss": 2.7435,
"step": 192900
},
{
"epoch": 1.4,
"learning_rate": 6.052235675331627e-06,
"loss": 2.7427,
"step": 193000
},
{
"epoch": 1.4,
"eval_accuracy": 0.4605184600448231,
"eval_loss": 2.7652785778045654,
"eval_runtime": 31.9488,
"eval_samples_per_second": 202.919,
"eval_steps_per_second": 2.128,
"step": 193000
},
{
"epoch": 1.4,
"learning_rate": 6.045002820813263e-06,
"loss": 2.7376,
"step": 193100
},
{
"epoch": 1.4,
"learning_rate": 6.0377699662948985e-06,
"loss": 2.7449,
"step": 193200
},
{
"epoch": 1.4,
"learning_rate": 6.030537111776534e-06,
"loss": 2.7361,
"step": 193300
},
{
"epoch": 1.4,
"learning_rate": 6.02330425725817e-06,
"loss": 2.747,
"step": 193400
},
{
"epoch": 1.4,
"learning_rate": 6.016071402739806e-06,
"loss": 2.748,
"step": 193500
},
{
"epoch": 1.4,
"learning_rate": 6.008838548221442e-06,
"loss": 2.7475,
"step": 193600
},
{
"epoch": 1.4,
"learning_rate": 6.001605693703078e-06,
"loss": 2.7504,
"step": 193700
},
{
"epoch": 1.4,
"learning_rate": 5.994372839184713e-06,
"loss": 2.7381,
"step": 193800
},
{
"epoch": 1.4,
"learning_rate": 5.987212313211532e-06,
"loss": 2.7453,
"step": 193900
},
{
"epoch": 1.4,
"learning_rate": 5.980051787238352e-06,
"loss": 2.741,
"step": 194000
},
{
"epoch": 1.4,
"eval_accuracy": 0.4605601981653374,
"eval_loss": 2.7650601863861084,
"eval_runtime": 29.1266,
"eval_samples_per_second": 222.58,
"eval_steps_per_second": 2.335,
"step": 194000
},
{
"epoch": 1.4,
"learning_rate": 5.9728189327199885e-06,
"loss": 2.7422,
"step": 194100
},
{
"epoch": 1.4,
"learning_rate": 5.965586078201624e-06,
"loss": 2.7339,
"step": 194200
},
{
"epoch": 1.41,
"learning_rate": 5.95835322368326e-06,
"loss": 2.746,
"step": 194300
},
{
"epoch": 1.41,
"learning_rate": 5.9511203691648955e-06,
"loss": 2.7438,
"step": 194400
},
{
"epoch": 1.41,
"learning_rate": 5.943887514646531e-06,
"loss": 2.749,
"step": 194500
},
{
"epoch": 1.41,
"learning_rate": 5.936654660128167e-06,
"loss": 2.7492,
"step": 194600
},
{
"epoch": 1.41,
"learning_rate": 5.929494134154986e-06,
"loss": 2.7466,
"step": 194700
},
{
"epoch": 1.41,
"learning_rate": 5.922261279636622e-06,
"loss": 2.7385,
"step": 194800
},
{
"epoch": 1.41,
"learning_rate": 5.915028425118258e-06,
"loss": 2.7399,
"step": 194900
},
{
"epoch": 1.41,
"learning_rate": 5.9077955705998935e-06,
"loss": 2.7488,
"step": 195000
},
{
"epoch": 1.41,
"eval_accuracy": 0.4605995166846624,
"eval_loss": 2.7648675441741943,
"eval_runtime": 29.4441,
"eval_samples_per_second": 220.18,
"eval_steps_per_second": 2.309,
"step": 195000
},
{
"epoch": 1.41,
"learning_rate": 5.900562716081529e-06,
"loss": 2.7498,
"step": 195100
},
{
"epoch": 1.41,
"learning_rate": 5.893329861563165e-06,
"loss": 2.746,
"step": 195200
},
{
"epoch": 1.41,
"learning_rate": 5.8860970070448005e-06,
"loss": 2.7481,
"step": 195300
},
{
"epoch": 1.41,
"learning_rate": 5.878864152526437e-06,
"loss": 2.7438,
"step": 195400
},
{
"epoch": 1.41,
"learning_rate": 5.871631298008073e-06,
"loss": 2.7449,
"step": 195500
},
{
"epoch": 1.41,
"learning_rate": 5.864398443489708e-06,
"loss": 2.7426,
"step": 195600
},
{
"epoch": 1.42,
"learning_rate": 5.857165588971344e-06,
"loss": 2.744,
"step": 195700
},
{
"epoch": 1.42,
"learning_rate": 5.84993273445298e-06,
"loss": 2.7455,
"step": 195800
},
{
"epoch": 1.42,
"learning_rate": 5.842699879934615e-06,
"loss": 2.7543,
"step": 195900
},
{
"epoch": 1.42,
"learning_rate": 5.835467025416252e-06,
"loss": 2.7353,
"step": 196000
},
{
"epoch": 1.42,
"eval_accuracy": 0.4605263237486881,
"eval_loss": 2.7647223472595215,
"eval_runtime": 32.6984,
"eval_samples_per_second": 198.266,
"eval_steps_per_second": 2.08,
"step": 196000
},
{
"epoch": 1.42,
"learning_rate": 5.8282341708978876e-06,
"loss": 2.7424,
"step": 196100
},
{
"epoch": 1.42,
"learning_rate": 5.821001316379523e-06,
"loss": 2.7506,
"step": 196200
},
{
"epoch": 1.42,
"learning_rate": 5.813768461861159e-06,
"loss": 2.7501,
"step": 196300
},
{
"epoch": 1.42,
"learning_rate": 5.8065356073427946e-06,
"loss": 2.7455,
"step": 196400
},
{
"epoch": 1.42,
"learning_rate": 5.79930275282443e-06,
"loss": 2.7528,
"step": 196500
},
{
"epoch": 1.42,
"learning_rate": 5.792069898306067e-06,
"loss": 2.7473,
"step": 196600
},
{
"epoch": 1.42,
"learning_rate": 5.784909372332886e-06,
"loss": 2.7343,
"step": 196700
},
{
"epoch": 1.42,
"learning_rate": 5.777676517814521e-06,
"loss": 2.7428,
"step": 196800
},
{
"epoch": 1.42,
"learning_rate": 5.770443663296157e-06,
"loss": 2.7387,
"step": 196900
},
{
"epoch": 1.42,
"learning_rate": 5.763210808777793e-06,
"loss": 2.7503,
"step": 197000
},
{
"epoch": 1.42,
"eval_accuracy": 0.46069085662955606,
"eval_loss": 2.7644920349121094,
"eval_runtime": 29.638,
"eval_samples_per_second": 218.74,
"eval_steps_per_second": 2.294,
"step": 197000
},
{
"epoch": 1.43,
"learning_rate": 5.755977954259428e-06,
"loss": 2.7531,
"step": 197100
},
{
"epoch": 1.43,
"learning_rate": 5.748745099741064e-06,
"loss": 2.7383,
"step": 197200
},
{
"epoch": 1.43,
"learning_rate": 5.7415122452227005e-06,
"loss": 2.7487,
"step": 197300
},
{
"epoch": 1.43,
"learning_rate": 5.734279390704336e-06,
"loss": 2.7425,
"step": 197400
},
{
"epoch": 1.43,
"learning_rate": 5.727046536185972e-06,
"loss": 2.7485,
"step": 197500
},
{
"epoch": 1.43,
"learning_rate": 5.7198136816676075e-06,
"loss": 2.7438,
"step": 197600
},
{
"epoch": 1.43,
"learning_rate": 5.712653155694426e-06,
"loss": 2.7458,
"step": 197700
},
{
"epoch": 1.43,
"learning_rate": 5.705420301176062e-06,
"loss": 2.7499,
"step": 197800
},
{
"epoch": 1.43,
"learning_rate": 5.698187446657698e-06,
"loss": 2.7384,
"step": 197900
},
{
"epoch": 1.43,
"learning_rate": 5.690954592139334e-06,
"loss": 2.7446,
"step": 198000
},
{
"epoch": 1.43,
"eval_accuracy": 0.46069690563252913,
"eval_loss": 2.7643613815307617,
"eval_runtime": 29.4958,
"eval_samples_per_second": 219.794,
"eval_steps_per_second": 2.305,
"step": 198000
},
{
"epoch": 1.43,
"learning_rate": 5.68372173762097e-06,
"loss": 2.7482,
"step": 198100
},
{
"epoch": 1.43,
"learning_rate": 5.6764888831026055e-06,
"loss": 2.745,
"step": 198200
},
{
"epoch": 1.43,
"learning_rate": 5.669256028584241e-06,
"loss": 2.743,
"step": 198300
},
{
"epoch": 1.43,
"learning_rate": 5.662023174065877e-06,
"loss": 2.7392,
"step": 198400
},
{
"epoch": 1.44,
"learning_rate": 5.6547903195475125e-06,
"loss": 2.7378,
"step": 198500
},
{
"epoch": 1.44,
"learning_rate": 5.647557465029149e-06,
"loss": 2.747,
"step": 198600
},
{
"epoch": 1.44,
"learning_rate": 5.640324610510785e-06,
"loss": 2.7432,
"step": 198700
},
{
"epoch": 1.44,
"learning_rate": 5.6331640845376045e-06,
"loss": 2.7402,
"step": 198800
},
{
"epoch": 1.44,
"learning_rate": 5.62593123001924e-06,
"loss": 2.7439,
"step": 198900
},
{
"epoch": 1.44,
"learning_rate": 5.618698375500876e-06,
"loss": 2.748,
"step": 199000
},
{
"epoch": 1.44,
"eval_accuracy": 0.460747717257503,
"eval_loss": 2.764165163040161,
"eval_runtime": 31.2501,
"eval_samples_per_second": 207.455,
"eval_steps_per_second": 2.176,
"step": 199000
},
{
"epoch": 1.44,
"learning_rate": 5.611465520982512e-06,
"loss": 2.7423,
"step": 199100
},
{
"epoch": 1.44,
"learning_rate": 5.604232666464146e-06,
"loss": 2.7383,
"step": 199200
},
{
"epoch": 1.44,
"learning_rate": 5.596999811945783e-06,
"loss": 2.748,
"step": 199300
},
{
"epoch": 1.44,
"learning_rate": 5.5897669574274185e-06,
"loss": 2.7418,
"step": 199400
},
{
"epoch": 1.44,
"learning_rate": 5.582534102909054e-06,
"loss": 2.7437,
"step": 199500
},
{
"epoch": 1.44,
"learning_rate": 5.57530124839069e-06,
"loss": 2.7444,
"step": 199600
},
{
"epoch": 1.44,
"learning_rate": 5.5680683938723255e-06,
"loss": 2.7405,
"step": 199700
},
{
"epoch": 1.45,
"learning_rate": 5.560835539353961e-06,
"loss": 2.7555,
"step": 199800
},
{
"epoch": 1.45,
"learning_rate": 5.553602684835598e-06,
"loss": 2.7455,
"step": 199900
},
{
"epoch": 1.45,
"learning_rate": 5.546369830317233e-06,
"loss": 2.7394,
"step": 200000
},
{
"epoch": 1.45,
"eval_accuracy": 0.46070537423669145,
"eval_loss": 2.7640960216522217,
"eval_runtime": 29.999,
"eval_samples_per_second": 216.107,
"eval_steps_per_second": 2.267,
"step": 200000
},
{
"epoch": 1.45,
"learning_rate": 5.539136975798869e-06,
"loss": 2.7442,
"step": 200100
},
{
"epoch": 1.45,
"learning_rate": 5.531904121280505e-06,
"loss": 2.746,
"step": 200200
},
{
"epoch": 1.45,
"learning_rate": 5.524743595307324e-06,
"loss": 2.7457,
"step": 200300
},
{
"epoch": 1.45,
"learning_rate": 5.517510740788961e-06,
"loss": 2.738,
"step": 200400
},
{
"epoch": 1.45,
"learning_rate": 5.5102778862705966e-06,
"loss": 2.7424,
"step": 200500
},
{
"epoch": 1.45,
"learning_rate": 5.503045031752232e-06,
"loss": 2.7345,
"step": 200600
},
{
"epoch": 1.45,
"learning_rate": 5.495812177233868e-06,
"loss": 2.7399,
"step": 200700
},
{
"epoch": 1.45,
"learning_rate": 5.4885793227155036e-06,
"loss": 2.7494,
"step": 200800
},
{
"epoch": 1.45,
"learning_rate": 5.481346468197139e-06,
"loss": 2.7468,
"step": 200900
},
{
"epoch": 1.45,
"learning_rate": 5.474113613678776e-06,
"loss": 2.7403,
"step": 201000
},
{
"epoch": 1.45,
"eval_accuracy": 0.46072291634531337,
"eval_loss": 2.7638235092163086,
"eval_runtime": 29.4493,
"eval_samples_per_second": 220.141,
"eval_steps_per_second": 2.309,
"step": 201000
},
{
"epoch": 1.45,
"learning_rate": 5.466880759160411e-06,
"loss": 2.7505,
"step": 201100
},
{
"epoch": 1.46,
"learning_rate": 5.459647904642047e-06,
"loss": 2.7523,
"step": 201200
},
{
"epoch": 1.46,
"learning_rate": 5.452415050123683e-06,
"loss": 2.7507,
"step": 201300
},
{
"epoch": 1.46,
"learning_rate": 5.445182195605318e-06,
"loss": 2.7374,
"step": 201400
},
{
"epoch": 1.46,
"learning_rate": 5.437949341086953e-06,
"loss": 2.7449,
"step": 201500
},
{
"epoch": 1.46,
"learning_rate": 5.430788815113773e-06,
"loss": 2.739,
"step": 201600
},
{
"epoch": 1.46,
"learning_rate": 5.4235559605954095e-06,
"loss": 2.7527,
"step": 201700
},
{
"epoch": 1.46,
"learning_rate": 5.416323106077045e-06,
"loss": 2.7362,
"step": 201800
},
{
"epoch": 1.46,
"learning_rate": 5.409090251558681e-06,
"loss": 2.7489,
"step": 201900
},
{
"epoch": 1.46,
"learning_rate": 5.4018573970403165e-06,
"loss": 2.7467,
"step": 202000
},
{
"epoch": 1.46,
"eval_accuracy": 0.46072291634531337,
"eval_loss": 2.763704299926758,
"eval_runtime": 30.6951,
"eval_samples_per_second": 211.206,
"eval_steps_per_second": 2.215,
"step": 202000
},
{
"epoch": 1.46,
"learning_rate": 5.394624542521952e-06,
"loss": 2.7419,
"step": 202100
},
{
"epoch": 1.46,
"learning_rate": 5.387391688003588e-06,
"loss": 2.7384,
"step": 202200
},
{
"epoch": 1.46,
"learning_rate": 5.380158833485224e-06,
"loss": 2.7425,
"step": 202300
},
{
"epoch": 1.46,
"learning_rate": 5.37292597896686e-06,
"loss": 2.7346,
"step": 202400
},
{
"epoch": 1.46,
"learning_rate": 5.365693124448496e-06,
"loss": 2.7527,
"step": 202500
},
{
"epoch": 1.47,
"learning_rate": 5.358460269930131e-06,
"loss": 2.7396,
"step": 202600
},
{
"epoch": 1.47,
"learning_rate": 5.351227415411767e-06,
"loss": 2.7407,
"step": 202700
},
{
"epoch": 1.47,
"learning_rate": 5.343994560893403e-06,
"loss": 2.7501,
"step": 202800
},
{
"epoch": 1.47,
"learning_rate": 5.3368340349202216e-06,
"loss": 2.7392,
"step": 202900
},
{
"epoch": 1.47,
"learning_rate": 5.329601180401858e-06,
"loss": 2.7532,
"step": 203000
},
{
"epoch": 1.47,
"eval_accuracy": 0.4608082072872339,
"eval_loss": 2.7634613513946533,
"eval_runtime": 27.9391,
"eval_samples_per_second": 232.04,
"eval_steps_per_second": 2.434,
"step": 203000
},
{
"epoch": 1.47,
"learning_rate": 5.322368325883494e-06,
"loss": 2.738,
"step": 203100
},
{
"epoch": 1.47,
"learning_rate": 5.315135471365129e-06,
"loss": 2.7442,
"step": 203200
},
{
"epoch": 1.47,
"learning_rate": 5.307902616846765e-06,
"loss": 2.7478,
"step": 203300
},
{
"epoch": 1.47,
"learning_rate": 5.300669762328401e-06,
"loss": 2.7452,
"step": 203400
},
{
"epoch": 1.47,
"learning_rate": 5.293436907810036e-06,
"loss": 2.7407,
"step": 203500
},
{
"epoch": 1.47,
"learning_rate": 5.286204053291673e-06,
"loss": 2.7475,
"step": 203600
},
{
"epoch": 1.47,
"learning_rate": 5.278971198773309e-06,
"loss": 2.7537,
"step": 203700
},
{
"epoch": 1.47,
"learning_rate": 5.271738344254944e-06,
"loss": 2.7459,
"step": 203800
},
{
"epoch": 1.47,
"learning_rate": 5.26450548973658e-06,
"loss": 2.7525,
"step": 203900
},
{
"epoch": 1.48,
"learning_rate": 5.257344963763399e-06,
"loss": 2.7431,
"step": 204000
},
{
"epoch": 1.48,
"eval_accuracy": 0.4608662777157755,
"eval_loss": 2.763364553451538,
"eval_runtime": 32.4224,
"eval_samples_per_second": 199.954,
"eval_steps_per_second": 2.097,
"step": 204000
},
{
"epoch": 1.48,
"learning_rate": 5.2501121092450345e-06,
"loss": 2.7486,
"step": 204100
},
{
"epoch": 1.48,
"learning_rate": 5.24287925472667e-06,
"loss": 2.7437,
"step": 204200
},
{
"epoch": 1.48,
"learning_rate": 5.235646400208307e-06,
"loss": 2.7369,
"step": 204300
},
{
"epoch": 1.48,
"learning_rate": 5.228413545689942e-06,
"loss": 2.7487,
"step": 204400
},
{
"epoch": 1.48,
"learning_rate": 5.221180691171578e-06,
"loss": 2.7439,
"step": 204500
},
{
"epoch": 1.48,
"learning_rate": 5.213947836653214e-06,
"loss": 2.7435,
"step": 204600
},
{
"epoch": 1.48,
"learning_rate": 5.206714982134849e-06,
"loss": 2.7452,
"step": 204700
},
{
"epoch": 1.48,
"learning_rate": 5.199482127616485e-06,
"loss": 2.7409,
"step": 204800
},
{
"epoch": 1.48,
"learning_rate": 5.1922492730981215e-06,
"loss": 2.7448,
"step": 204900
},
{
"epoch": 1.48,
"learning_rate": 5.185016418579757e-06,
"loss": 2.7433,
"step": 205000
},
{
"epoch": 1.48,
"eval_accuracy": 0.4608082072872339,
"eval_loss": 2.7632086277008057,
"eval_runtime": 29.7969,
"eval_samples_per_second": 217.573,
"eval_steps_per_second": 2.282,
"step": 205000
},
{
"epoch": 1.48,
"learning_rate": 5.177855892606577e-06,
"loss": 2.7424,
"step": 205100
},
{
"epoch": 1.48,
"learning_rate": 5.1706230380882126e-06,
"loss": 2.7355,
"step": 205200
},
{
"epoch": 1.48,
"learning_rate": 5.163390183569848e-06,
"loss": 2.7338,
"step": 205300
},
{
"epoch": 1.49,
"learning_rate": 5.156157329051485e-06,
"loss": 2.7346,
"step": 205400
},
{
"epoch": 1.49,
"learning_rate": 5.14892447453312e-06,
"loss": 2.7495,
"step": 205500
},
{
"epoch": 1.49,
"learning_rate": 5.141691620014756e-06,
"loss": 2.7364,
"step": 205600
},
{
"epoch": 1.49,
"learning_rate": 5.134458765496392e-06,
"loss": 2.7424,
"step": 205700
},
{
"epoch": 1.49,
"learning_rate": 5.1272259109780266e-06,
"loss": 2.74,
"step": 205800
},
{
"epoch": 1.49,
"learning_rate": 5.119993056459662e-06,
"loss": 2.748,
"step": 205900
},
{
"epoch": 1.49,
"learning_rate": 5.112760201941298e-06,
"loss": 2.7436,
"step": 206000
},
{
"epoch": 1.49,
"eval_accuracy": 0.4609152746398575,
"eval_loss": 2.762951135635376,
"eval_runtime": 29.8903,
"eval_samples_per_second": 216.893,
"eval_steps_per_second": 2.275,
"step": 206000
},
{
"epoch": 1.49,
"learning_rate": 5.1055273474229336e-06,
"loss": 2.7419,
"step": 206100
},
{
"epoch": 1.49,
"learning_rate": 5.09829449290457e-06,
"loss": 2.7369,
"step": 206200
},
{
"epoch": 1.49,
"learning_rate": 5.091061638386206e-06,
"loss": 2.7427,
"step": 206300
},
{
"epoch": 1.49,
"learning_rate": 5.0839011124130255e-06,
"loss": 2.747,
"step": 206400
},
{
"epoch": 1.49,
"learning_rate": 5.076668257894661e-06,
"loss": 2.7469,
"step": 206500
},
{
"epoch": 1.49,
"learning_rate": 5.069435403376297e-06,
"loss": 2.7459,
"step": 206600
},
{
"epoch": 1.5,
"learning_rate": 5.062202548857933e-06,
"loss": 2.7446,
"step": 206700
},
{
"epoch": 1.5,
"learning_rate": 5.054969694339569e-06,
"loss": 2.7419,
"step": 206800
},
{
"epoch": 1.5,
"learning_rate": 5.047736839821205e-06,
"loss": 2.749,
"step": 206900
},
{
"epoch": 1.5,
"learning_rate": 5.04050398530284e-06,
"loss": 2.747,
"step": 207000
},
{
"epoch": 1.5,
"eval_accuracy": 0.46086809241666743,
"eval_loss": 2.7627713680267334,
"eval_runtime": 29.635,
"eval_samples_per_second": 218.761,
"eval_steps_per_second": 2.295,
"step": 207000
},
{
"epoch": 1.5,
"learning_rate": 5.033271130784476e-06,
"loss": 2.7381,
"step": 207100
},
{
"epoch": 1.5,
"learning_rate": 5.026038276266112e-06,
"loss": 2.7322,
"step": 207200
},
{
"epoch": 1.5,
"learning_rate": 5.018805421747748e-06,
"loss": 2.7446,
"step": 207300
},
{
"epoch": 1.5,
"learning_rate": 5.011572567229384e-06,
"loss": 2.7473,
"step": 207400
},
{
"epoch": 1.5,
"learning_rate": 5.0043397127110195e-06,
"loss": 2.7449,
"step": 207500
},
{
"epoch": 1.5,
"learning_rate": 4.997106858192654e-06,
"loss": 2.7365,
"step": 207600
},
{
"epoch": 1.5,
"learning_rate": 4.98987400367429e-06,
"loss": 2.7431,
"step": 207700
},
{
"epoch": 1.5,
"learning_rate": 4.98271347770111e-06,
"loss": 2.7405,
"step": 207800
},
{
"epoch": 1.5,
"learning_rate": 4.975480623182745e-06,
"loss": 2.7495,
"step": 207900
},
{
"epoch": 1.5,
"learning_rate": 4.968247768664382e-06,
"loss": 2.7395,
"step": 208000
},
{
"epoch": 1.5,
"eval_accuracy": 0.4608783757217217,
"eval_loss": 2.7625892162323,
"eval_runtime": 28.0543,
"eval_samples_per_second": 231.088,
"eval_steps_per_second": 2.424,
"step": 208000
},
{
"epoch": 1.51,
"learning_rate": 4.961014914146018e-06,
"loss": 2.746,
"step": 208100
},
{
"epoch": 1.51,
"learning_rate": 4.953782059627653e-06,
"loss": 2.7362,
"step": 208200
},
{
"epoch": 1.51,
"learning_rate": 4.946549205109289e-06,
"loss": 2.7453,
"step": 208300
},
{
"epoch": 1.51,
"learning_rate": 4.939316350590925e-06,
"loss": 2.7409,
"step": 208400
},
{
"epoch": 1.51,
"learning_rate": 4.93208349607256e-06,
"loss": 2.7459,
"step": 208500
},
{
"epoch": 1.51,
"learning_rate": 4.924850641554196e-06,
"loss": 2.7428,
"step": 208600
},
{
"epoch": 1.51,
"learning_rate": 4.9176177870358324e-06,
"loss": 2.7507,
"step": 208700
},
{
"epoch": 1.51,
"learning_rate": 4.910384932517468e-06,
"loss": 2.7423,
"step": 208800
},
{
"epoch": 1.51,
"learning_rate": 4.903152077999104e-06,
"loss": 2.7494,
"step": 208900
},
{
"epoch": 1.51,
"learning_rate": 4.8959192234807394e-06,
"loss": 2.7443,
"step": 209000
},
{
"epoch": 1.51,
"eval_accuracy": 0.4609176942410467,
"eval_loss": 2.7624387741088867,
"eval_runtime": 30.0098,
"eval_samples_per_second": 216.029,
"eval_steps_per_second": 2.266,
"step": 209000
},
{
"epoch": 1.51,
"learning_rate": 4.888686368962375e-06,
"loss": 2.7409,
"step": 209100
},
{
"epoch": 1.51,
"learning_rate": 4.881525842989194e-06,
"loss": 2.7504,
"step": 209200
},
{
"epoch": 1.51,
"learning_rate": 4.8742929884708305e-06,
"loss": 2.7428,
"step": 209300
},
{
"epoch": 1.51,
"learning_rate": 4.867060133952466e-06,
"loss": 2.7458,
"step": 209400
},
{
"epoch": 1.52,
"learning_rate": 4.859827279434102e-06,
"loss": 2.7465,
"step": 209500
},
{
"epoch": 1.52,
"learning_rate": 4.8525944249157375e-06,
"loss": 2.7435,
"step": 209600
},
{
"epoch": 1.52,
"learning_rate": 4.845361570397373e-06,
"loss": 2.7431,
"step": 209700
},
{
"epoch": 1.52,
"learning_rate": 4.838128715879009e-06,
"loss": 2.7376,
"step": 209800
},
{
"epoch": 1.52,
"learning_rate": 4.8308958613606445e-06,
"loss": 2.7403,
"step": 209900
},
{
"epoch": 1.52,
"learning_rate": 4.823663006842281e-06,
"loss": 2.7395,
"step": 210000
},
{
"epoch": 1.52,
"eval_accuracy": 0.46079066517861195,
"eval_loss": 2.762295961380005,
"eval_runtime": 30.222,
"eval_samples_per_second": 214.513,
"eval_steps_per_second": 2.25,
"step": 210000
},
{
"epoch": 1.52,
"learning_rate": 4.816430152323917e-06,
"loss": 2.7402,
"step": 210100
},
{
"epoch": 1.52,
"learning_rate": 4.809197297805552e-06,
"loss": 2.7445,
"step": 210200
},
{
"epoch": 1.52,
"learning_rate": 4.802036771832372e-06,
"loss": 2.7432,
"step": 210300
},
{
"epoch": 1.52,
"learning_rate": 4.794803917314008e-06,
"loss": 2.7407,
"step": 210400
},
{
"epoch": 1.52,
"learning_rate": 4.7875710627956434e-06,
"loss": 2.7449,
"step": 210500
},
{
"epoch": 1.52,
"learning_rate": 4.780338208277279e-06,
"loss": 2.7481,
"step": 210600
},
{
"epoch": 1.52,
"learning_rate": 4.773105353758915e-06,
"loss": 2.7456,
"step": 210700
},
{
"epoch": 1.52,
"learning_rate": 4.7658724992405504e-06,
"loss": 2.7489,
"step": 210800
},
{
"epoch": 1.53,
"learning_rate": 4.75871197326737e-06,
"loss": 2.7345,
"step": 210900
},
{
"epoch": 1.53,
"learning_rate": 4.751479118749006e-06,
"loss": 2.7353,
"step": 211000
},
{
"epoch": 1.53,
"eval_accuracy": 0.4608324032991262,
"eval_loss": 2.7621333599090576,
"eval_runtime": 31.0021,
"eval_samples_per_second": 209.115,
"eval_steps_per_second": 2.193,
"step": 211000
},
{
"epoch": 1.53,
"learning_rate": 4.7442462642306415e-06,
"loss": 2.7442,
"step": 211100
},
{
"epoch": 1.53,
"learning_rate": 4.737013409712277e-06,
"loss": 2.7349,
"step": 211200
},
{
"epoch": 1.53,
"learning_rate": 4.729780555193913e-06,
"loss": 2.7421,
"step": 211300
},
{
"epoch": 1.53,
"learning_rate": 4.7225477006755485e-06,
"loss": 2.7441,
"step": 211400
},
{
"epoch": 1.53,
"learning_rate": 4.715314846157185e-06,
"loss": 2.74,
"step": 211500
},
{
"epoch": 1.53,
"learning_rate": 4.708081991638821e-06,
"loss": 2.7479,
"step": 211600
},
{
"epoch": 1.53,
"learning_rate": 4.700849137120456e-06,
"loss": 2.7426,
"step": 211700
},
{
"epoch": 1.53,
"learning_rate": 4.693616282602092e-06,
"loss": 2.7466,
"step": 211800
},
{
"epoch": 1.53,
"learning_rate": 4.686383428083728e-06,
"loss": 2.7438,
"step": 211900
},
{
"epoch": 1.53,
"learning_rate": 4.679150573565363e-06,
"loss": 2.7401,
"step": 212000
},
{
"epoch": 1.53,
"eval_accuracy": 0.46098302347315606,
"eval_loss": 2.7617835998535156,
"eval_runtime": 29.9041,
"eval_samples_per_second": 216.793,
"eval_steps_per_second": 2.274,
"step": 212000
},
{
"epoch": 1.53,
"learning_rate": 4.671917719046999e-06,
"loss": 2.7432,
"step": 212100
},
{
"epoch": 1.53,
"learning_rate": 4.6646848645286355e-06,
"loss": 2.7462,
"step": 212200
},
{
"epoch": 1.54,
"learning_rate": 4.657452010010271e-06,
"loss": 2.7348,
"step": 212300
},
{
"epoch": 1.54,
"learning_rate": 4.650219155491907e-06,
"loss": 2.7424,
"step": 212400
},
{
"epoch": 1.54,
"learning_rate": 4.6429863009735425e-06,
"loss": 2.7427,
"step": 212500
},
{
"epoch": 1.54,
"learning_rate": 4.635753446455178e-06,
"loss": 2.75,
"step": 212600
},
{
"epoch": 1.54,
"learning_rate": 4.628520591936814e-06,
"loss": 2.7498,
"step": 212700
},
{
"epoch": 1.54,
"learning_rate": 4.62128773741845e-06,
"loss": 2.7432,
"step": 212800
},
{
"epoch": 1.54,
"learning_rate": 4.614054882900086e-06,
"loss": 2.7424,
"step": 212900
},
{
"epoch": 1.54,
"learning_rate": 4.606894356926905e-06,
"loss": 2.7371,
"step": 213000
},
{
"epoch": 1.54,
"eval_accuracy": 0.4609551980594798,
"eval_loss": 2.761749744415283,
"eval_runtime": 30.3654,
"eval_samples_per_second": 213.5,
"eval_steps_per_second": 2.239,
"step": 213000
},
{
"epoch": 1.54,
"learning_rate": 4.5996615024085414e-06,
"loss": 2.7503,
"step": 213100
},
{
"epoch": 1.54,
"learning_rate": 4.592428647890177e-06,
"loss": 2.7367,
"step": 213200
},
{
"epoch": 1.54,
"learning_rate": 4.585195793371813e-06,
"loss": 2.7346,
"step": 213300
},
{
"epoch": 1.54,
"learning_rate": 4.577962938853448e-06,
"loss": 2.7493,
"step": 213400
},
{
"epoch": 1.54,
"learning_rate": 4.570730084335084e-06,
"loss": 2.748,
"step": 213500
},
{
"epoch": 1.54,
"learning_rate": 4.563569558361903e-06,
"loss": 2.7412,
"step": 213600
},
{
"epoch": 1.55,
"learning_rate": 4.5563367038435395e-06,
"loss": 2.7452,
"step": 213700
},
{
"epoch": 1.55,
"learning_rate": 4.549103849325175e-06,
"loss": 2.7362,
"step": 213800
},
{
"epoch": 1.55,
"learning_rate": 4.541870994806811e-06,
"loss": 2.7471,
"step": 213900
},
{
"epoch": 1.55,
"learning_rate": 4.5346381402884465e-06,
"loss": 2.7458,
"step": 214000
},
{
"epoch": 1.55,
"eval_accuracy": 0.46101931749099456,
"eval_loss": 2.7615652084350586,
"eval_runtime": 31.5152,
"eval_samples_per_second": 205.71,
"eval_steps_per_second": 2.158,
"step": 214000
},
{
"epoch": 1.55,
"learning_rate": 4.527405285770082e-06,
"loss": 2.7413,
"step": 214100
},
{
"epoch": 1.55,
"learning_rate": 4.520172431251718e-06,
"loss": 2.7408,
"step": 214200
},
{
"epoch": 1.55,
"learning_rate": 4.5129395767333535e-06,
"loss": 2.7456,
"step": 214300
},
{
"epoch": 1.55,
"learning_rate": 4.50570672221499e-06,
"loss": 2.741,
"step": 214400
},
{
"epoch": 1.55,
"learning_rate": 4.498473867696626e-06,
"loss": 2.7465,
"step": 214500
},
{
"epoch": 1.55,
"learning_rate": 4.491241013178261e-06,
"loss": 2.7435,
"step": 214600
},
{
"epoch": 1.55,
"learning_rate": 4.484008158659897e-06,
"loss": 2.7377,
"step": 214700
},
{
"epoch": 1.55,
"learning_rate": 4.476775304141533e-06,
"loss": 2.7381,
"step": 214800
},
{
"epoch": 1.55,
"learning_rate": 4.469542449623168e-06,
"loss": 2.7445,
"step": 214900
},
{
"epoch": 1.56,
"learning_rate": 4.462309595104805e-06,
"loss": 2.7416,
"step": 215000
},
{
"epoch": 1.56,
"eval_accuracy": 0.4611487661546186,
"eval_loss": 2.7614753246307373,
"eval_runtime": 27.7512,
"eval_samples_per_second": 233.612,
"eval_steps_per_second": 2.45,
"step": 215000
},
{
"epoch": 1.56,
"learning_rate": 4.4550767405864405e-06,
"loss": 2.7414,
"step": 215100
},
{
"epoch": 1.56,
"learning_rate": 4.447843886068076e-06,
"loss": 2.7419,
"step": 215200
},
{
"epoch": 1.56,
"learning_rate": 4.440611031549712e-06,
"loss": 2.7466,
"step": 215300
},
{
"epoch": 1.56,
"learning_rate": 4.4333781770313475e-06,
"loss": 2.7416,
"step": 215400
},
{
"epoch": 1.56,
"learning_rate": 4.426145322512983e-06,
"loss": 2.7512,
"step": 215500
},
{
"epoch": 1.56,
"learning_rate": 4.418984796539802e-06,
"loss": 2.7472,
"step": 215600
},
{
"epoch": 1.56,
"learning_rate": 4.411751942021439e-06,
"loss": 2.7447,
"step": 215700
},
{
"epoch": 1.56,
"learning_rate": 4.404519087503074e-06,
"loss": 2.7415,
"step": 215800
},
{
"epoch": 1.56,
"learning_rate": 4.39728623298471e-06,
"loss": 2.7387,
"step": 215900
},
{
"epoch": 1.56,
"learning_rate": 4.390053378466346e-06,
"loss": 2.7434,
"step": 216000
},
{
"epoch": 1.56,
"eval_accuracy": 0.4610798075207254,
"eval_loss": 2.761385440826416,
"eval_runtime": 27.9023,
"eval_samples_per_second": 232.347,
"eval_steps_per_second": 2.437,
"step": 216000
},
{
"epoch": 1.56,
"learning_rate": 4.382820523947981e-06,
"loss": 2.7392,
"step": 216100
},
{
"epoch": 1.56,
"learning_rate": 4.375587669429617e-06,
"loss": 2.7394,
"step": 216200
},
{
"epoch": 1.56,
"learning_rate": 4.3683548149112535e-06,
"loss": 2.7423,
"step": 216300
},
{
"epoch": 1.57,
"learning_rate": 4.361121960392889e-06,
"loss": 2.7389,
"step": 216400
},
{
"epoch": 1.57,
"learning_rate": 4.353889105874525e-06,
"loss": 2.7334,
"step": 216500
},
{
"epoch": 1.57,
"learning_rate": 4.3466562513561605e-06,
"loss": 2.7384,
"step": 216600
},
{
"epoch": 1.57,
"learning_rate": 4.339423396837796e-06,
"loss": 2.7432,
"step": 216700
},
{
"epoch": 1.57,
"learning_rate": 4.332190542319432e-06,
"loss": 2.7388,
"step": 216800
},
{
"epoch": 1.57,
"learning_rate": 4.324957687801068e-06,
"loss": 2.7417,
"step": 216900
},
{
"epoch": 1.57,
"learning_rate": 4.317724833282704e-06,
"loss": 2.7456,
"step": 217000
},
{
"epoch": 1.57,
"eval_accuracy": 0.4610507723064546,
"eval_loss": 2.7613608837127686,
"eval_runtime": 32.1832,
"eval_samples_per_second": 201.441,
"eval_steps_per_second": 2.113,
"step": 217000
},
{
"epoch": 1.57,
"learning_rate": 4.31049197876434e-06,
"loss": 2.744,
"step": 217100
},
{
"epoch": 1.57,
"learning_rate": 4.303259124245975e-06,
"loss": 2.7446,
"step": 217200
},
{
"epoch": 1.57,
"learning_rate": 4.296026269727611e-06,
"loss": 2.7351,
"step": 217300
},
{
"epoch": 1.57,
"learning_rate": 4.288793415209247e-06,
"loss": 2.7413,
"step": 217400
},
{
"epoch": 1.57,
"learning_rate": 4.281560560690883e-06,
"loss": 2.7478,
"step": 217500
},
{
"epoch": 1.57,
"learning_rate": 4.274327706172519e-06,
"loss": 2.7409,
"step": 217600
},
{
"epoch": 1.57,
"learning_rate": 4.267167180199338e-06,
"loss": 2.731,
"step": 217700
},
{
"epoch": 1.58,
"learning_rate": 4.259934325680974e-06,
"loss": 2.7469,
"step": 217800
},
{
"epoch": 1.58,
"learning_rate": 4.252701471162609e-06,
"loss": 2.7392,
"step": 217900
},
{
"epoch": 1.58,
"learning_rate": 4.245540945189429e-06,
"loss": 2.7499,
"step": 218000
},
{
"epoch": 1.58,
"eval_accuracy": 0.46106226541210343,
"eval_loss": 2.7610652446746826,
"eval_runtime": 29.5792,
"eval_samples_per_second": 219.174,
"eval_steps_per_second": 2.299,
"step": 218000
},
{
"epoch": 1.58,
"learning_rate": 4.2383080906710645e-06,
"loss": 2.7383,
"step": 218100
},
{
"epoch": 1.58,
"learning_rate": 4.2310752361527e-06,
"loss": 2.7398,
"step": 218200
},
{
"epoch": 1.58,
"learning_rate": 4.223842381634336e-06,
"loss": 2.7354,
"step": 218300
},
{
"epoch": 1.58,
"learning_rate": 4.2166095271159715e-06,
"loss": 2.7482,
"step": 218400
},
{
"epoch": 1.58,
"learning_rate": 4.209376672597608e-06,
"loss": 2.7427,
"step": 218500
},
{
"epoch": 1.58,
"learning_rate": 4.202143818079244e-06,
"loss": 2.7346,
"step": 218600
},
{
"epoch": 1.58,
"learning_rate": 4.194910963560879e-06,
"loss": 2.743,
"step": 218700
},
{
"epoch": 1.58,
"learning_rate": 4.187678109042515e-06,
"loss": 2.738,
"step": 218800
},
{
"epoch": 1.58,
"learning_rate": 4.180445254524151e-06,
"loss": 2.7347,
"step": 218900
},
{
"epoch": 1.58,
"learning_rate": 4.173212400005786e-06,
"loss": 2.744,
"step": 219000
},
{
"epoch": 1.58,
"eval_accuracy": 0.46114574165313205,
"eval_loss": 2.760906457901001,
"eval_runtime": 27.8233,
"eval_samples_per_second": 233.006,
"eval_steps_per_second": 2.444,
"step": 219000
},
{
"epoch": 1.58,
"learning_rate": 4.165979545487423e-06,
"loss": 2.7415,
"step": 219100
},
{
"epoch": 1.59,
"learning_rate": 4.158819019514242e-06,
"loss": 2.7429,
"step": 219200
},
{
"epoch": 1.59,
"learning_rate": 4.151586164995877e-06,
"loss": 2.7381,
"step": 219300
},
{
"epoch": 1.59,
"learning_rate": 4.144353310477514e-06,
"loss": 2.7389,
"step": 219400
},
{
"epoch": 1.59,
"learning_rate": 4.1371204559591496e-06,
"loss": 2.7519,
"step": 219500
},
{
"epoch": 1.59,
"learning_rate": 4.129887601440785e-06,
"loss": 2.7505,
"step": 219600
},
{
"epoch": 1.59,
"learning_rate": 4.122727075467605e-06,
"loss": 2.7414,
"step": 219700
},
{
"epoch": 1.59,
"learning_rate": 4.11549422094924e-06,
"loss": 2.7272,
"step": 219800
},
{
"epoch": 1.59,
"learning_rate": 4.1082613664308754e-06,
"loss": 2.7408,
"step": 219900
},
{
"epoch": 1.59,
"learning_rate": 4.101028511912511e-06,
"loss": 2.7375,
"step": 220000
},
{
"epoch": 1.59,
"eval_accuracy": 0.4611257799433208,
"eval_loss": 2.760807514190674,
"eval_runtime": 29.5284,
"eval_samples_per_second": 219.552,
"eval_steps_per_second": 2.303,
"step": 220000
},
{
"epoch": 1.59,
"learning_rate": 4.093795657394148e-06,
"loss": 2.7376,
"step": 220100
},
{
"epoch": 1.59,
"learning_rate": 4.086562802875783e-06,
"loss": 2.7406,
"step": 220200
},
{
"epoch": 1.59,
"learning_rate": 4.079329948357419e-06,
"loss": 2.7457,
"step": 220300
},
{
"epoch": 1.59,
"learning_rate": 4.072097093839055e-06,
"loss": 2.7377,
"step": 220400
},
{
"epoch": 1.59,
"learning_rate": 4.06486423932069e-06,
"loss": 2.7417,
"step": 220500
},
{
"epoch": 1.6,
"learning_rate": 4.057631384802326e-06,
"loss": 2.7429,
"step": 220600
},
{
"epoch": 1.6,
"learning_rate": 4.050470858829146e-06,
"loss": 2.7458,
"step": 220700
},
{
"epoch": 1.6,
"learning_rate": 4.043238004310781e-06,
"loss": 2.74,
"step": 220800
},
{
"epoch": 1.6,
"learning_rate": 4.036005149792417e-06,
"loss": 2.7355,
"step": 220900
},
{
"epoch": 1.6,
"learning_rate": 4.0287722952740535e-06,
"loss": 2.7428,
"step": 221000
},
{
"epoch": 1.6,
"eval_accuracy": 0.4611263848436182,
"eval_loss": 2.7606468200683594,
"eval_runtime": 29.1245,
"eval_samples_per_second": 222.596,
"eval_steps_per_second": 2.335,
"step": 221000
},
{
"epoch": 1.6,
"learning_rate": 4.021539440755689e-06,
"loss": 2.7413,
"step": 221100
},
{
"epoch": 1.6,
"learning_rate": 4.014306586237325e-06,
"loss": 2.7345,
"step": 221200
},
{
"epoch": 1.6,
"learning_rate": 4.0070737317189605e-06,
"loss": 2.7442,
"step": 221300
},
{
"epoch": 1.6,
"learning_rate": 3.999840877200596e-06,
"loss": 2.7402,
"step": 221400
},
{
"epoch": 1.6,
"learning_rate": 3.992608022682232e-06,
"loss": 2.7399,
"step": 221500
},
{
"epoch": 1.6,
"learning_rate": 3.985375168163868e-06,
"loss": 2.7461,
"step": 221600
},
{
"epoch": 1.6,
"learning_rate": 3.978142313645504e-06,
"loss": 2.7288,
"step": 221700
},
{
"epoch": 1.6,
"learning_rate": 3.97090945912714e-06,
"loss": 2.7323,
"step": 221800
},
{
"epoch": 1.6,
"learning_rate": 3.963676604608775e-06,
"loss": 2.735,
"step": 221900
},
{
"epoch": 1.61,
"learning_rate": 3.956443750090411e-06,
"loss": 2.7442,
"step": 222000
},
{
"epoch": 1.61,
"eval_accuracy": 0.4611021888317258,
"eval_loss": 2.7605795860290527,
"eval_runtime": 29.7064,
"eval_samples_per_second": 218.236,
"eval_steps_per_second": 2.289,
"step": 222000
},
{
"epoch": 1.61,
"learning_rate": 3.949210895572047e-06,
"loss": 2.7375,
"step": 222100
},
{
"epoch": 1.61,
"learning_rate": 3.941978041053682e-06,
"loss": 2.7365,
"step": 222200
},
{
"epoch": 1.61,
"learning_rate": 3.934745186535318e-06,
"loss": 2.7442,
"step": 222300
},
{
"epoch": 1.61,
"learning_rate": 3.927512332016954e-06,
"loss": 2.7405,
"step": 222400
},
{
"epoch": 1.61,
"learning_rate": 3.920279477498589e-06,
"loss": 2.7429,
"step": 222500
},
{
"epoch": 1.61,
"learning_rate": 3.913046622980226e-06,
"loss": 2.7361,
"step": 222600
},
{
"epoch": 1.61,
"learning_rate": 3.9058137684618616e-06,
"loss": 2.7376,
"step": 222700
},
{
"epoch": 1.61,
"learning_rate": 3.8986532424886805e-06,
"loss": 2.7357,
"step": 222800
},
{
"epoch": 1.61,
"learning_rate": 3.891420387970317e-06,
"loss": 2.7413,
"step": 222900
},
{
"epoch": 1.61,
"learning_rate": 3.884187533451953e-06,
"loss": 2.7395,
"step": 223000
},
{
"epoch": 1.61,
"eval_accuracy": 0.46116146906086203,
"eval_loss": 2.7603955268859863,
"eval_runtime": 29.5556,
"eval_samples_per_second": 219.349,
"eval_steps_per_second": 2.301,
"step": 223000
},
{
"epoch": 1.61,
"learning_rate": 3.876954678933588e-06,
"loss": 2.7413,
"step": 223100
},
{
"epoch": 1.61,
"learning_rate": 3.869721824415224e-06,
"loss": 2.7495,
"step": 223200
},
{
"epoch": 1.62,
"learning_rate": 3.86248896989686e-06,
"loss": 2.7346,
"step": 223300
},
{
"epoch": 1.62,
"learning_rate": 3.855256115378495e-06,
"loss": 2.7434,
"step": 223400
},
{
"epoch": 1.62,
"learning_rate": 3.848023260860132e-06,
"loss": 2.7414,
"step": 223500
},
{
"epoch": 1.62,
"learning_rate": 3.8407904063417675e-06,
"loss": 2.7452,
"step": 223600
},
{
"epoch": 1.62,
"learning_rate": 3.833557551823403e-06,
"loss": 2.7334,
"step": 223700
},
{
"epoch": 1.62,
"learning_rate": 3.826397025850223e-06,
"loss": 2.734,
"step": 223800
},
{
"epoch": 1.62,
"learning_rate": 3.8191641713318586e-06,
"loss": 2.7417,
"step": 223900
},
{
"epoch": 1.62,
"learning_rate": 3.8119313168134942e-06,
"loss": 2.7445,
"step": 224000
},
{
"epoch": 1.62,
"eval_accuracy": 0.4612249835920794,
"eval_loss": 2.7602407932281494,
"eval_runtime": 27.9042,
"eval_samples_per_second": 232.331,
"eval_steps_per_second": 2.437,
"step": 224000
},
{
"epoch": 1.62,
"learning_rate": 3.80469846229513e-06,
"loss": 2.7441,
"step": 224100
},
{
"epoch": 1.62,
"learning_rate": 3.7974656077767656e-06,
"loss": 2.7424,
"step": 224200
},
{
"epoch": 1.62,
"learning_rate": 3.790232753258401e-06,
"loss": 2.7458,
"step": 224300
},
{
"epoch": 1.62,
"learning_rate": 3.782999898740037e-06,
"loss": 2.7381,
"step": 224400
},
{
"epoch": 1.62,
"learning_rate": 3.7757670442216726e-06,
"loss": 2.7407,
"step": 224500
},
{
"epoch": 1.62,
"learning_rate": 3.7685341897033082e-06,
"loss": 2.7407,
"step": 224600
},
{
"epoch": 1.63,
"learning_rate": 3.7613013351849443e-06,
"loss": 2.7438,
"step": 224700
},
{
"epoch": 1.63,
"learning_rate": 3.75406848066658e-06,
"loss": 2.7364,
"step": 224800
},
{
"epoch": 1.63,
"learning_rate": 3.7468356261482156e-06,
"loss": 2.7412,
"step": 224900
},
{
"epoch": 1.63,
"learning_rate": 3.7396027716298517e-06,
"loss": 2.7394,
"step": 225000
},
{
"epoch": 1.63,
"eval_accuracy": 0.4611070280341043,
"eval_loss": 2.760154962539673,
"eval_runtime": 30.9249,
"eval_samples_per_second": 209.637,
"eval_steps_per_second": 2.199,
"step": 225000
},
{
"epoch": 1.63,
"learning_rate": 3.732442245656671e-06,
"loss": 2.744,
"step": 225100
},
{
"epoch": 1.63,
"learning_rate": 3.7252093911383067e-06,
"loss": 2.7371,
"step": 225200
},
{
"epoch": 1.63,
"learning_rate": 3.717976536619943e-06,
"loss": 2.7374,
"step": 225300
},
{
"epoch": 1.63,
"learning_rate": 3.7107436821015785e-06,
"loss": 2.7375,
"step": 225400
},
{
"epoch": 1.63,
"learning_rate": 3.703510827583214e-06,
"loss": 2.7368,
"step": 225500
},
{
"epoch": 1.63,
"learning_rate": 3.6962779730648502e-06,
"loss": 2.7399,
"step": 225600
},
{
"epoch": 1.63,
"learning_rate": 3.689045118546486e-06,
"loss": 2.7354,
"step": 225700
},
{
"epoch": 1.63,
"learning_rate": 3.6818122640281216e-06,
"loss": 2.7475,
"step": 225800
},
{
"epoch": 1.63,
"learning_rate": 3.6745794095097577e-06,
"loss": 2.749,
"step": 225900
},
{
"epoch": 1.63,
"learning_rate": 3.6673465549913933e-06,
"loss": 2.7403,
"step": 226000
},
{
"epoch": 1.63,
"eval_accuracy": 0.46118143077067325,
"eval_loss": 2.7599334716796875,
"eval_runtime": 29.7721,
"eval_samples_per_second": 217.754,
"eval_steps_per_second": 2.284,
"step": 226000
},
{
"epoch": 1.64,
"learning_rate": 3.6601860290182126e-06,
"loss": 2.7356,
"step": 226100
},
{
"epoch": 1.64,
"learning_rate": 3.6529531744998487e-06,
"loss": 2.7327,
"step": 226200
},
{
"epoch": 1.64,
"learning_rate": 3.6457203199814844e-06,
"loss": 2.7394,
"step": 226300
},
{
"epoch": 1.64,
"learning_rate": 3.6384874654631196e-06,
"loss": 2.7427,
"step": 226400
},
{
"epoch": 1.64,
"learning_rate": 3.6312546109447553e-06,
"loss": 2.7417,
"step": 226500
},
{
"epoch": 1.64,
"learning_rate": 3.6240217564263914e-06,
"loss": 2.7395,
"step": 226600
},
{
"epoch": 1.64,
"learning_rate": 3.616788901908027e-06,
"loss": 2.7454,
"step": 226700
},
{
"epoch": 1.64,
"learning_rate": 3.6095560473896627e-06,
"loss": 2.737,
"step": 226800
},
{
"epoch": 1.64,
"learning_rate": 3.602323192871299e-06,
"loss": 2.7436,
"step": 226900
},
{
"epoch": 1.64,
"learning_rate": 3.5950903383529345e-06,
"loss": 2.738,
"step": 227000
},
{
"epoch": 1.64,
"eval_accuracy": 0.46119715817840323,
"eval_loss": 2.7598636150360107,
"eval_runtime": 27.9537,
"eval_samples_per_second": 231.919,
"eval_steps_per_second": 2.433,
"step": 227000
},
{
"epoch": 1.64,
"learning_rate": 3.58785748383457e-06,
"loss": 2.7383,
"step": 227100
},
{
"epoch": 1.64,
"learning_rate": 3.5806246293162062e-06,
"loss": 2.7407,
"step": 227200
},
{
"epoch": 1.64,
"learning_rate": 3.573391774797842e-06,
"loss": 2.7394,
"step": 227300
},
{
"epoch": 1.64,
"learning_rate": 3.5661589202794776e-06,
"loss": 2.7449,
"step": 227400
},
{
"epoch": 1.65,
"learning_rate": 3.5589260657611137e-06,
"loss": 2.7449,
"step": 227500
},
{
"epoch": 1.65,
"learning_rate": 3.5516932112427493e-06,
"loss": 2.7383,
"step": 227600
},
{
"epoch": 1.65,
"learning_rate": 3.5445326852695686e-06,
"loss": 2.7364,
"step": 227700
},
{
"epoch": 1.65,
"learning_rate": 3.5372998307512047e-06,
"loss": 2.7375,
"step": 227800
},
{
"epoch": 1.65,
"learning_rate": 3.5300669762328404e-06,
"loss": 2.7405,
"step": 227900
},
{
"epoch": 1.65,
"learning_rate": 3.522834121714476e-06,
"loss": 2.7332,
"step": 228000
},
{
"epoch": 1.65,
"eval_accuracy": 0.4612788197185399,
"eval_loss": 2.759690284729004,
"eval_runtime": 32.3658,
"eval_samples_per_second": 200.304,
"eval_steps_per_second": 2.101,
"step": 228000
},
{
"epoch": 1.65,
"learning_rate": 3.515601267196112e-06,
"loss": 2.7336,
"step": 228100
},
{
"epoch": 1.65,
"learning_rate": 3.508368412677748e-06,
"loss": 2.73,
"step": 228200
},
{
"epoch": 1.65,
"learning_rate": 3.5011355581593835e-06,
"loss": 2.7353,
"step": 228300
},
{
"epoch": 1.65,
"learning_rate": 3.4939027036410196e-06,
"loss": 2.7394,
"step": 228400
},
{
"epoch": 1.65,
"learning_rate": 3.4866698491226552e-06,
"loss": 2.7291,
"step": 228500
},
{
"epoch": 1.65,
"learning_rate": 3.479436994604291e-06,
"loss": 2.7395,
"step": 228600
},
{
"epoch": 1.65,
"learning_rate": 3.472204140085926e-06,
"loss": 2.7351,
"step": 228700
},
{
"epoch": 1.65,
"learning_rate": 3.4649712855675622e-06,
"loss": 2.7361,
"step": 228800
},
{
"epoch": 1.66,
"learning_rate": 3.457738431049198e-06,
"loss": 2.7387,
"step": 228900
},
{
"epoch": 1.66,
"learning_rate": 3.4505055765308336e-06,
"loss": 2.7388,
"step": 229000
},
{
"epoch": 1.66,
"eval_accuracy": 0.46129575692686453,
"eval_loss": 2.759584903717041,
"eval_runtime": 29.3954,
"eval_samples_per_second": 220.545,
"eval_steps_per_second": 2.313,
"step": 229000
},
{
"epoch": 1.66,
"learning_rate": 3.4432727220124697e-06,
"loss": 2.7428,
"step": 229100
},
{
"epoch": 1.66,
"learning_rate": 3.4360398674941053e-06,
"loss": 2.7331,
"step": 229200
},
{
"epoch": 1.66,
"learning_rate": 3.428807012975741e-06,
"loss": 2.7316,
"step": 229300
},
{
"epoch": 1.66,
"learning_rate": 3.421574158457377e-06,
"loss": 2.7302,
"step": 229400
},
{
"epoch": 1.66,
"learning_rate": 3.4143413039390128e-06,
"loss": 2.7417,
"step": 229500
},
{
"epoch": 1.66,
"learning_rate": 3.4071084494206484e-06,
"loss": 2.7427,
"step": 229600
},
{
"epoch": 1.66,
"learning_rate": 3.3998755949022845e-06,
"loss": 2.7407,
"step": 229700
},
{
"epoch": 1.66,
"learning_rate": 3.39264274038392e-06,
"loss": 2.7359,
"step": 229800
},
{
"epoch": 1.66,
"learning_rate": 3.385409885865556e-06,
"loss": 2.7396,
"step": 229900
},
{
"epoch": 1.66,
"learning_rate": 3.3782493598923756e-06,
"loss": 2.743,
"step": 230000
},
{
"epoch": 1.66,
"eval_accuracy": 0.46134172934946,
"eval_loss": 2.7594590187072754,
"eval_runtime": 29.5124,
"eval_samples_per_second": 219.67,
"eval_steps_per_second": 2.304,
"step": 230000
},
{
"epoch": 1.66,
"learning_rate": 3.3710165053740113e-06,
"loss": 2.7326,
"step": 230100
},
{
"epoch": 1.66,
"learning_rate": 3.363783650855647e-06,
"loss": 2.7364,
"step": 230200
},
{
"epoch": 1.67,
"learning_rate": 3.356550796337283e-06,
"loss": 2.7352,
"step": 230300
},
{
"epoch": 1.67,
"learning_rate": 3.3493179418189187e-06,
"loss": 2.7397,
"step": 230400
},
{
"epoch": 1.67,
"learning_rate": 3.3420850873005543e-06,
"loss": 2.7393,
"step": 230500
},
{
"epoch": 1.67,
"learning_rate": 3.3348522327821904e-06,
"loss": 2.744,
"step": 230600
},
{
"epoch": 1.67,
"learning_rate": 3.327619378263826e-06,
"loss": 2.7422,
"step": 230700
},
{
"epoch": 1.67,
"learning_rate": 3.3203865237454618e-06,
"loss": 2.7439,
"step": 230800
},
{
"epoch": 1.67,
"learning_rate": 3.3132259977722807e-06,
"loss": 2.746,
"step": 230900
},
{
"epoch": 1.67,
"learning_rate": 3.3059931432539168e-06,
"loss": 2.7368,
"step": 231000
},
{
"epoch": 1.67,
"eval_accuracy": 0.4612715609149722,
"eval_loss": 2.759273052215576,
"eval_runtime": 29.4403,
"eval_samples_per_second": 220.209,
"eval_steps_per_second": 2.31,
"step": 231000
},
{
"epoch": 1.67,
"learning_rate": 3.2987602887355524e-06,
"loss": 2.7397,
"step": 231100
},
{
"epoch": 1.67,
"learning_rate": 3.291527434217188e-06,
"loss": 2.7324,
"step": 231200
},
{
"epoch": 1.67,
"learning_rate": 3.284294579698824e-06,
"loss": 2.7456,
"step": 231300
},
{
"epoch": 1.67,
"learning_rate": 3.27706172518046e-06,
"loss": 2.731,
"step": 231400
},
{
"epoch": 1.67,
"learning_rate": 3.2698288706620955e-06,
"loss": 2.7319,
"step": 231500
},
{
"epoch": 1.68,
"learning_rate": 3.2625960161437316e-06,
"loss": 2.7492,
"step": 231600
},
{
"epoch": 1.68,
"learning_rate": 3.2553631616253673e-06,
"loss": 2.741,
"step": 231700
},
{
"epoch": 1.68,
"learning_rate": 3.248130307107003e-06,
"loss": 2.7384,
"step": 231800
},
{
"epoch": 1.68,
"learning_rate": 3.240897452588639e-06,
"loss": 2.7307,
"step": 231900
},
{
"epoch": 1.68,
"learning_rate": 3.2336645980702747e-06,
"loss": 2.7426,
"step": 232000
},
{
"epoch": 1.68,
"eval_accuracy": 0.4613774184670012,
"eval_loss": 2.7592265605926514,
"eval_runtime": 31.7025,
"eval_samples_per_second": 204.495,
"eval_steps_per_second": 2.145,
"step": 232000
},
{
"epoch": 1.68,
"learning_rate": 3.2264317435519104e-06,
"loss": 2.7339,
"step": 232100
},
{
"epoch": 1.68,
"learning_rate": 3.2191988890335464e-06,
"loss": 2.7413,
"step": 232200
},
{
"epoch": 1.68,
"learning_rate": 3.211966034515182e-06,
"loss": 2.7413,
"step": 232300
},
{
"epoch": 1.68,
"learning_rate": 3.2047331799968178e-06,
"loss": 2.7421,
"step": 232400
},
{
"epoch": 1.68,
"learning_rate": 3.197500325478454e-06,
"loss": 2.7387,
"step": 232500
},
{
"epoch": 1.68,
"learning_rate": 3.1902674709600895e-06,
"loss": 2.7344,
"step": 232600
},
{
"epoch": 1.68,
"learning_rate": 3.183034616441725e-06,
"loss": 2.7488,
"step": 232700
},
{
"epoch": 1.68,
"learning_rate": 3.1758017619233613e-06,
"loss": 2.7391,
"step": 232800
},
{
"epoch": 1.68,
"learning_rate": 3.168568907404997e-06,
"loss": 2.7362,
"step": 232900
},
{
"epoch": 1.69,
"learning_rate": 3.1614083814318163e-06,
"loss": 2.7332,
"step": 233000
},
{
"epoch": 1.69,
"eval_accuracy": 0.4614445624000024,
"eval_loss": 2.7591168880462646,
"eval_runtime": 30.2871,
"eval_samples_per_second": 214.052,
"eval_steps_per_second": 2.245,
"step": 233000
},
{
"epoch": 1.69,
"learning_rate": 3.1541755269134524e-06,
"loss": 2.7441,
"step": 233100
},
{
"epoch": 1.69,
"learning_rate": 3.1469426723950876e-06,
"loss": 2.74,
"step": 233200
},
{
"epoch": 1.69,
"learning_rate": 3.1397098178767233e-06,
"loss": 2.7379,
"step": 233300
},
{
"epoch": 1.69,
"learning_rate": 3.132476963358359e-06,
"loss": 2.7406,
"step": 233400
},
{
"epoch": 1.69,
"learning_rate": 3.1253164373851787e-06,
"loss": 2.7385,
"step": 233500
},
{
"epoch": 1.69,
"learning_rate": 3.1180835828668143e-06,
"loss": 2.7332,
"step": 233600
},
{
"epoch": 1.69,
"learning_rate": 3.11085072834845e-06,
"loss": 2.7396,
"step": 233700
},
{
"epoch": 1.69,
"learning_rate": 3.103617873830086e-06,
"loss": 2.743,
"step": 233800
},
{
"epoch": 1.69,
"learning_rate": 3.0963850193117218e-06,
"loss": 2.7363,
"step": 233900
},
{
"epoch": 1.69,
"learning_rate": 3.0891521647933574e-06,
"loss": 2.7413,
"step": 234000
},
{
"epoch": 1.69,
"eval_accuracy": 0.4613556420562981,
"eval_loss": 2.7590200901031494,
"eval_runtime": 29.9923,
"eval_samples_per_second": 216.155,
"eval_steps_per_second": 2.267,
"step": 234000
},
{
"epoch": 1.69,
"learning_rate": 3.0819193102749935e-06,
"loss": 2.7473,
"step": 234100
},
{
"epoch": 1.69,
"learning_rate": 3.074686455756629e-06,
"loss": 2.7374,
"step": 234200
},
{
"epoch": 1.69,
"learning_rate": 3.067453601238265e-06,
"loss": 2.7358,
"step": 234300
},
{
"epoch": 1.7,
"learning_rate": 3.060220746719901e-06,
"loss": 2.7322,
"step": 234400
},
{
"epoch": 1.7,
"learning_rate": 3.0529878922015366e-06,
"loss": 2.7412,
"step": 234500
},
{
"epoch": 1.7,
"learning_rate": 3.0457550376831723e-06,
"loss": 2.7346,
"step": 234600
},
{
"epoch": 1.7,
"learning_rate": 3.0385221831648084e-06,
"loss": 2.7395,
"step": 234700
},
{
"epoch": 1.7,
"learning_rate": 3.031289328646444e-06,
"loss": 2.7384,
"step": 234800
},
{
"epoch": 1.7,
"learning_rate": 3.0240564741280797e-06,
"loss": 2.7408,
"step": 234900
},
{
"epoch": 1.7,
"learning_rate": 3.016823619609716e-06,
"loss": 2.735,
"step": 235000
},
{
"epoch": 1.7,
"eval_accuracy": 0.46131692843727035,
"eval_loss": 2.7589235305786133,
"eval_runtime": 29.8027,
"eval_samples_per_second": 217.53,
"eval_steps_per_second": 2.282,
"step": 235000
},
{
"epoch": 1.7,
"learning_rate": 3.0095907650913515e-06,
"loss": 2.7375,
"step": 235100
},
{
"epoch": 1.7,
"learning_rate": 3.002357910572987e-06,
"loss": 2.7379,
"step": 235200
},
{
"epoch": 1.7,
"learning_rate": 2.9951250560546232e-06,
"loss": 2.734,
"step": 235300
},
{
"epoch": 1.7,
"learning_rate": 2.987892201536259e-06,
"loss": 2.743,
"step": 235400
},
{
"epoch": 1.7,
"learning_rate": 2.980659347017894e-06,
"loss": 2.7419,
"step": 235500
},
{
"epoch": 1.7,
"learning_rate": 2.97342649249953e-06,
"loss": 2.743,
"step": 235600
},
{
"epoch": 1.7,
"learning_rate": 2.9662659665263495e-06,
"loss": 2.7387,
"step": 235700
},
{
"epoch": 1.71,
"learning_rate": 2.959033112007985e-06,
"loss": 2.7431,
"step": 235800
},
{
"epoch": 1.71,
"learning_rate": 2.951800257489621e-06,
"loss": 2.7385,
"step": 235900
},
{
"epoch": 1.71,
"learning_rate": 2.9446397315164406e-06,
"loss": 2.7393,
"step": 236000
},
{
"epoch": 1.71,
"eval_accuracy": 0.46144032809792124,
"eval_loss": 2.7588789463043213,
"eval_runtime": 29.5655,
"eval_samples_per_second": 219.276,
"eval_steps_per_second": 2.3,
"step": 236000
},
{
"epoch": 1.71,
"learning_rate": 2.9374068769980763e-06,
"loss": 2.7405,
"step": 236100
},
{
"epoch": 1.71,
"learning_rate": 2.930174022479712e-06,
"loss": 2.7432,
"step": 236200
},
{
"epoch": 1.71,
"learning_rate": 2.922941167961348e-06,
"loss": 2.7447,
"step": 236300
},
{
"epoch": 1.71,
"learning_rate": 2.9157083134429837e-06,
"loss": 2.7412,
"step": 236400
},
{
"epoch": 1.71,
"learning_rate": 2.9084754589246194e-06,
"loss": 2.7406,
"step": 236500
},
{
"epoch": 1.71,
"learning_rate": 2.9012426044062555e-06,
"loss": 2.7361,
"step": 236600
},
{
"epoch": 1.71,
"learning_rate": 2.894009749887891e-06,
"loss": 2.7427,
"step": 236700
},
{
"epoch": 1.71,
"learning_rate": 2.8867768953695268e-06,
"loss": 2.732,
"step": 236800
},
{
"epoch": 1.71,
"learning_rate": 2.879544040851163e-06,
"loss": 2.7359,
"step": 236900
},
{
"epoch": 1.71,
"learning_rate": 2.8723111863327985e-06,
"loss": 2.7382,
"step": 237000
},
{
"epoch": 1.71,
"eval_accuracy": 0.4614735976142732,
"eval_loss": 2.7587404251098633,
"eval_runtime": 29.6674,
"eval_samples_per_second": 218.523,
"eval_steps_per_second": 2.292,
"step": 237000
},
{
"epoch": 1.71,
"learning_rate": 2.865078331814434e-06,
"loss": 2.7361,
"step": 237100
},
{
"epoch": 1.72,
"learning_rate": 2.8578454772960703e-06,
"loss": 2.7353,
"step": 237200
},
{
"epoch": 1.72,
"learning_rate": 2.850612622777706e-06,
"loss": 2.7453,
"step": 237300
},
{
"epoch": 1.72,
"learning_rate": 2.8433797682593416e-06,
"loss": 2.7412,
"step": 237400
},
{
"epoch": 1.72,
"learning_rate": 2.8361469137409773e-06,
"loss": 2.7355,
"step": 237500
},
{
"epoch": 1.72,
"learning_rate": 2.828914059222613e-06,
"loss": 2.7308,
"step": 237600
},
{
"epoch": 1.72,
"learning_rate": 2.8216812047042486e-06,
"loss": 2.7353,
"step": 237700
},
{
"epoch": 1.72,
"learning_rate": 2.8144483501858843e-06,
"loss": 2.7321,
"step": 237800
},
{
"epoch": 1.72,
"learning_rate": 2.80721549566752e-06,
"loss": 2.7433,
"step": 237900
},
{
"epoch": 1.72,
"learning_rate": 2.8000549696943397e-06,
"loss": 2.7403,
"step": 238000
},
{
"epoch": 1.72,
"eval_accuracy": 0.46152561903984174,
"eval_loss": 2.7587077617645264,
"eval_runtime": 29.8439,
"eval_samples_per_second": 217.231,
"eval_steps_per_second": 2.279,
"step": 238000
},
{
"epoch": 1.72,
"learning_rate": 2.7928221151759754e-06,
"loss": 2.738,
"step": 238100
},
{
"epoch": 1.72,
"learning_rate": 2.7855892606576115e-06,
"loss": 2.7421,
"step": 238200
},
{
"epoch": 1.72,
"learning_rate": 2.778356406139247e-06,
"loss": 2.747,
"step": 238300
},
{
"epoch": 1.72,
"learning_rate": 2.771123551620883e-06,
"loss": 2.7361,
"step": 238400
},
{
"epoch": 1.73,
"learning_rate": 2.763890697102519e-06,
"loss": 2.7439,
"step": 238500
},
{
"epoch": 1.73,
"learning_rate": 2.7566578425841545e-06,
"loss": 2.7365,
"step": 238600
},
{
"epoch": 1.73,
"learning_rate": 2.7494249880657902e-06,
"loss": 2.744,
"step": 238700
},
{
"epoch": 1.73,
"learning_rate": 2.742192133547426e-06,
"loss": 2.7478,
"step": 238800
},
{
"epoch": 1.73,
"learning_rate": 2.7350316075742456e-06,
"loss": 2.7332,
"step": 238900
},
{
"epoch": 1.73,
"learning_rate": 2.7277987530558813e-06,
"loss": 2.7436,
"step": 239000
},
{
"epoch": 1.73,
"eval_accuracy": 0.46152017493716596,
"eval_loss": 2.7586169242858887,
"eval_runtime": 29.5848,
"eval_samples_per_second": 219.132,
"eval_steps_per_second": 2.298,
"step": 239000
},
{
"epoch": 1.73,
"learning_rate": 2.7205658985375174e-06,
"loss": 2.7422,
"step": 239100
},
{
"epoch": 1.73,
"learning_rate": 2.713333044019153e-06,
"loss": 2.7439,
"step": 239200
},
{
"epoch": 1.73,
"learning_rate": 2.7061001895007887e-06,
"loss": 2.7457,
"step": 239300
},
{
"epoch": 1.73,
"learning_rate": 2.698867334982425e-06,
"loss": 2.7453,
"step": 239400
},
{
"epoch": 1.73,
"learning_rate": 2.6916344804640605e-06,
"loss": 2.7448,
"step": 239500
},
{
"epoch": 1.73,
"learning_rate": 2.684401625945696e-06,
"loss": 2.7451,
"step": 239600
},
{
"epoch": 1.73,
"learning_rate": 2.677168771427332e-06,
"loss": 2.75,
"step": 239700
},
{
"epoch": 1.73,
"learning_rate": 2.669935916908967e-06,
"loss": 2.7402,
"step": 239800
},
{
"epoch": 1.74,
"learning_rate": 2.662703062390603e-06,
"loss": 2.7313,
"step": 239900
},
{
"epoch": 1.74,
"learning_rate": 2.655470207872239e-06,
"loss": 2.7422,
"step": 240000
},
{
"epoch": 1.74,
"eval_accuracy": 0.461479041716949,
"eval_loss": 2.758490562438965,
"eval_runtime": 30.0813,
"eval_samples_per_second": 215.516,
"eval_steps_per_second": 2.261,
"step": 240000
},
{
"epoch": 1.74,
"learning_rate": 2.6482373533538745e-06,
"loss": 2.7413,
"step": 240100
},
{
"epoch": 1.74,
"learning_rate": 2.6410044988355106e-06,
"loss": 2.7452,
"step": 240200
},
{
"epoch": 1.74,
"learning_rate": 2.6337716443171462e-06,
"loss": 2.7438,
"step": 240300
},
{
"epoch": 1.74,
"learning_rate": 2.626538789798782e-06,
"loss": 2.7333,
"step": 240400
},
{
"epoch": 1.74,
"learning_rate": 2.619305935280418e-06,
"loss": 2.7387,
"step": 240500
},
{
"epoch": 1.74,
"learning_rate": 2.6120730807620536e-06,
"loss": 2.7413,
"step": 240600
},
{
"epoch": 1.74,
"learning_rate": 2.6048402262436893e-06,
"loss": 2.7352,
"step": 240700
},
{
"epoch": 1.74,
"learning_rate": 2.5976073717253254e-06,
"loss": 2.7375,
"step": 240800
},
{
"epoch": 1.74,
"learning_rate": 2.590374517206961e-06,
"loss": 2.7369,
"step": 240900
},
{
"epoch": 1.74,
"learning_rate": 2.5832139912337804e-06,
"loss": 2.7257,
"step": 241000
},
{
"epoch": 1.74,
"eval_accuracy": 0.4614288349922724,
"eval_loss": 2.758410930633545,
"eval_runtime": 30.0568,
"eval_samples_per_second": 215.692,
"eval_steps_per_second": 2.262,
"step": 241000
},
{
"epoch": 1.74,
"learning_rate": 2.5759811367154165e-06,
"loss": 2.738,
"step": 241100
},
{
"epoch": 1.74,
"learning_rate": 2.568748282197052e-06,
"loss": 2.7356,
"step": 241200
},
{
"epoch": 1.75,
"learning_rate": 2.561515427678688e-06,
"loss": 2.7441,
"step": 241300
},
{
"epoch": 1.75,
"learning_rate": 2.554282573160324e-06,
"loss": 2.7457,
"step": 241400
},
{
"epoch": 1.75,
"learning_rate": 2.5470497186419596e-06,
"loss": 2.7406,
"step": 241500
},
{
"epoch": 1.75,
"learning_rate": 2.5398168641235952e-06,
"loss": 2.7382,
"step": 241600
},
{
"epoch": 1.75,
"learning_rate": 2.532656338150415e-06,
"loss": 2.742,
"step": 241700
},
{
"epoch": 1.75,
"learning_rate": 2.5254234836320506e-06,
"loss": 2.7328,
"step": 241800
},
{
"epoch": 1.75,
"learning_rate": 2.518190629113686e-06,
"loss": 2.7437,
"step": 241900
},
{
"epoch": 1.75,
"learning_rate": 2.5109577745953215e-06,
"loss": 2.7351,
"step": 242000
},
{
"epoch": 1.75,
"eval_accuracy": 0.461543766048761,
"eval_loss": 2.758322238922119,
"eval_runtime": 29.9387,
"eval_samples_per_second": 216.543,
"eval_steps_per_second": 2.271,
"step": 242000
},
{
"epoch": 1.75,
"learning_rate": 2.5037249200769576e-06,
"loss": 2.7423,
"step": 242100
},
{
"epoch": 1.75,
"learning_rate": 2.4964920655585937e-06,
"loss": 2.7486,
"step": 242200
},
{
"epoch": 1.75,
"learning_rate": 2.4892592110402294e-06,
"loss": 2.7439,
"step": 242300
},
{
"epoch": 1.75,
"learning_rate": 2.482026356521865e-06,
"loss": 2.7344,
"step": 242400
},
{
"epoch": 1.75,
"learning_rate": 2.4747935020035007e-06,
"loss": 2.7396,
"step": 242500
},
{
"epoch": 1.75,
"learning_rate": 2.4675606474851364e-06,
"loss": 2.7368,
"step": 242600
},
{
"epoch": 1.76,
"learning_rate": 2.4603277929667725e-06,
"loss": 2.7349,
"step": 242700
},
{
"epoch": 1.76,
"learning_rate": 2.453094938448408e-06,
"loss": 2.7427,
"step": 242800
},
{
"epoch": 1.76,
"learning_rate": 2.445862083930044e-06,
"loss": 2.7458,
"step": 242900
},
{
"epoch": 1.76,
"learning_rate": 2.43862922941168e-06,
"loss": 2.7391,
"step": 243000
},
{
"epoch": 1.76,
"eval_accuracy": 0.4615431611484637,
"eval_loss": 2.7582499980926514,
"eval_runtime": 29.9454,
"eval_samples_per_second": 216.494,
"eval_steps_per_second": 2.271,
"step": 243000
},
{
"epoch": 1.76,
"learning_rate": 2.4313963748933156e-06,
"loss": 2.7428,
"step": 243100
},
{
"epoch": 1.76,
"learning_rate": 2.4241635203749512e-06,
"loss": 2.7424,
"step": 243200
},
{
"epoch": 1.76,
"learning_rate": 2.4169306658565873e-06,
"loss": 2.7412,
"step": 243300
},
{
"epoch": 1.76,
"learning_rate": 2.409697811338223e-06,
"loss": 2.7338,
"step": 243400
},
{
"epoch": 1.76,
"learning_rate": 2.4024649568198587e-06,
"loss": 2.7389,
"step": 243500
},
{
"epoch": 1.76,
"learning_rate": 2.3952321023014948e-06,
"loss": 2.7363,
"step": 243600
},
{
"epoch": 1.76,
"learning_rate": 2.3879992477831304e-06,
"loss": 2.7472,
"step": 243700
},
{
"epoch": 1.76,
"learning_rate": 2.3808387218099497e-06,
"loss": 2.7422,
"step": 243800
},
{
"epoch": 1.76,
"learning_rate": 2.3736058672915854e-06,
"loss": 2.7432,
"step": 243900
},
{
"epoch": 1.76,
"learning_rate": 2.366373012773221e-06,
"loss": 2.7495,
"step": 244000
},
{
"epoch": 1.76,
"eval_accuracy": 0.4615492101514368,
"eval_loss": 2.758117437362671,
"eval_runtime": 29.5895,
"eval_samples_per_second": 219.098,
"eval_steps_per_second": 2.298,
"step": 244000
},
{
"epoch": 1.77,
"learning_rate": 2.359140158254857e-06,
"loss": 2.7379,
"step": 244100
},
{
"epoch": 1.77,
"learning_rate": 2.351907303736493e-06,
"loss": 2.736,
"step": 244200
},
{
"epoch": 1.77,
"learning_rate": 2.3446744492181285e-06,
"loss": 2.7374,
"step": 244300
},
{
"epoch": 1.77,
"learning_rate": 2.3374415946997646e-06,
"loss": 2.7352,
"step": 244400
},
{
"epoch": 1.77,
"learning_rate": 2.3302087401814002e-06,
"loss": 2.7366,
"step": 244500
},
{
"epoch": 1.77,
"learning_rate": 2.322975885663036e-06,
"loss": 2.742,
"step": 244600
},
{
"epoch": 1.77,
"learning_rate": 2.315743031144672e-06,
"loss": 2.7376,
"step": 244700
},
{
"epoch": 1.77,
"learning_rate": 2.3085101766263077e-06,
"loss": 2.7423,
"step": 244800
},
{
"epoch": 1.77,
"learning_rate": 2.3012773221079433e-06,
"loss": 2.7357,
"step": 244900
},
{
"epoch": 1.77,
"learning_rate": 2.294044467589579e-06,
"loss": 2.7399,
"step": 245000
},
{
"epoch": 1.77,
"eval_accuracy": 0.4614409329982186,
"eval_loss": 2.7580342292785645,
"eval_runtime": 31.4458,
"eval_samples_per_second": 206.164,
"eval_steps_per_second": 2.162,
"step": 245000
},
{
"epoch": 1.77,
"learning_rate": 2.2868116130712147e-06,
"loss": 2.7369,
"step": 245100
},
{
"epoch": 1.77,
"learning_rate": 2.2795787585528508e-06,
"loss": 2.7372,
"step": 245200
},
{
"epoch": 1.77,
"learning_rate": 2.2723459040344864e-06,
"loss": 2.7426,
"step": 245300
},
{
"epoch": 1.77,
"learning_rate": 2.2651853780613057e-06,
"loss": 2.742,
"step": 245400
},
{
"epoch": 1.78,
"learning_rate": 2.257952523542942e-06,
"loss": 2.7461,
"step": 245500
},
{
"epoch": 1.78,
"learning_rate": 2.2507196690245775e-06,
"loss": 2.7337,
"step": 245600
},
{
"epoch": 1.78,
"learning_rate": 2.243486814506213e-06,
"loss": 2.7397,
"step": 245700
},
{
"epoch": 1.78,
"learning_rate": 2.2362539599878493e-06,
"loss": 2.7301,
"step": 245800
},
{
"epoch": 1.78,
"learning_rate": 2.2290211054694845e-06,
"loss": 2.7431,
"step": 245900
},
{
"epoch": 1.78,
"learning_rate": 2.2217882509511206e-06,
"loss": 2.7435,
"step": 246000
},
{
"epoch": 1.78,
"eval_accuracy": 0.4615564689550045,
"eval_loss": 2.757960081100464,
"eval_runtime": 29.7199,
"eval_samples_per_second": 218.136,
"eval_steps_per_second": 2.288,
"step": 246000
},
{
"epoch": 1.78,
"learning_rate": 2.2145553964327563e-06,
"loss": 2.739,
"step": 246100
},
{
"epoch": 1.78,
"learning_rate": 2.2073948704595756e-06,
"loss": 2.7395,
"step": 246200
},
{
"epoch": 1.78,
"learning_rate": 2.2001620159412117e-06,
"loss": 2.7303,
"step": 246300
},
{
"epoch": 1.78,
"learning_rate": 2.1929291614228473e-06,
"loss": 2.7363,
"step": 246400
},
{
"epoch": 1.78,
"learning_rate": 2.185696306904483e-06,
"loss": 2.7318,
"step": 246500
},
{
"epoch": 1.78,
"learning_rate": 2.178463452386119e-06,
"loss": 2.7335,
"step": 246600
},
{
"epoch": 1.78,
"learning_rate": 2.1712305978677548e-06,
"loss": 2.7384,
"step": 246700
},
{
"epoch": 1.79,
"learning_rate": 2.1639977433493904e-06,
"loss": 2.7412,
"step": 246800
},
{
"epoch": 1.79,
"learning_rate": 2.1567648888310265e-06,
"loss": 2.7449,
"step": 246900
},
{
"epoch": 1.79,
"learning_rate": 2.1495320343126617e-06,
"loss": 2.7414,
"step": 247000
},
{
"epoch": 1.79,
"eval_accuracy": 0.46152924844162563,
"eval_loss": 2.7578506469726562,
"eval_runtime": 30.2105,
"eval_samples_per_second": 214.595,
"eval_steps_per_second": 2.251,
"step": 247000
},
{
"epoch": 1.79,
"learning_rate": 2.142299179794298e-06,
"loss": 2.7373,
"step": 247100
},
{
"epoch": 1.79,
"learning_rate": 2.1350663252759335e-06,
"loss": 2.7383,
"step": 247200
},
{
"epoch": 1.79,
"learning_rate": 2.127833470757569e-06,
"loss": 2.7326,
"step": 247300
},
{
"epoch": 1.79,
"learning_rate": 2.1206006162392053e-06,
"loss": 2.7407,
"step": 247400
},
{
"epoch": 1.79,
"learning_rate": 2.113367761720841e-06,
"loss": 2.7294,
"step": 247500
},
{
"epoch": 1.79,
"learning_rate": 2.1061349072024766e-06,
"loss": 2.7417,
"step": 247600
},
{
"epoch": 1.79,
"learning_rate": 2.0989020526841127e-06,
"loss": 2.7314,
"step": 247700
},
{
"epoch": 1.79,
"learning_rate": 2.0916691981657484e-06,
"loss": 2.736,
"step": 247800
},
{
"epoch": 1.79,
"learning_rate": 2.084436343647384e-06,
"loss": 2.7482,
"step": 247900
},
{
"epoch": 1.79,
"learning_rate": 2.07720348912902e-06,
"loss": 2.7478,
"step": 248000
},
{
"epoch": 1.79,
"eval_accuracy": 0.4615903433716538,
"eval_loss": 2.7578227519989014,
"eval_runtime": 29.6353,
"eval_samples_per_second": 218.759,
"eval_steps_per_second": 2.295,
"step": 248000
},
{
"epoch": 1.79,
"learning_rate": 2.0699706346106558e-06,
"loss": 2.7368,
"step": 248100
},
{
"epoch": 1.8,
"learning_rate": 2.0627377800922914e-06,
"loss": 2.735,
"step": 248200
},
{
"epoch": 1.8,
"learning_rate": 2.055504925573927e-06,
"loss": 2.7335,
"step": 248300
},
{
"epoch": 1.8,
"learning_rate": 2.0482720710555628e-06,
"loss": 2.7429,
"step": 248400
},
{
"epoch": 1.8,
"learning_rate": 2.041039216537199e-06,
"loss": 2.7418,
"step": 248500
},
{
"epoch": 1.8,
"learning_rate": 2.0338063620188345e-06,
"loss": 2.7387,
"step": 248600
},
{
"epoch": 1.8,
"learning_rate": 2.02657350750047e-06,
"loss": 2.7381,
"step": 248700
},
{
"epoch": 1.8,
"learning_rate": 2.0193406529821063e-06,
"loss": 2.7433,
"step": 248800
},
{
"epoch": 1.8,
"learning_rate": 2.012107798463742e-06,
"loss": 2.7437,
"step": 248900
},
{
"epoch": 1.8,
"learning_rate": 2.0048749439453776e-06,
"loss": 2.7299,
"step": 249000
},
{
"epoch": 1.8,
"eval_accuracy": 0.4615842943686807,
"eval_loss": 2.7576780319213867,
"eval_runtime": 30.3855,
"eval_samples_per_second": 213.358,
"eval_steps_per_second": 2.238,
"step": 249000
},
{
"epoch": 1.8,
"learning_rate": 1.9976420894270133e-06,
"loss": 2.74,
"step": 249100
},
{
"epoch": 1.8,
"learning_rate": 1.990481563453833e-06,
"loss": 2.7341,
"step": 249200
},
{
"epoch": 1.8,
"learning_rate": 1.9833210374806523e-06,
"loss": 2.7405,
"step": 249300
},
{
"epoch": 1.8,
"learning_rate": 1.976088182962288e-06,
"loss": 2.7356,
"step": 249400
},
{
"epoch": 1.8,
"learning_rate": 1.9689276569891073e-06,
"loss": 2.7424,
"step": 249500
},
{
"epoch": 1.81,
"learning_rate": 1.9616948024707434e-06,
"loss": 2.7359,
"step": 249600
},
{
"epoch": 1.81,
"learning_rate": 1.954461947952379e-06,
"loss": 2.7368,
"step": 249700
},
{
"epoch": 1.81,
"learning_rate": 1.9472290934340148e-06,
"loss": 2.7339,
"step": 249800
},
{
"epoch": 1.81,
"learning_rate": 1.939996238915651e-06,
"loss": 2.7452,
"step": 249900
},
{
"epoch": 1.81,
"learning_rate": 1.9327633843972865e-06,
"loss": 2.7401,
"step": 250000
},
{
"epoch": 1.81,
"eval_accuracy": 0.4615576787555991,
"eval_loss": 2.7575795650482178,
"eval_runtime": 30.2185,
"eval_samples_per_second": 214.538,
"eval_steps_per_second": 2.25,
"step": 250000
},
{
"epoch": 1.81,
"learning_rate": 1.925530529878922e-06,
"loss": 2.7378,
"step": 250100
},
{
"epoch": 1.81,
"learning_rate": 1.918297675360558e-06,
"loss": 2.7439,
"step": 250200
},
{
"epoch": 1.81,
"learning_rate": 1.9110648208421935e-06,
"loss": 2.7375,
"step": 250300
},
{
"epoch": 1.81,
"learning_rate": 1.9038319663238294e-06,
"loss": 2.7377,
"step": 250400
},
{
"epoch": 1.81,
"learning_rate": 1.8965991118054653e-06,
"loss": 2.7367,
"step": 250500
},
{
"epoch": 1.81,
"learning_rate": 1.8893662572871011e-06,
"loss": 2.7409,
"step": 250600
},
{
"epoch": 1.81,
"learning_rate": 1.8821334027687368e-06,
"loss": 2.7352,
"step": 250700
},
{
"epoch": 1.81,
"learning_rate": 1.8749005482503727e-06,
"loss": 2.7421,
"step": 250800
},
{
"epoch": 1.81,
"learning_rate": 1.8676676937320086e-06,
"loss": 2.7286,
"step": 250900
},
{
"epoch": 1.82,
"learning_rate": 1.8604348392136442e-06,
"loss": 2.7395,
"step": 251000
},
{
"epoch": 1.82,
"eval_accuracy": 0.4616157491841407,
"eval_loss": 2.757535219192505,
"eval_runtime": 29.6145,
"eval_samples_per_second": 218.913,
"eval_steps_per_second": 2.296,
"step": 251000
},
{
"epoch": 1.82,
"learning_rate": 1.8532019846952801e-06,
"loss": 2.734,
"step": 251100
},
{
"epoch": 1.82,
"learning_rate": 1.8459691301769158e-06,
"loss": 2.7448,
"step": 251200
},
{
"epoch": 1.82,
"learning_rate": 1.8387362756585517e-06,
"loss": 2.7367,
"step": 251300
},
{
"epoch": 1.82,
"learning_rate": 1.8315034211401871e-06,
"loss": 2.7354,
"step": 251400
},
{
"epoch": 1.82,
"learning_rate": 1.824270566621823e-06,
"loss": 2.7409,
"step": 251500
},
{
"epoch": 1.82,
"learning_rate": 1.8171100406486425e-06,
"loss": 2.7331,
"step": 251600
},
{
"epoch": 1.82,
"learning_rate": 1.8098771861302784e-06,
"loss": 2.7325,
"step": 251700
},
{
"epoch": 1.82,
"learning_rate": 1.802644331611914e-06,
"loss": 2.7439,
"step": 251800
},
{
"epoch": 1.82,
"learning_rate": 1.79541147709355e-06,
"loss": 2.7325,
"step": 251900
},
{
"epoch": 1.82,
"learning_rate": 1.7881786225751856e-06,
"loss": 2.7399,
"step": 252000
},
{
"epoch": 1.82,
"eval_accuracy": 0.46158852867076183,
"eval_loss": 2.757430076599121,
"eval_runtime": 29.3317,
"eval_samples_per_second": 221.024,
"eval_steps_per_second": 2.318,
"step": 252000
},
{
"epoch": 1.82,
"learning_rate": 1.7809457680568215e-06,
"loss": 2.744,
"step": 252100
},
{
"epoch": 1.82,
"learning_rate": 1.7737129135384574e-06,
"loss": 2.7365,
"step": 252200
},
{
"epoch": 1.82,
"learning_rate": 1.766480059020093e-06,
"loss": 2.7367,
"step": 252300
},
{
"epoch": 1.83,
"learning_rate": 1.759247204501729e-06,
"loss": 2.7402,
"step": 252400
},
{
"epoch": 1.83,
"learning_rate": 1.7520143499833644e-06,
"loss": 2.74,
"step": 252500
},
{
"epoch": 1.83,
"learning_rate": 1.7447814954650002e-06,
"loss": 2.7433,
"step": 252600
},
{
"epoch": 1.83,
"learning_rate": 1.7376209694918198e-06,
"loss": 2.7307,
"step": 252700
},
{
"epoch": 1.83,
"learning_rate": 1.7303881149734556e-06,
"loss": 2.7333,
"step": 252800
},
{
"epoch": 1.83,
"learning_rate": 1.7231552604550913e-06,
"loss": 2.7416,
"step": 252900
},
{
"epoch": 1.83,
"learning_rate": 1.7159224059367272e-06,
"loss": 2.7413,
"step": 253000
},
{
"epoch": 1.83,
"eval_accuracy": 0.4616139344832488,
"eval_loss": 2.757355213165283,
"eval_runtime": 29.4627,
"eval_samples_per_second": 220.041,
"eval_steps_per_second": 2.308,
"step": 253000
},
{
"epoch": 1.83,
"learning_rate": 1.7086895514183629e-06,
"loss": 2.7397,
"step": 253100
},
{
"epoch": 1.83,
"learning_rate": 1.7014566968999987e-06,
"loss": 2.7499,
"step": 253200
},
{
"epoch": 1.83,
"learning_rate": 1.6942238423816346e-06,
"loss": 2.7391,
"step": 253300
},
{
"epoch": 1.83,
"learning_rate": 1.6869909878632703e-06,
"loss": 2.7351,
"step": 253400
},
{
"epoch": 1.83,
"learning_rate": 1.6797581333449062e-06,
"loss": 2.7421,
"step": 253500
},
{
"epoch": 1.83,
"learning_rate": 1.6725252788265416e-06,
"loss": 2.7468,
"step": 253600
},
{
"epoch": 1.83,
"learning_rate": 1.6652924243081775e-06,
"loss": 2.7392,
"step": 253700
},
{
"epoch": 1.84,
"learning_rate": 1.6580595697898134e-06,
"loss": 2.7259,
"step": 253800
},
{
"epoch": 1.84,
"learning_rate": 1.650899043816633e-06,
"loss": 2.7404,
"step": 253900
},
{
"epoch": 1.84,
"learning_rate": 1.6436661892982686e-06,
"loss": 2.7294,
"step": 254000
},
{
"epoch": 1.84,
"eval_accuracy": 0.4616357108939519,
"eval_loss": 2.757269859313965,
"eval_runtime": 30.4439,
"eval_samples_per_second": 212.949,
"eval_steps_per_second": 2.234,
"step": 254000
},
{
"epoch": 1.84,
"learning_rate": 1.6364333347799044e-06,
"loss": 2.74,
"step": 254100
},
{
"epoch": 1.84,
"learning_rate": 1.6292004802615401e-06,
"loss": 2.7326,
"step": 254200
},
{
"epoch": 1.84,
"learning_rate": 1.621967625743176e-06,
"loss": 2.7434,
"step": 254300
},
{
"epoch": 1.84,
"learning_rate": 1.6148070997699953e-06,
"loss": 2.7419,
"step": 254400
},
{
"epoch": 1.84,
"learning_rate": 1.607574245251631e-06,
"loss": 2.7295,
"step": 254500
},
{
"epoch": 1.84,
"learning_rate": 1.6003413907332668e-06,
"loss": 2.7313,
"step": 254600
},
{
"epoch": 1.84,
"learning_rate": 1.5931085362149027e-06,
"loss": 2.7341,
"step": 254700
},
{
"epoch": 1.84,
"learning_rate": 1.5858756816965384e-06,
"loss": 2.7482,
"step": 254800
},
{
"epoch": 1.84,
"learning_rate": 1.5786428271781743e-06,
"loss": 2.7316,
"step": 254900
},
{
"epoch": 1.84,
"learning_rate": 1.5714099726598101e-06,
"loss": 2.7329,
"step": 255000
},
{
"epoch": 1.84,
"eval_accuracy": 0.4616484138001954,
"eval_loss": 2.757246732711792,
"eval_runtime": 29.9443,
"eval_samples_per_second": 216.502,
"eval_steps_per_second": 2.271,
"step": 255000
},
{
"epoch": 1.85,
"learning_rate": 1.5641771181414458e-06,
"loss": 2.7439,
"step": 255100
},
{
"epoch": 1.85,
"learning_rate": 1.5569442636230817e-06,
"loss": 2.7369,
"step": 255200
},
{
"epoch": 1.85,
"learning_rate": 1.5497114091047174e-06,
"loss": 2.7376,
"step": 255300
},
{
"epoch": 1.85,
"learning_rate": 1.5424785545863532e-06,
"loss": 2.7391,
"step": 255400
},
{
"epoch": 1.85,
"learning_rate": 1.5352457000679891e-06,
"loss": 2.7324,
"step": 255500
},
{
"epoch": 1.85,
"learning_rate": 1.5280128455496248e-06,
"loss": 2.7475,
"step": 255600
},
{
"epoch": 1.85,
"learning_rate": 1.5207799910312604e-06,
"loss": 2.7312,
"step": 255700
},
{
"epoch": 1.85,
"learning_rate": 1.5135471365128961e-06,
"loss": 2.7444,
"step": 255800
},
{
"epoch": 1.85,
"learning_rate": 1.506314281994532e-06,
"loss": 2.7346,
"step": 255900
},
{
"epoch": 1.85,
"learning_rate": 1.4991537560213515e-06,
"loss": 2.7454,
"step": 256000
},
{
"epoch": 1.85,
"eval_accuracy": 0.46165385790287117,
"eval_loss": 2.757188081741333,
"eval_runtime": 28.8568,
"eval_samples_per_second": 224.661,
"eval_steps_per_second": 2.356,
"step": 256000
},
{
"epoch": 1.85,
"learning_rate": 1.4919209015029872e-06,
"loss": 2.7443,
"step": 256100
},
{
"epoch": 1.85,
"learning_rate": 1.484688046984623e-06,
"loss": 2.7341,
"step": 256200
},
{
"epoch": 1.85,
"learning_rate": 1.477455192466259e-06,
"loss": 2.7427,
"step": 256300
},
{
"epoch": 1.85,
"learning_rate": 1.4702223379478946e-06,
"loss": 2.7386,
"step": 256400
},
{
"epoch": 1.86,
"learning_rate": 1.4629894834295305e-06,
"loss": 2.7349,
"step": 256500
},
{
"epoch": 1.86,
"learning_rate": 1.4559012860015335e-06,
"loss": 2.7334,
"step": 256600
},
{
"epoch": 1.86,
"learning_rate": 1.4486684314831691e-06,
"loss": 2.7423,
"step": 256700
},
{
"epoch": 1.86,
"learning_rate": 1.441435576964805e-06,
"loss": 2.7376,
"step": 256800
},
{
"epoch": 1.86,
"learning_rate": 1.4342027224464409e-06,
"loss": 2.7353,
"step": 256900
},
{
"epoch": 1.86,
"learning_rate": 1.4269698679280765e-06,
"loss": 2.7343,
"step": 257000
},
{
"epoch": 1.86,
"eval_accuracy": 0.46170527442814235,
"eval_loss": 2.757138252258301,
"eval_runtime": 30.9623,
"eval_samples_per_second": 209.383,
"eval_steps_per_second": 2.196,
"step": 257000
},
{
"epoch": 1.86,
"learning_rate": 1.4197370134097124e-06,
"loss": 2.7396,
"step": 257100
},
{
"epoch": 1.86,
"learning_rate": 1.4125041588913483e-06,
"loss": 2.7398,
"step": 257200
},
{
"epoch": 1.86,
"learning_rate": 1.405271304372984e-06,
"loss": 2.7293,
"step": 257300
},
{
"epoch": 1.86,
"learning_rate": 1.3980384498546199e-06,
"loss": 2.7378,
"step": 257400
},
{
"epoch": 1.86,
"learning_rate": 1.3908055953362557e-06,
"loss": 2.7431,
"step": 257500
},
{
"epoch": 1.86,
"learning_rate": 1.3835727408178912e-06,
"loss": 2.7375,
"step": 257600
},
{
"epoch": 1.86,
"learning_rate": 1.376339886299527e-06,
"loss": 2.7346,
"step": 257700
},
{
"epoch": 1.86,
"learning_rate": 1.3691070317811627e-06,
"loss": 2.7308,
"step": 257800
},
{
"epoch": 1.87,
"learning_rate": 1.3618741772627986e-06,
"loss": 2.7364,
"step": 257900
},
{
"epoch": 1.87,
"learning_rate": 1.3546413227444345e-06,
"loss": 2.7356,
"step": 258000
},
{
"epoch": 1.87,
"eval_accuracy": 0.46169559602338545,
"eval_loss": 2.757066488265991,
"eval_runtime": 29.5596,
"eval_samples_per_second": 219.319,
"eval_steps_per_second": 2.3,
"step": 258000
},
{
"epoch": 1.87,
"learning_rate": 1.3474084682260701e-06,
"loss": 2.7362,
"step": 258100
},
{
"epoch": 1.87,
"learning_rate": 1.340175613707706e-06,
"loss": 2.7308,
"step": 258200
},
{
"epoch": 1.87,
"learning_rate": 1.3329427591893417e-06,
"loss": 2.7459,
"step": 258300
},
{
"epoch": 1.87,
"learning_rate": 1.3257099046709776e-06,
"loss": 2.7381,
"step": 258400
},
{
"epoch": 1.87,
"learning_rate": 1.3184770501526135e-06,
"loss": 2.7388,
"step": 258500
},
{
"epoch": 1.87,
"learning_rate": 1.3112441956342491e-06,
"loss": 2.7453,
"step": 258600
},
{
"epoch": 1.87,
"learning_rate": 1.304011341115885e-06,
"loss": 2.7413,
"step": 258700
},
{
"epoch": 1.87,
"learning_rate": 1.2967784865975209e-06,
"loss": 2.7237,
"step": 258800
},
{
"epoch": 1.87,
"learning_rate": 1.2895456320791563e-06,
"loss": 2.7355,
"step": 258900
},
{
"epoch": 1.87,
"learning_rate": 1.2823127775607922e-06,
"loss": 2.7462,
"step": 259000
},
{
"epoch": 1.87,
"eval_accuracy": 0.4617197920352778,
"eval_loss": 2.7570412158966064,
"eval_runtime": 27.9363,
"eval_samples_per_second": 232.064,
"eval_steps_per_second": 2.434,
"step": 259000
},
{
"epoch": 1.87,
"learning_rate": 1.2750799230424279e-06,
"loss": 2.7382,
"step": 259100
},
{
"epoch": 1.87,
"learning_rate": 1.2678470685240638e-06,
"loss": 2.7421,
"step": 259200
},
{
"epoch": 1.88,
"learning_rate": 1.2606142140056996e-06,
"loss": 2.7328,
"step": 259300
},
{
"epoch": 1.88,
"learning_rate": 1.2533813594873353e-06,
"loss": 2.7247,
"step": 259400
},
{
"epoch": 1.88,
"learning_rate": 1.2461485049689712e-06,
"loss": 2.735,
"step": 259500
},
{
"epoch": 1.88,
"learning_rate": 1.238915650450607e-06,
"loss": 2.737,
"step": 259600
},
{
"epoch": 1.88,
"learning_rate": 1.2316827959322427e-06,
"loss": 2.7381,
"step": 259700
},
{
"epoch": 1.88,
"learning_rate": 1.224522269959062e-06,
"loss": 2.7417,
"step": 259800
},
{
"epoch": 1.88,
"learning_rate": 1.217289415440698e-06,
"loss": 2.745,
"step": 259900
},
{
"epoch": 1.88,
"learning_rate": 1.2100565609223338e-06,
"loss": 2.7375,
"step": 260000
},
{
"epoch": 1.88,
"eval_accuracy": 0.4617488272495486,
"eval_loss": 2.7569446563720703,
"eval_runtime": 31.5522,
"eval_samples_per_second": 205.469,
"eval_steps_per_second": 2.155,
"step": 260000
},
{
"epoch": 1.88,
"learning_rate": 1.2028237064039695e-06,
"loss": 2.7467,
"step": 260100
},
{
"epoch": 1.88,
"learning_rate": 1.1955908518856053e-06,
"loss": 2.7409,
"step": 260200
},
{
"epoch": 1.88,
"learning_rate": 1.188357997367241e-06,
"loss": 2.7339,
"step": 260300
},
{
"epoch": 1.88,
"learning_rate": 1.1811251428488769e-06,
"loss": 2.7397,
"step": 260400
},
{
"epoch": 1.88,
"learning_rate": 1.1738922883305125e-06,
"loss": 2.7418,
"step": 260500
},
{
"epoch": 1.88,
"learning_rate": 1.1666594338121484e-06,
"loss": 2.7402,
"step": 260600
},
{
"epoch": 1.89,
"learning_rate": 1.1594265792937843e-06,
"loss": 2.7448,
"step": 260700
},
{
"epoch": 1.89,
"learning_rate": 1.1522660533206036e-06,
"loss": 2.7441,
"step": 260800
},
{
"epoch": 1.89,
"learning_rate": 1.1450331988022393e-06,
"loss": 2.7411,
"step": 260900
},
{
"epoch": 1.89,
"learning_rate": 1.1378003442838752e-06,
"loss": 2.7368,
"step": 261000
},
{
"epoch": 1.89,
"eval_accuracy": 0.46175306155162976,
"eval_loss": 2.7569141387939453,
"eval_runtime": 29.4044,
"eval_samples_per_second": 220.477,
"eval_steps_per_second": 2.313,
"step": 261000
},
{
"epoch": 1.89,
"learning_rate": 1.130567489765511e-06,
"loss": 2.7327,
"step": 261100
},
{
"epoch": 1.89,
"learning_rate": 1.1233346352471467e-06,
"loss": 2.7408,
"step": 261200
},
{
"epoch": 1.89,
"learning_rate": 1.1161017807287824e-06,
"loss": 2.7415,
"step": 261300
},
{
"epoch": 1.89,
"learning_rate": 1.1088689262104183e-06,
"loss": 2.7472,
"step": 261400
},
{
"epoch": 1.89,
"learning_rate": 1.1016360716920541e-06,
"loss": 2.7347,
"step": 261500
},
{
"epoch": 1.89,
"learning_rate": 1.0944032171736898e-06,
"loss": 2.7333,
"step": 261600
},
{
"epoch": 1.89,
"learning_rate": 1.0871703626553257e-06,
"loss": 2.7426,
"step": 261700
},
{
"epoch": 1.89,
"learning_rate": 1.0799375081369616e-06,
"loss": 2.7343,
"step": 261800
},
{
"epoch": 1.89,
"learning_rate": 1.0727046536185972e-06,
"loss": 2.7387,
"step": 261900
},
{
"epoch": 1.89,
"learning_rate": 1.0654717991002329e-06,
"loss": 2.7452,
"step": 262000
},
{
"epoch": 1.89,
"eval_accuracy": 0.4617373341438997,
"eval_loss": 2.7568695545196533,
"eval_runtime": 29.6699,
"eval_samples_per_second": 218.504,
"eval_steps_per_second": 2.292,
"step": 262000
},
{
"epoch": 1.9,
"learning_rate": 1.0582389445818688e-06,
"loss": 2.7431,
"step": 262100
},
{
"epoch": 1.9,
"learning_rate": 1.0510060900635046e-06,
"loss": 2.737,
"step": 262200
},
{
"epoch": 1.9,
"learning_rate": 1.0437732355451403e-06,
"loss": 2.7351,
"step": 262300
},
{
"epoch": 1.9,
"learning_rate": 1.0365403810267762e-06,
"loss": 2.7306,
"step": 262400
},
{
"epoch": 1.9,
"learning_rate": 1.0293075265084119e-06,
"loss": 2.7392,
"step": 262500
},
{
"epoch": 1.9,
"learning_rate": 1.0220746719900475e-06,
"loss": 2.7406,
"step": 262600
},
{
"epoch": 1.9,
"learning_rate": 1.0148418174716834e-06,
"loss": 2.7378,
"step": 262700
},
{
"epoch": 1.9,
"learning_rate": 1.0076089629533193e-06,
"loss": 2.7391,
"step": 262800
},
{
"epoch": 1.9,
"learning_rate": 1.000376108434955e-06,
"loss": 2.7356,
"step": 262900
},
{
"epoch": 1.9,
"learning_rate": 9.931432539165908e-07,
"loss": 2.7394,
"step": 263000
},
{
"epoch": 1.9,
"eval_accuracy": 0.46170285482695317,
"eval_loss": 2.7567996978759766,
"eval_runtime": 27.9535,
"eval_samples_per_second": 231.921,
"eval_steps_per_second": 2.433,
"step": 263000
},
{
"epoch": 1.9,
"learning_rate": 9.859103993982267e-07,
"loss": 2.7391,
"step": 263100
},
{
"epoch": 1.9,
"learning_rate": 9.786775448798624e-07,
"loss": 2.7399,
"step": 263200
},
{
"epoch": 1.9,
"learning_rate": 9.71444690361498e-07,
"loss": 2.7326,
"step": 263300
},
{
"epoch": 1.91,
"learning_rate": 9.64211835843134e-07,
"loss": 2.7277,
"step": 263400
},
{
"epoch": 1.91,
"learning_rate": 9.569789813247698e-07,
"loss": 2.7386,
"step": 263500
},
{
"epoch": 1.91,
"learning_rate": 9.497461268064056e-07,
"loss": 2.7339,
"step": 263600
},
{
"epoch": 1.91,
"learning_rate": 9.425132722880413e-07,
"loss": 2.7326,
"step": 263700
},
{
"epoch": 1.91,
"learning_rate": 9.352804177696771e-07,
"loss": 2.7286,
"step": 263800
},
{
"epoch": 1.91,
"learning_rate": 9.280475632513128e-07,
"loss": 2.7334,
"step": 263900
},
{
"epoch": 1.91,
"learning_rate": 9.208147087329485e-07,
"loss": 2.7378,
"step": 264000
},
{
"epoch": 1.91,
"eval_accuracy": 0.46175245665133247,
"eval_loss": 2.7567875385284424,
"eval_runtime": 29.609,
"eval_samples_per_second": 218.954,
"eval_steps_per_second": 2.297,
"step": 264000
},
{
"epoch": 1.91,
"learning_rate": 9.136541827597681e-07,
"loss": 2.7481,
"step": 264100
},
{
"epoch": 1.91,
"learning_rate": 9.064213282414038e-07,
"loss": 2.7415,
"step": 264200
},
{
"epoch": 1.91,
"learning_rate": 8.991884737230395e-07,
"loss": 2.7386,
"step": 264300
},
{
"epoch": 1.91,
"learning_rate": 8.919556192046754e-07,
"loss": 2.7357,
"step": 264400
},
{
"epoch": 1.91,
"learning_rate": 8.847950932314948e-07,
"loss": 2.743,
"step": 264500
},
{
"epoch": 1.91,
"learning_rate": 8.775622387131307e-07,
"loss": 2.7419,
"step": 264600
},
{
"epoch": 1.91,
"learning_rate": 8.703293841947664e-07,
"loss": 2.7376,
"step": 264700
},
{
"epoch": 1.92,
"learning_rate": 8.630965296764021e-07,
"loss": 2.7359,
"step": 264800
},
{
"epoch": 1.92,
"learning_rate": 8.558636751580379e-07,
"loss": 2.7316,
"step": 264900
},
{
"epoch": 1.92,
"learning_rate": 8.486308206396737e-07,
"loss": 2.7446,
"step": 265000
},
{
"epoch": 1.92,
"eval_accuracy": 0.4617639497569813,
"eval_loss": 2.7567336559295654,
"eval_runtime": 29.2914,
"eval_samples_per_second": 221.328,
"eval_steps_per_second": 2.322,
"step": 265000
},
{
"epoch": 1.92,
"learning_rate": 8.413979661213096e-07,
"loss": 2.7289,
"step": 265100
},
{
"epoch": 1.92,
"learning_rate": 8.341651116029453e-07,
"loss": 2.7291,
"step": 265200
},
{
"epoch": 1.92,
"learning_rate": 8.269322570845811e-07,
"loss": 2.7372,
"step": 265300
},
{
"epoch": 1.92,
"learning_rate": 8.196994025662168e-07,
"loss": 2.7369,
"step": 265400
},
{
"epoch": 1.92,
"learning_rate": 8.124665480478526e-07,
"loss": 2.739,
"step": 265500
},
{
"epoch": 1.92,
"learning_rate": 8.052336935294884e-07,
"loss": 2.7476,
"step": 265600
},
{
"epoch": 1.92,
"learning_rate": 7.980008390111242e-07,
"loss": 2.7415,
"step": 265700
},
{
"epoch": 1.92,
"learning_rate": 7.907679844927601e-07,
"loss": 2.738,
"step": 265800
},
{
"epoch": 1.92,
"learning_rate": 7.835351299743958e-07,
"loss": 2.7455,
"step": 265900
},
{
"epoch": 1.92,
"learning_rate": 7.763022754560315e-07,
"loss": 2.7436,
"step": 266000
},
{
"epoch": 1.92,
"eval_accuracy": 0.46179419477184674,
"eval_loss": 2.756711006164551,
"eval_runtime": 31.9975,
"eval_samples_per_second": 202.61,
"eval_steps_per_second": 2.125,
"step": 266000
},
{
"epoch": 1.92,
"learning_rate": 7.690694209376673e-07,
"loss": 2.7329,
"step": 266100
},
{
"epoch": 1.93,
"learning_rate": 7.61836566419303e-07,
"loss": 2.7423,
"step": 266200
},
{
"epoch": 1.93,
"learning_rate": 7.546037119009389e-07,
"loss": 2.7321,
"step": 266300
},
{
"epoch": 1.93,
"learning_rate": 7.473708573825747e-07,
"loss": 2.7325,
"step": 266400
},
{
"epoch": 1.93,
"learning_rate": 7.401380028642105e-07,
"loss": 2.7361,
"step": 266500
},
{
"epoch": 1.93,
"learning_rate": 7.329051483458461e-07,
"loss": 2.7379,
"step": 266600
},
{
"epoch": 1.93,
"learning_rate": 7.25672293827482e-07,
"loss": 2.7332,
"step": 266700
},
{
"epoch": 1.93,
"learning_rate": 7.184394393091178e-07,
"loss": 2.7375,
"step": 266800
},
{
"epoch": 1.93,
"learning_rate": 7.112065847907536e-07,
"loss": 2.7367,
"step": 266900
},
{
"epoch": 1.93,
"learning_rate": 7.040460588175731e-07,
"loss": 2.7505,
"step": 267000
},
{
"epoch": 1.93,
"eval_accuracy": 0.4617736281617382,
"eval_loss": 2.7566604614257812,
"eval_runtime": 29.751,
"eval_samples_per_second": 217.908,
"eval_steps_per_second": 2.286,
"step": 267000
},
{
"epoch": 1.93,
"learning_rate": 6.968132042992088e-07,
"loss": 2.7389,
"step": 267100
},
{
"epoch": 1.93,
"learning_rate": 6.895803497808445e-07,
"loss": 2.7352,
"step": 267200
},
{
"epoch": 1.93,
"learning_rate": 6.823474952624803e-07,
"loss": 2.7449,
"step": 267300
},
{
"epoch": 1.93,
"learning_rate": 6.751146407441162e-07,
"loss": 2.7338,
"step": 267400
},
{
"epoch": 1.93,
"learning_rate": 6.67881786225752e-07,
"loss": 2.7355,
"step": 267500
},
{
"epoch": 1.94,
"learning_rate": 6.606489317073877e-07,
"loss": 2.7429,
"step": 267600
},
{
"epoch": 1.94,
"learning_rate": 6.534160771890234e-07,
"loss": 2.7364,
"step": 267700
},
{
"epoch": 1.94,
"learning_rate": 6.461832226706593e-07,
"loss": 2.738,
"step": 267800
},
{
"epoch": 1.94,
"learning_rate": 6.38950368152295e-07,
"loss": 2.7408,
"step": 267900
},
{
"epoch": 1.94,
"learning_rate": 6.317175136339308e-07,
"loss": 2.7493,
"step": 268000
},
{
"epoch": 1.94,
"eval_accuracy": 0.4617833065664952,
"eval_loss": 2.7566213607788086,
"eval_runtime": 28.2536,
"eval_samples_per_second": 229.457,
"eval_steps_per_second": 2.407,
"step": 268000
},
{
"epoch": 1.94,
"learning_rate": 6.245569876607502e-07,
"loss": 2.7406,
"step": 268100
},
{
"epoch": 1.94,
"learning_rate": 6.17324133142386e-07,
"loss": 2.7364,
"step": 268200
},
{
"epoch": 1.94,
"learning_rate": 6.100912786240218e-07,
"loss": 2.7406,
"step": 268300
},
{
"epoch": 1.94,
"learning_rate": 6.029307526508412e-07,
"loss": 2.7454,
"step": 268400
},
{
"epoch": 1.94,
"learning_rate": 5.95697898132477e-07,
"loss": 2.7426,
"step": 268500
},
{
"epoch": 1.94,
"learning_rate": 5.884650436141129e-07,
"loss": 2.7343,
"step": 268600
},
{
"epoch": 1.94,
"learning_rate": 5.812321890957485e-07,
"loss": 2.7358,
"step": 268700
},
{
"epoch": 1.94,
"learning_rate": 5.739993345773844e-07,
"loss": 2.736,
"step": 268800
},
{
"epoch": 1.94,
"learning_rate": 5.667664800590202e-07,
"loss": 2.732,
"step": 268900
},
{
"epoch": 1.95,
"learning_rate": 5.595336255406559e-07,
"loss": 2.7391,
"step": 269000
},
{
"epoch": 1.95,
"eval_accuracy": 0.4617845163670898,
"eval_loss": 2.7565996646881104,
"eval_runtime": 30.4715,
"eval_samples_per_second": 212.756,
"eval_steps_per_second": 2.232,
"step": 269000
},
{
"epoch": 1.95,
"learning_rate": 5.523007710222917e-07,
"loss": 2.733,
"step": 269100
},
{
"epoch": 1.95,
"learning_rate": 5.450679165039275e-07,
"loss": 2.7411,
"step": 269200
},
{
"epoch": 1.95,
"learning_rate": 5.378350619855633e-07,
"loss": 2.7453,
"step": 269300
},
{
"epoch": 1.95,
"learning_rate": 5.30602207467199e-07,
"loss": 2.7358,
"step": 269400
},
{
"epoch": 1.95,
"learning_rate": 5.233693529488348e-07,
"loss": 2.7314,
"step": 269500
},
{
"epoch": 1.95,
"learning_rate": 5.161364984304707e-07,
"loss": 2.744,
"step": 269600
},
{
"epoch": 1.95,
"learning_rate": 5.089036439121064e-07,
"loss": 2.7467,
"step": 269700
},
{
"epoch": 1.95,
"learning_rate": 5.016707893937422e-07,
"loss": 2.7395,
"step": 269800
},
{
"epoch": 1.95,
"learning_rate": 4.94437934875378e-07,
"loss": 2.7339,
"step": 269900
},
{
"epoch": 1.95,
"learning_rate": 4.872774089021974e-07,
"loss": 2.7431,
"step": 270000
},
{
"epoch": 1.95,
"eval_accuracy": 0.461747617448954,
"eval_loss": 2.756573438644409,
"eval_runtime": 31.3033,
"eval_samples_per_second": 207.103,
"eval_steps_per_second": 2.172,
"step": 270000
},
{
"epoch": 1.95,
"learning_rate": 4.800445543838331e-07,
"loss": 2.727,
"step": 270100
},
{
"epoch": 1.95,
"learning_rate": 4.7281169986546897e-07,
"loss": 2.7348,
"step": 270200
},
{
"epoch": 1.96,
"learning_rate": 4.6557884534710474e-07,
"loss": 2.7455,
"step": 270300
},
{
"epoch": 1.96,
"learning_rate": 4.5834599082874046e-07,
"loss": 2.7347,
"step": 270400
},
{
"epoch": 1.96,
"learning_rate": 4.511131363103763e-07,
"loss": 2.7418,
"step": 270500
},
{
"epoch": 1.96,
"learning_rate": 4.438802817920121e-07,
"loss": 2.7362,
"step": 270600
},
{
"epoch": 1.96,
"learning_rate": 4.3664742727364783e-07,
"loss": 2.7349,
"step": 270700
},
{
"epoch": 1.96,
"learning_rate": 4.2941457275528366e-07,
"loss": 2.7409,
"step": 270800
},
{
"epoch": 1.96,
"learning_rate": 4.2218171823691943e-07,
"loss": 2.7341,
"step": 270900
},
{
"epoch": 1.96,
"learning_rate": 4.1494886371855515e-07,
"loss": 2.7387,
"step": 271000
},
{
"epoch": 1.96,
"eval_accuracy": 0.46175306155162976,
"eval_loss": 2.7565271854400635,
"eval_runtime": 28.2552,
"eval_samples_per_second": 229.444,
"eval_steps_per_second": 2.407,
"step": 271000
},
{
"epoch": 1.96,
"learning_rate": 4.0771600920019097e-07,
"loss": 2.7284,
"step": 271100
},
{
"epoch": 1.96,
"learning_rate": 4.004831546818268e-07,
"loss": 2.7376,
"step": 271200
},
{
"epoch": 1.96,
"learning_rate": 3.9325030016346257e-07,
"loss": 2.7438,
"step": 271300
},
{
"epoch": 1.96,
"learning_rate": 3.860174456450983e-07,
"loss": 2.73,
"step": 271400
},
{
"epoch": 1.96,
"learning_rate": 3.787845911267341e-07,
"loss": 2.7454,
"step": 271500
},
{
"epoch": 1.96,
"learning_rate": 3.7155173660836994e-07,
"loss": 2.7397,
"step": 271600
},
{
"epoch": 1.97,
"learning_rate": 3.6431888209000566e-07,
"loss": 2.7391,
"step": 271700
},
{
"epoch": 1.97,
"learning_rate": 3.570860275716415e-07,
"loss": 2.7378,
"step": 271800
},
{
"epoch": 1.97,
"learning_rate": 3.4985317305327726e-07,
"loss": 2.7392,
"step": 271900
},
{
"epoch": 1.97,
"learning_rate": 3.42620318534913e-07,
"loss": 2.741,
"step": 272000
},
{
"epoch": 1.97,
"eval_accuracy": 0.46179358987154945,
"eval_loss": 2.7564971446990967,
"eval_runtime": 28.8041,
"eval_samples_per_second": 225.072,
"eval_steps_per_second": 2.361,
"step": 272000
},
{
"epoch": 1.97,
"learning_rate": 3.354597925617324e-07,
"loss": 2.7359,
"step": 272100
},
{
"epoch": 1.97,
"learning_rate": 3.282269380433682e-07,
"loss": 2.7351,
"step": 272200
},
{
"epoch": 1.97,
"learning_rate": 3.2099408352500405e-07,
"loss": 2.7398,
"step": 272300
},
{
"epoch": 1.97,
"learning_rate": 3.1376122900663977e-07,
"loss": 2.7399,
"step": 272400
},
{
"epoch": 1.97,
"learning_rate": 3.0652837448827554e-07,
"loss": 2.7299,
"step": 272500
},
{
"epoch": 1.97,
"learning_rate": 2.9929551996991137e-07,
"loss": 2.7412,
"step": 272600
},
{
"epoch": 1.97,
"learning_rate": 2.9206266545154714e-07,
"loss": 2.7434,
"step": 272700
},
{
"epoch": 1.97,
"learning_rate": 2.848298109331829e-07,
"loss": 2.741,
"step": 272800
},
{
"epoch": 1.97,
"learning_rate": 2.775969564148187e-07,
"loss": 2.7356,
"step": 272900
},
{
"epoch": 1.97,
"learning_rate": 2.7036410189645445e-07,
"loss": 2.7343,
"step": 273000
},
{
"epoch": 1.97,
"eval_accuracy": 0.4617863310679817,
"eval_loss": 2.756471872329712,
"eval_runtime": 29.6952,
"eval_samples_per_second": 218.318,
"eval_steps_per_second": 2.29,
"step": 273000
},
{
"epoch": 1.98,
"learning_rate": 2.6313124737809023e-07,
"loss": 2.7316,
"step": 273100
},
{
"epoch": 1.98,
"learning_rate": 2.5589839285972605e-07,
"loss": 2.7433,
"step": 273200
},
{
"epoch": 1.98,
"learning_rate": 2.486655383413618e-07,
"loss": 2.7338,
"step": 273300
},
{
"epoch": 1.98,
"learning_rate": 2.4150501236818125e-07,
"loss": 2.7303,
"step": 273400
},
{
"epoch": 1.98,
"learning_rate": 2.3427215784981705e-07,
"loss": 2.7432,
"step": 273500
},
{
"epoch": 1.98,
"learning_rate": 2.2703930333145282e-07,
"loss": 2.7416,
"step": 273600
},
{
"epoch": 1.98,
"learning_rate": 2.198064488130886e-07,
"loss": 2.739,
"step": 273700
},
{
"epoch": 1.98,
"learning_rate": 2.125735942947244e-07,
"loss": 2.7426,
"step": 273800
},
{
"epoch": 1.98,
"learning_rate": 2.0534073977636016e-07,
"loss": 2.7369,
"step": 273900
},
{
"epoch": 1.98,
"learning_rate": 1.9810788525799593e-07,
"loss": 2.7378,
"step": 274000
},
{
"epoch": 1.98,
"eval_accuracy": 0.4617851212673871,
"eval_loss": 2.756432056427002,
"eval_runtime": 29.9933,
"eval_samples_per_second": 216.148,
"eval_steps_per_second": 2.267,
"step": 274000
},
{
"epoch": 1.98,
"learning_rate": 1.9087503073963173e-07,
"loss": 2.7462,
"step": 274100
},
{
"epoch": 1.98,
"learning_rate": 1.836421762212675e-07,
"loss": 2.7361,
"step": 274200
},
{
"epoch": 1.98,
"learning_rate": 1.764093217029033e-07,
"loss": 2.7366,
"step": 274300
},
{
"epoch": 1.98,
"learning_rate": 1.6917646718453908e-07,
"loss": 2.7395,
"step": 274400
},
{
"epoch": 1.99,
"learning_rate": 1.6194361266617485e-07,
"loss": 2.7322,
"step": 274500
},
{
"epoch": 1.99,
"learning_rate": 1.5478308669299427e-07,
"loss": 2.7378,
"step": 274600
},
{
"epoch": 1.99,
"learning_rate": 1.4755023217463005e-07,
"loss": 2.7433,
"step": 274700
},
{
"epoch": 1.99,
"learning_rate": 1.4031737765626584e-07,
"loss": 2.7401,
"step": 274800
},
{
"epoch": 1.99,
"learning_rate": 1.3308452313790162e-07,
"loss": 2.7369,
"step": 274900
},
{
"epoch": 1.99,
"learning_rate": 1.2585166861953741e-07,
"loss": 2.737,
"step": 275000
},
{
"epoch": 1.99,
"eval_accuracy": 0.46180387317660365,
"eval_loss": 2.756422996520996,
"eval_runtime": 29.6811,
"eval_samples_per_second": 218.422,
"eval_steps_per_second": 2.291,
"step": 275000
},
{
"epoch": 1.99,
"learning_rate": 1.1861881410117317e-07,
"loss": 2.7343,
"step": 275100
},
{
"epoch": 1.99,
"learning_rate": 1.1138595958280896e-07,
"loss": 2.7331,
"step": 275200
},
{
"epoch": 1.99,
"learning_rate": 1.0415310506444474e-07,
"loss": 2.7372,
"step": 275300
},
{
"epoch": 1.99,
"learning_rate": 9.692025054608053e-08,
"loss": 2.7342,
"step": 275400
},
{
"epoch": 1.99,
"learning_rate": 8.96873960277163e-08,
"loss": 2.7394,
"step": 275500
},
{
"epoch": 1.99,
"learning_rate": 8.252687005453573e-08,
"loss": 2.733,
"step": 275600
},
{
"epoch": 1.99,
"learning_rate": 7.529401553617151e-08,
"loss": 2.7389,
"step": 275700
},
{
"epoch": 1.99,
"learning_rate": 6.80611610178073e-08,
"loss": 2.7343,
"step": 275800
},
{
"epoch": 2.0,
"learning_rate": 6.082830649944307e-08,
"loss": 2.7303,
"step": 275900
},
{
"epoch": 2.0,
"learning_rate": 5.3595451981078855e-08,
"loss": 2.7397,
"step": 276000
},
{
"epoch": 2.0,
"eval_accuracy": 0.4618002437748198,
"eval_loss": 2.756411075592041,
"eval_runtime": 30.3922,
"eval_samples_per_second": 213.311,
"eval_steps_per_second": 2.237,
"step": 276000
},
{
"epoch": 2.0,
"learning_rate": 4.6362597462714634e-08,
"loss": 2.7391,
"step": 276100
},
{
"epoch": 2.0,
"learning_rate": 3.912974294435042e-08,
"loss": 2.739,
"step": 276200
},
{
"epoch": 2.0,
"learning_rate": 3.18968884259862e-08,
"loss": 2.7493,
"step": 276300
},
{
"epoch": 2.0,
"learning_rate": 2.4736362452805626e-08,
"loss": 2.733,
"step": 276400
},
{
"epoch": 2.0,
"learning_rate": 1.7503507934441408e-08,
"loss": 2.7424,
"step": 276500
},
{
"epoch": 2.0,
"step": 276518,
"total_flos": 5.3881880355706765e+20,
"train_loss": 2.8250040690803138,
"train_runtime": 396233.9412,
"train_samples_per_second": 133.99,
"train_steps_per_second": 0.698
}
],
"logging_steps": 100,
"max_steps": 276518,
"num_train_epochs": 2,
"save_steps": 20000,
"total_flos": 5.3881880355706765e+20,
"trial_name": null,
"trial_params": null
}