EthioLLM-b-250K / trainer_state.json
Atnafu's picture
Re-adding model with Git LFS
9d69e15
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 487330,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.994962345843679e-05,
"loss": 8.8184,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 4.989832351794472e-05,
"loss": 7.8259,
"step": 1000
},
{
"epoch": 0.03,
"learning_rate": 4.984702357745265e-05,
"loss": 7.4946,
"step": 1500
},
{
"epoch": 0.04,
"learning_rate": 4.979572363696058e-05,
"loss": 7.2174,
"step": 2000
},
{
"epoch": 0.05,
"learning_rate": 4.974442369646851e-05,
"loss": 7.0931,
"step": 2500
},
{
"epoch": 0.06,
"learning_rate": 4.969312375597645e-05,
"loss": 6.9066,
"step": 3000
},
{
"epoch": 0.07,
"learning_rate": 4.964182381548438e-05,
"loss": 6.8159,
"step": 3500
},
{
"epoch": 0.08,
"learning_rate": 4.959052387499231e-05,
"loss": 6.7744,
"step": 4000
},
{
"epoch": 0.09,
"learning_rate": 4.953922393450024e-05,
"loss": 6.6722,
"step": 4500
},
{
"epoch": 0.1,
"learning_rate": 4.948792399400817e-05,
"loss": 6.5736,
"step": 5000
},
{
"epoch": 0.11,
"learning_rate": 4.94366240535161e-05,
"loss": 6.5583,
"step": 5500
},
{
"epoch": 0.12,
"learning_rate": 4.9385324113024026e-05,
"loss": 6.4946,
"step": 6000
},
{
"epoch": 0.13,
"learning_rate": 4.933402417253196e-05,
"loss": 6.4229,
"step": 6500
},
{
"epoch": 0.14,
"learning_rate": 4.928272423203989e-05,
"loss": 6.3521,
"step": 7000
},
{
"epoch": 0.15,
"learning_rate": 4.923142429154782e-05,
"loss": 6.2802,
"step": 7500
},
{
"epoch": 0.16,
"learning_rate": 4.918022695093674e-05,
"loss": 6.2379,
"step": 8000
},
{
"epoch": 0.17,
"learning_rate": 4.912892701044467e-05,
"loss": 6.1997,
"step": 8500
},
{
"epoch": 0.18,
"learning_rate": 4.90776270699526e-05,
"loss": 6.1736,
"step": 9000
},
{
"epoch": 0.19,
"learning_rate": 4.902632712946053e-05,
"loss": 6.1003,
"step": 9500
},
{
"epoch": 0.21,
"learning_rate": 4.897502718896846e-05,
"loss": 6.1453,
"step": 10000
},
{
"epoch": 0.22,
"learning_rate": 4.892372724847639e-05,
"loss": 6.1247,
"step": 10500
},
{
"epoch": 0.23,
"learning_rate": 4.887242730798433e-05,
"loss": 6.0404,
"step": 11000
},
{
"epoch": 0.24,
"learning_rate": 4.882112736749226e-05,
"loss": 6.0285,
"step": 11500
},
{
"epoch": 0.25,
"learning_rate": 4.876982742700019e-05,
"loss": 5.9289,
"step": 12000
},
{
"epoch": 0.26,
"learning_rate": 4.871852748650812e-05,
"loss": 5.9026,
"step": 12500
},
{
"epoch": 0.27,
"learning_rate": 4.8667227546016046e-05,
"loss": 5.9464,
"step": 13000
},
{
"epoch": 0.28,
"learning_rate": 4.861592760552398e-05,
"loss": 5.8722,
"step": 13500
},
{
"epoch": 0.29,
"learning_rate": 4.8564730264912896e-05,
"loss": 5.8571,
"step": 14000
},
{
"epoch": 0.3,
"learning_rate": 4.8513430324420826e-05,
"loss": 5.8416,
"step": 14500
},
{
"epoch": 0.31,
"learning_rate": 4.8462130383928756e-05,
"loss": 5.7729,
"step": 15000
},
{
"epoch": 0.32,
"learning_rate": 4.8410830443436686e-05,
"loss": 5.8065,
"step": 15500
},
{
"epoch": 0.33,
"learning_rate": 4.8359530502944615e-05,
"loss": 5.7176,
"step": 16000
},
{
"epoch": 0.34,
"learning_rate": 4.830823056245255e-05,
"loss": 5.7107,
"step": 16500
},
{
"epoch": 0.35,
"learning_rate": 4.825693062196048e-05,
"loss": 5.676,
"step": 17000
},
{
"epoch": 0.36,
"learning_rate": 4.820563068146841e-05,
"loss": 5.6657,
"step": 17500
},
{
"epoch": 0.37,
"learning_rate": 4.815433074097635e-05,
"loss": 5.6036,
"step": 18000
},
{
"epoch": 0.38,
"learning_rate": 4.810303080048427e-05,
"loss": 5.617,
"step": 18500
},
{
"epoch": 0.39,
"learning_rate": 4.805173085999221e-05,
"loss": 5.5956,
"step": 19000
},
{
"epoch": 0.4,
"learning_rate": 4.800053351938112e-05,
"loss": 5.5803,
"step": 19500
},
{
"epoch": 0.41,
"learning_rate": 4.794923357888905e-05,
"loss": 5.5891,
"step": 20000
},
{
"epoch": 0.42,
"learning_rate": 4.789793363839698e-05,
"loss": 5.5668,
"step": 20500
},
{
"epoch": 0.43,
"learning_rate": 4.784663369790492e-05,
"loss": 5.5098,
"step": 21000
},
{
"epoch": 0.44,
"learning_rate": 4.7795538957174814e-05,
"loss": 5.5136,
"step": 21500
},
{
"epoch": 0.45,
"learning_rate": 4.7744239016682744e-05,
"loss": 5.4964,
"step": 22000
},
{
"epoch": 0.46,
"learning_rate": 4.769293907619067e-05,
"loss": 5.433,
"step": 22500
},
{
"epoch": 0.47,
"learning_rate": 4.764163913569861e-05,
"loss": 5.4654,
"step": 23000
},
{
"epoch": 0.48,
"learning_rate": 4.759033919520653e-05,
"loss": 5.3737,
"step": 23500
},
{
"epoch": 0.49,
"learning_rate": 4.753903925471447e-05,
"loss": 5.4051,
"step": 24000
},
{
"epoch": 0.5,
"learning_rate": 4.74877393142224e-05,
"loss": 5.4338,
"step": 24500
},
{
"epoch": 0.51,
"learning_rate": 4.743643937373033e-05,
"loss": 5.4018,
"step": 25000
},
{
"epoch": 0.52,
"learning_rate": 4.738513943323826e-05,
"loss": 5.3666,
"step": 25500
},
{
"epoch": 0.53,
"learning_rate": 4.733383949274619e-05,
"loss": 5.3332,
"step": 26000
},
{
"epoch": 0.54,
"learning_rate": 4.7282539552254125e-05,
"loss": 5.3543,
"step": 26500
},
{
"epoch": 0.55,
"learning_rate": 4.7231239611762054e-05,
"loss": 5.3672,
"step": 27000
},
{
"epoch": 0.56,
"learning_rate": 4.7179939671269984e-05,
"loss": 5.2982,
"step": 27500
},
{
"epoch": 0.57,
"learning_rate": 4.7128639730777914e-05,
"loss": 5.2833,
"step": 28000
},
{
"epoch": 0.58,
"learning_rate": 4.7077339790285844e-05,
"loss": 5.3131,
"step": 28500
},
{
"epoch": 0.6,
"learning_rate": 4.702603984979377e-05,
"loss": 5.2557,
"step": 29000
},
{
"epoch": 0.61,
"learning_rate": 4.697473990930171e-05,
"loss": 5.2611,
"step": 29500
},
{
"epoch": 0.62,
"learning_rate": 4.692343996880964e-05,
"loss": 5.2762,
"step": 30000
},
{
"epoch": 0.63,
"learning_rate": 4.687224262819855e-05,
"loss": 5.2073,
"step": 30500
},
{
"epoch": 0.64,
"learning_rate": 4.682094268770649e-05,
"loss": 5.239,
"step": 31000
},
{
"epoch": 0.65,
"learning_rate": 4.676964274721441e-05,
"loss": 5.2202,
"step": 31500
},
{
"epoch": 0.66,
"learning_rate": 4.671834280672235e-05,
"loss": 5.24,
"step": 32000
},
{
"epoch": 0.67,
"learning_rate": 4.666704286623028e-05,
"loss": 5.1645,
"step": 32500
},
{
"epoch": 0.68,
"learning_rate": 4.661574292573821e-05,
"loss": 5.1957,
"step": 33000
},
{
"epoch": 0.69,
"learning_rate": 4.656444298524614e-05,
"loss": 5.163,
"step": 33500
},
{
"epoch": 0.7,
"learning_rate": 4.651314304475407e-05,
"loss": 5.1395,
"step": 34000
},
{
"epoch": 0.71,
"learning_rate": 4.646194570414298e-05,
"loss": 5.156,
"step": 34500
},
{
"epoch": 0.72,
"learning_rate": 4.641064576365092e-05,
"loss": 5.1425,
"step": 35000
},
{
"epoch": 0.73,
"learning_rate": 4.635934582315885e-05,
"loss": 5.1205,
"step": 35500
},
{
"epoch": 0.74,
"learning_rate": 4.630814848254776e-05,
"loss": 5.0797,
"step": 36000
},
{
"epoch": 0.75,
"learning_rate": 4.625684854205569e-05,
"loss": 5.1206,
"step": 36500
},
{
"epoch": 0.76,
"learning_rate": 4.620554860156363e-05,
"loss": 5.0963,
"step": 37000
},
{
"epoch": 0.77,
"learning_rate": 4.615424866107155e-05,
"loss": 5.047,
"step": 37500
},
{
"epoch": 0.78,
"learning_rate": 4.610294872057949e-05,
"loss": 5.0337,
"step": 38000
},
{
"epoch": 0.79,
"learning_rate": 4.6051648780087416e-05,
"loss": 5.0447,
"step": 38500
},
{
"epoch": 0.8,
"learning_rate": 4.600045143947633e-05,
"loss": 5.0363,
"step": 39000
},
{
"epoch": 0.81,
"learning_rate": 4.594915149898426e-05,
"loss": 5.0541,
"step": 39500
},
{
"epoch": 0.82,
"learning_rate": 4.5897851558492196e-05,
"loss": 5.0135,
"step": 40000
},
{
"epoch": 0.83,
"learning_rate": 4.5846551618000126e-05,
"loss": 5.0131,
"step": 40500
},
{
"epoch": 0.84,
"learning_rate": 4.5795251677508055e-05,
"loss": 5.006,
"step": 41000
},
{
"epoch": 0.85,
"learning_rate": 4.574395173701599e-05,
"loss": 4.9761,
"step": 41500
},
{
"epoch": 0.86,
"learning_rate": 4.5692754396404905e-05,
"loss": 4.9729,
"step": 42000
},
{
"epoch": 0.87,
"learning_rate": 4.5641454455912835e-05,
"loss": 4.9476,
"step": 42500
},
{
"epoch": 0.88,
"learning_rate": 4.5590154515420765e-05,
"loss": 4.9783,
"step": 43000
},
{
"epoch": 0.89,
"learning_rate": 4.5538854574928695e-05,
"loss": 4.934,
"step": 43500
},
{
"epoch": 0.9,
"learning_rate": 4.5487554634436624e-05,
"loss": 4.942,
"step": 44000
},
{
"epoch": 0.91,
"learning_rate": 4.543625469394456e-05,
"loss": 4.971,
"step": 44500
},
{
"epoch": 0.92,
"learning_rate": 4.5384954753452484e-05,
"loss": 4.9091,
"step": 45000
},
{
"epoch": 0.93,
"learning_rate": 4.5333757412841404e-05,
"loss": 4.9071,
"step": 45500
},
{
"epoch": 0.94,
"learning_rate": 4.5282457472349334e-05,
"loss": 4.9298,
"step": 46000
},
{
"epoch": 0.95,
"learning_rate": 4.5231157531857263e-05,
"loss": 4.9334,
"step": 46500
},
{
"epoch": 0.96,
"learning_rate": 4.517996019124618e-05,
"loss": 4.9334,
"step": 47000
},
{
"epoch": 0.97,
"learning_rate": 4.5128660250754113e-05,
"loss": 4.9512,
"step": 47500
},
{
"epoch": 0.98,
"learning_rate": 4.507736031026204e-05,
"loss": 4.9052,
"step": 48000
},
{
"epoch": 1.0,
"learning_rate": 4.502606036976997e-05,
"loss": 4.8881,
"step": 48500
},
{
"epoch": 1.01,
"learning_rate": 4.497476042927791e-05,
"loss": 4.873,
"step": 49000
},
{
"epoch": 1.02,
"learning_rate": 4.492346048878583e-05,
"loss": 4.8642,
"step": 49500
},
{
"epoch": 1.03,
"learning_rate": 4.487216054829377e-05,
"loss": 4.8801,
"step": 50000
},
{
"epoch": 1.04,
"learning_rate": 4.48208606078017e-05,
"loss": 4.8548,
"step": 50500
},
{
"epoch": 1.05,
"learning_rate": 4.476956066730963e-05,
"loss": 4.8291,
"step": 51000
},
{
"epoch": 1.06,
"learning_rate": 4.471826072681756e-05,
"loss": 4.8631,
"step": 51500
},
{
"epoch": 1.07,
"learning_rate": 4.466696078632549e-05,
"loss": 4.8377,
"step": 52000
},
{
"epoch": 1.08,
"learning_rate": 4.46157634457144e-05,
"loss": 4.8645,
"step": 52500
},
{
"epoch": 1.09,
"learning_rate": 4.456446350522234e-05,
"loss": 4.7896,
"step": 53000
},
{
"epoch": 1.1,
"learning_rate": 4.451316356473027e-05,
"loss": 4.8434,
"step": 53500
},
{
"epoch": 1.11,
"learning_rate": 4.44618636242382e-05,
"loss": 4.8492,
"step": 54000
},
{
"epoch": 1.12,
"learning_rate": 4.4410563683746134e-05,
"loss": 4.8399,
"step": 54500
},
{
"epoch": 1.13,
"learning_rate": 4.435936634313505e-05,
"loss": 4.7979,
"step": 55000
},
{
"epoch": 1.14,
"learning_rate": 4.430806640264297e-05,
"loss": 4.8179,
"step": 55500
},
{
"epoch": 1.15,
"learning_rate": 4.4256766462150907e-05,
"loss": 4.821,
"step": 56000
},
{
"epoch": 1.16,
"learning_rate": 4.4205466521658836e-05,
"loss": 4.7995,
"step": 56500
},
{
"epoch": 1.17,
"learning_rate": 4.4154166581166766e-05,
"loss": 4.7851,
"step": 57000
},
{
"epoch": 1.18,
"learning_rate": 4.41028666406747e-05,
"loss": 4.7566,
"step": 57500
},
{
"epoch": 1.19,
"learning_rate": 4.4051566700182625e-05,
"loss": 4.7688,
"step": 58000
},
{
"epoch": 1.2,
"learning_rate": 4.400026675969056e-05,
"loss": 4.787,
"step": 58500
},
{
"epoch": 1.21,
"learning_rate": 4.394917201896046e-05,
"loss": 4.7602,
"step": 59000
},
{
"epoch": 1.22,
"learning_rate": 4.389787207846839e-05,
"loss": 4.7401,
"step": 59500
},
{
"epoch": 1.23,
"learning_rate": 4.384657213797632e-05,
"loss": 4.7633,
"step": 60000
},
{
"epoch": 1.24,
"learning_rate": 4.3795272197484255e-05,
"loss": 4.7525,
"step": 60500
},
{
"epoch": 1.25,
"learning_rate": 4.3743972256992185e-05,
"loss": 4.7159,
"step": 61000
},
{
"epoch": 1.26,
"learning_rate": 4.3692672316500115e-05,
"loss": 4.7274,
"step": 61500
},
{
"epoch": 1.27,
"learning_rate": 4.3641372376008044e-05,
"loss": 4.7038,
"step": 62000
},
{
"epoch": 1.28,
"learning_rate": 4.3590072435515974e-05,
"loss": 4.6976,
"step": 62500
},
{
"epoch": 1.29,
"learning_rate": 4.353887509490489e-05,
"loss": 4.6971,
"step": 63000
},
{
"epoch": 1.3,
"learning_rate": 4.3487575154412824e-05,
"loss": 4.697,
"step": 63500
},
{
"epoch": 1.31,
"learning_rate": 4.3436275213920754e-05,
"loss": 4.7002,
"step": 64000
},
{
"epoch": 1.32,
"learning_rate": 4.3384975273428683e-05,
"loss": 4.6913,
"step": 64500
},
{
"epoch": 1.33,
"learning_rate": 4.333367533293662e-05,
"loss": 4.6913,
"step": 65000
},
{
"epoch": 1.34,
"learning_rate": 4.328237539244455e-05,
"loss": 4.7019,
"step": 65500
},
{
"epoch": 1.35,
"learning_rate": 4.323107545195248e-05,
"loss": 4.6847,
"step": 66000
},
{
"epoch": 1.36,
"learning_rate": 4.317987811134139e-05,
"loss": 4.6411,
"step": 66500
},
{
"epoch": 1.37,
"learning_rate": 4.312857817084933e-05,
"loss": 4.6794,
"step": 67000
},
{
"epoch": 1.39,
"learning_rate": 4.307727823035725e-05,
"loss": 4.6893,
"step": 67500
},
{
"epoch": 1.4,
"learning_rate": 4.302597828986519e-05,
"loss": 4.6423,
"step": 68000
},
{
"epoch": 1.41,
"learning_rate": 4.297467834937312e-05,
"loss": 4.6048,
"step": 68500
},
{
"epoch": 1.42,
"learning_rate": 4.292337840888105e-05,
"loss": 4.6473,
"step": 69000
},
{
"epoch": 1.43,
"learning_rate": 4.287218106826996e-05,
"loss": 4.5908,
"step": 69500
},
{
"epoch": 1.44,
"learning_rate": 4.28208811277779e-05,
"loss": 4.6576,
"step": 70000
},
{
"epoch": 1.45,
"learning_rate": 4.276958118728582e-05,
"loss": 4.6422,
"step": 70500
},
{
"epoch": 1.46,
"learning_rate": 4.271828124679376e-05,
"loss": 4.6184,
"step": 71000
},
{
"epoch": 1.47,
"learning_rate": 4.266698130630169e-05,
"loss": 4.613,
"step": 71500
},
{
"epoch": 1.48,
"learning_rate": 4.261568136580962e-05,
"loss": 4.6211,
"step": 72000
},
{
"epoch": 1.49,
"learning_rate": 4.2564381425317554e-05,
"loss": 4.5722,
"step": 72500
},
{
"epoch": 1.5,
"learning_rate": 4.2513081484825477e-05,
"loss": 4.6272,
"step": 73000
},
{
"epoch": 1.51,
"learning_rate": 4.246188414421439e-05,
"loss": 4.6094,
"step": 73500
},
{
"epoch": 1.52,
"learning_rate": 4.241068680360331e-05,
"loss": 4.5994,
"step": 74000
},
{
"epoch": 1.53,
"learning_rate": 4.235938686311124e-05,
"loss": 4.5916,
"step": 74500
},
{
"epoch": 1.54,
"learning_rate": 4.230808692261917e-05,
"loss": 4.5789,
"step": 75000
},
{
"epoch": 1.55,
"learning_rate": 4.22567869821271e-05,
"loss": 4.5941,
"step": 75500
},
{
"epoch": 1.56,
"learning_rate": 4.2205487041635036e-05,
"loss": 4.5707,
"step": 76000
},
{
"epoch": 1.57,
"learning_rate": 4.2154187101142966e-05,
"loss": 4.6116,
"step": 76500
},
{
"epoch": 1.58,
"learning_rate": 4.2102887160650895e-05,
"loss": 4.598,
"step": 77000
},
{
"epoch": 1.59,
"learning_rate": 4.2051587220158825e-05,
"loss": 4.6079,
"step": 77500
},
{
"epoch": 1.6,
"learning_rate": 4.2000287279666755e-05,
"loss": 4.5904,
"step": 78000
},
{
"epoch": 1.61,
"learning_rate": 4.1949089939055675e-05,
"loss": 4.5436,
"step": 78500
},
{
"epoch": 1.62,
"learning_rate": 4.189789259844459e-05,
"loss": 4.5608,
"step": 79000
},
{
"epoch": 1.63,
"learning_rate": 4.184659265795252e-05,
"loss": 4.539,
"step": 79500
},
{
"epoch": 1.64,
"learning_rate": 4.179529271746045e-05,
"loss": 4.5659,
"step": 80000
},
{
"epoch": 1.65,
"learning_rate": 4.1743992776968384e-05,
"loss": 4.5041,
"step": 80500
},
{
"epoch": 1.66,
"learning_rate": 4.169269283647631e-05,
"loss": 4.5788,
"step": 81000
},
{
"epoch": 1.67,
"learning_rate": 4.1641392895984244e-05,
"loss": 4.542,
"step": 81500
},
{
"epoch": 1.68,
"learning_rate": 4.1590092955492174e-05,
"loss": 4.5193,
"step": 82000
},
{
"epoch": 1.69,
"learning_rate": 4.1538793015000103e-05,
"loss": 4.5562,
"step": 82500
},
{
"epoch": 1.7,
"learning_rate": 4.148749307450804e-05,
"loss": 4.4589,
"step": 83000
},
{
"epoch": 1.71,
"learning_rate": 4.143629573389695e-05,
"loss": 4.5263,
"step": 83500
},
{
"epoch": 1.72,
"learning_rate": 4.1384995793404876e-05,
"loss": 4.4958,
"step": 84000
},
{
"epoch": 1.73,
"learning_rate": 4.1333798452793797e-05,
"loss": 4.5111,
"step": 84500
},
{
"epoch": 1.74,
"learning_rate": 4.1282498512301726e-05,
"loss": 4.5035,
"step": 85000
},
{
"epoch": 1.75,
"learning_rate": 4.123119857180966e-05,
"loss": 4.5078,
"step": 85500
},
{
"epoch": 1.76,
"learning_rate": 4.1179898631317586e-05,
"loss": 4.491,
"step": 86000
},
{
"epoch": 1.77,
"learning_rate": 4.112859869082552e-05,
"loss": 4.4847,
"step": 86500
},
{
"epoch": 1.79,
"learning_rate": 4.107729875033345e-05,
"loss": 4.4864,
"step": 87000
},
{
"epoch": 1.8,
"learning_rate": 4.102599880984138e-05,
"loss": 4.4806,
"step": 87500
},
{
"epoch": 1.81,
"learning_rate": 4.0974801469230295e-05,
"loss": 4.5149,
"step": 88000
},
{
"epoch": 1.82,
"learning_rate": 4.092350152873823e-05,
"loss": 4.4806,
"step": 88500
},
{
"epoch": 1.83,
"learning_rate": 4.087220158824616e-05,
"loss": 4.5255,
"step": 89000
},
{
"epoch": 1.84,
"learning_rate": 4.082090164775409e-05,
"loss": 4.4811,
"step": 89500
},
{
"epoch": 1.85,
"learning_rate": 4.076960170726202e-05,
"loss": 4.4526,
"step": 90000
},
{
"epoch": 1.86,
"learning_rate": 4.071830176676995e-05,
"loss": 4.4444,
"step": 90500
},
{
"epoch": 1.87,
"learning_rate": 4.066700182627789e-05,
"loss": 4.4464,
"step": 91000
},
{
"epoch": 1.88,
"learning_rate": 4.061570188578581e-05,
"loss": 4.4321,
"step": 91500
},
{
"epoch": 1.89,
"learning_rate": 4.0564607145055714e-05,
"loss": 4.4777,
"step": 92000
},
{
"epoch": 1.9,
"learning_rate": 4.0513307204563644e-05,
"loss": 4.4735,
"step": 92500
},
{
"epoch": 1.91,
"learning_rate": 4.046210986395256e-05,
"loss": 4.466,
"step": 93000
},
{
"epoch": 1.92,
"learning_rate": 4.0410809923460494e-05,
"loss": 4.4724,
"step": 93500
},
{
"epoch": 1.93,
"learning_rate": 4.035950998296842e-05,
"loss": 4.431,
"step": 94000
},
{
"epoch": 1.94,
"learning_rate": 4.030821004247635e-05,
"loss": 4.4713,
"step": 94500
},
{
"epoch": 1.95,
"learning_rate": 4.025691010198428e-05,
"loss": 4.4095,
"step": 95000
},
{
"epoch": 1.96,
"learning_rate": 4.020561016149221e-05,
"loss": 4.4054,
"step": 95500
},
{
"epoch": 1.97,
"learning_rate": 4.015431022100015e-05,
"loss": 4.4136,
"step": 96000
},
{
"epoch": 1.98,
"learning_rate": 4.010301028050807e-05,
"loss": 4.4432,
"step": 96500
},
{
"epoch": 1.99,
"learning_rate": 4.005171034001601e-05,
"loss": 4.3887,
"step": 97000
},
{
"epoch": 2.0,
"learning_rate": 4.000041039952394e-05,
"loss": 4.4078,
"step": 97500
},
{
"epoch": 2.01,
"learning_rate": 3.994911045903187e-05,
"loss": 4.4154,
"step": 98000
},
{
"epoch": 2.02,
"learning_rate": 3.9897810518539804e-05,
"loss": 4.3906,
"step": 98500
},
{
"epoch": 2.03,
"learning_rate": 3.984651057804773e-05,
"loss": 4.4291,
"step": 99000
},
{
"epoch": 2.04,
"learning_rate": 3.979531323743665e-05,
"loss": 4.3558,
"step": 99500
},
{
"epoch": 2.05,
"learning_rate": 3.974401329694458e-05,
"loss": 4.3718,
"step": 100000
},
{
"epoch": 2.06,
"learning_rate": 3.969281595633349e-05,
"loss": 4.3902,
"step": 100500
},
{
"epoch": 2.07,
"learning_rate": 3.964151601584142e-05,
"loss": 4.3582,
"step": 101000
},
{
"epoch": 2.08,
"learning_rate": 3.959021607534936e-05,
"loss": 4.3659,
"step": 101500
},
{
"epoch": 2.09,
"learning_rate": 3.953901873473827e-05,
"loss": 4.3793,
"step": 102000
},
{
"epoch": 2.1,
"learning_rate": 3.94877187942462e-05,
"loss": 4.3694,
"step": 102500
},
{
"epoch": 2.11,
"learning_rate": 3.943641885375413e-05,
"loss": 4.3826,
"step": 103000
},
{
"epoch": 2.12,
"learning_rate": 3.9385118913262066e-05,
"loss": 4.375,
"step": 103500
},
{
"epoch": 2.13,
"learning_rate": 3.933381897276999e-05,
"loss": 4.4241,
"step": 104000
},
{
"epoch": 2.14,
"learning_rate": 3.9282519032277926e-05,
"loss": 4.3498,
"step": 104500
},
{
"epoch": 2.15,
"learning_rate": 3.9231219091785856e-05,
"loss": 4.3247,
"step": 105000
},
{
"epoch": 2.16,
"learning_rate": 3.9179919151293785e-05,
"loss": 4.3628,
"step": 105500
},
{
"epoch": 2.18,
"learning_rate": 3.9128619210801715e-05,
"loss": 4.3408,
"step": 106000
},
{
"epoch": 2.19,
"learning_rate": 3.9077319270309645e-05,
"loss": 4.372,
"step": 106500
},
{
"epoch": 2.2,
"learning_rate": 3.902612192969856e-05,
"loss": 4.3608,
"step": 107000
},
{
"epoch": 2.21,
"learning_rate": 3.8974821989206495e-05,
"loss": 4.353,
"step": 107500
},
{
"epoch": 2.22,
"learning_rate": 3.892352204871443e-05,
"loss": 4.3279,
"step": 108000
},
{
"epoch": 2.23,
"learning_rate": 3.8872222108222354e-05,
"loss": 4.3806,
"step": 108500
},
{
"epoch": 2.24,
"learning_rate": 3.882092216773029e-05,
"loss": 4.3484,
"step": 109000
},
{
"epoch": 2.25,
"learning_rate": 3.8769622227238214e-05,
"loss": 4.306,
"step": 109500
},
{
"epoch": 2.26,
"learning_rate": 3.871832228674615e-05,
"loss": 4.3324,
"step": 110000
},
{
"epoch": 2.27,
"learning_rate": 3.866702234625408e-05,
"loss": 4.3572,
"step": 110500
},
{
"epoch": 2.28,
"learning_rate": 3.861572240576201e-05,
"loss": 4.3448,
"step": 111000
},
{
"epoch": 2.29,
"learning_rate": 3.8564422465269946e-05,
"loss": 4.3358,
"step": 111500
},
{
"epoch": 2.3,
"learning_rate": 3.851322512465886e-05,
"loss": 4.2848,
"step": 112000
},
{
"epoch": 2.31,
"learning_rate": 3.846202778404777e-05,
"loss": 4.3378,
"step": 112500
},
{
"epoch": 2.32,
"learning_rate": 3.84107278435557e-05,
"loss": 4.3408,
"step": 113000
},
{
"epoch": 2.33,
"learning_rate": 3.835942790306363e-05,
"loss": 4.3114,
"step": 113500
},
{
"epoch": 2.34,
"learning_rate": 3.830812796257157e-05,
"loss": 4.3221,
"step": 114000
},
{
"epoch": 2.35,
"learning_rate": 3.825682802207949e-05,
"loss": 4.3448,
"step": 114500
},
{
"epoch": 2.36,
"learning_rate": 3.820552808158743e-05,
"loss": 4.2776,
"step": 115000
},
{
"epoch": 2.37,
"learning_rate": 3.815422814109536e-05,
"loss": 4.3126,
"step": 115500
},
{
"epoch": 2.38,
"learning_rate": 3.810292820060329e-05,
"loss": 4.2848,
"step": 116000
},
{
"epoch": 2.39,
"learning_rate": 3.80517308599922e-05,
"loss": 4.277,
"step": 116500
},
{
"epoch": 2.4,
"learning_rate": 3.800043091950014e-05,
"loss": 4.2941,
"step": 117000
},
{
"epoch": 2.41,
"learning_rate": 3.794913097900807e-05,
"loss": 4.3109,
"step": 117500
},
{
"epoch": 2.42,
"learning_rate": 3.7897831038516e-05,
"loss": 4.2839,
"step": 118000
},
{
"epoch": 2.43,
"learning_rate": 3.784663369790491e-05,
"loss": 4.2977,
"step": 118500
},
{
"epoch": 2.44,
"learning_rate": 3.779533375741284e-05,
"loss": 4.2763,
"step": 119000
},
{
"epoch": 2.45,
"learning_rate": 3.774403381692078e-05,
"loss": 4.2716,
"step": 119500
},
{
"epoch": 2.46,
"learning_rate": 3.769273387642871e-05,
"loss": 4.2914,
"step": 120000
},
{
"epoch": 2.47,
"learning_rate": 3.7641433935936636e-05,
"loss": 4.2892,
"step": 120500
},
{
"epoch": 2.48,
"learning_rate": 3.7590133995444566e-05,
"loss": 4.2721,
"step": 121000
},
{
"epoch": 2.49,
"learning_rate": 3.7538834054952496e-05,
"loss": 4.2827,
"step": 121500
},
{
"epoch": 2.5,
"learning_rate": 3.7487534114460426e-05,
"loss": 4.2514,
"step": 122000
},
{
"epoch": 2.51,
"learning_rate": 3.743623417396836e-05,
"loss": 4.3007,
"step": 122500
},
{
"epoch": 2.52,
"learning_rate": 3.7385036833357276e-05,
"loss": 4.3164,
"step": 123000
},
{
"epoch": 2.53,
"learning_rate": 3.7333736892865205e-05,
"loss": 4.2493,
"step": 123500
},
{
"epoch": 2.54,
"learning_rate": 3.7282436952373135e-05,
"loss": 4.272,
"step": 124000
},
{
"epoch": 2.55,
"learning_rate": 3.7231239611762055e-05,
"loss": 4.2676,
"step": 124500
},
{
"epoch": 2.56,
"learning_rate": 3.717993967126998e-05,
"loss": 4.2604,
"step": 125000
},
{
"epoch": 2.58,
"learning_rate": 3.7128639730777915e-05,
"loss": 4.2754,
"step": 125500
},
{
"epoch": 2.59,
"learning_rate": 3.7077339790285844e-05,
"loss": 4.2361,
"step": 126000
},
{
"epoch": 2.6,
"learning_rate": 3.7026039849793774e-05,
"loss": 4.2829,
"step": 126500
},
{
"epoch": 2.61,
"learning_rate": 3.697484250918269e-05,
"loss": 4.2507,
"step": 127000
},
{
"epoch": 2.62,
"learning_rate": 3.6923542568690624e-05,
"loss": 4.235,
"step": 127500
},
{
"epoch": 2.63,
"learning_rate": 3.6872242628198554e-05,
"loss": 4.2501,
"step": 128000
},
{
"epoch": 2.64,
"learning_rate": 3.6820942687706484e-05,
"loss": 4.2579,
"step": 128500
},
{
"epoch": 2.65,
"learning_rate": 3.676964274721442e-05,
"loss": 4.2095,
"step": 129000
},
{
"epoch": 2.66,
"learning_rate": 3.671844540660333e-05,
"loss": 4.2339,
"step": 129500
},
{
"epoch": 2.67,
"learning_rate": 3.666714546611126e-05,
"loss": 4.2573,
"step": 130000
},
{
"epoch": 2.68,
"learning_rate": 3.661584552561919e-05,
"loss": 4.253,
"step": 130500
},
{
"epoch": 2.69,
"learning_rate": 3.656454558512712e-05,
"loss": 4.2388,
"step": 131000
},
{
"epoch": 2.7,
"learning_rate": 3.651324564463505e-05,
"loss": 4.2168,
"step": 131500
},
{
"epoch": 2.71,
"learning_rate": 3.646204830402397e-05,
"loss": 4.2164,
"step": 132000
},
{
"epoch": 2.72,
"learning_rate": 3.64107483635319e-05,
"loss": 4.2099,
"step": 132500
},
{
"epoch": 2.73,
"learning_rate": 3.635944842303983e-05,
"loss": 4.2155,
"step": 133000
},
{
"epoch": 2.74,
"learning_rate": 3.630814848254776e-05,
"loss": 4.2385,
"step": 133500
},
{
"epoch": 2.75,
"learning_rate": 3.625684854205569e-05,
"loss": 4.1949,
"step": 134000
},
{
"epoch": 2.76,
"learning_rate": 3.620554860156362e-05,
"loss": 4.1929,
"step": 134500
},
{
"epoch": 2.77,
"learning_rate": 3.615424866107156e-05,
"loss": 4.2094,
"step": 135000
},
{
"epoch": 2.78,
"learning_rate": 3.610294872057949e-05,
"loss": 4.1766,
"step": 135500
},
{
"epoch": 2.79,
"learning_rate": 3.605164878008742e-05,
"loss": 4.2076,
"step": 136000
},
{
"epoch": 2.8,
"learning_rate": 3.600045143947633e-05,
"loss": 4.224,
"step": 136500
},
{
"epoch": 2.81,
"learning_rate": 3.594915149898426e-05,
"loss": 4.2378,
"step": 137000
},
{
"epoch": 2.82,
"learning_rate": 3.58978515584922e-05,
"loss": 4.1963,
"step": 137500
},
{
"epoch": 2.83,
"learning_rate": 3.584655161800013e-05,
"loss": 4.2194,
"step": 138000
},
{
"epoch": 2.84,
"learning_rate": 3.5795251677508056e-05,
"loss": 4.1876,
"step": 138500
},
{
"epoch": 2.85,
"learning_rate": 3.574405433689697e-05,
"loss": 4.1573,
"step": 139000
},
{
"epoch": 2.86,
"learning_rate": 3.5692754396404906e-05,
"loss": 4.1632,
"step": 139500
},
{
"epoch": 2.87,
"learning_rate": 3.564145445591283e-05,
"loss": 4.1946,
"step": 140000
},
{
"epoch": 2.88,
"learning_rate": 3.5590154515420766e-05,
"loss": 4.2068,
"step": 140500
},
{
"epoch": 2.89,
"learning_rate": 3.553895717480968e-05,
"loss": 4.1915,
"step": 141000
},
{
"epoch": 2.9,
"learning_rate": 3.548765723431761e-05,
"loss": 4.166,
"step": 141500
},
{
"epoch": 2.91,
"learning_rate": 3.543635729382554e-05,
"loss": 4.1631,
"step": 142000
},
{
"epoch": 2.92,
"learning_rate": 3.5385057353333475e-05,
"loss": 4.1724,
"step": 142500
},
{
"epoch": 2.93,
"learning_rate": 3.533386001272239e-05,
"loss": 4.1626,
"step": 143000
},
{
"epoch": 2.94,
"learning_rate": 3.528256007223032e-05,
"loss": 4.1848,
"step": 143500
},
{
"epoch": 2.95,
"learning_rate": 3.523126013173825e-05,
"loss": 4.1468,
"step": 144000
},
{
"epoch": 2.97,
"learning_rate": 3.517996019124618e-05,
"loss": 4.2017,
"step": 144500
},
{
"epoch": 2.98,
"learning_rate": 3.512866025075411e-05,
"loss": 4.2094,
"step": 145000
},
{
"epoch": 2.99,
"learning_rate": 3.5077360310262044e-05,
"loss": 4.1475,
"step": 145500
},
{
"epoch": 3.0,
"learning_rate": 3.502616296965096e-05,
"loss": 4.1887,
"step": 146000
},
{
"epoch": 3.01,
"learning_rate": 3.497486302915889e-05,
"loss": 4.1589,
"step": 146500
},
{
"epoch": 3.02,
"learning_rate": 3.492356308866682e-05,
"loss": 4.1307,
"step": 147000
},
{
"epoch": 3.03,
"learning_rate": 3.487226314817475e-05,
"loss": 4.1373,
"step": 147500
},
{
"epoch": 3.04,
"learning_rate": 3.482096320768268e-05,
"loss": 4.1346,
"step": 148000
},
{
"epoch": 3.05,
"learning_rate": 3.476966326719061e-05,
"loss": 4.1282,
"step": 148500
},
{
"epoch": 3.06,
"learning_rate": 3.471836332669854e-05,
"loss": 4.1611,
"step": 149000
},
{
"epoch": 3.07,
"learning_rate": 3.466706338620647e-05,
"loss": 4.154,
"step": 149500
},
{
"epoch": 3.08,
"learning_rate": 3.46157634457144e-05,
"loss": 4.1765,
"step": 150000
},
{
"epoch": 3.09,
"learning_rate": 3.4564566105103316e-05,
"loss": 4.1181,
"step": 150500
},
{
"epoch": 3.1,
"learning_rate": 3.451326616461125e-05,
"loss": 4.1712,
"step": 151000
},
{
"epoch": 3.11,
"learning_rate": 3.446196622411918e-05,
"loss": 4.1497,
"step": 151500
},
{
"epoch": 3.12,
"learning_rate": 3.441066628362711e-05,
"loss": 4.146,
"step": 152000
},
{
"epoch": 3.13,
"learning_rate": 3.435936634313504e-05,
"loss": 4.1196,
"step": 152500
},
{
"epoch": 3.14,
"learning_rate": 3.430806640264297e-05,
"loss": 4.1407,
"step": 153000
},
{
"epoch": 3.15,
"learning_rate": 3.425676646215091e-05,
"loss": 4.1269,
"step": 153500
},
{
"epoch": 3.16,
"learning_rate": 3.420546652165884e-05,
"loss": 4.1198,
"step": 154000
},
{
"epoch": 3.17,
"learning_rate": 3.415426918104775e-05,
"loss": 4.1264,
"step": 154500
},
{
"epoch": 3.18,
"learning_rate": 3.410307184043667e-05,
"loss": 4.1296,
"step": 155000
},
{
"epoch": 3.19,
"learning_rate": 3.4051771899944594e-05,
"loss": 4.1359,
"step": 155500
},
{
"epoch": 3.2,
"learning_rate": 3.400047195945253e-05,
"loss": 4.1061,
"step": 156000
},
{
"epoch": 3.21,
"learning_rate": 3.394917201896046e-05,
"loss": 4.1364,
"step": 156500
},
{
"epoch": 3.22,
"learning_rate": 3.389787207846839e-05,
"loss": 4.1629,
"step": 157000
},
{
"epoch": 3.23,
"learning_rate": 3.3846572137976326e-05,
"loss": 4.1114,
"step": 157500
},
{
"epoch": 3.24,
"learning_rate": 3.379527219748425e-05,
"loss": 4.1195,
"step": 158000
},
{
"epoch": 3.25,
"learning_rate": 3.374407485687317e-05,
"loss": 4.127,
"step": 158500
},
{
"epoch": 3.26,
"learning_rate": 3.36927749163811e-05,
"loss": 4.0762,
"step": 159000
},
{
"epoch": 3.27,
"learning_rate": 3.364147497588903e-05,
"loss": 4.1274,
"step": 159500
},
{
"epoch": 3.28,
"learning_rate": 3.359017503539696e-05,
"loss": 4.1027,
"step": 160000
},
{
"epoch": 3.29,
"learning_rate": 3.3538875094904895e-05,
"loss": 4.1181,
"step": 160500
},
{
"epoch": 3.3,
"learning_rate": 3.348757515441282e-05,
"loss": 4.1254,
"step": 161000
},
{
"epoch": 3.31,
"learning_rate": 3.3436275213920755e-05,
"loss": 4.0904,
"step": 161500
},
{
"epoch": 3.32,
"learning_rate": 3.338518047319065e-05,
"loss": 4.0986,
"step": 162000
},
{
"epoch": 3.33,
"learning_rate": 3.333388053269859e-05,
"loss": 4.124,
"step": 162500
},
{
"epoch": 3.34,
"learning_rate": 3.328258059220651e-05,
"loss": 4.0905,
"step": 163000
},
{
"epoch": 3.36,
"learning_rate": 3.323128065171445e-05,
"loss": 4.1261,
"step": 163500
},
{
"epoch": 3.37,
"learning_rate": 3.317998071122238e-05,
"loss": 4.0871,
"step": 164000
},
{
"epoch": 3.38,
"learning_rate": 3.312868077073031e-05,
"loss": 4.0878,
"step": 164500
},
{
"epoch": 3.39,
"learning_rate": 3.307738083023824e-05,
"loss": 4.0649,
"step": 165000
},
{
"epoch": 3.4,
"learning_rate": 3.302608088974617e-05,
"loss": 4.1072,
"step": 165500
},
{
"epoch": 3.41,
"learning_rate": 3.29747809492541e-05,
"loss": 4.0528,
"step": 166000
},
{
"epoch": 3.42,
"learning_rate": 3.2923583608643017e-05,
"loss": 4.0903,
"step": 166500
},
{
"epoch": 3.43,
"learning_rate": 3.2872283668150946e-05,
"loss": 4.0848,
"step": 167000
},
{
"epoch": 3.44,
"learning_rate": 3.2820983727658876e-05,
"loss": 4.039,
"step": 167500
},
{
"epoch": 3.45,
"learning_rate": 3.276968378716681e-05,
"loss": 4.0965,
"step": 168000
},
{
"epoch": 3.46,
"learning_rate": 3.2718383846674736e-05,
"loss": 4.1169,
"step": 168500
},
{
"epoch": 3.47,
"learning_rate": 3.266708390618267e-05,
"loss": 4.1324,
"step": 169000
},
{
"epoch": 3.48,
"learning_rate": 3.26157839656906e-05,
"loss": 4.1061,
"step": 169500
},
{
"epoch": 3.49,
"learning_rate": 3.256448402519853e-05,
"loss": 4.089,
"step": 170000
},
{
"epoch": 3.5,
"learning_rate": 3.2513286684587445e-05,
"loss": 4.094,
"step": 170500
},
{
"epoch": 3.51,
"learning_rate": 3.246198674409538e-05,
"loss": 4.0917,
"step": 171000
},
{
"epoch": 3.52,
"learning_rate": 3.2410686803603304e-05,
"loss": 4.0532,
"step": 171500
},
{
"epoch": 3.53,
"learning_rate": 3.235938686311124e-05,
"loss": 4.0729,
"step": 172000
},
{
"epoch": 3.54,
"learning_rate": 3.2308189522500154e-05,
"loss": 4.0459,
"step": 172500
},
{
"epoch": 3.55,
"learning_rate": 3.2256889582008084e-05,
"loss": 4.0761,
"step": 173000
},
{
"epoch": 3.56,
"learning_rate": 3.2205589641516014e-05,
"loss": 4.0617,
"step": 173500
},
{
"epoch": 3.57,
"learning_rate": 3.215428970102395e-05,
"loss": 4.0709,
"step": 174000
},
{
"epoch": 3.58,
"learning_rate": 3.210298976053188e-05,
"loss": 4.0661,
"step": 174500
},
{
"epoch": 3.59,
"learning_rate": 3.205168982003981e-05,
"loss": 4.0281,
"step": 175000
},
{
"epoch": 3.6,
"learning_rate": 3.200049247942872e-05,
"loss": 4.0545,
"step": 175500
},
{
"epoch": 3.61,
"learning_rate": 3.194919253893666e-05,
"loss": 4.0483,
"step": 176000
},
{
"epoch": 3.62,
"learning_rate": 3.189789259844459e-05,
"loss": 4.0512,
"step": 176500
},
{
"epoch": 3.63,
"learning_rate": 3.184659265795252e-05,
"loss": 4.0502,
"step": 177000
},
{
"epoch": 3.64,
"learning_rate": 3.179529271746045e-05,
"loss": 4.0288,
"step": 177500
},
{
"epoch": 3.65,
"learning_rate": 3.174409537684936e-05,
"loss": 4.0851,
"step": 178000
},
{
"epoch": 3.66,
"learning_rate": 3.16927954363573e-05,
"loss": 4.0473,
"step": 178500
},
{
"epoch": 3.67,
"learning_rate": 3.164149549586523e-05,
"loss": 4.0553,
"step": 179000
},
{
"epoch": 3.68,
"learning_rate": 3.159029815525414e-05,
"loss": 4.0329,
"step": 179500
},
{
"epoch": 3.69,
"learning_rate": 3.153899821476207e-05,
"loss": 4.0429,
"step": 180000
},
{
"epoch": 3.7,
"learning_rate": 3.148769827427001e-05,
"loss": 4.0339,
"step": 180500
},
{
"epoch": 3.71,
"learning_rate": 3.143639833377793e-05,
"loss": 4.0411,
"step": 181000
},
{
"epoch": 3.72,
"learning_rate": 3.138509839328587e-05,
"loss": 4.0174,
"step": 181500
},
{
"epoch": 3.73,
"learning_rate": 3.13337984527938e-05,
"loss": 4.0375,
"step": 182000
},
{
"epoch": 3.74,
"learning_rate": 3.128249851230173e-05,
"loss": 4.0391,
"step": 182500
},
{
"epoch": 3.76,
"learning_rate": 3.123119857180966e-05,
"loss": 4.027,
"step": 183000
},
{
"epoch": 3.77,
"learning_rate": 3.118000123119858e-05,
"loss": 4.0097,
"step": 183500
},
{
"epoch": 3.78,
"learning_rate": 3.11287012907065e-05,
"loss": 4.0425,
"step": 184000
},
{
"epoch": 3.79,
"learning_rate": 3.1077401350214437e-05,
"loss": 4.0201,
"step": 184500
},
{
"epoch": 3.8,
"learning_rate": 3.1026101409722366e-05,
"loss": 3.997,
"step": 185000
},
{
"epoch": 3.81,
"learning_rate": 3.097490406911128e-05,
"loss": 4.0319,
"step": 185500
},
{
"epoch": 3.82,
"learning_rate": 3.092360412861921e-05,
"loss": 4.0165,
"step": 186000
},
{
"epoch": 3.83,
"learning_rate": 3.0872304188127146e-05,
"loss": 3.9969,
"step": 186500
},
{
"epoch": 3.84,
"learning_rate": 3.0821004247635076e-05,
"loss": 3.9924,
"step": 187000
},
{
"epoch": 3.85,
"learning_rate": 3.076980690702399e-05,
"loss": 4.0224,
"step": 187500
},
{
"epoch": 3.86,
"learning_rate": 3.071850696653192e-05,
"loss": 3.9941,
"step": 188000
},
{
"epoch": 3.87,
"learning_rate": 3.066720702603985e-05,
"loss": 4.0328,
"step": 188500
},
{
"epoch": 3.88,
"learning_rate": 3.0615907085547785e-05,
"loss": 4.0028,
"step": 189000
},
{
"epoch": 3.89,
"learning_rate": 3.0564607145055715e-05,
"loss": 3.9965,
"step": 189500
},
{
"epoch": 3.9,
"learning_rate": 3.0513307204563645e-05,
"loss": 4.023,
"step": 190000
},
{
"epoch": 3.91,
"learning_rate": 3.0462007264071574e-05,
"loss": 3.9735,
"step": 190500
},
{
"epoch": 3.92,
"learning_rate": 3.041080992346049e-05,
"loss": 4.0157,
"step": 191000
},
{
"epoch": 3.93,
"learning_rate": 3.035950998296842e-05,
"loss": 4.0167,
"step": 191500
},
{
"epoch": 3.94,
"learning_rate": 3.0308312642357334e-05,
"loss": 4.0053,
"step": 192000
},
{
"epoch": 3.95,
"learning_rate": 3.0257012701865267e-05,
"loss": 4.0017,
"step": 192500
},
{
"epoch": 3.96,
"learning_rate": 3.02057127613732e-05,
"loss": 4.0052,
"step": 193000
},
{
"epoch": 3.97,
"learning_rate": 3.0154412820881127e-05,
"loss": 3.9906,
"step": 193500
},
{
"epoch": 3.98,
"learning_rate": 3.010311288038906e-05,
"loss": 3.976,
"step": 194000
},
{
"epoch": 3.99,
"learning_rate": 3.005181293989699e-05,
"loss": 4.0274,
"step": 194500
},
{
"epoch": 4.0,
"learning_rate": 3.0000512999404923e-05,
"loss": 4.0102,
"step": 195000
},
{
"epoch": 4.01,
"learning_rate": 2.9949315658793836e-05,
"loss": 3.9998,
"step": 195500
},
{
"epoch": 4.02,
"learning_rate": 2.989801571830177e-05,
"loss": 4.0075,
"step": 196000
},
{
"epoch": 4.03,
"learning_rate": 2.9846715777809696e-05,
"loss": 4.0114,
"step": 196500
},
{
"epoch": 4.04,
"learning_rate": 2.9795415837317632e-05,
"loss": 3.9899,
"step": 197000
},
{
"epoch": 4.05,
"learning_rate": 2.974411589682556e-05,
"loss": 3.9896,
"step": 197500
},
{
"epoch": 4.06,
"learning_rate": 2.969281595633349e-05,
"loss": 4.0081,
"step": 198000
},
{
"epoch": 4.07,
"learning_rate": 2.9641516015841425e-05,
"loss": 3.9918,
"step": 198500
},
{
"epoch": 4.08,
"learning_rate": 2.9590216075349355e-05,
"loss": 3.9877,
"step": 199000
},
{
"epoch": 4.09,
"learning_rate": 2.9538916134857288e-05,
"loss": 3.9619,
"step": 199500
},
{
"epoch": 4.1,
"learning_rate": 2.9487616194365214e-05,
"loss": 3.999,
"step": 200000
},
{
"epoch": 4.11,
"learning_rate": 2.9436418853754134e-05,
"loss": 3.9985,
"step": 200500
},
{
"epoch": 4.12,
"learning_rate": 2.938511891326206e-05,
"loss": 3.9828,
"step": 201000
},
{
"epoch": 4.13,
"learning_rate": 2.933392157265098e-05,
"loss": 3.9799,
"step": 201500
},
{
"epoch": 4.15,
"learning_rate": 2.9282621632158907e-05,
"loss": 3.9574,
"step": 202000
},
{
"epoch": 4.16,
"learning_rate": 2.923132169166684e-05,
"loss": 3.9762,
"step": 202500
},
{
"epoch": 4.17,
"learning_rate": 2.918002175117477e-05,
"loss": 3.9344,
"step": 203000
},
{
"epoch": 4.18,
"learning_rate": 2.9128721810682703e-05,
"loss": 4.0109,
"step": 203500
},
{
"epoch": 4.19,
"learning_rate": 2.907742187019063e-05,
"loss": 3.9805,
"step": 204000
},
{
"epoch": 4.2,
"learning_rate": 2.9026121929698563e-05,
"loss": 3.9499,
"step": 204500
},
{
"epoch": 4.21,
"learning_rate": 2.8974821989206496e-05,
"loss": 3.965,
"step": 205000
},
{
"epoch": 4.22,
"learning_rate": 2.892362464859541e-05,
"loss": 3.9572,
"step": 205500
},
{
"epoch": 4.23,
"learning_rate": 2.887232470810334e-05,
"loss": 3.993,
"step": 206000
},
{
"epoch": 4.24,
"learning_rate": 2.8821024767611272e-05,
"loss": 3.9494,
"step": 206500
},
{
"epoch": 4.25,
"learning_rate": 2.8769724827119205e-05,
"loss": 3.9585,
"step": 207000
},
{
"epoch": 4.26,
"learning_rate": 2.871842488662713e-05,
"loss": 3.9766,
"step": 207500
},
{
"epoch": 4.27,
"learning_rate": 2.8667124946135064e-05,
"loss": 3.9779,
"step": 208000
},
{
"epoch": 4.28,
"learning_rate": 2.8615825005642994e-05,
"loss": 3.9486,
"step": 208500
},
{
"epoch": 4.29,
"learning_rate": 2.8564525065150927e-05,
"loss": 3.977,
"step": 209000
},
{
"epoch": 4.3,
"learning_rate": 2.851332772453984e-05,
"loss": 3.9675,
"step": 209500
},
{
"epoch": 4.31,
"learning_rate": 2.8462130383928754e-05,
"loss": 3.9637,
"step": 210000
},
{
"epoch": 4.32,
"learning_rate": 2.8410830443436687e-05,
"loss": 3.9209,
"step": 210500
},
{
"epoch": 4.33,
"learning_rate": 2.835953050294462e-05,
"loss": 3.9515,
"step": 211000
},
{
"epoch": 4.34,
"learning_rate": 2.8308230562452547e-05,
"loss": 3.9693,
"step": 211500
},
{
"epoch": 4.35,
"learning_rate": 2.825693062196048e-05,
"loss": 3.9517,
"step": 212000
},
{
"epoch": 4.36,
"learning_rate": 2.8205733281349393e-05,
"loss": 3.9347,
"step": 212500
},
{
"epoch": 4.37,
"learning_rate": 2.8154433340857326e-05,
"loss": 3.9444,
"step": 213000
},
{
"epoch": 4.38,
"learning_rate": 2.8103133400365256e-05,
"loss": 3.9793,
"step": 213500
},
{
"epoch": 4.39,
"learning_rate": 2.8051936059754173e-05,
"loss": 3.9519,
"step": 214000
},
{
"epoch": 4.4,
"learning_rate": 2.8000636119262103e-05,
"loss": 3.9411,
"step": 214500
},
{
"epoch": 4.41,
"learning_rate": 2.7949336178770036e-05,
"loss": 4.0059,
"step": 215000
},
{
"epoch": 4.42,
"learning_rate": 2.7898036238277962e-05,
"loss": 3.9071,
"step": 215500
},
{
"epoch": 4.43,
"learning_rate": 2.7846736297785895e-05,
"loss": 3.9323,
"step": 216000
},
{
"epoch": 4.44,
"learning_rate": 2.7795436357293825e-05,
"loss": 3.9026,
"step": 216500
},
{
"epoch": 4.45,
"learning_rate": 2.7744136416801758e-05,
"loss": 3.9355,
"step": 217000
},
{
"epoch": 4.46,
"learning_rate": 2.769293907619067e-05,
"loss": 3.9487,
"step": 217500
},
{
"epoch": 4.47,
"learning_rate": 2.7641639135698605e-05,
"loss": 3.9103,
"step": 218000
},
{
"epoch": 4.48,
"learning_rate": 2.759033919520653e-05,
"loss": 3.9136,
"step": 218500
},
{
"epoch": 4.49,
"learning_rate": 2.7539039254714468e-05,
"loss": 3.9358,
"step": 219000
},
{
"epoch": 4.5,
"learning_rate": 2.74877393142224e-05,
"loss": 3.9526,
"step": 219500
},
{
"epoch": 4.51,
"learning_rate": 2.7436439373730327e-05,
"loss": 3.9287,
"step": 220000
},
{
"epoch": 4.52,
"learning_rate": 2.738513943323826e-05,
"loss": 3.9084,
"step": 220500
},
{
"epoch": 4.53,
"learning_rate": 2.733383949274619e-05,
"loss": 3.9103,
"step": 221000
},
{
"epoch": 4.55,
"learning_rate": 2.7282539552254123e-05,
"loss": 3.9452,
"step": 221500
},
{
"epoch": 4.56,
"learning_rate": 2.723123961176205e-05,
"loss": 3.9149,
"step": 222000
},
{
"epoch": 4.57,
"learning_rate": 2.7179939671269982e-05,
"loss": 3.8862,
"step": 222500
},
{
"epoch": 4.58,
"learning_rate": 2.7128742330658896e-05,
"loss": 3.905,
"step": 223000
},
{
"epoch": 4.59,
"learning_rate": 2.707744239016683e-05,
"loss": 3.9089,
"step": 223500
},
{
"epoch": 4.6,
"learning_rate": 2.702614244967476e-05,
"loss": 3.9231,
"step": 224000
},
{
"epoch": 4.61,
"learning_rate": 2.6974842509182692e-05,
"loss": 3.9382,
"step": 224500
},
{
"epoch": 4.62,
"learning_rate": 2.6923542568690625e-05,
"loss": 3.9066,
"step": 225000
},
{
"epoch": 4.63,
"learning_rate": 2.687224262819855e-05,
"loss": 3.8783,
"step": 225500
},
{
"epoch": 4.64,
"learning_rate": 2.6820942687706484e-05,
"loss": 3.8863,
"step": 226000
},
{
"epoch": 4.65,
"learning_rate": 2.6769745347095398e-05,
"loss": 3.9364,
"step": 226500
},
{
"epoch": 4.66,
"learning_rate": 2.671844540660333e-05,
"loss": 3.9386,
"step": 227000
},
{
"epoch": 4.67,
"learning_rate": 2.666714546611126e-05,
"loss": 3.9075,
"step": 227500
},
{
"epoch": 4.68,
"learning_rate": 2.6615948125500174e-05,
"loss": 3.8976,
"step": 228000
},
{
"epoch": 4.69,
"learning_rate": 2.6564648185008107e-05,
"loss": 3.9058,
"step": 228500
},
{
"epoch": 4.7,
"learning_rate": 2.651334824451604e-05,
"loss": 3.909,
"step": 229000
},
{
"epoch": 4.71,
"learning_rate": 2.6462048304023967e-05,
"loss": 3.9192,
"step": 229500
},
{
"epoch": 4.72,
"learning_rate": 2.64107483635319e-05,
"loss": 3.9413,
"step": 230000
},
{
"epoch": 4.73,
"learning_rate": 2.635944842303983e-05,
"loss": 3.8911,
"step": 230500
},
{
"epoch": 4.74,
"learning_rate": 2.6308148482547763e-05,
"loss": 3.8748,
"step": 231000
},
{
"epoch": 4.75,
"learning_rate": 2.625684854205569e-05,
"loss": 3.8669,
"step": 231500
},
{
"epoch": 4.76,
"learning_rate": 2.6205548601563622e-05,
"loss": 3.9275,
"step": 232000
},
{
"epoch": 4.77,
"learning_rate": 2.6154248661071555e-05,
"loss": 3.8816,
"step": 232500
},
{
"epoch": 4.78,
"learning_rate": 2.610305132046047e-05,
"loss": 3.8978,
"step": 233000
},
{
"epoch": 4.79,
"learning_rate": 2.6051751379968402e-05,
"loss": 3.9131,
"step": 233500
},
{
"epoch": 4.8,
"learning_rate": 2.600045143947633e-05,
"loss": 3.923,
"step": 234000
},
{
"epoch": 4.81,
"learning_rate": 2.5949151498984265e-05,
"loss": 3.9078,
"step": 234500
},
{
"epoch": 4.82,
"learning_rate": 2.589785155849219e-05,
"loss": 3.8891,
"step": 235000
},
{
"epoch": 4.83,
"learning_rate": 2.584665421788111e-05,
"loss": 3.9043,
"step": 235500
},
{
"epoch": 4.84,
"learning_rate": 2.5795354277389038e-05,
"loss": 3.9208,
"step": 236000
},
{
"epoch": 4.85,
"learning_rate": 2.574405433689697e-05,
"loss": 3.9073,
"step": 236500
},
{
"epoch": 4.86,
"learning_rate": 2.56927543964049e-05,
"loss": 3.887,
"step": 237000
},
{
"epoch": 4.87,
"learning_rate": 2.5641454455912834e-05,
"loss": 3.9111,
"step": 237500
},
{
"epoch": 4.88,
"learning_rate": 2.559015451542076e-05,
"loss": 3.865,
"step": 238000
},
{
"epoch": 4.89,
"learning_rate": 2.5538854574928693e-05,
"loss": 3.9018,
"step": 238500
},
{
"epoch": 4.9,
"learning_rate": 2.548755463443663e-05,
"loss": 3.8742,
"step": 239000
},
{
"epoch": 4.91,
"learning_rate": 2.5436459893706527e-05,
"loss": 3.8782,
"step": 239500
},
{
"epoch": 4.92,
"learning_rate": 2.5385159953214453e-05,
"loss": 3.8779,
"step": 240000
},
{
"epoch": 4.94,
"learning_rate": 2.533386001272239e-05,
"loss": 3.903,
"step": 240500
},
{
"epoch": 4.95,
"learning_rate": 2.5282560072230316e-05,
"loss": 3.8616,
"step": 241000
},
{
"epoch": 4.96,
"learning_rate": 2.523126013173825e-05,
"loss": 3.8687,
"step": 241500
},
{
"epoch": 4.97,
"learning_rate": 2.5180062791127162e-05,
"loss": 3.8893,
"step": 242000
},
{
"epoch": 4.98,
"learning_rate": 2.5128762850635096e-05,
"loss": 3.8935,
"step": 242500
},
{
"epoch": 4.99,
"learning_rate": 2.5077462910143025e-05,
"loss": 3.8659,
"step": 243000
},
{
"epoch": 5.0,
"learning_rate": 2.502616296965096e-05,
"loss": 3.9473,
"step": 243500
},
{
"epoch": 5.01,
"learning_rate": 2.4974965629039872e-05,
"loss": 3.8634,
"step": 244000
},
{
"epoch": 5.02,
"learning_rate": 2.4923768288428785e-05,
"loss": 3.8574,
"step": 244500
},
{
"epoch": 5.03,
"learning_rate": 2.487246834793672e-05,
"loss": 3.8773,
"step": 245000
},
{
"epoch": 5.04,
"learning_rate": 2.4821271007325632e-05,
"loss": 3.843,
"step": 245500
},
{
"epoch": 5.05,
"learning_rate": 2.4769971066833565e-05,
"loss": 3.885,
"step": 246000
},
{
"epoch": 5.06,
"learning_rate": 2.4718671126341495e-05,
"loss": 3.8529,
"step": 246500
},
{
"epoch": 5.07,
"learning_rate": 2.4667371185849424e-05,
"loss": 3.8452,
"step": 247000
},
{
"epoch": 5.08,
"learning_rate": 2.4616071245357354e-05,
"loss": 3.8957,
"step": 247500
},
{
"epoch": 5.09,
"learning_rate": 2.4564771304865287e-05,
"loss": 3.8577,
"step": 248000
},
{
"epoch": 5.1,
"learning_rate": 2.451347136437322e-05,
"loss": 3.8598,
"step": 248500
},
{
"epoch": 5.11,
"learning_rate": 2.446217142388115e-05,
"loss": 3.8613,
"step": 249000
},
{
"epoch": 5.12,
"learning_rate": 2.441087148338908e-05,
"loss": 3.8774,
"step": 249500
},
{
"epoch": 5.13,
"learning_rate": 2.435957154289701e-05,
"loss": 3.8774,
"step": 250000
},
{
"epoch": 5.14,
"learning_rate": 2.4308271602404943e-05,
"loss": 3.8681,
"step": 250500
},
{
"epoch": 5.15,
"learning_rate": 2.4256971661912872e-05,
"loss": 3.8859,
"step": 251000
},
{
"epoch": 5.16,
"learning_rate": 2.4205671721420806e-05,
"loss": 3.862,
"step": 251500
},
{
"epoch": 5.17,
"learning_rate": 2.4154371780928735e-05,
"loss": 3.8659,
"step": 252000
},
{
"epoch": 5.18,
"learning_rate": 2.410307184043667e-05,
"loss": 3.8633,
"step": 252500
},
{
"epoch": 5.19,
"learning_rate": 2.4051874499825582e-05,
"loss": 3.838,
"step": 253000
},
{
"epoch": 5.2,
"learning_rate": 2.4000677159214495e-05,
"loss": 3.8674,
"step": 253500
},
{
"epoch": 5.21,
"learning_rate": 2.394937721872243e-05,
"loss": 3.8754,
"step": 254000
},
{
"epoch": 5.22,
"learning_rate": 2.3898077278230358e-05,
"loss": 3.8913,
"step": 254500
},
{
"epoch": 5.23,
"learning_rate": 2.3846777337738288e-05,
"loss": 3.8363,
"step": 255000
},
{
"epoch": 5.24,
"learning_rate": 2.379547739724622e-05,
"loss": 3.8495,
"step": 255500
},
{
"epoch": 5.25,
"learning_rate": 2.374417745675415e-05,
"loss": 3.8572,
"step": 256000
},
{
"epoch": 5.26,
"learning_rate": 2.3692877516262084e-05,
"loss": 3.8744,
"step": 256500
},
{
"epoch": 5.27,
"learning_rate": 2.3641577575770014e-05,
"loss": 3.8673,
"step": 257000
},
{
"epoch": 5.28,
"learning_rate": 2.3590277635277943e-05,
"loss": 3.8655,
"step": 257500
},
{
"epoch": 5.29,
"learning_rate": 2.353908029466686e-05,
"loss": 3.8382,
"step": 258000
},
{
"epoch": 5.3,
"learning_rate": 2.348778035417479e-05,
"loss": 3.8245,
"step": 258500
},
{
"epoch": 5.31,
"learning_rate": 2.343648041368272e-05,
"loss": 3.8232,
"step": 259000
},
{
"epoch": 5.32,
"learning_rate": 2.3385180473190653e-05,
"loss": 3.8636,
"step": 259500
},
{
"epoch": 5.34,
"learning_rate": 2.3333880532698582e-05,
"loss": 3.79,
"step": 260000
},
{
"epoch": 5.35,
"learning_rate": 2.3282580592206516e-05,
"loss": 3.8223,
"step": 260500
},
{
"epoch": 5.36,
"learning_rate": 2.3231280651714445e-05,
"loss": 3.8356,
"step": 261000
},
{
"epoch": 5.37,
"learning_rate": 2.3180185910984346e-05,
"loss": 3.8253,
"step": 261500
},
{
"epoch": 5.38,
"learning_rate": 2.3128885970492276e-05,
"loss": 3.8744,
"step": 262000
},
{
"epoch": 5.39,
"learning_rate": 2.3077586030000205e-05,
"loss": 3.8458,
"step": 262500
},
{
"epoch": 5.4,
"learning_rate": 2.302628608950814e-05,
"loss": 3.8637,
"step": 263000
},
{
"epoch": 5.41,
"learning_rate": 2.2974986149016068e-05,
"loss": 3.8397,
"step": 263500
},
{
"epoch": 5.42,
"learning_rate": 2.2923686208523998e-05,
"loss": 3.8044,
"step": 264000
},
{
"epoch": 5.43,
"learning_rate": 2.287238626803193e-05,
"loss": 3.8173,
"step": 264500
},
{
"epoch": 5.44,
"learning_rate": 2.2821188927420844e-05,
"loss": 3.8435,
"step": 265000
},
{
"epoch": 5.45,
"learning_rate": 2.2769888986928774e-05,
"loss": 3.8386,
"step": 265500
},
{
"epoch": 5.46,
"learning_rate": 2.2718589046436707e-05,
"loss": 3.835,
"step": 266000
},
{
"epoch": 5.47,
"learning_rate": 2.266728910594464e-05,
"loss": 3.8471,
"step": 266500
},
{
"epoch": 5.48,
"learning_rate": 2.261598916545257e-05,
"loss": 3.8465,
"step": 267000
},
{
"epoch": 5.49,
"learning_rate": 2.25646892249605e-05,
"loss": 3.8589,
"step": 267500
},
{
"epoch": 5.5,
"learning_rate": 2.251338928446843e-05,
"loss": 3.8235,
"step": 268000
},
{
"epoch": 5.51,
"learning_rate": 2.2462089343976363e-05,
"loss": 3.8023,
"step": 268500
},
{
"epoch": 5.52,
"learning_rate": 2.2410789403484292e-05,
"loss": 3.8117,
"step": 269000
},
{
"epoch": 5.53,
"learning_rate": 2.235969466275419e-05,
"loss": 3.8287,
"step": 269500
},
{
"epoch": 5.54,
"learning_rate": 2.2308394722262123e-05,
"loss": 3.8649,
"step": 270000
},
{
"epoch": 5.55,
"learning_rate": 2.2257094781770056e-05,
"loss": 3.8493,
"step": 270500
},
{
"epoch": 5.56,
"learning_rate": 2.2205794841277986e-05,
"loss": 3.8159,
"step": 271000
},
{
"epoch": 5.57,
"learning_rate": 2.2154494900785915e-05,
"loss": 3.8185,
"step": 271500
},
{
"epoch": 5.58,
"learning_rate": 2.2103194960293845e-05,
"loss": 3.789,
"step": 272000
},
{
"epoch": 5.59,
"learning_rate": 2.2051895019801778e-05,
"loss": 3.8325,
"step": 272500
},
{
"epoch": 5.6,
"learning_rate": 2.200059507930971e-05,
"loss": 3.8406,
"step": 273000
},
{
"epoch": 5.61,
"learning_rate": 2.194929513881764e-05,
"loss": 3.8304,
"step": 273500
},
{
"epoch": 5.62,
"learning_rate": 2.189799519832557e-05,
"loss": 3.828,
"step": 274000
},
{
"epoch": 5.63,
"learning_rate": 2.1846797857714484e-05,
"loss": 3.8338,
"step": 274500
},
{
"epoch": 5.64,
"learning_rate": 2.1795497917222417e-05,
"loss": 3.7988,
"step": 275000
},
{
"epoch": 5.65,
"learning_rate": 2.174419797673035e-05,
"loss": 3.8681,
"step": 275500
},
{
"epoch": 5.66,
"learning_rate": 2.169289803623828e-05,
"loss": 3.8073,
"step": 276000
},
{
"epoch": 5.67,
"learning_rate": 2.1641700695627194e-05,
"loss": 3.8385,
"step": 276500
},
{
"epoch": 5.68,
"learning_rate": 2.1590400755135127e-05,
"loss": 3.8167,
"step": 277000
},
{
"epoch": 5.69,
"learning_rate": 2.153920341452404e-05,
"loss": 3.8017,
"step": 277500
},
{
"epoch": 5.7,
"learning_rate": 2.148790347403197e-05,
"loss": 3.8083,
"step": 278000
},
{
"epoch": 5.71,
"learning_rate": 2.1436603533539903e-05,
"loss": 3.7946,
"step": 278500
},
{
"epoch": 5.73,
"learning_rate": 2.1385303593047833e-05,
"loss": 3.7821,
"step": 279000
},
{
"epoch": 5.74,
"learning_rate": 2.1334003652555766e-05,
"loss": 3.8217,
"step": 279500
},
{
"epoch": 5.75,
"learning_rate": 2.1282703712063695e-05,
"loss": 3.831,
"step": 280000
},
{
"epoch": 5.76,
"learning_rate": 2.1231403771571625e-05,
"loss": 3.8005,
"step": 280500
},
{
"epoch": 5.77,
"learning_rate": 2.1180103831079555e-05,
"loss": 3.8192,
"step": 281000
},
{
"epoch": 5.78,
"learning_rate": 2.1128803890587488e-05,
"loss": 3.8005,
"step": 281500
},
{
"epoch": 5.79,
"learning_rate": 2.10776065499764e-05,
"loss": 3.805,
"step": 282000
},
{
"epoch": 5.8,
"learning_rate": 2.1026306609484335e-05,
"loss": 3.7921,
"step": 282500
},
{
"epoch": 5.81,
"learning_rate": 2.0975006668992264e-05,
"loss": 3.7936,
"step": 283000
},
{
"epoch": 5.82,
"learning_rate": 2.0923706728500194e-05,
"loss": 3.8194,
"step": 283500
},
{
"epoch": 5.83,
"learning_rate": 2.087250938788911e-05,
"loss": 3.7764,
"step": 284000
},
{
"epoch": 5.84,
"learning_rate": 2.082120944739704e-05,
"loss": 3.7881,
"step": 284500
},
{
"epoch": 5.85,
"learning_rate": 2.0769909506904974e-05,
"loss": 3.7697,
"step": 285000
},
{
"epoch": 5.86,
"learning_rate": 2.0718609566412903e-05,
"loss": 3.8,
"step": 285500
},
{
"epoch": 5.87,
"learning_rate": 2.0667309625920837e-05,
"loss": 3.7786,
"step": 286000
},
{
"epoch": 5.88,
"learning_rate": 2.0616009685428766e-05,
"loss": 3.822,
"step": 286500
},
{
"epoch": 5.89,
"learning_rate": 2.0564709744936696e-05,
"loss": 3.7656,
"step": 287000
},
{
"epoch": 5.9,
"learning_rate": 2.0513512404325613e-05,
"loss": 3.796,
"step": 287500
},
{
"epoch": 5.91,
"learning_rate": 2.0462212463833543e-05,
"loss": 3.8181,
"step": 288000
},
{
"epoch": 5.92,
"learning_rate": 2.0410912523341476e-05,
"loss": 3.8336,
"step": 288500
},
{
"epoch": 5.93,
"learning_rate": 2.035971518273039e-05,
"loss": 3.7975,
"step": 289000
},
{
"epoch": 5.94,
"learning_rate": 2.0308415242238322e-05,
"loss": 3.7465,
"step": 289500
},
{
"epoch": 5.95,
"learning_rate": 2.0257115301746252e-05,
"loss": 3.7907,
"step": 290000
},
{
"epoch": 5.96,
"learning_rate": 2.0205815361254182e-05,
"loss": 3.7742,
"step": 290500
},
{
"epoch": 5.97,
"learning_rate": 2.015451542076211e-05,
"loss": 3.8057,
"step": 291000
},
{
"epoch": 5.98,
"learning_rate": 2.0103215480270045e-05,
"loss": 3.8435,
"step": 291500
},
{
"epoch": 5.99,
"learning_rate": 2.0051915539777974e-05,
"loss": 3.7912,
"step": 292000
},
{
"epoch": 6.0,
"learning_rate": 2.0000615599285904e-05,
"loss": 3.7894,
"step": 292500
},
{
"epoch": 6.01,
"learning_rate": 1.994941825867482e-05,
"loss": 3.7659,
"step": 293000
},
{
"epoch": 6.02,
"learning_rate": 1.989811831818275e-05,
"loss": 3.7678,
"step": 293500
},
{
"epoch": 6.03,
"learning_rate": 1.9846920977571667e-05,
"loss": 3.7787,
"step": 294000
},
{
"epoch": 6.04,
"learning_rate": 1.9795621037079597e-05,
"loss": 3.7889,
"step": 294500
},
{
"epoch": 6.05,
"learning_rate": 1.9744321096587527e-05,
"loss": 3.776,
"step": 295000
},
{
"epoch": 6.06,
"learning_rate": 1.969302115609546e-05,
"loss": 3.8081,
"step": 295500
},
{
"epoch": 6.07,
"learning_rate": 1.9641823815484377e-05,
"loss": 3.7669,
"step": 296000
},
{
"epoch": 6.08,
"learning_rate": 1.9590523874992307e-05,
"loss": 3.8278,
"step": 296500
},
{
"epoch": 6.09,
"learning_rate": 1.9539223934500236e-05,
"loss": 3.7803,
"step": 297000
},
{
"epoch": 6.1,
"learning_rate": 1.9487923994008166e-05,
"loss": 3.8018,
"step": 297500
},
{
"epoch": 6.11,
"learning_rate": 1.94366240535161e-05,
"loss": 3.7972,
"step": 298000
},
{
"epoch": 6.13,
"learning_rate": 1.9385324113024032e-05,
"loss": 3.7718,
"step": 298500
},
{
"epoch": 6.14,
"learning_rate": 1.9334024172531962e-05,
"loss": 3.766,
"step": 299000
},
{
"epoch": 6.15,
"learning_rate": 1.9282724232039892e-05,
"loss": 3.7857,
"step": 299500
},
{
"epoch": 6.16,
"learning_rate": 1.9231526891428805e-05,
"loss": 3.7604,
"step": 300000
},
{
"epoch": 6.17,
"learning_rate": 1.9180226950936738e-05,
"loss": 3.77,
"step": 300500
},
{
"epoch": 6.18,
"learning_rate": 1.9128927010444668e-05,
"loss": 3.7844,
"step": 301000
},
{
"epoch": 6.19,
"learning_rate": 1.90776270699526e-05,
"loss": 3.8153,
"step": 301500
},
{
"epoch": 6.2,
"learning_rate": 1.902632712946053e-05,
"loss": 3.7875,
"step": 302000
},
{
"epoch": 6.21,
"learning_rate": 1.897502718896846e-05,
"loss": 3.749,
"step": 302500
},
{
"epoch": 6.22,
"learning_rate": 1.892372724847639e-05,
"loss": 3.7781,
"step": 303000
},
{
"epoch": 6.23,
"learning_rate": 1.8872427307984323e-05,
"loss": 3.7711,
"step": 303500
},
{
"epoch": 6.24,
"learning_rate": 1.8821127367492257e-05,
"loss": 3.7755,
"step": 304000
},
{
"epoch": 6.25,
"learning_rate": 1.8769827427000186e-05,
"loss": 3.7479,
"step": 304500
},
{
"epoch": 6.26,
"learning_rate": 1.8718527486508116e-05,
"loss": 3.78,
"step": 305000
},
{
"epoch": 6.27,
"learning_rate": 1.8667227546016046e-05,
"loss": 3.7609,
"step": 305500
},
{
"epoch": 6.28,
"learning_rate": 1.8616030205404963e-05,
"loss": 3.7848,
"step": 306000
},
{
"epoch": 6.29,
"learning_rate": 1.8564832864793876e-05,
"loss": 3.7519,
"step": 306500
},
{
"epoch": 6.3,
"learning_rate": 1.8513532924301806e-05,
"loss": 3.7789,
"step": 307000
},
{
"epoch": 6.31,
"learning_rate": 1.8462232983809742e-05,
"loss": 3.759,
"step": 307500
},
{
"epoch": 6.32,
"learning_rate": 1.8411035643198656e-05,
"loss": 3.7669,
"step": 308000
},
{
"epoch": 6.33,
"learning_rate": 1.8359735702706585e-05,
"loss": 3.7612,
"step": 308500
},
{
"epoch": 6.34,
"learning_rate": 1.830843576221452e-05,
"loss": 3.7493,
"step": 309000
},
{
"epoch": 6.35,
"learning_rate": 1.8257135821722448e-05,
"loss": 3.7789,
"step": 309500
},
{
"epoch": 6.36,
"learning_rate": 1.8205835881230378e-05,
"loss": 3.7703,
"step": 310000
},
{
"epoch": 6.37,
"learning_rate": 1.815453594073831e-05,
"loss": 3.7513,
"step": 310500
},
{
"epoch": 6.38,
"learning_rate": 1.810323600024624e-05,
"loss": 3.745,
"step": 311000
},
{
"epoch": 6.39,
"learning_rate": 1.805193605975417e-05,
"loss": 3.743,
"step": 311500
},
{
"epoch": 6.4,
"learning_rate": 1.80006361192621e-05,
"loss": 3.7827,
"step": 312000
},
{
"epoch": 6.41,
"learning_rate": 1.7949438778651017e-05,
"loss": 3.766,
"step": 312500
},
{
"epoch": 6.42,
"learning_rate": 1.7898241438039934e-05,
"loss": 3.7877,
"step": 313000
},
{
"epoch": 6.43,
"learning_rate": 1.7846941497547864e-05,
"loss": 3.7391,
"step": 313500
},
{
"epoch": 6.44,
"learning_rate": 1.7795641557055793e-05,
"loss": 3.7594,
"step": 314000
},
{
"epoch": 6.45,
"learning_rate": 1.7744341616563727e-05,
"loss": 3.7925,
"step": 314500
},
{
"epoch": 6.46,
"learning_rate": 1.769314427595264e-05,
"loss": 3.7439,
"step": 315000
},
{
"epoch": 6.47,
"learning_rate": 1.7641844335460573e-05,
"loss": 3.7496,
"step": 315500
},
{
"epoch": 6.48,
"learning_rate": 1.7590544394968503e-05,
"loss": 3.7429,
"step": 316000
},
{
"epoch": 6.49,
"learning_rate": 1.7539244454476433e-05,
"loss": 3.7272,
"step": 316500
},
{
"epoch": 6.5,
"learning_rate": 1.7487944513984362e-05,
"loss": 3.7492,
"step": 317000
},
{
"epoch": 6.52,
"learning_rate": 1.7436644573492295e-05,
"loss": 3.7413,
"step": 317500
},
{
"epoch": 6.53,
"learning_rate": 1.738534463300023e-05,
"loss": 3.7537,
"step": 318000
},
{
"epoch": 6.54,
"learning_rate": 1.7334044692508158e-05,
"loss": 3.757,
"step": 318500
},
{
"epoch": 6.55,
"learning_rate": 1.7282744752016088e-05,
"loss": 3.763,
"step": 319000
},
{
"epoch": 6.56,
"learning_rate": 1.723144481152402e-05,
"loss": 3.7308,
"step": 319500
},
{
"epoch": 6.57,
"learning_rate": 1.718014487103195e-05,
"loss": 3.7588,
"step": 320000
},
{
"epoch": 6.58,
"learning_rate": 1.712884493053988e-05,
"loss": 3.7518,
"step": 320500
},
{
"epoch": 6.59,
"learning_rate": 1.7077647589928797e-05,
"loss": 3.7607,
"step": 321000
},
{
"epoch": 6.6,
"learning_rate": 1.7026347649436727e-05,
"loss": 3.7389,
"step": 321500
},
{
"epoch": 6.61,
"learning_rate": 1.6975047708944657e-05,
"loss": 3.7351,
"step": 322000
},
{
"epoch": 6.62,
"learning_rate": 1.692374776845259e-05,
"loss": 3.7515,
"step": 322500
},
{
"epoch": 6.63,
"learning_rate": 1.687244782796052e-05,
"loss": 3.7696,
"step": 323000
},
{
"epoch": 6.64,
"learning_rate": 1.6821250487349437e-05,
"loss": 3.7349,
"step": 323500
},
{
"epoch": 6.65,
"learning_rate": 1.6769950546857366e-05,
"loss": 3.7388,
"step": 324000
},
{
"epoch": 6.66,
"learning_rate": 1.6718650606365296e-05,
"loss": 3.7543,
"step": 324500
},
{
"epoch": 6.67,
"learning_rate": 1.6667350665873226e-05,
"loss": 3.7633,
"step": 325000
},
{
"epoch": 6.68,
"learning_rate": 1.6616153325262143e-05,
"loss": 3.7501,
"step": 325500
},
{
"epoch": 6.69,
"learning_rate": 1.6564853384770072e-05,
"loss": 3.7473,
"step": 326000
},
{
"epoch": 6.7,
"learning_rate": 1.6513553444278005e-05,
"loss": 3.7029,
"step": 326500
},
{
"epoch": 6.71,
"learning_rate": 1.646235610366692e-05,
"loss": 3.7224,
"step": 327000
},
{
"epoch": 6.72,
"learning_rate": 1.6411056163174852e-05,
"loss": 3.7497,
"step": 327500
},
{
"epoch": 6.73,
"learning_rate": 1.635975622268278e-05,
"loss": 3.7226,
"step": 328000
},
{
"epoch": 6.74,
"learning_rate": 1.630845628219071e-05,
"loss": 3.748,
"step": 328500
},
{
"epoch": 6.75,
"learning_rate": 1.6257156341698645e-05,
"loss": 3.7475,
"step": 329000
},
{
"epoch": 6.76,
"learning_rate": 1.6205856401206578e-05,
"loss": 3.7278,
"step": 329500
},
{
"epoch": 6.77,
"learning_rate": 1.6154556460714507e-05,
"loss": 3.6932,
"step": 330000
},
{
"epoch": 6.78,
"learning_rate": 1.610335912010342e-05,
"loss": 3.7137,
"step": 330500
},
{
"epoch": 6.79,
"learning_rate": 1.6052059179611354e-05,
"loss": 3.6953,
"step": 331000
},
{
"epoch": 6.8,
"learning_rate": 1.6000759239119284e-05,
"loss": 3.7323,
"step": 331500
},
{
"epoch": 6.81,
"learning_rate": 1.5949459298627213e-05,
"loss": 3.7612,
"step": 332000
},
{
"epoch": 6.82,
"learning_rate": 1.5898159358135146e-05,
"loss": 3.7452,
"step": 332500
},
{
"epoch": 6.83,
"learning_rate": 1.5846859417643076e-05,
"loss": 3.7379,
"step": 333000
},
{
"epoch": 6.84,
"learning_rate": 1.5795662077031993e-05,
"loss": 3.7193,
"step": 333500
},
{
"epoch": 6.85,
"learning_rate": 1.5744362136539923e-05,
"loss": 3.7023,
"step": 334000
},
{
"epoch": 6.86,
"learning_rate": 1.5693062196047853e-05,
"loss": 3.7158,
"step": 334500
},
{
"epoch": 6.87,
"learning_rate": 1.564186485543677e-05,
"loss": 3.7399,
"step": 335000
},
{
"epoch": 6.88,
"learning_rate": 1.55905649149447e-05,
"loss": 3.7258,
"step": 335500
},
{
"epoch": 6.89,
"learning_rate": 1.553926497445263e-05,
"loss": 3.7434,
"step": 336000
},
{
"epoch": 6.9,
"learning_rate": 1.5487965033960562e-05,
"loss": 3.7052,
"step": 336500
},
{
"epoch": 6.92,
"learning_rate": 1.5436767693349475e-05,
"loss": 3.7202,
"step": 337000
},
{
"epoch": 6.93,
"learning_rate": 1.538546775285741e-05,
"loss": 3.7165,
"step": 337500
},
{
"epoch": 6.94,
"learning_rate": 1.5334167812365338e-05,
"loss": 3.703,
"step": 338000
},
{
"epoch": 6.95,
"learning_rate": 1.5282867871873268e-05,
"loss": 3.698,
"step": 338500
},
{
"epoch": 6.96,
"learning_rate": 1.52315679313812e-05,
"loss": 3.7597,
"step": 339000
},
{
"epoch": 6.97,
"learning_rate": 1.5180267990889132e-05,
"loss": 3.7319,
"step": 339500
},
{
"epoch": 6.98,
"learning_rate": 1.5128968050397064e-05,
"loss": 3.7347,
"step": 340000
},
{
"epoch": 6.99,
"learning_rate": 1.5077668109904994e-05,
"loss": 3.7027,
"step": 340500
},
{
"epoch": 7.0,
"learning_rate": 1.5026470769293907e-05,
"loss": 3.7179,
"step": 341000
},
{
"epoch": 7.01,
"learning_rate": 1.497517082880184e-05,
"loss": 3.6961,
"step": 341500
},
{
"epoch": 7.02,
"learning_rate": 1.4923870888309772e-05,
"loss": 3.7059,
"step": 342000
},
{
"epoch": 7.03,
"learning_rate": 1.4872570947817701e-05,
"loss": 3.7243,
"step": 342500
},
{
"epoch": 7.04,
"learning_rate": 1.4821271007325633e-05,
"loss": 3.6775,
"step": 343000
},
{
"epoch": 7.05,
"learning_rate": 1.4769971066833562e-05,
"loss": 3.7294,
"step": 343500
},
{
"epoch": 7.06,
"learning_rate": 1.4718671126341494e-05,
"loss": 3.7296,
"step": 344000
},
{
"epoch": 7.07,
"learning_rate": 1.4667371185849424e-05,
"loss": 3.699,
"step": 344500
},
{
"epoch": 7.08,
"learning_rate": 1.4616071245357357e-05,
"loss": 3.6937,
"step": 345000
},
{
"epoch": 7.09,
"learning_rate": 1.456487390474627e-05,
"loss": 3.7385,
"step": 345500
},
{
"epoch": 7.1,
"learning_rate": 1.4513573964254202e-05,
"loss": 3.7215,
"step": 346000
},
{
"epoch": 7.11,
"learning_rate": 1.4462274023762131e-05,
"loss": 3.6759,
"step": 346500
},
{
"epoch": 7.12,
"learning_rate": 1.4410974083270064e-05,
"loss": 3.7289,
"step": 347000
},
{
"epoch": 7.13,
"learning_rate": 1.4359674142777996e-05,
"loss": 3.6964,
"step": 347500
},
{
"epoch": 7.14,
"learning_rate": 1.4308374202285926e-05,
"loss": 3.7005,
"step": 348000
},
{
"epoch": 7.15,
"learning_rate": 1.4257074261793857e-05,
"loss": 3.7368,
"step": 348500
},
{
"epoch": 7.16,
"learning_rate": 1.4205774321301787e-05,
"loss": 3.6993,
"step": 349000
},
{
"epoch": 7.17,
"learning_rate": 1.4154576980690704e-05,
"loss": 3.7237,
"step": 349500
},
{
"epoch": 7.18,
"learning_rate": 1.4103277040198633e-05,
"loss": 3.7022,
"step": 350000
},
{
"epoch": 7.19,
"learning_rate": 1.4051977099706565e-05,
"loss": 3.651,
"step": 350500
},
{
"epoch": 7.2,
"learning_rate": 1.4000677159214494e-05,
"loss": 3.7123,
"step": 351000
},
{
"epoch": 7.21,
"learning_rate": 1.3949377218722426e-05,
"loss": 3.7059,
"step": 351500
},
{
"epoch": 7.22,
"learning_rate": 1.3898179878111343e-05,
"loss": 3.7122,
"step": 352000
},
{
"epoch": 7.23,
"learning_rate": 1.3846879937619272e-05,
"loss": 3.7173,
"step": 352500
},
{
"epoch": 7.24,
"learning_rate": 1.379568259700819e-05,
"loss": 3.7134,
"step": 353000
},
{
"epoch": 7.25,
"learning_rate": 1.3744382656516119e-05,
"loss": 3.6982,
"step": 353500
},
{
"epoch": 7.26,
"learning_rate": 1.369308271602405e-05,
"loss": 3.7201,
"step": 354000
},
{
"epoch": 7.27,
"learning_rate": 1.364178277553198e-05,
"loss": 3.6949,
"step": 354500
},
{
"epoch": 7.28,
"learning_rate": 1.3590482835039912e-05,
"loss": 3.6863,
"step": 355000
},
{
"epoch": 7.29,
"learning_rate": 1.3539285494428827e-05,
"loss": 3.7027,
"step": 355500
},
{
"epoch": 7.31,
"learning_rate": 1.3487985553936758e-05,
"loss": 3.7029,
"step": 356000
},
{
"epoch": 7.32,
"learning_rate": 1.3436685613444688e-05,
"loss": 3.6962,
"step": 356500
},
{
"epoch": 7.33,
"learning_rate": 1.338538567295262e-05,
"loss": 3.7232,
"step": 357000
},
{
"epoch": 7.34,
"learning_rate": 1.3334085732460552e-05,
"loss": 3.7096,
"step": 357500
},
{
"epoch": 7.35,
"learning_rate": 1.3282785791968482e-05,
"loss": 3.7207,
"step": 358000
},
{
"epoch": 7.36,
"learning_rate": 1.3231485851476414e-05,
"loss": 3.7219,
"step": 358500
},
{
"epoch": 7.37,
"learning_rate": 1.3180288510865327e-05,
"loss": 3.7024,
"step": 359000
},
{
"epoch": 7.38,
"learning_rate": 1.312898857037326e-05,
"loss": 3.7225,
"step": 359500
},
{
"epoch": 7.39,
"learning_rate": 1.307768862988119e-05,
"loss": 3.6723,
"step": 360000
},
{
"epoch": 7.4,
"learning_rate": 1.3026388689389121e-05,
"loss": 3.7091,
"step": 360500
},
{
"epoch": 7.41,
"learning_rate": 1.2975088748897051e-05,
"loss": 3.6912,
"step": 361000
},
{
"epoch": 7.42,
"learning_rate": 1.2923891408285968e-05,
"loss": 3.6878,
"step": 361500
},
{
"epoch": 7.43,
"learning_rate": 1.28725914677939e-05,
"loss": 3.6944,
"step": 362000
},
{
"epoch": 7.44,
"learning_rate": 1.2821291527301829e-05,
"loss": 3.6658,
"step": 362500
},
{
"epoch": 7.45,
"learning_rate": 1.276999158680976e-05,
"loss": 3.7055,
"step": 363000
},
{
"epoch": 7.46,
"learning_rate": 1.271869164631769e-05,
"loss": 3.7103,
"step": 363500
},
{
"epoch": 7.47,
"learning_rate": 1.2667391705825622e-05,
"loss": 3.701,
"step": 364000
},
{
"epoch": 7.48,
"learning_rate": 1.2616091765333555e-05,
"loss": 3.6841,
"step": 364500
},
{
"epoch": 7.49,
"learning_rate": 1.2564894424722468e-05,
"loss": 3.6652,
"step": 365000
},
{
"epoch": 7.5,
"learning_rate": 1.2513594484230398e-05,
"loss": 3.7217,
"step": 365500
},
{
"epoch": 7.51,
"learning_rate": 1.246229454373833e-05,
"loss": 3.7033,
"step": 366000
},
{
"epoch": 7.52,
"learning_rate": 1.241099460324626e-05,
"loss": 3.7162,
"step": 366500
},
{
"epoch": 7.53,
"learning_rate": 1.235969466275419e-05,
"loss": 3.702,
"step": 367000
},
{
"epoch": 7.54,
"learning_rate": 1.2308394722262124e-05,
"loss": 3.6944,
"step": 367500
},
{
"epoch": 7.55,
"learning_rate": 1.2257197381651039e-05,
"loss": 3.7221,
"step": 368000
},
{
"epoch": 7.56,
"learning_rate": 1.2205897441158968e-05,
"loss": 3.6708,
"step": 368500
},
{
"epoch": 7.57,
"learning_rate": 1.21545975006669e-05,
"loss": 3.6831,
"step": 369000
},
{
"epoch": 7.58,
"learning_rate": 1.2103297560174831e-05,
"loss": 3.6817,
"step": 369500
},
{
"epoch": 7.59,
"learning_rate": 1.2051997619682761e-05,
"loss": 3.7204,
"step": 370000
},
{
"epoch": 7.6,
"learning_rate": 1.2000697679190692e-05,
"loss": 3.7261,
"step": 370500
},
{
"epoch": 7.61,
"learning_rate": 1.1949397738698624e-05,
"loss": 3.6543,
"step": 371000
},
{
"epoch": 7.62,
"learning_rate": 1.1898200398087539e-05,
"loss": 3.7125,
"step": 371500
},
{
"epoch": 7.63,
"learning_rate": 1.1846900457595469e-05,
"loss": 3.6821,
"step": 372000
},
{
"epoch": 7.64,
"learning_rate": 1.17956005171034e-05,
"loss": 3.6991,
"step": 372500
},
{
"epoch": 7.65,
"learning_rate": 1.1744300576611332e-05,
"loss": 3.6953,
"step": 373000
},
{
"epoch": 7.66,
"learning_rate": 1.1693000636119263e-05,
"loss": 3.7289,
"step": 373500
},
{
"epoch": 7.67,
"learning_rate": 1.1641700695627193e-05,
"loss": 3.6919,
"step": 374000
},
{
"epoch": 7.68,
"learning_rate": 1.1590400755135126e-05,
"loss": 3.7074,
"step": 374500
},
{
"epoch": 7.69,
"learning_rate": 1.1539100814643056e-05,
"loss": 3.7045,
"step": 375000
},
{
"epoch": 7.71,
"learning_rate": 1.148790347403197e-05,
"loss": 3.6862,
"step": 375500
},
{
"epoch": 7.72,
"learning_rate": 1.14366035335399e-05,
"loss": 3.661,
"step": 376000
},
{
"epoch": 7.73,
"learning_rate": 1.1385303593047834e-05,
"loss": 3.6478,
"step": 376500
},
{
"epoch": 7.74,
"learning_rate": 1.1334003652555763e-05,
"loss": 3.6391,
"step": 377000
},
{
"epoch": 7.75,
"learning_rate": 1.1282703712063695e-05,
"loss": 3.6802,
"step": 377500
},
{
"epoch": 7.76,
"learning_rate": 1.123150637145261e-05,
"loss": 3.67,
"step": 378000
},
{
"epoch": 7.77,
"learning_rate": 1.1180206430960541e-05,
"loss": 3.6543,
"step": 378500
},
{
"epoch": 7.78,
"learning_rate": 1.1129009090349456e-05,
"loss": 3.6902,
"step": 379000
},
{
"epoch": 7.79,
"learning_rate": 1.1077709149857386e-05,
"loss": 3.6943,
"step": 379500
},
{
"epoch": 7.8,
"learning_rate": 1.1026511809246303e-05,
"loss": 3.6582,
"step": 380000
},
{
"epoch": 7.81,
"learning_rate": 1.0975211868754233e-05,
"loss": 3.7017,
"step": 380500
},
{
"epoch": 7.82,
"learning_rate": 1.0923911928262164e-05,
"loss": 3.6806,
"step": 381000
},
{
"epoch": 7.83,
"learning_rate": 1.0872611987770094e-05,
"loss": 3.6638,
"step": 381500
},
{
"epoch": 7.84,
"learning_rate": 1.0821312047278025e-05,
"loss": 3.683,
"step": 382000
},
{
"epoch": 7.85,
"learning_rate": 1.0770012106785957e-05,
"loss": 3.699,
"step": 382500
},
{
"epoch": 7.86,
"learning_rate": 1.0718712166293886e-05,
"loss": 3.6675,
"step": 383000
},
{
"epoch": 7.87,
"learning_rate": 1.066741222580182e-05,
"loss": 3.6736,
"step": 383500
},
{
"epoch": 7.88,
"learning_rate": 1.061611228530975e-05,
"loss": 3.6253,
"step": 384000
},
{
"epoch": 7.89,
"learning_rate": 1.056481234481768e-05,
"loss": 3.6682,
"step": 384500
},
{
"epoch": 7.9,
"learning_rate": 1.051351240432561e-05,
"loss": 3.6791,
"step": 385000
},
{
"epoch": 7.91,
"learning_rate": 1.0462212463833543e-05,
"loss": 3.7102,
"step": 385500
},
{
"epoch": 7.92,
"learning_rate": 1.0411015123222457e-05,
"loss": 3.6935,
"step": 386000
},
{
"epoch": 7.93,
"learning_rate": 1.0359715182730388e-05,
"loss": 3.6438,
"step": 386500
},
{
"epoch": 7.94,
"learning_rate": 1.030841524223832e-05,
"loss": 3.6608,
"step": 387000
},
{
"epoch": 7.95,
"learning_rate": 1.0257217901627235e-05,
"loss": 3.6675,
"step": 387500
},
{
"epoch": 7.96,
"learning_rate": 1.0205917961135165e-05,
"loss": 3.6718,
"step": 388000
},
{
"epoch": 7.97,
"learning_rate": 1.0154618020643096e-05,
"loss": 3.668,
"step": 388500
},
{
"epoch": 7.98,
"learning_rate": 1.0103318080151028e-05,
"loss": 3.6562,
"step": 389000
},
{
"epoch": 7.99,
"learning_rate": 1.0052018139658959e-05,
"loss": 3.6583,
"step": 389500
},
{
"epoch": 8.0,
"learning_rate": 1.0000718199166889e-05,
"loss": 3.6691,
"step": 390000
},
{
"epoch": 8.01,
"learning_rate": 9.949418258674822e-06,
"loss": 3.6664,
"step": 390500
},
{
"epoch": 8.02,
"learning_rate": 9.898118318182751e-06,
"loss": 3.6405,
"step": 391000
},
{
"epoch": 8.03,
"learning_rate": 9.846920977571667e-06,
"loss": 3.6682,
"step": 391500
},
{
"epoch": 8.04,
"learning_rate": 9.795723636960582e-06,
"loss": 3.6599,
"step": 392000
},
{
"epoch": 8.05,
"learning_rate": 9.744423696468513e-06,
"loss": 3.6527,
"step": 392500
},
{
"epoch": 8.06,
"learning_rate": 9.693123755976443e-06,
"loss": 3.697,
"step": 393000
},
{
"epoch": 8.07,
"learning_rate": 9.641823815484374e-06,
"loss": 3.646,
"step": 393500
},
{
"epoch": 8.08,
"learning_rate": 9.590523874992304e-06,
"loss": 3.6486,
"step": 394000
},
{
"epoch": 8.1,
"learning_rate": 9.539223934500237e-06,
"loss": 3.643,
"step": 394500
},
{
"epoch": 8.11,
"learning_rate": 9.488026593889152e-06,
"loss": 3.6605,
"step": 395000
},
{
"epoch": 8.12,
"learning_rate": 9.436726653397082e-06,
"loss": 3.625,
"step": 395500
},
{
"epoch": 8.13,
"learning_rate": 9.385426712905013e-06,
"loss": 3.6781,
"step": 396000
},
{
"epoch": 8.14,
"learning_rate": 9.334126772412945e-06,
"loss": 3.684,
"step": 396500
},
{
"epoch": 8.15,
"learning_rate": 9.282826831920875e-06,
"loss": 3.6696,
"step": 397000
},
{
"epoch": 8.16,
"learning_rate": 9.231526891428806e-06,
"loss": 3.6563,
"step": 397500
},
{
"epoch": 8.17,
"learning_rate": 9.180226950936737e-06,
"loss": 3.6521,
"step": 398000
},
{
"epoch": 8.18,
"learning_rate": 9.128927010444669e-06,
"loss": 3.6741,
"step": 398500
},
{
"epoch": 8.19,
"learning_rate": 9.077627069952599e-06,
"loss": 3.6389,
"step": 399000
},
{
"epoch": 8.2,
"learning_rate": 9.026429729341514e-06,
"loss": 3.6597,
"step": 399500
},
{
"epoch": 8.21,
"learning_rate": 8.975129788849445e-06,
"loss": 3.669,
"step": 400000
},
{
"epoch": 8.22,
"learning_rate": 8.923829848357377e-06,
"loss": 3.6309,
"step": 400500
},
{
"epoch": 8.23,
"learning_rate": 8.872529907865306e-06,
"loss": 3.6264,
"step": 401000
},
{
"epoch": 8.24,
"learning_rate": 8.821332567254223e-06,
"loss": 3.6721,
"step": 401500
},
{
"epoch": 8.25,
"learning_rate": 8.770032626762153e-06,
"loss": 3.627,
"step": 402000
},
{
"epoch": 8.26,
"learning_rate": 8.718835286151068e-06,
"loss": 3.6375,
"step": 402500
},
{
"epoch": 8.27,
"learning_rate": 8.667535345659e-06,
"loss": 3.663,
"step": 403000
},
{
"epoch": 8.28,
"learning_rate": 8.616235405166931e-06,
"loss": 3.6765,
"step": 403500
},
{
"epoch": 8.29,
"learning_rate": 8.56493546467486e-06,
"loss": 3.6448,
"step": 404000
},
{
"epoch": 8.3,
"learning_rate": 8.513635524182792e-06,
"loss": 3.6436,
"step": 404500
},
{
"epoch": 8.31,
"learning_rate": 8.462335583690723e-06,
"loss": 3.664,
"step": 405000
},
{
"epoch": 8.32,
"learning_rate": 8.411035643198655e-06,
"loss": 3.658,
"step": 405500
},
{
"epoch": 8.33,
"learning_rate": 8.359735702706585e-06,
"loss": 3.642,
"step": 406000
},
{
"epoch": 8.34,
"learning_rate": 8.308435762214516e-06,
"loss": 3.6617,
"step": 406500
},
{
"epoch": 8.35,
"learning_rate": 8.257238421603431e-06,
"loss": 3.6868,
"step": 407000
},
{
"epoch": 8.36,
"learning_rate": 8.205938481111363e-06,
"loss": 3.6467,
"step": 407500
},
{
"epoch": 8.37,
"learning_rate": 8.154638540619292e-06,
"loss": 3.671,
"step": 408000
},
{
"epoch": 8.38,
"learning_rate": 8.103338600127225e-06,
"loss": 3.667,
"step": 408500
},
{
"epoch": 8.39,
"learning_rate": 8.052141259516139e-06,
"loss": 3.6418,
"step": 409000
},
{
"epoch": 8.4,
"learning_rate": 8.00084131902407e-06,
"loss": 3.6336,
"step": 409500
},
{
"epoch": 8.41,
"learning_rate": 7.949541378532e-06,
"loss": 3.6337,
"step": 410000
},
{
"epoch": 8.42,
"learning_rate": 7.8984466378019e-06,
"loss": 3.6312,
"step": 410500
},
{
"epoch": 8.43,
"learning_rate": 7.847146697309832e-06,
"loss": 3.6719,
"step": 411000
},
{
"epoch": 8.44,
"learning_rate": 7.795846756817762e-06,
"loss": 3.6541,
"step": 411500
},
{
"epoch": 8.45,
"learning_rate": 7.744546816325693e-06,
"loss": 3.6703,
"step": 412000
},
{
"epoch": 8.46,
"learning_rate": 7.693246875833625e-06,
"loss": 3.666,
"step": 412500
},
{
"epoch": 8.47,
"learning_rate": 7.641946935341556e-06,
"loss": 3.6529,
"step": 413000
},
{
"epoch": 8.49,
"learning_rate": 7.590646994849486e-06,
"loss": 3.6305,
"step": 413500
},
{
"epoch": 8.5,
"learning_rate": 7.539347054357418e-06,
"loss": 3.6171,
"step": 414000
},
{
"epoch": 8.51,
"learning_rate": 7.488149713746332e-06,
"loss": 3.6429,
"step": 414500
},
{
"epoch": 8.52,
"learning_rate": 7.436849773254263e-06,
"loss": 3.6609,
"step": 415000
},
{
"epoch": 8.53,
"learning_rate": 7.3855498327621935e-06,
"loss": 3.6829,
"step": 415500
},
{
"epoch": 8.54,
"learning_rate": 7.334249892270126e-06,
"loss": 3.6737,
"step": 416000
},
{
"epoch": 8.55,
"learning_rate": 7.282949951778056e-06,
"loss": 3.6503,
"step": 416500
},
{
"epoch": 8.56,
"learning_rate": 7.231650011285987e-06,
"loss": 3.5959,
"step": 417000
},
{
"epoch": 8.57,
"learning_rate": 7.180350070793919e-06,
"loss": 3.653,
"step": 417500
},
{
"epoch": 8.58,
"learning_rate": 7.12905013030185e-06,
"loss": 3.6283,
"step": 418000
},
{
"epoch": 8.59,
"learning_rate": 7.077852789690764e-06,
"loss": 3.6632,
"step": 418500
},
{
"epoch": 8.6,
"learning_rate": 7.026552849198695e-06,
"loss": 3.6649,
"step": 419000
},
{
"epoch": 8.61,
"learning_rate": 6.975252908706627e-06,
"loss": 3.6467,
"step": 419500
},
{
"epoch": 8.62,
"learning_rate": 6.9239529682145574e-06,
"loss": 3.672,
"step": 420000
},
{
"epoch": 8.63,
"learning_rate": 6.872653027722488e-06,
"loss": 3.6617,
"step": 420500
},
{
"epoch": 8.64,
"learning_rate": 6.821455687111402e-06,
"loss": 3.6675,
"step": 421000
},
{
"epoch": 8.65,
"learning_rate": 6.770155746619335e-06,
"loss": 3.6744,
"step": 421500
},
{
"epoch": 8.66,
"learning_rate": 6.718855806127265e-06,
"loss": 3.642,
"step": 422000
},
{
"epoch": 8.67,
"learning_rate": 6.667555865635196e-06,
"loss": 3.6462,
"step": 422500
},
{
"epoch": 8.68,
"learning_rate": 6.616358525024112e-06,
"loss": 3.6384,
"step": 423000
},
{
"epoch": 8.69,
"learning_rate": 6.565058584532042e-06,
"loss": 3.6566,
"step": 423500
},
{
"epoch": 8.7,
"learning_rate": 6.513758644039973e-06,
"loss": 3.627,
"step": 424000
},
{
"epoch": 8.71,
"learning_rate": 6.4624587035479034e-06,
"loss": 3.6055,
"step": 424500
},
{
"epoch": 8.72,
"learning_rate": 6.411158763055836e-06,
"loss": 3.6213,
"step": 425000
},
{
"epoch": 8.73,
"learning_rate": 6.359858822563766e-06,
"loss": 3.6507,
"step": 425500
},
{
"epoch": 8.74,
"learning_rate": 6.308661481952681e-06,
"loss": 3.6337,
"step": 426000
},
{
"epoch": 8.75,
"learning_rate": 6.257361541460611e-06,
"loss": 3.6383,
"step": 426500
},
{
"epoch": 8.76,
"learning_rate": 6.2060616009685434e-06,
"loss": 3.6197,
"step": 427000
},
{
"epoch": 8.77,
"learning_rate": 6.154761660476474e-06,
"loss": 3.6605,
"step": 427500
},
{
"epoch": 8.78,
"learning_rate": 6.103461719984405e-06,
"loss": 3.6235,
"step": 428000
},
{
"epoch": 8.79,
"learning_rate": 6.052161779492336e-06,
"loss": 3.6859,
"step": 428500
},
{
"epoch": 8.8,
"learning_rate": 6.000861839000267e-06,
"loss": 3.6256,
"step": 429000
},
{
"epoch": 8.81,
"learning_rate": 5.949561898508198e-06,
"loss": 3.6361,
"step": 429500
},
{
"epoch": 8.82,
"learning_rate": 5.898261958016129e-06,
"loss": 3.6645,
"step": 430000
},
{
"epoch": 8.83,
"learning_rate": 5.84696201752406e-06,
"loss": 3.6387,
"step": 430500
},
{
"epoch": 8.84,
"learning_rate": 5.7956620770319905e-06,
"loss": 3.6499,
"step": 431000
},
{
"epoch": 8.85,
"learning_rate": 5.744362136539922e-06,
"loss": 3.6571,
"step": 431500
},
{
"epoch": 8.86,
"learning_rate": 5.693164795928837e-06,
"loss": 3.6678,
"step": 432000
},
{
"epoch": 8.87,
"learning_rate": 5.6418648554367685e-06,
"loss": 3.62,
"step": 432500
},
{
"epoch": 8.89,
"learning_rate": 5.590564914944699e-06,
"loss": 3.602,
"step": 433000
},
{
"epoch": 8.9,
"learning_rate": 5.539367574333614e-06,
"loss": 3.6465,
"step": 433500
},
{
"epoch": 8.91,
"learning_rate": 5.488067633841545e-06,
"loss": 3.6149,
"step": 434000
},
{
"epoch": 8.92,
"learning_rate": 5.436767693349476e-06,
"loss": 3.6184,
"step": 434500
},
{
"epoch": 8.93,
"learning_rate": 5.385467752857408e-06,
"loss": 3.6606,
"step": 435000
},
{
"epoch": 8.94,
"learning_rate": 5.334167812365338e-06,
"loss": 3.6463,
"step": 435500
},
{
"epoch": 8.95,
"learning_rate": 5.282970471754253e-06,
"loss": 3.6765,
"step": 436000
},
{
"epoch": 8.96,
"learning_rate": 5.231670531262184e-06,
"loss": 3.6274,
"step": 436500
},
{
"epoch": 8.97,
"learning_rate": 5.180370590770115e-06,
"loss": 3.608,
"step": 437000
},
{
"epoch": 8.98,
"learning_rate": 5.129070650278046e-06,
"loss": 3.6366,
"step": 437500
},
{
"epoch": 8.99,
"learning_rate": 5.077770709785977e-06,
"loss": 3.6295,
"step": 438000
},
{
"epoch": 9.0,
"learning_rate": 5.026573369174892e-06,
"loss": 3.61,
"step": 438500
},
{
"epoch": 9.01,
"learning_rate": 4.975273428682823e-06,
"loss": 3.623,
"step": 439000
},
{
"epoch": 9.02,
"learning_rate": 4.9239734881907545e-06,
"loss": 3.6587,
"step": 439500
},
{
"epoch": 9.03,
"learning_rate": 4.872673547698685e-06,
"loss": 3.6401,
"step": 440000
},
{
"epoch": 9.04,
"learning_rate": 4.821373607206616e-06,
"loss": 3.6167,
"step": 440500
},
{
"epoch": 9.05,
"learning_rate": 4.770073666714546e-06,
"loss": 3.6441,
"step": 441000
},
{
"epoch": 9.06,
"learning_rate": 4.718773726222478e-06,
"loss": 3.6181,
"step": 441500
},
{
"epoch": 9.07,
"learning_rate": 4.667473785730408e-06,
"loss": 3.644,
"step": 442000
},
{
"epoch": 9.08,
"learning_rate": 4.616379045000308e-06,
"loss": 3.6274,
"step": 442500
},
{
"epoch": 9.09,
"learning_rate": 4.5650791045082385e-06,
"loss": 3.6162,
"step": 443000
},
{
"epoch": 9.1,
"learning_rate": 4.5138817638971545e-06,
"loss": 3.6448,
"step": 443500
},
{
"epoch": 9.11,
"learning_rate": 4.462581823405085e-06,
"loss": 3.6333,
"step": 444000
},
{
"epoch": 9.12,
"learning_rate": 4.411281882913016e-06,
"loss": 3.638,
"step": 444500
},
{
"epoch": 9.13,
"learning_rate": 4.359981942420947e-06,
"loss": 3.603,
"step": 445000
},
{
"epoch": 9.14,
"learning_rate": 4.308682001928878e-06,
"loss": 3.5923,
"step": 445500
},
{
"epoch": 9.15,
"learning_rate": 4.257382061436809e-06,
"loss": 3.6077,
"step": 446000
},
{
"epoch": 9.16,
"learning_rate": 4.206184720825724e-06,
"loss": 3.6197,
"step": 446500
},
{
"epoch": 9.17,
"learning_rate": 4.154884780333655e-06,
"loss": 3.6563,
"step": 447000
},
{
"epoch": 9.18,
"learning_rate": 4.103584839841585e-06,
"loss": 3.5986,
"step": 447500
},
{
"epoch": 9.19,
"learning_rate": 4.052284899349517e-06,
"loss": 3.6303,
"step": 448000
},
{
"epoch": 9.2,
"learning_rate": 4.000984958857447e-06,
"loss": 3.611,
"step": 448500
},
{
"epoch": 9.21,
"learning_rate": 3.949685018365379e-06,
"loss": 3.5932,
"step": 449000
},
{
"epoch": 9.22,
"learning_rate": 3.89838507787331e-06,
"loss": 3.6529,
"step": 449500
},
{
"epoch": 9.23,
"learning_rate": 3.847085137381241e-06,
"loss": 3.6276,
"step": 450000
},
{
"epoch": 9.24,
"learning_rate": 3.795785196889172e-06,
"loss": 3.6366,
"step": 450500
},
{
"epoch": 9.25,
"learning_rate": 3.7445878562780865e-06,
"loss": 3.635,
"step": 451000
},
{
"epoch": 9.26,
"learning_rate": 3.693287915786018e-06,
"loss": 3.6248,
"step": 451500
},
{
"epoch": 9.28,
"learning_rate": 3.6419879752939485e-06,
"loss": 3.6092,
"step": 452000
},
{
"epoch": 9.29,
"learning_rate": 3.59068803480188e-06,
"loss": 3.6406,
"step": 452500
},
{
"epoch": 9.3,
"learning_rate": 3.539388094309811e-06,
"loss": 3.6074,
"step": 453000
},
{
"epoch": 9.31,
"learning_rate": 3.4880881538177415e-06,
"loss": 3.6201,
"step": 453500
},
{
"epoch": 9.32,
"learning_rate": 3.436890813206657e-06,
"loss": 3.5992,
"step": 454000
},
{
"epoch": 9.33,
"learning_rate": 3.3855908727145877e-06,
"loss": 3.6067,
"step": 454500
},
{
"epoch": 9.34,
"learning_rate": 3.334290932222519e-06,
"loss": 3.6521,
"step": 455000
},
{
"epoch": 9.35,
"learning_rate": 3.2829909917304496e-06,
"loss": 3.618,
"step": 455500
},
{
"epoch": 9.36,
"learning_rate": 3.2316910512383806e-06,
"loss": 3.5911,
"step": 456000
},
{
"epoch": 9.37,
"learning_rate": 3.1803911107463112e-06,
"loss": 3.6177,
"step": 456500
},
{
"epoch": 9.38,
"learning_rate": 3.1290911702542426e-06,
"loss": 3.6153,
"step": 457000
},
{
"epoch": 9.39,
"learning_rate": 3.0777912297621736e-06,
"loss": 3.6284,
"step": 457500
},
{
"epoch": 9.4,
"learning_rate": 3.0265938891510888e-06,
"loss": 3.6215,
"step": 458000
},
{
"epoch": 9.41,
"learning_rate": 2.9752939486590198e-06,
"loss": 3.6314,
"step": 458500
},
{
"epoch": 9.42,
"learning_rate": 2.9239940081669508e-06,
"loss": 3.6157,
"step": 459000
},
{
"epoch": 9.43,
"learning_rate": 2.8726940676748818e-06,
"loss": 3.6239,
"step": 459500
},
{
"epoch": 9.44,
"learning_rate": 2.8213941271828128e-06,
"loss": 3.6236,
"step": 460000
},
{
"epoch": 9.45,
"learning_rate": 2.7700941866907438e-06,
"loss": 3.6384,
"step": 460500
},
{
"epoch": 9.46,
"learning_rate": 2.7187942461986743e-06,
"loss": 3.6349,
"step": 461000
},
{
"epoch": 9.47,
"learning_rate": 2.6675969055875895e-06,
"loss": 3.6125,
"step": 461500
},
{
"epoch": 9.48,
"learning_rate": 2.6162969650955205e-06,
"loss": 3.622,
"step": 462000
},
{
"epoch": 9.49,
"learning_rate": 2.5649970246034515e-06,
"loss": 3.6161,
"step": 462500
},
{
"epoch": 9.5,
"learning_rate": 2.513697084111383e-06,
"loss": 3.6401,
"step": 463000
},
{
"epoch": 9.51,
"learning_rate": 2.4623971436193135e-06,
"loss": 3.637,
"step": 463500
},
{
"epoch": 9.52,
"learning_rate": 2.4110972031272445e-06,
"loss": 3.6228,
"step": 464000
},
{
"epoch": 9.53,
"learning_rate": 2.3600024623971438e-06,
"loss": 3.6225,
"step": 464500
},
{
"epoch": 9.54,
"learning_rate": 2.3087025219050748e-06,
"loss": 3.638,
"step": 465000
},
{
"epoch": 9.55,
"learning_rate": 2.2574025814130058e-06,
"loss": 3.6288,
"step": 465500
},
{
"epoch": 9.56,
"learning_rate": 2.2061026409209368e-06,
"loss": 3.6212,
"step": 466000
},
{
"epoch": 9.57,
"learning_rate": 2.1548027004288673e-06,
"loss": 3.6176,
"step": 466500
},
{
"epoch": 9.58,
"learning_rate": 2.1035027599367983e-06,
"loss": 3.6476,
"step": 467000
},
{
"epoch": 9.59,
"learning_rate": 2.0522028194447293e-06,
"loss": 3.633,
"step": 467500
},
{
"epoch": 9.6,
"learning_rate": 2.0009028789526607e-06,
"loss": 3.6141,
"step": 468000
},
{
"epoch": 9.61,
"learning_rate": 1.9496029384605917e-06,
"loss": 3.6138,
"step": 468500
},
{
"epoch": 9.62,
"learning_rate": 1.8983029979685225e-06,
"loss": 3.5803,
"step": 469000
},
{
"epoch": 9.63,
"learning_rate": 1.8471056573574377e-06,
"loss": 3.6189,
"step": 469500
},
{
"epoch": 9.64,
"learning_rate": 1.7958057168653685e-06,
"loss": 3.5963,
"step": 470000
},
{
"epoch": 9.65,
"learning_rate": 1.7445057763732995e-06,
"loss": 3.6222,
"step": 470500
},
{
"epoch": 9.66,
"learning_rate": 1.6933084357622146e-06,
"loss": 3.6233,
"step": 471000
},
{
"epoch": 9.68,
"learning_rate": 1.6420084952701454e-06,
"loss": 3.6486,
"step": 471500
},
{
"epoch": 9.69,
"learning_rate": 1.5907085547780764e-06,
"loss": 3.5778,
"step": 472000
},
{
"epoch": 9.7,
"learning_rate": 1.5394086142860074e-06,
"loss": 3.6264,
"step": 472500
},
{
"epoch": 9.71,
"learning_rate": 1.4882112736749225e-06,
"loss": 3.6235,
"step": 473000
},
{
"epoch": 9.72,
"learning_rate": 1.4369113331828535e-06,
"loss": 3.6006,
"step": 473500
},
{
"epoch": 9.73,
"learning_rate": 1.3856113926907845e-06,
"loss": 3.5967,
"step": 474000
},
{
"epoch": 9.74,
"learning_rate": 1.3343114521987155e-06,
"loss": 3.6154,
"step": 474500
},
{
"epoch": 9.75,
"learning_rate": 1.2830115117066465e-06,
"loss": 3.6179,
"step": 475000
},
{
"epoch": 9.76,
"learning_rate": 1.2317115712145775e-06,
"loss": 3.6382,
"step": 475500
},
{
"epoch": 9.77,
"learning_rate": 1.1804116307225085e-06,
"loss": 3.5936,
"step": 476000
},
{
"epoch": 9.78,
"learning_rate": 1.1292142901114235e-06,
"loss": 3.652,
"step": 476500
},
{
"epoch": 9.79,
"learning_rate": 1.0779143496193545e-06,
"loss": 3.5949,
"step": 477000
},
{
"epoch": 9.8,
"learning_rate": 1.0266144091272855e-06,
"loss": 3.6016,
"step": 477500
},
{
"epoch": 9.81,
"learning_rate": 9.753144686352164e-07,
"loss": 3.5945,
"step": 478000
},
{
"epoch": 9.82,
"learning_rate": 9.241171280241316e-07,
"loss": 3.608,
"step": 478500
},
{
"epoch": 9.83,
"learning_rate": 8.728171875320625e-07,
"loss": 3.6227,
"step": 479000
},
{
"epoch": 9.84,
"learning_rate": 8.215172470399935e-07,
"loss": 3.6483,
"step": 479500
},
{
"epoch": 9.85,
"learning_rate": 7.702173065479245e-07,
"loss": 3.6404,
"step": 480000
},
{
"epoch": 9.86,
"learning_rate": 7.189173660558555e-07,
"loss": 3.5914,
"step": 480500
},
{
"epoch": 9.87,
"learning_rate": 6.676174255637864e-07,
"loss": 3.622,
"step": 481000
},
{
"epoch": 9.88,
"learning_rate": 6.163174850717174e-07,
"loss": 3.5945,
"step": 481500
},
{
"epoch": 9.89,
"learning_rate": 5.650175445796483e-07,
"loss": 3.598,
"step": 482000
},
{
"epoch": 9.9,
"learning_rate": 5.137176040875793e-07,
"loss": 3.6166,
"step": 482500
},
{
"epoch": 9.91,
"learning_rate": 4.624176635955103e-07,
"loss": 3.5921,
"step": 483000
},
{
"epoch": 9.92,
"learning_rate": 4.111177231034412e-07,
"loss": 3.6098,
"step": 483500
},
{
"epoch": 9.93,
"learning_rate": 3.5992038249235634e-07,
"loss": 3.6096,
"step": 484000
},
{
"epoch": 9.94,
"learning_rate": 3.086204420002873e-07,
"loss": 3.6214,
"step": 484500
},
{
"epoch": 9.95,
"learning_rate": 2.5732050150821823e-07,
"loss": 3.6553,
"step": 485000
},
{
"epoch": 9.96,
"learning_rate": 2.0602056101614925e-07,
"loss": 3.5965,
"step": 485500
},
{
"epoch": 9.97,
"learning_rate": 1.547206205240802e-07,
"loss": 3.618,
"step": 486000
},
{
"epoch": 9.98,
"learning_rate": 1.0342068003201116e-07,
"loss": 3.6041,
"step": 486500
},
{
"epoch": 9.99,
"learning_rate": 5.2223339420926276e-08,
"loss": 3.6081,
"step": 487000
},
{
"epoch": 10.0,
"step": 487330,
"total_flos": 1.5431436054208512e+18,
"train_loss": 4.106789583444884,
"train_runtime": 101886.156,
"train_samples_per_second": 57.397,
"train_steps_per_second": 4.783
}
],
"logging_steps": 500,
"max_steps": 487330,
"num_train_epochs": 10,
"save_steps": 100000,
"total_flos": 1.5431436054208512e+18,
"trial_name": null,
"trial_params": null
}