intern2b / trainer_state.json
bokufa's picture
Upload 4 files
fb113e9 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9664951741145456,
"eval_steps": 500,
"global_step": 185000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5e-06,
"loss": 3.6198,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 1e-05,
"loss": 3.5109,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 1.5e-05,
"loss": 3.4948,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 2e-05,
"loss": 3.5552,
"step": 400
},
{
"epoch": 0.01,
"learning_rate": 2.5e-05,
"loss": 3.5109,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 3e-05,
"loss": 3.513,
"step": 600
},
{
"epoch": 0.01,
"learning_rate": 3.5000000000000004e-05,
"loss": 3.5135,
"step": 700
},
{
"epoch": 0.01,
"learning_rate": 4e-05,
"loss": 3.5552,
"step": 800
},
{
"epoch": 0.01,
"learning_rate": 4.4999999999999996e-05,
"loss": 3.5392,
"step": 900
},
{
"epoch": 0.01,
"learning_rate": 5e-05,
"loss": 3.5638,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 5.5e-05,
"loss": 3.5414,
"step": 1100
},
{
"epoch": 0.01,
"learning_rate": 6e-05,
"loss": 3.5545,
"step": 1200
},
{
"epoch": 0.01,
"learning_rate": 6.500000000000001e-05,
"loss": 3.4934,
"step": 1300
},
{
"epoch": 0.01,
"learning_rate": 7.000000000000001e-05,
"loss": 3.5182,
"step": 1400
},
{
"epoch": 0.02,
"learning_rate": 7.5e-05,
"loss": 3.5703,
"step": 1500
},
{
"epoch": 0.02,
"learning_rate": 8e-05,
"loss": 3.5242,
"step": 1600
},
{
"epoch": 0.02,
"learning_rate": 8.5e-05,
"loss": 3.4979,
"step": 1700
},
{
"epoch": 0.02,
"learning_rate": 8.999999999999999e-05,
"loss": 3.502,
"step": 1800
},
{
"epoch": 0.02,
"learning_rate": 9.5e-05,
"loss": 3.5335,
"step": 1900
},
{
"epoch": 0.02,
"learning_rate": 0.0001,
"loss": 3.5493,
"step": 2000
},
{
"epoch": 0.02,
"learning_rate": 0.000105,
"loss": 3.5539,
"step": 2100
},
{
"epoch": 0.02,
"learning_rate": 0.00011,
"loss": 3.5152,
"step": 2200
},
{
"epoch": 0.02,
"learning_rate": 0.000115,
"loss": 3.5788,
"step": 2300
},
{
"epoch": 0.03,
"learning_rate": 0.00012,
"loss": 3.5338,
"step": 2400
},
{
"epoch": 0.03,
"learning_rate": 0.000125,
"loss": 3.5295,
"step": 2500
},
{
"epoch": 0.03,
"learning_rate": 0.00013000000000000002,
"loss": 3.5262,
"step": 2600
},
{
"epoch": 0.03,
"learning_rate": 0.000135,
"loss": 3.5277,
"step": 2700
},
{
"epoch": 0.03,
"learning_rate": 0.00014000000000000001,
"loss": 3.5143,
"step": 2800
},
{
"epoch": 0.03,
"learning_rate": 0.000145,
"loss": 3.5899,
"step": 2900
},
{
"epoch": 0.03,
"learning_rate": 0.00015,
"loss": 3.5366,
"step": 3000
},
{
"epoch": 0.03,
"learning_rate": 0.000155,
"loss": 3.4522,
"step": 3100
},
{
"epoch": 0.03,
"learning_rate": 0.00016,
"loss": 3.531,
"step": 3200
},
{
"epoch": 0.04,
"learning_rate": 0.000165,
"loss": 3.5378,
"step": 3300
},
{
"epoch": 0.04,
"learning_rate": 0.00017,
"loss": 3.5002,
"step": 3400
},
{
"epoch": 0.04,
"learning_rate": 0.000175,
"loss": 3.4772,
"step": 3500
},
{
"epoch": 0.04,
"learning_rate": 0.00017999999999999998,
"loss": 3.5223,
"step": 3600
},
{
"epoch": 0.04,
"learning_rate": 0.000185,
"loss": 3.5212,
"step": 3700
},
{
"epoch": 0.04,
"learning_rate": 0.00019,
"loss": 3.5397,
"step": 3800
},
{
"epoch": 0.04,
"learning_rate": 0.00019500000000000002,
"loss": 3.5471,
"step": 3900
},
{
"epoch": 0.04,
"learning_rate": 0.0002,
"loss": 3.5082,
"step": 4000
},
{
"epoch": 0.04,
"learning_rate": 0.000205,
"loss": 3.5092,
"step": 4100
},
{
"epoch": 0.04,
"learning_rate": 0.00021,
"loss": 3.4302,
"step": 4200
},
{
"epoch": 0.05,
"learning_rate": 0.000215,
"loss": 3.5378,
"step": 4300
},
{
"epoch": 0.05,
"learning_rate": 0.00022,
"loss": 3.4796,
"step": 4400
},
{
"epoch": 0.05,
"learning_rate": 0.00022500000000000002,
"loss": 3.5122,
"step": 4500
},
{
"epoch": 0.05,
"learning_rate": 0.00023,
"loss": 3.5079,
"step": 4600
},
{
"epoch": 0.05,
"learning_rate": 0.000235,
"loss": 3.484,
"step": 4700
},
{
"epoch": 0.05,
"learning_rate": 0.00024,
"loss": 3.5695,
"step": 4800
},
{
"epoch": 0.05,
"learning_rate": 0.000245,
"loss": 3.5049,
"step": 4900
},
{
"epoch": 0.05,
"learning_rate": 0.00025,
"loss": 3.4858,
"step": 5000
},
{
"epoch": 0.05,
"learning_rate": 0.000255,
"loss": 3.4795,
"step": 5100
},
{
"epoch": 0.06,
"learning_rate": 0.00026000000000000003,
"loss": 3.5687,
"step": 5200
},
{
"epoch": 0.06,
"learning_rate": 0.00026500000000000004,
"loss": 3.4746,
"step": 5300
},
{
"epoch": 0.06,
"learning_rate": 0.00027,
"loss": 3.5224,
"step": 5400
},
{
"epoch": 0.06,
"learning_rate": 0.000275,
"loss": 3.4772,
"step": 5500
},
{
"epoch": 0.06,
"learning_rate": 0.00028000000000000003,
"loss": 3.5545,
"step": 5600
},
{
"epoch": 0.06,
"learning_rate": 0.000285,
"loss": 3.5292,
"step": 5700
},
{
"epoch": 0.06,
"learning_rate": 0.00029,
"loss": 3.5156,
"step": 5800
},
{
"epoch": 0.06,
"learning_rate": 0.000295,
"loss": 3.5265,
"step": 5900
},
{
"epoch": 0.06,
"learning_rate": 0.0003,
"loss": 3.5554,
"step": 6000
},
{
"epoch": 0.06,
"learning_rate": 0.000305,
"loss": 3.5095,
"step": 6100
},
{
"epoch": 0.07,
"learning_rate": 0.00031,
"loss": 3.5129,
"step": 6200
},
{
"epoch": 0.07,
"learning_rate": 0.000315,
"loss": 3.546,
"step": 6300
},
{
"epoch": 0.07,
"learning_rate": 0.00032,
"loss": 3.5059,
"step": 6400
},
{
"epoch": 0.07,
"learning_rate": 0.00032500000000000004,
"loss": 3.4791,
"step": 6500
},
{
"epoch": 0.07,
"learning_rate": 0.00033,
"loss": 3.4911,
"step": 6600
},
{
"epoch": 0.07,
"learning_rate": 0.000335,
"loss": 3.5105,
"step": 6700
},
{
"epoch": 0.07,
"learning_rate": 0.00034,
"loss": 3.4258,
"step": 6800
},
{
"epoch": 0.07,
"learning_rate": 0.000345,
"loss": 3.5187,
"step": 6900
},
{
"epoch": 0.07,
"learning_rate": 0.00035,
"loss": 3.5052,
"step": 7000
},
{
"epoch": 0.08,
"learning_rate": 0.000355,
"loss": 3.4961,
"step": 7100
},
{
"epoch": 0.08,
"learning_rate": 0.00035999999999999997,
"loss": 3.5155,
"step": 7200
},
{
"epoch": 0.08,
"learning_rate": 0.000365,
"loss": 3.537,
"step": 7300
},
{
"epoch": 0.08,
"learning_rate": 0.00037,
"loss": 3.4744,
"step": 7400
},
{
"epoch": 0.08,
"learning_rate": 0.000375,
"loss": 3.4898,
"step": 7500
},
{
"epoch": 0.08,
"learning_rate": 0.00038,
"loss": 3.4827,
"step": 7600
},
{
"epoch": 0.08,
"learning_rate": 0.00038500000000000003,
"loss": 3.5292,
"step": 7700
},
{
"epoch": 0.08,
"learning_rate": 0.00039000000000000005,
"loss": 3.5189,
"step": 7800
},
{
"epoch": 0.08,
"learning_rate": 0.000395,
"loss": 3.4855,
"step": 7900
},
{
"epoch": 0.09,
"learning_rate": 0.0004,
"loss": 3.5686,
"step": 8000
},
{
"epoch": 0.09,
"learning_rate": 0.00040500000000000003,
"loss": 3.5008,
"step": 8100
},
{
"epoch": 0.09,
"learning_rate": 0.00041,
"loss": 3.5276,
"step": 8200
},
{
"epoch": 0.09,
"learning_rate": 0.000415,
"loss": 3.491,
"step": 8300
},
{
"epoch": 0.09,
"learning_rate": 0.00042,
"loss": 3.5629,
"step": 8400
},
{
"epoch": 0.09,
"learning_rate": 0.000425,
"loss": 3.5554,
"step": 8500
},
{
"epoch": 0.09,
"learning_rate": 0.00043,
"loss": 3.5069,
"step": 8600
},
{
"epoch": 0.09,
"learning_rate": 0.000435,
"loss": 3.5329,
"step": 8700
},
{
"epoch": 0.09,
"learning_rate": 0.00044,
"loss": 3.5153,
"step": 8800
},
{
"epoch": 0.09,
"learning_rate": 0.00044500000000000003,
"loss": 3.5705,
"step": 8900
},
{
"epoch": 0.1,
"learning_rate": 0.00045000000000000004,
"loss": 3.4461,
"step": 9000
},
{
"epoch": 0.1,
"learning_rate": 0.000455,
"loss": 3.5421,
"step": 9100
},
{
"epoch": 0.1,
"learning_rate": 0.00046,
"loss": 3.5423,
"step": 9200
},
{
"epoch": 0.1,
"learning_rate": 0.000465,
"loss": 3.5614,
"step": 9300
},
{
"epoch": 0.1,
"learning_rate": 0.00047,
"loss": 3.4959,
"step": 9400
},
{
"epoch": 0.1,
"learning_rate": 0.000475,
"loss": 3.5324,
"step": 9500
},
{
"epoch": 0.1,
"learning_rate": 0.00048,
"loss": 3.4574,
"step": 9600
},
{
"epoch": 0.1,
"learning_rate": 0.00048499999999999997,
"loss": 3.5164,
"step": 9700
},
{
"epoch": 0.1,
"learning_rate": 0.00049,
"loss": 3.5273,
"step": 9800
},
{
"epoch": 0.11,
"learning_rate": 0.000495,
"loss": 3.5234,
"step": 9900
},
{
"epoch": 0.11,
"learning_rate": 0.0005,
"loss": 3.5284,
"step": 10000
},
{
"epoch": 0.11,
"learning_rate": 0.0004999996112877375,
"loss": 3.5233,
"step": 10100
},
{
"epoch": 0.11,
"learning_rate": 0.0004999984451521587,
"loss": 3.4993,
"step": 10200
},
{
"epoch": 0.11,
"learning_rate": 0.0004999965015968901,
"loss": 3.5276,
"step": 10300
},
{
"epoch": 0.11,
"learning_rate": 0.0004999937806279752,
"loss": 3.5053,
"step": 10400
},
{
"epoch": 0.11,
"learning_rate": 0.0004999902822538758,
"loss": 3.486,
"step": 10500
},
{
"epoch": 0.11,
"learning_rate": 0.0004999860064854707,
"loss": 3.5093,
"step": 10600
},
{
"epoch": 0.11,
"learning_rate": 0.0004999809533360561,
"loss": 3.531,
"step": 10700
},
{
"epoch": 0.11,
"learning_rate": 0.0004999751228213458,
"loss": 3.5188,
"step": 10800
},
{
"epoch": 0.12,
"learning_rate": 0.000499968514959471,
"loss": 3.4891,
"step": 10900
},
{
"epoch": 0.12,
"learning_rate": 0.00049996112977098,
"loss": 3.5318,
"step": 11000
},
{
"epoch": 0.12,
"learning_rate": 0.0004999529672788389,
"loss": 3.538,
"step": 11100
},
{
"epoch": 0.12,
"learning_rate": 0.0004999440275084302,
"loss": 3.5255,
"step": 11200
},
{
"epoch": 0.12,
"learning_rate": 0.000499934310487554,
"loss": 3.563,
"step": 11300
},
{
"epoch": 0.12,
"learning_rate": 0.0004999238162464273,
"loss": 3.5304,
"step": 11400
},
{
"epoch": 0.12,
"learning_rate": 0.0004999125448176843,
"loss": 3.5348,
"step": 11500
},
{
"epoch": 0.12,
"learning_rate": 0.0004999004962363751,
"loss": 3.4948,
"step": 11600
},
{
"epoch": 0.12,
"learning_rate": 0.0004998876705399677,
"loss": 3.5647,
"step": 11700
},
{
"epoch": 0.13,
"learning_rate": 0.000499874067768346,
"loss": 3.5225,
"step": 11800
},
{
"epoch": 0.13,
"learning_rate": 0.0004998596879638106,
"loss": 3.4625,
"step": 11900
},
{
"epoch": 0.13,
"learning_rate": 0.000499844531171078,
"loss": 3.466,
"step": 12000
},
{
"epoch": 0.13,
"learning_rate": 0.0004998285974372816,
"loss": 3.5515,
"step": 12100
},
{
"epoch": 0.13,
"learning_rate": 0.0004998118868119704,
"loss": 3.6037,
"step": 12200
},
{
"epoch": 0.13,
"learning_rate": 0.0004997943993471093,
"loss": 3.5103,
"step": 12300
},
{
"epoch": 0.13,
"learning_rate": 0.0004997761350970793,
"loss": 3.4917,
"step": 12400
},
{
"epoch": 0.13,
"learning_rate": 0.0004997570941186764,
"loss": 3.5306,
"step": 12500
},
{
"epoch": 0.13,
"learning_rate": 0.0004997372764711125,
"loss": 3.4867,
"step": 12600
},
{
"epoch": 0.13,
"learning_rate": 0.0004997166822160145,
"loss": 3.449,
"step": 12700
},
{
"epoch": 0.14,
"learning_rate": 0.0004996953114174239,
"loss": 3.5224,
"step": 12800
},
{
"epoch": 0.14,
"learning_rate": 0.0004996731641417981,
"loss": 3.5221,
"step": 12900
},
{
"epoch": 0.14,
"learning_rate": 0.000499650240458008,
"loss": 3.5179,
"step": 13000
},
{
"epoch": 0.14,
"learning_rate": 0.0004996265404373395,
"loss": 3.485,
"step": 13100
},
{
"epoch": 0.14,
"learning_rate": 0.0004996020641534924,
"loss": 3.5237,
"step": 13200
},
{
"epoch": 0.14,
"learning_rate": 0.0004995768116825806,
"loss": 3.51,
"step": 13300
},
{
"epoch": 0.14,
"learning_rate": 0.0004995507831031317,
"loss": 3.5003,
"step": 13400
},
{
"epoch": 0.14,
"learning_rate": 0.0004995239784960868,
"loss": 3.5773,
"step": 13500
},
{
"epoch": 0.14,
"learning_rate": 0.0004994963979447999,
"loss": 3.5475,
"step": 13600
},
{
"epoch": 0.15,
"learning_rate": 0.0004994680415350384,
"loss": 3.5233,
"step": 13700
},
{
"epoch": 0.15,
"learning_rate": 0.000499438909354982,
"loss": 3.5136,
"step": 13800
},
{
"epoch": 0.15,
"learning_rate": 0.0004994090014952231,
"loss": 3.5589,
"step": 13900
},
{
"epoch": 0.15,
"learning_rate": 0.0004993783180487662,
"loss": 3.4992,
"step": 14000
},
{
"epoch": 0.15,
"learning_rate": 0.0004993468591110274,
"loss": 3.5307,
"step": 14100
},
{
"epoch": 0.15,
"learning_rate": 0.0004993146247798345,
"loss": 3.5067,
"step": 14200
},
{
"epoch": 0.15,
"learning_rate": 0.0004992816151554267,
"loss": 3.5534,
"step": 14300
},
{
"epoch": 0.15,
"learning_rate": 0.0004992478303404537,
"loss": 3.4614,
"step": 14400
},
{
"epoch": 0.15,
"learning_rate": 0.0004992132704399764,
"loss": 3.4507,
"step": 14500
},
{
"epoch": 0.16,
"learning_rate": 0.0004991779355614653,
"loss": 3.5012,
"step": 14600
},
{
"epoch": 0.16,
"learning_rate": 0.0004991418258148015,
"loss": 3.5332,
"step": 14700
},
{
"epoch": 0.16,
"learning_rate": 0.0004991049413122752,
"loss": 3.5121,
"step": 14800
},
{
"epoch": 0.16,
"learning_rate": 0.0004990672821685863,
"loss": 3.4945,
"step": 14900
},
{
"epoch": 0.16,
"learning_rate": 0.0004990288485008431,
"loss": 3.5502,
"step": 15000
},
{
"epoch": 0.16,
"learning_rate": 0.000498989640428563,
"loss": 3.5384,
"step": 15100
},
{
"epoch": 0.16,
"learning_rate": 0.000498949658073671,
"loss": 3.5698,
"step": 15200
},
{
"epoch": 0.16,
"learning_rate": 0.0004989089015605002,
"loss": 3.4467,
"step": 15300
},
{
"epoch": 0.16,
"learning_rate": 0.0004988673710157913,
"loss": 3.5147,
"step": 15400
},
{
"epoch": 0.16,
"learning_rate": 0.0004988250665686915,
"loss": 3.4889,
"step": 15500
},
{
"epoch": 0.17,
"learning_rate": 0.0004987819883507549,
"loss": 3.4569,
"step": 15600
},
{
"epoch": 0.17,
"learning_rate": 0.0004987381364959417,
"loss": 3.4654,
"step": 15700
},
{
"epoch": 0.17,
"learning_rate": 0.000498693511140618,
"loss": 3.5525,
"step": 15800
},
{
"epoch": 0.17,
"learning_rate": 0.0004986481124235554,
"loss": 3.4769,
"step": 15900
},
{
"epoch": 0.17,
"learning_rate": 0.0004986019404859298,
"loss": 3.4723,
"step": 16000
},
{
"epoch": 0.17,
"learning_rate": 0.0004985549954713222,
"loss": 3.5086,
"step": 16100
},
{
"epoch": 0.17,
"learning_rate": 0.0004985072775257175,
"loss": 3.537,
"step": 16200
},
{
"epoch": 0.17,
"learning_rate": 0.0004984587867975039,
"loss": 3.4993,
"step": 16300
},
{
"epoch": 0.17,
"learning_rate": 0.0004984095234374732,
"loss": 3.4976,
"step": 16400
},
{
"epoch": 0.18,
"learning_rate": 0.0004983594875988193,
"loss": 3.5206,
"step": 16500
},
{
"epoch": 0.18,
"learning_rate": 0.0004983086794371385,
"loss": 3.4844,
"step": 16600
},
{
"epoch": 0.18,
"learning_rate": 0.0004982570991104293,
"loss": 3.4813,
"step": 16700
},
{
"epoch": 0.18,
"learning_rate": 0.0004982047467790904,
"loss": 3.5309,
"step": 16800
},
{
"epoch": 0.18,
"learning_rate": 0.0004981516226059222,
"loss": 3.5115,
"step": 16900
},
{
"epoch": 0.18,
"learning_rate": 0.0004980977267561245,
"loss": 3.4775,
"step": 17000
},
{
"epoch": 0.18,
"learning_rate": 0.0004980430593972974,
"loss": 3.4671,
"step": 17100
},
{
"epoch": 0.18,
"learning_rate": 0.0004979876206994396,
"loss": 3.4842,
"step": 17200
},
{
"epoch": 0.18,
"learning_rate": 0.0004979314108349489,
"loss": 3.462,
"step": 17300
},
{
"epoch": 0.18,
"learning_rate": 0.000497874429978621,
"loss": 3.4734,
"step": 17400
},
{
"epoch": 0.19,
"learning_rate": 0.0004978166783076492,
"loss": 3.4945,
"step": 17500
},
{
"epoch": 0.19,
"learning_rate": 0.0004977581560016236,
"loss": 3.49,
"step": 17600
},
{
"epoch": 0.19,
"learning_rate": 0.0004976988632425309,
"loss": 3.5377,
"step": 17700
},
{
"epoch": 0.19,
"learning_rate": 0.0004976388002147538,
"loss": 3.5332,
"step": 17800
},
{
"epoch": 0.19,
"learning_rate": 0.0004975779671050702,
"loss": 3.4532,
"step": 17900
},
{
"epoch": 0.19,
"learning_rate": 0.0004975163641026527,
"loss": 3.5116,
"step": 18000
},
{
"epoch": 0.19,
"learning_rate": 0.000497453991399068,
"loss": 3.4816,
"step": 18100
},
{
"epoch": 0.19,
"learning_rate": 0.0004973908491882763,
"loss": 3.4983,
"step": 18200
},
{
"epoch": 0.19,
"learning_rate": 0.000497326937666631,
"loss": 3.4687,
"step": 18300
},
{
"epoch": 0.2,
"learning_rate": 0.0004972622570328775,
"loss": 3.5534,
"step": 18400
},
{
"epoch": 0.2,
"learning_rate": 0.0004971968074881528,
"loss": 3.4703,
"step": 18500
},
{
"epoch": 0.2,
"learning_rate": 0.0004971305892359858,
"loss": 3.5117,
"step": 18600
},
{
"epoch": 0.2,
"learning_rate": 0.0004970636024822949,
"loss": 3.4832,
"step": 18700
},
{
"epoch": 0.2,
"learning_rate": 0.0004969958474353888,
"loss": 3.554,
"step": 18800
},
{
"epoch": 0.2,
"learning_rate": 0.0004969273243059651,
"loss": 3.4567,
"step": 18900
},
{
"epoch": 0.2,
"learning_rate": 0.0004968580333071101,
"loss": 3.5089,
"step": 19000
},
{
"epoch": 0.2,
"learning_rate": 0.0004967879746542981,
"loss": 3.5264,
"step": 19100
},
{
"epoch": 0.2,
"learning_rate": 0.00049671714856539,
"loss": 3.546,
"step": 19200
},
{
"epoch": 0.21,
"learning_rate": 0.0004966455552606338,
"loss": 3.5111,
"step": 19300
},
{
"epoch": 0.21,
"learning_rate": 0.0004965731949626629,
"loss": 3.4773,
"step": 19400
},
{
"epoch": 0.21,
"learning_rate": 0.0004965000678964962,
"loss": 3.4718,
"step": 19500
},
{
"epoch": 0.21,
"learning_rate": 0.0004964261742895367,
"loss": 3.4829,
"step": 19600
},
{
"epoch": 0.21,
"learning_rate": 0.0004963515143715711,
"loss": 3.5101,
"step": 19700
},
{
"epoch": 0.21,
"learning_rate": 0.0004962760883747694,
"loss": 3.4787,
"step": 19800
},
{
"epoch": 0.21,
"learning_rate": 0.0004961998965336835,
"loss": 3.5048,
"step": 19900
},
{
"epoch": 0.21,
"learning_rate": 0.0004961229390852471,
"loss": 3.5439,
"step": 20000
},
{
"epoch": 0.21,
"learning_rate": 0.0004960452162687747,
"loss": 3.4837,
"step": 20100
},
{
"epoch": 0.21,
"learning_rate": 0.0004959667283259607,
"loss": 3.4976,
"step": 20200
},
{
"epoch": 0.22,
"learning_rate": 0.0004958874755008788,
"loss": 3.4594,
"step": 20300
},
{
"epoch": 0.22,
"learning_rate": 0.0004958074580399816,
"loss": 3.4317,
"step": 20400
},
{
"epoch": 0.22,
"learning_rate": 0.0004957266761920991,
"loss": 3.4615,
"step": 20500
},
{
"epoch": 0.22,
"learning_rate": 0.0004956451302084385,
"loss": 3.5461,
"step": 20600
},
{
"epoch": 0.22,
"learning_rate": 0.0004955628203425832,
"loss": 3.5074,
"step": 20700
},
{
"epoch": 0.22,
"learning_rate": 0.000495479746850492,
"loss": 3.5118,
"step": 20800
},
{
"epoch": 0.22,
"learning_rate": 0.0004953959099904985,
"loss": 3.4543,
"step": 20900
},
{
"epoch": 0.22,
"learning_rate": 0.0004953113100233098,
"loss": 3.5353,
"step": 21000
},
{
"epoch": 0.22,
"learning_rate": 0.0004952259472120064,
"loss": 3.541,
"step": 21100
},
{
"epoch": 0.23,
"learning_rate": 0.0004951398218220408,
"loss": 3.5633,
"step": 21200
},
{
"epoch": 0.23,
"learning_rate": 0.0004950529341212371,
"loss": 3.4821,
"step": 21300
},
{
"epoch": 0.23,
"learning_rate": 0.0004949652843797897,
"loss": 3.5209,
"step": 21400
},
{
"epoch": 0.23,
"learning_rate": 0.0004948768728702628,
"loss": 3.5295,
"step": 21500
},
{
"epoch": 0.23,
"learning_rate": 0.0004947876998675897,
"loss": 3.4903,
"step": 21600
},
{
"epoch": 0.23,
"learning_rate": 0.0004946977656490713,
"loss": 3.5398,
"step": 21700
},
{
"epoch": 0.23,
"learning_rate": 0.0004946070704943761,
"loss": 3.5016,
"step": 21800
},
{
"epoch": 0.23,
"learning_rate": 0.0004945156146855383,
"loss": 3.4882,
"step": 21900
},
{
"epoch": 0.23,
"learning_rate": 0.0004944233985069581,
"loss": 3.4632,
"step": 22000
},
{
"epoch": 0.23,
"learning_rate": 0.0004943304222454001,
"loss": 3.4802,
"step": 22100
},
{
"epoch": 0.24,
"learning_rate": 0.0004942366861899921,
"loss": 3.4686,
"step": 22200
},
{
"epoch": 0.24,
"learning_rate": 0.000494142190632225,
"loss": 3.4896,
"step": 22300
},
{
"epoch": 0.24,
"learning_rate": 0.0004940469358659516,
"loss": 3.4763,
"step": 22400
},
{
"epoch": 0.24,
"learning_rate": 0.0004939509221873854,
"loss": 3.5467,
"step": 22500
},
{
"epoch": 0.24,
"learning_rate": 0.0004938541498951,
"loss": 3.5006,
"step": 22600
},
{
"epoch": 0.24,
"learning_rate": 0.0004937566192900279,
"loss": 3.4922,
"step": 22700
},
{
"epoch": 0.24,
"learning_rate": 0.00049365833067546,
"loss": 3.4747,
"step": 22800
},
{
"epoch": 0.24,
"learning_rate": 0.000493559284357044,
"loss": 3.4843,
"step": 22900
},
{
"epoch": 0.24,
"learning_rate": 0.0004934594806427843,
"loss": 3.5593,
"step": 23000
},
{
"epoch": 0.25,
"learning_rate": 0.00049335891984304,
"loss": 3.5082,
"step": 23100
},
{
"epoch": 0.25,
"learning_rate": 0.0004932576022705252,
"loss": 3.4836,
"step": 23200
},
{
"epoch": 0.25,
"learning_rate": 0.0004931555282403066,
"loss": 3.472,
"step": 23300
},
{
"epoch": 0.25,
"learning_rate": 0.0004930526980698039,
"loss": 3.4998,
"step": 23400
},
{
"epoch": 0.25,
"learning_rate": 0.0004929491120787878,
"loss": 3.4831,
"step": 23500
},
{
"epoch": 0.25,
"learning_rate": 0.0004928447705893794,
"loss": 3.4745,
"step": 23600
},
{
"epoch": 0.25,
"learning_rate": 0.0004927396739260493,
"loss": 3.469,
"step": 23700
},
{
"epoch": 0.25,
"learning_rate": 0.0004926338224156163,
"loss": 3.5138,
"step": 23800
},
{
"epoch": 0.25,
"learning_rate": 0.0004925272163872468,
"loss": 3.4742,
"step": 23900
},
{
"epoch": 0.26,
"learning_rate": 0.0004924198561724532,
"loss": 3.4287,
"step": 24000
},
{
"epoch": 0.26,
"learning_rate": 0.0004923117421050934,
"loss": 3.4663,
"step": 24100
},
{
"epoch": 0.26,
"learning_rate": 0.0004922028745213696,
"loss": 3.4808,
"step": 24200
},
{
"epoch": 0.26,
"learning_rate": 0.0004920932537598269,
"loss": 3.5508,
"step": 24300
},
{
"epoch": 0.26,
"learning_rate": 0.0004919828801613532,
"loss": 3.5053,
"step": 24400
},
{
"epoch": 0.26,
"learning_rate": 0.0004918717540691766,
"loss": 3.5131,
"step": 24500
},
{
"epoch": 0.26,
"learning_rate": 0.000491759875828866,
"loss": 3.4786,
"step": 24600
},
{
"epoch": 0.26,
"learning_rate": 0.0004916472457883287,
"loss": 3.5234,
"step": 24700
},
{
"epoch": 0.26,
"learning_rate": 0.0004915338642978103,
"loss": 3.5251,
"step": 24800
},
{
"epoch": 0.26,
"learning_rate": 0.0004914197317098931,
"loss": 3.4751,
"step": 24900
},
{
"epoch": 0.27,
"learning_rate": 0.0004913048483794948,
"loss": 3.5188,
"step": 25000
},
{
"epoch": 0.27,
"learning_rate": 0.000491189214663868,
"loss": 3.4953,
"step": 25100
},
{
"epoch": 0.27,
"learning_rate": 0.0004910728309225985,
"loss": 3.4985,
"step": 25200
},
{
"epoch": 0.27,
"learning_rate": 0.0004909556975176047,
"loss": 3.4845,
"step": 25300
},
{
"epoch": 0.27,
"learning_rate": 0.0004908378148131362,
"loss": 3.6007,
"step": 25400
},
{
"epoch": 0.27,
"learning_rate": 0.0004907191831757724,
"loss": 3.481,
"step": 25500
},
{
"epoch": 0.27,
"learning_rate": 0.0004905998029744222,
"loss": 3.5303,
"step": 25600
},
{
"epoch": 0.27,
"learning_rate": 0.0004904796745803217,
"loss": 3.5077,
"step": 25700
},
{
"epoch": 0.27,
"learning_rate": 0.0004903587983670339,
"loss": 3.481,
"step": 25800
},
{
"epoch": 0.28,
"learning_rate": 0.0004902371747104476,
"loss": 3.451,
"step": 25900
},
{
"epoch": 0.28,
"learning_rate": 0.0004901148039887756,
"loss": 3.4953,
"step": 26000
},
{
"epoch": 0.28,
"learning_rate": 0.0004899916865825537,
"loss": 3.458,
"step": 26100
},
{
"epoch": 0.28,
"learning_rate": 0.00048986782287464,
"loss": 3.4587,
"step": 26200
},
{
"epoch": 0.28,
"learning_rate": 0.0004897432132502132,
"loss": 3.491,
"step": 26300
},
{
"epoch": 0.28,
"learning_rate": 0.0004896178580967717,
"loss": 3.4245,
"step": 26400
},
{
"epoch": 0.28,
"learning_rate": 0.0004894917578041322,
"loss": 3.5217,
"step": 26500
},
{
"epoch": 0.28,
"learning_rate": 0.0004893649127644283,
"loss": 3.5185,
"step": 26600
},
{
"epoch": 0.28,
"learning_rate": 0.00048923732337211,
"loss": 3.505,
"step": 26700
},
{
"epoch": 0.28,
"learning_rate": 0.0004891089900239418,
"loss": 3.4743,
"step": 26800
},
{
"epoch": 0.29,
"learning_rate": 0.0004889799131190015,
"loss": 3.4964,
"step": 26900
},
{
"epoch": 0.29,
"learning_rate": 0.0004888500930586793,
"loss": 3.5558,
"step": 27000
},
{
"epoch": 0.29,
"learning_rate": 0.0004887195302466767,
"loss": 3.442,
"step": 27100
},
{
"epoch": 0.29,
"learning_rate": 0.0004885882250890044,
"loss": 3.5129,
"step": 27200
},
{
"epoch": 0.29,
"learning_rate": 0.0004884561779939817,
"loss": 3.5072,
"step": 27300
},
{
"epoch": 0.29,
"learning_rate": 0.0004883233893722354,
"loss": 3.4971,
"step": 27400
},
{
"epoch": 0.29,
"learning_rate": 0.000488189859636698,
"loss": 3.47,
"step": 27500
},
{
"epoch": 0.29,
"learning_rate": 0.0004880555892026066,
"loss": 3.5349,
"step": 27600
},
{
"epoch": 0.29,
"learning_rate": 0.0004879205784875017,
"loss": 3.4856,
"step": 27700
},
{
"epoch": 0.3,
"learning_rate": 0.0004877848279112259,
"loss": 3.5072,
"step": 27800
},
{
"epoch": 0.3,
"learning_rate": 0.00048764833789592254,
"loss": 3.4752,
"step": 27900
},
{
"epoch": 0.3,
"learning_rate": 0.0004875111088660343,
"loss": 3.4654,
"step": 28000
},
{
"epoch": 0.3,
"learning_rate": 0.000487373141248302,
"loss": 3.5321,
"step": 28100
},
{
"epoch": 0.3,
"learning_rate": 0.0004872344354717634,
"loss": 3.4574,
"step": 28200
},
{
"epoch": 0.3,
"learning_rate": 0.0004870949919677515,
"loss": 3.4621,
"step": 28300
},
{
"epoch": 0.3,
"learning_rate": 0.00048695481116989357,
"loss": 3.532,
"step": 28400
},
{
"epoch": 0.3,
"learning_rate": 0.00048681389351410955,
"loss": 3.5071,
"step": 28500
},
{
"epoch": 0.3,
"learning_rate": 0.0004866722394386107,
"loss": 3.5263,
"step": 28600
},
{
"epoch": 0.31,
"learning_rate": 0.00048652984938389853,
"loss": 3.5198,
"step": 28700
},
{
"epoch": 0.31,
"learning_rate": 0.00048638672379276314,
"loss": 3.4399,
"step": 28800
},
{
"epoch": 0.31,
"learning_rate": 0.0004862428631102819,
"loss": 3.4343,
"step": 28900
},
{
"epoch": 0.31,
"learning_rate": 0.000486098267783818,
"loss": 3.4477,
"step": 29000
},
{
"epoch": 0.31,
"learning_rate": 0.00048595293826301936,
"loss": 3.4616,
"step": 29100
},
{
"epoch": 0.31,
"learning_rate": 0.0004858068749998169,
"loss": 3.498,
"step": 29200
},
{
"epoch": 0.31,
"learning_rate": 0.0004856600784484232,
"loss": 3.5423,
"step": 29300
},
{
"epoch": 0.31,
"learning_rate": 0.00048551254906533135,
"loss": 3.5488,
"step": 29400
},
{
"epoch": 0.31,
"learning_rate": 0.00048536428730931307,
"loss": 3.4823,
"step": 29500
},
{
"epoch": 0.31,
"learning_rate": 0.00048521529364141776,
"loss": 3.4666,
"step": 29600
},
{
"epoch": 0.32,
"learning_rate": 0.0004850655685249706,
"loss": 3.4553,
"step": 29700
},
{
"epoch": 0.32,
"learning_rate": 0.0004849151124255716,
"loss": 3.4764,
"step": 29800
},
{
"epoch": 0.32,
"learning_rate": 0.0004847639258110939,
"loss": 3.504,
"step": 29900
},
{
"epoch": 0.32,
"learning_rate": 0.000484612009151682,
"loss": 3.5053,
"step": 30000
},
{
"epoch": 0.32,
"learning_rate": 0.0004844593629197511,
"loss": 3.4995,
"step": 30100
},
{
"epoch": 0.32,
"learning_rate": 0.00048430598758998465,
"loss": 3.5613,
"step": 30200
},
{
"epoch": 0.32,
"learning_rate": 0.00048415188363933384,
"loss": 3.4437,
"step": 30300
},
{
"epoch": 0.32,
"learning_rate": 0.0004839970515470153,
"loss": 3.5437,
"step": 30400
},
{
"epoch": 0.32,
"learning_rate": 0.0004838414917945101,
"loss": 3.4199,
"step": 30500
},
{
"epoch": 0.33,
"learning_rate": 0.00048368520486556215,
"loss": 3.5321,
"step": 30600
},
{
"epoch": 0.33,
"learning_rate": 0.00048352819124617666,
"loss": 3.5139,
"step": 30700
},
{
"epoch": 0.33,
"learning_rate": 0.00048337045142461845,
"loss": 3.5193,
"step": 30800
},
{
"epoch": 0.33,
"learning_rate": 0.0004832119858914108,
"loss": 3.4716,
"step": 30900
},
{
"epoch": 0.33,
"learning_rate": 0.00048305279513933375,
"loss": 3.4594,
"step": 31000
},
{
"epoch": 0.33,
"learning_rate": 0.0004828928796634224,
"loss": 3.5184,
"step": 31100
},
{
"epoch": 0.33,
"learning_rate": 0.0004827322399609656,
"loss": 3.4635,
"step": 31200
},
{
"epoch": 0.33,
"learning_rate": 0.0004825708765315044,
"loss": 3.5565,
"step": 31300
},
{
"epoch": 0.33,
"learning_rate": 0.00048240878987683037,
"loss": 3.4901,
"step": 31400
},
{
"epoch": 0.33,
"learning_rate": 0.0004822459805009839,
"loss": 3.5068,
"step": 31500
},
{
"epoch": 0.34,
"learning_rate": 0.0004820824489102531,
"loss": 3.4434,
"step": 31600
},
{
"epoch": 0.34,
"learning_rate": 0.00048191819561317184,
"loss": 3.5135,
"step": 31700
},
{
"epoch": 0.34,
"learning_rate": 0.0004817532211205184,
"loss": 3.5205,
"step": 31800
},
{
"epoch": 0.34,
"learning_rate": 0.00048158752594531346,
"loss": 3.5106,
"step": 31900
},
{
"epoch": 0.34,
"learning_rate": 0.0004814211106028191,
"loss": 3.5037,
"step": 32000
},
{
"epoch": 0.34,
"learning_rate": 0.00048125397561053676,
"loss": 3.5225,
"step": 32100
},
{
"epoch": 0.34,
"learning_rate": 0.0004810861214882058,
"loss": 3.5481,
"step": 32200
},
{
"epoch": 0.34,
"learning_rate": 0.0004809175487578019,
"loss": 3.5227,
"step": 32300
},
{
"epoch": 0.34,
"learning_rate": 0.0004807482579435353,
"loss": 3.5191,
"step": 32400
},
{
"epoch": 0.35,
"learning_rate": 0.0004805782495718494,
"loss": 3.4667,
"step": 32500
},
{
"epoch": 0.35,
"learning_rate": 0.0004804075241714189,
"loss": 3.5535,
"step": 32600
},
{
"epoch": 0.35,
"learning_rate": 0.0004802360822731482,
"loss": 3.5245,
"step": 32700
},
{
"epoch": 0.35,
"learning_rate": 0.00048006392441016986,
"loss": 3.4818,
"step": 32800
},
{
"epoch": 0.35,
"learning_rate": 0.0004798910511178429,
"loss": 3.5417,
"step": 32900
},
{
"epoch": 0.35,
"learning_rate": 0.00047971746293375107,
"loss": 3.4636,
"step": 33000
},
{
"epoch": 0.35,
"learning_rate": 0.0004795431603977011,
"loss": 3.4918,
"step": 33100
},
{
"epoch": 0.35,
"learning_rate": 0.00047936814405172143,
"loss": 3.4898,
"step": 33200
},
{
"epoch": 0.35,
"learning_rate": 0.0004791924144400599,
"loss": 3.5102,
"step": 33300
},
{
"epoch": 0.36,
"learning_rate": 0.0004790159721091827,
"loss": 3.5459,
"step": 33400
},
{
"epoch": 0.36,
"learning_rate": 0.00047883881760777205,
"loss": 3.4848,
"step": 33500
},
{
"epoch": 0.36,
"learning_rate": 0.0004786609514867251,
"loss": 3.534,
"step": 33600
},
{
"epoch": 0.36,
"learning_rate": 0.00047848237429915175,
"loss": 3.5235,
"step": 33700
},
{
"epoch": 0.36,
"learning_rate": 0.00047830308660037305,
"loss": 3.5164,
"step": 33800
},
{
"epoch": 0.36,
"learning_rate": 0.0004781230889479198,
"loss": 3.5117,
"step": 33900
},
{
"epoch": 0.36,
"learning_rate": 0.0004779423819015302,
"loss": 3.5032,
"step": 34000
},
{
"epoch": 0.36,
"learning_rate": 0.0004777609660231486,
"loss": 3.4958,
"step": 34100
},
{
"epoch": 0.36,
"learning_rate": 0.00047757884187692374,
"loss": 3.5169,
"step": 34200
},
{
"epoch": 0.36,
"learning_rate": 0.0004773960100292066,
"loss": 3.5191,
"step": 34300
},
{
"epoch": 0.37,
"learning_rate": 0.0004772124710485492,
"loss": 3.4945,
"step": 34400
},
{
"epoch": 0.37,
"learning_rate": 0.0004770282255057022,
"loss": 3.5127,
"step": 34500
},
{
"epoch": 0.37,
"learning_rate": 0.0004768432739736137,
"loss": 3.4561,
"step": 34600
},
{
"epoch": 0.37,
"learning_rate": 0.00047665761702742705,
"loss": 3.558,
"step": 34700
},
{
"epoch": 0.37,
"learning_rate": 0.0004764712552444794,
"loss": 3.5893,
"step": 34800
},
{
"epoch": 0.37,
"learning_rate": 0.0004762841892042995,
"loss": 3.5003,
"step": 34900
},
{
"epoch": 0.37,
"learning_rate": 0.00047609641948860636,
"loss": 3.4897,
"step": 35000
},
{
"epoch": 0.37,
"learning_rate": 0.0004759079466813072,
"loss": 3.4982,
"step": 35100
},
{
"epoch": 0.37,
"learning_rate": 0.00047571877136849537,
"loss": 3.524,
"step": 35200
},
{
"epoch": 0.38,
"learning_rate": 0.000475528894138449,
"loss": 3.4562,
"step": 35300
},
{
"epoch": 0.38,
"learning_rate": 0.0004753383155816291,
"loss": 3.5271,
"step": 35400
},
{
"epoch": 0.38,
"learning_rate": 0.00047514703629067726,
"loss": 3.4422,
"step": 35500
},
{
"epoch": 0.38,
"learning_rate": 0.0004749550568604145,
"loss": 3.4279,
"step": 35600
},
{
"epoch": 0.38,
"learning_rate": 0.0004747623778878387,
"loss": 3.4677,
"step": 35700
},
{
"epoch": 0.38,
"learning_rate": 0.0004745689999721234,
"loss": 3.4405,
"step": 35800
},
{
"epoch": 0.38,
"learning_rate": 0.00047437492371461566,
"loss": 3.4902,
"step": 35900
},
{
"epoch": 0.38,
"learning_rate": 0.0004741801497188339,
"loss": 3.4773,
"step": 36000
},
{
"epoch": 0.38,
"learning_rate": 0.0004739846785904664,
"loss": 3.5532,
"step": 36100
},
{
"epoch": 0.38,
"learning_rate": 0.00047378851093736945,
"loss": 3.4676,
"step": 36200
},
{
"epoch": 0.39,
"learning_rate": 0.0004735916473695653,
"loss": 3.4511,
"step": 36300
},
{
"epoch": 0.39,
"learning_rate": 0.00047339408849924,
"loss": 3.5473,
"step": 36400
},
{
"epoch": 0.39,
"learning_rate": 0.0004731958349407421,
"loss": 3.5044,
"step": 36500
},
{
"epoch": 0.39,
"learning_rate": 0.0004729968873105804,
"loss": 3.5104,
"step": 36600
},
{
"epoch": 0.39,
"learning_rate": 0.0004727972462274219,
"loss": 3.4658,
"step": 36700
},
{
"epoch": 0.39,
"learning_rate": 0.00047259691231209006,
"loss": 3.4728,
"step": 36800
},
{
"epoch": 0.39,
"learning_rate": 0.0004723958861875629,
"loss": 3.4425,
"step": 36900
},
{
"epoch": 0.39,
"learning_rate": 0.000472194168478971,
"loss": 3.4615,
"step": 37000
},
{
"epoch": 0.39,
"learning_rate": 0.00047199175981359556,
"loss": 3.5654,
"step": 37100
},
{
"epoch": 0.4,
"learning_rate": 0.00047178866082086635,
"loss": 3.5196,
"step": 37200
},
{
"epoch": 0.4,
"learning_rate": 0.0004715848721323599,
"loss": 3.4618,
"step": 37300
},
{
"epoch": 0.4,
"learning_rate": 0.00047138039438179765,
"loss": 3.4837,
"step": 37400
},
{
"epoch": 0.4,
"learning_rate": 0.00047117522820504357,
"loss": 3.5105,
"step": 37500
},
{
"epoch": 0.4,
"learning_rate": 0.00047096937424010246,
"loss": 3.5053,
"step": 37600
},
{
"epoch": 0.4,
"learning_rate": 0.0004707628331271182,
"loss": 3.5327,
"step": 37700
},
{
"epoch": 0.4,
"learning_rate": 0.0004705556055083711,
"loss": 3.4411,
"step": 37800
},
{
"epoch": 0.4,
"learning_rate": 0.0004703476920282766,
"loss": 3.5237,
"step": 37900
},
{
"epoch": 0.4,
"learning_rate": 0.0004701390933333829,
"loss": 3.4413,
"step": 38000
},
{
"epoch": 0.4,
"learning_rate": 0.0004699298100723688,
"loss": 3.5936,
"step": 38100
},
{
"epoch": 0.41,
"learning_rate": 0.0004697198428960422,
"loss": 3.5068,
"step": 38200
},
{
"epoch": 0.41,
"learning_rate": 0.00046950919245733756,
"loss": 3.5562,
"step": 38300
},
{
"epoch": 0.41,
"learning_rate": 0.0004692978594113142,
"loss": 3.471,
"step": 38400
},
{
"epoch": 0.41,
"learning_rate": 0.000469085844415154,
"loss": 3.4787,
"step": 38500
},
{
"epoch": 0.41,
"learning_rate": 0.0004688731481281597,
"loss": 3.4779,
"step": 38600
},
{
"epoch": 0.41,
"learning_rate": 0.00046865977121175257,
"loss": 3.4712,
"step": 38700
},
{
"epoch": 0.41,
"learning_rate": 0.00046844571432947025,
"loss": 3.4615,
"step": 38800
},
{
"epoch": 0.41,
"learning_rate": 0.00046823097814696515,
"loss": 3.4683,
"step": 38900
},
{
"epoch": 0.41,
"learning_rate": 0.0004680155633320019,
"loss": 3.482,
"step": 39000
},
{
"epoch": 0.42,
"learning_rate": 0.0004677994705544555,
"loss": 3.4946,
"step": 39100
},
{
"epoch": 0.42,
"learning_rate": 0.00046758270048630933,
"loss": 3.4712,
"step": 39200
},
{
"epoch": 0.42,
"learning_rate": 0.00046736525380165284,
"loss": 3.4971,
"step": 39300
},
{
"epoch": 0.42,
"learning_rate": 0.0004671471311766796,
"loss": 3.4925,
"step": 39400
},
{
"epoch": 0.42,
"learning_rate": 0.0004669283332896851,
"loss": 3.4788,
"step": 39500
},
{
"epoch": 0.42,
"learning_rate": 0.0004667088608210647,
"loss": 3.4891,
"step": 39600
},
{
"epoch": 0.42,
"learning_rate": 0.00046648871445331144,
"loss": 3.5367,
"step": 39700
},
{
"epoch": 0.42,
"learning_rate": 0.0004662678948710142,
"loss": 3.4932,
"step": 39800
},
{
"epoch": 0.42,
"learning_rate": 0.0004660464027608552,
"loss": 3.5584,
"step": 39900
},
{
"epoch": 0.43,
"learning_rate": 0.00046582423881160796,
"loss": 3.4568,
"step": 40000
},
{
"epoch": 0.43,
"learning_rate": 0.00046560140371413526,
"loss": 3.4778,
"step": 40100
},
{
"epoch": 0.43,
"learning_rate": 0.0004653778981613871,
"loss": 3.4778,
"step": 40200
},
{
"epoch": 0.43,
"learning_rate": 0.0004651537228483983,
"loss": 3.4521,
"step": 40300
},
{
"epoch": 0.43,
"learning_rate": 0.0004649288784722862,
"loss": 3.4848,
"step": 40400
},
{
"epoch": 0.43,
"learning_rate": 0.00046470336573224913,
"loss": 3.4623,
"step": 40500
},
{
"epoch": 0.43,
"learning_rate": 0.0004644771853295635,
"loss": 3.5117,
"step": 40600
},
{
"epoch": 0.43,
"learning_rate": 0.00046425033796758207,
"loss": 3.4535,
"step": 40700
},
{
"epoch": 0.43,
"learning_rate": 0.0004640228243517318,
"loss": 3.4737,
"step": 40800
},
{
"epoch": 0.43,
"learning_rate": 0.0004637946451895113,
"loss": 3.4991,
"step": 40900
},
{
"epoch": 0.44,
"learning_rate": 0.0004635658011904887,
"loss": 3.517,
"step": 41000
},
{
"epoch": 0.44,
"learning_rate": 0.00046333629306629997,
"loss": 3.4767,
"step": 41100
},
{
"epoch": 0.44,
"learning_rate": 0.00046310612153064603,
"loss": 3.4917,
"step": 41200
},
{
"epoch": 0.44,
"learning_rate": 0.0004628752872992909,
"loss": 3.4907,
"step": 41300
},
{
"epoch": 0.44,
"learning_rate": 0.0004626437910900591,
"loss": 3.5002,
"step": 41400
},
{
"epoch": 0.44,
"learning_rate": 0.00046241163362283424,
"loss": 3.4782,
"step": 41500
},
{
"epoch": 0.44,
"learning_rate": 0.0004621788156195559,
"loss": 3.48,
"step": 41600
},
{
"epoch": 0.44,
"learning_rate": 0.00046194533780421766,
"loss": 3.5048,
"step": 41700
},
{
"epoch": 0.44,
"learning_rate": 0.00046171120090286516,
"loss": 3.4651,
"step": 41800
},
{
"epoch": 0.45,
"learning_rate": 0.0004614764056435934,
"loss": 3.5113,
"step": 41900
},
{
"epoch": 0.45,
"learning_rate": 0.00046124095275654485,
"loss": 3.4631,
"step": 42000
},
{
"epoch": 0.45,
"learning_rate": 0.00046100484297390676,
"loss": 3.4961,
"step": 42100
},
{
"epoch": 0.45,
"learning_rate": 0.00046076807702990943,
"loss": 3.4688,
"step": 42200
},
{
"epoch": 0.45,
"learning_rate": 0.00046053065566082344,
"loss": 3.4649,
"step": 42300
},
{
"epoch": 0.45,
"learning_rate": 0.0004602925796049574,
"loss": 3.5527,
"step": 42400
},
{
"epoch": 0.45,
"learning_rate": 0.00046005384960265617,
"loss": 3.5142,
"step": 42500
},
{
"epoch": 0.45,
"learning_rate": 0.0004598144663962979,
"loss": 3.4609,
"step": 42600
},
{
"epoch": 0.45,
"learning_rate": 0.000459574430730292,
"loss": 3.5237,
"step": 42700
},
{
"epoch": 0.45,
"learning_rate": 0.0004593337433510771,
"loss": 3.4829,
"step": 42800
},
{
"epoch": 0.46,
"learning_rate": 0.0004590924050071182,
"loss": 3.5192,
"step": 42900
},
{
"epoch": 0.46,
"learning_rate": 0.00045885041644890467,
"loss": 3.4881,
"step": 43000
},
{
"epoch": 0.46,
"learning_rate": 0.00045860777842894796,
"loss": 3.5034,
"step": 43100
},
{
"epoch": 0.46,
"learning_rate": 0.00045836449170177896,
"loss": 3.486,
"step": 43200
},
{
"epoch": 0.46,
"learning_rate": 0.00045812055702394597,
"loss": 3.4628,
"step": 43300
},
{
"epoch": 0.46,
"learning_rate": 0.00045787597515401223,
"loss": 3.4878,
"step": 43400
},
{
"epoch": 0.46,
"learning_rate": 0.0004576307468525535,
"loss": 3.4622,
"step": 43500
},
{
"epoch": 0.46,
"learning_rate": 0.0004573848728821557,
"loss": 3.5293,
"step": 43600
},
{
"epoch": 0.46,
"learning_rate": 0.00045713835400741274,
"loss": 3.4451,
"step": 43700
},
{
"epoch": 0.47,
"learning_rate": 0.00045689119099492383,
"loss": 3.5038,
"step": 43800
},
{
"epoch": 0.47,
"learning_rate": 0.00045664338461329137,
"loss": 3.4633,
"step": 43900
},
{
"epoch": 0.47,
"learning_rate": 0.0004563949356331184,
"loss": 3.4536,
"step": 44000
},
{
"epoch": 0.47,
"learning_rate": 0.0004561458448270062,
"loss": 3.5222,
"step": 44100
},
{
"epoch": 0.47,
"learning_rate": 0.0004558961129695519,
"loss": 3.4848,
"step": 44200
},
{
"epoch": 0.47,
"learning_rate": 0.0004556457408373464,
"loss": 3.5302,
"step": 44300
},
{
"epoch": 0.47,
"learning_rate": 0.0004553947292089713,
"loss": 3.5057,
"step": 44400
},
{
"epoch": 0.47,
"learning_rate": 0.000455143078864997,
"loss": 3.537,
"step": 44500
},
{
"epoch": 0.47,
"learning_rate": 0.00045489079058798,
"loss": 3.4947,
"step": 44600
},
{
"epoch": 0.48,
"learning_rate": 0.00045463786516246086,
"loss": 3.546,
"step": 44700
},
{
"epoch": 0.48,
"learning_rate": 0.00045438430337496117,
"loss": 3.5723,
"step": 44800
},
{
"epoch": 0.48,
"learning_rate": 0.00045413010601398163,
"loss": 3.4514,
"step": 44900
},
{
"epoch": 0.48,
"learning_rate": 0.0004538752738699992,
"loss": 3.4672,
"step": 45000
},
{
"epoch": 0.48,
"learning_rate": 0.000453619807735465,
"loss": 3.4934,
"step": 45100
},
{
"epoch": 0.48,
"learning_rate": 0.00045336370840480143,
"loss": 3.4974,
"step": 45200
},
{
"epoch": 0.48,
"learning_rate": 0.00045310697667440026,
"loss": 3.4252,
"step": 45300
},
{
"epoch": 0.48,
"learning_rate": 0.00045284961334261965,
"loss": 3.5194,
"step": 45400
},
{
"epoch": 0.48,
"learning_rate": 0.0004525916192097818,
"loss": 3.4608,
"step": 45500
},
{
"epoch": 0.48,
"learning_rate": 0.0004523329950781705,
"loss": 3.5033,
"step": 45600
},
{
"epoch": 0.49,
"learning_rate": 0.0004520737417520289,
"loss": 3.4757,
"step": 45700
},
{
"epoch": 0.49,
"learning_rate": 0.0004518138600375565,
"loss": 3.4889,
"step": 45800
},
{
"epoch": 0.49,
"learning_rate": 0.0004515533507429069,
"loss": 3.4402,
"step": 45900
},
{
"epoch": 0.49,
"learning_rate": 0.00045129221467818544,
"loss": 3.5092,
"step": 46000
},
{
"epoch": 0.49,
"learning_rate": 0.0004510304526554464,
"loss": 3.5367,
"step": 46100
},
{
"epoch": 0.49,
"learning_rate": 0.0004507680654886907,
"loss": 3.4865,
"step": 46200
},
{
"epoch": 0.49,
"learning_rate": 0.0004505050539938632,
"loss": 3.5106,
"step": 46300
},
{
"epoch": 0.49,
"learning_rate": 0.00045024141898885017,
"loss": 3.4592,
"step": 46400
},
{
"epoch": 0.49,
"learning_rate": 0.000449977161293477,
"loss": 3.4813,
"step": 46500
},
{
"epoch": 0.5,
"learning_rate": 0.0004497122817295053,
"loss": 3.4957,
"step": 46600
},
{
"epoch": 0.5,
"learning_rate": 0.00044944678112063046,
"loss": 3.4612,
"step": 46700
},
{
"epoch": 0.5,
"learning_rate": 0.00044918066029247936,
"loss": 3.5378,
"step": 46800
},
{
"epoch": 0.5,
"learning_rate": 0.00044891392007260735,
"loss": 3.5104,
"step": 46900
},
{
"epoch": 0.5,
"learning_rate": 0.000448646561290496,
"loss": 3.4465,
"step": 47000
},
{
"epoch": 0.5,
"learning_rate": 0.0004483785847775503,
"loss": 3.4633,
"step": 47100
},
{
"epoch": 0.5,
"learning_rate": 0.0004481099913670965,
"loss": 3.4771,
"step": 47200
},
{
"epoch": 0.5,
"learning_rate": 0.0004478407818943789,
"loss": 3.5111,
"step": 47300
},
{
"epoch": 0.5,
"learning_rate": 0.0004475709571965578,
"loss": 3.4932,
"step": 47400
},
{
"epoch": 0.5,
"learning_rate": 0.00044730051811270647,
"loss": 3.4843,
"step": 47500
},
{
"epoch": 0.51,
"learning_rate": 0.0004470294654838087,
"loss": 3.4771,
"step": 47600
},
{
"epoch": 0.51,
"learning_rate": 0.0004467578001527565,
"loss": 3.5115,
"step": 47700
},
{
"epoch": 0.51,
"learning_rate": 0.00044648552296434695,
"loss": 3.5195,
"step": 47800
},
{
"epoch": 0.51,
"learning_rate": 0.00044621263476528003,
"loss": 3.4532,
"step": 47900
},
{
"epoch": 0.51,
"learning_rate": 0.00044593913640415545,
"loss": 3.4574,
"step": 48000
},
{
"epoch": 0.51,
"learning_rate": 0.0004456650287314707,
"loss": 3.5016,
"step": 48100
},
{
"epoch": 0.51,
"learning_rate": 0.00044539031259961784,
"loss": 3.4765,
"step": 48200
},
{
"epoch": 0.51,
"learning_rate": 0.00044511498886288105,
"loss": 3.465,
"step": 48300
},
{
"epoch": 0.51,
"learning_rate": 0.00044483905837743417,
"loss": 3.5028,
"step": 48400
},
{
"epoch": 0.52,
"learning_rate": 0.00044456252200133757,
"loss": 3.5421,
"step": 48500
},
{
"epoch": 0.52,
"learning_rate": 0.0004442853805945359,
"loss": 3.4714,
"step": 48600
},
{
"epoch": 0.52,
"learning_rate": 0.00044400763501885543,
"loss": 3.4677,
"step": 48700
},
{
"epoch": 0.52,
"learning_rate": 0.0004437292861380009,
"loss": 3.535,
"step": 48800
},
{
"epoch": 0.52,
"learning_rate": 0.00044345033481755326,
"loss": 3.4449,
"step": 48900
},
{
"epoch": 0.52,
"learning_rate": 0.000443170781924967,
"loss": 3.4953,
"step": 49000
},
{
"epoch": 0.52,
"learning_rate": 0.0004428906283295672,
"loss": 3.4682,
"step": 49100
},
{
"epoch": 0.52,
"learning_rate": 0.00044260987490254695,
"loss": 3.4276,
"step": 49200
},
{
"epoch": 0.52,
"learning_rate": 0.00044232852251696467,
"loss": 3.5311,
"step": 49300
},
{
"epoch": 0.53,
"learning_rate": 0.00044204657204774124,
"loss": 3.4406,
"step": 49400
},
{
"epoch": 0.53,
"learning_rate": 0.0004417640243716576,
"loss": 3.5214,
"step": 49500
},
{
"epoch": 0.53,
"learning_rate": 0.0004414808803673518,
"loss": 3.4624,
"step": 49600
},
{
"epoch": 0.53,
"learning_rate": 0.000441197140915316,
"loss": 3.4879,
"step": 49700
},
{
"epoch": 0.53,
"learning_rate": 0.0004409128068978944,
"loss": 3.52,
"step": 49800
},
{
"epoch": 0.53,
"learning_rate": 0.0004406278791992798,
"loss": 3.5174,
"step": 49900
},
{
"epoch": 0.53,
"learning_rate": 0.00044034235870551156,
"loss": 3.4889,
"step": 50000
},
{
"epoch": 0.53,
"learning_rate": 0.000440056246304472,
"loss": 3.4301,
"step": 50100
},
{
"epoch": 0.53,
"learning_rate": 0.0004397695428858844,
"loss": 3.4761,
"step": 50200
},
{
"epoch": 0.53,
"learning_rate": 0.00043948224934130985,
"loss": 3.4547,
"step": 50300
},
{
"epoch": 0.54,
"learning_rate": 0.00043919436656414445,
"loss": 3.4262,
"step": 50400
},
{
"epoch": 0.54,
"learning_rate": 0.0004389058954496169,
"loss": 3.4494,
"step": 50500
},
{
"epoch": 0.54,
"learning_rate": 0.0004386168368947851,
"loss": 3.5187,
"step": 50600
},
{
"epoch": 0.54,
"learning_rate": 0.000438327191798534,
"loss": 3.4925,
"step": 50700
},
{
"epoch": 0.54,
"learning_rate": 0.0004380369610615722,
"loss": 3.4945,
"step": 50800
},
{
"epoch": 0.54,
"learning_rate": 0.00043774614558643,
"loss": 3.4728,
"step": 50900
},
{
"epoch": 0.54,
"learning_rate": 0.0004374547462774555,
"loss": 3.5043,
"step": 51000
},
{
"epoch": 0.54,
"learning_rate": 0.00043716276404081266,
"loss": 3.465,
"step": 51100
},
{
"epoch": 0.54,
"learning_rate": 0.0004368701997844781,
"loss": 3.455,
"step": 51200
},
{
"epoch": 0.55,
"learning_rate": 0.00043657705441823826,
"loss": 3.4398,
"step": 51300
},
{
"epoch": 0.55,
"learning_rate": 0.0004362833288536867,
"loss": 3.4834,
"step": 51400
},
{
"epoch": 0.55,
"learning_rate": 0.0004359890240042214,
"loss": 3.5072,
"step": 51500
},
{
"epoch": 0.55,
"learning_rate": 0.00043569414078504154,
"loss": 3.4757,
"step": 51600
},
{
"epoch": 0.55,
"learning_rate": 0.0004353986801131448,
"loss": 3.4697,
"step": 51700
},
{
"epoch": 0.55,
"learning_rate": 0.00043510264290732474,
"loss": 3.5054,
"step": 51800
},
{
"epoch": 0.55,
"learning_rate": 0.0004348060300881678,
"loss": 3.507,
"step": 51900
},
{
"epoch": 0.55,
"learning_rate": 0.00043450884257805014,
"loss": 3.504,
"step": 52000
},
{
"epoch": 0.55,
"learning_rate": 0.0004342110813011352,
"loss": 3.5152,
"step": 52100
},
{
"epoch": 0.55,
"learning_rate": 0.00043391274718337084,
"loss": 3.4792,
"step": 52200
},
{
"epoch": 0.56,
"learning_rate": 0.00043361384115248584,
"loss": 3.521,
"step": 52300
},
{
"epoch": 0.56,
"learning_rate": 0.0004333143641379879,
"loss": 3.4768,
"step": 52400
},
{
"epoch": 0.56,
"learning_rate": 0.00043301431707116014,
"loss": 3.4492,
"step": 52500
},
{
"epoch": 0.56,
"learning_rate": 0.0004327137008850582,
"loss": 3.4987,
"step": 52600
},
{
"epoch": 0.56,
"learning_rate": 0.0004324125165145077,
"loss": 3.4817,
"step": 52700
},
{
"epoch": 0.56,
"learning_rate": 0.00043211076489610135,
"loss": 3.4574,
"step": 52800
},
{
"epoch": 0.56,
"learning_rate": 0.0004318084469681952,
"loss": 3.4127,
"step": 52900
},
{
"epoch": 0.56,
"learning_rate": 0.00043150556367090704,
"loss": 3.463,
"step": 53000
},
{
"epoch": 0.56,
"learning_rate": 0.00043120211594611235,
"loss": 3.5262,
"step": 53100
},
{
"epoch": 0.57,
"learning_rate": 0.00043089810473744195,
"loss": 3.5513,
"step": 53200
},
{
"epoch": 0.57,
"learning_rate": 0.0004305935309902789,
"loss": 3.4956,
"step": 53300
},
{
"epoch": 0.57,
"learning_rate": 0.00043028839565175563,
"loss": 3.5124,
"step": 53400
},
{
"epoch": 0.57,
"learning_rate": 0.000429982699670751,
"loss": 3.4991,
"step": 53500
},
{
"epoch": 0.57,
"learning_rate": 0.0004296764439978871,
"loss": 3.5046,
"step": 53600
},
{
"epoch": 0.57,
"learning_rate": 0.0004293696295855266,
"loss": 3.5114,
"step": 53700
},
{
"epoch": 0.57,
"learning_rate": 0.0004290622573877698,
"loss": 3.5114,
"step": 53800
},
{
"epoch": 0.57,
"learning_rate": 0.00042875432836045145,
"loss": 3.5104,
"step": 53900
},
{
"epoch": 0.57,
"learning_rate": 0.0004284458434611378,
"loss": 3.4757,
"step": 54000
},
{
"epoch": 0.58,
"learning_rate": 0.0004281368036491237,
"loss": 3.4637,
"step": 54100
},
{
"epoch": 0.58,
"learning_rate": 0.00042782720988542976,
"loss": 3.4404,
"step": 54200
},
{
"epoch": 0.58,
"learning_rate": 0.0004275170631327991,
"loss": 3.5348,
"step": 54300
},
{
"epoch": 0.58,
"learning_rate": 0.0004272063643556945,
"loss": 3.5154,
"step": 54400
},
{
"epoch": 0.58,
"learning_rate": 0.00042689511452029526,
"loss": 3.528,
"step": 54500
},
{
"epoch": 0.58,
"learning_rate": 0.0004265833145944945,
"loss": 3.5509,
"step": 54600
},
{
"epoch": 0.58,
"learning_rate": 0.00042627096554789584,
"loss": 3.5283,
"step": 54700
},
{
"epoch": 0.58,
"learning_rate": 0.0004259580683518105,
"loss": 3.4573,
"step": 54800
},
{
"epoch": 0.58,
"learning_rate": 0.0004256446239792543,
"loss": 3.5586,
"step": 54900
},
{
"epoch": 0.58,
"learning_rate": 0.0004253306334049446,
"loss": 3.5568,
"step": 55000
},
{
"epoch": 0.59,
"learning_rate": 0.00042501609760529734,
"loss": 3.5057,
"step": 55100
},
{
"epoch": 0.59,
"learning_rate": 0.0004247010175584239,
"loss": 3.5694,
"step": 55200
},
{
"epoch": 0.59,
"learning_rate": 0.000424385394244128,
"loss": 3.4765,
"step": 55300
},
{
"epoch": 0.59,
"learning_rate": 0.000424069228643903,
"loss": 3.5685,
"step": 55400
},
{
"epoch": 0.59,
"learning_rate": 0.00042375252174092824,
"loss": 3.5035,
"step": 55500
},
{
"epoch": 0.59,
"learning_rate": 0.0004234352745200669,
"loss": 3.4847,
"step": 55600
},
{
"epoch": 0.59,
"learning_rate": 0.00042311748796786174,
"loss": 3.4716,
"step": 55700
},
{
"epoch": 0.59,
"learning_rate": 0.0004227991630725333,
"loss": 3.4406,
"step": 55800
},
{
"epoch": 0.59,
"learning_rate": 0.0004224803008239757,
"loss": 3.4917,
"step": 55900
},
{
"epoch": 0.6,
"learning_rate": 0.00042216090221375426,
"loss": 3.5315,
"step": 56000
},
{
"epoch": 0.6,
"learning_rate": 0.0004218409682351023,
"loss": 3.4636,
"step": 56100
},
{
"epoch": 0.6,
"learning_rate": 0.000421520499882918,
"loss": 3.4861,
"step": 56200
},
{
"epoch": 0.6,
"learning_rate": 0.0004211994981537609,
"loss": 3.5376,
"step": 56300
},
{
"epoch": 0.6,
"learning_rate": 0.00042087796404584977,
"loss": 3.5678,
"step": 56400
},
{
"epoch": 0.6,
"learning_rate": 0.00042055589855905846,
"loss": 3.5243,
"step": 56500
},
{
"epoch": 0.6,
"learning_rate": 0.00042023330269491346,
"loss": 3.5343,
"step": 56600
},
{
"epoch": 0.6,
"learning_rate": 0.0004199101774565905,
"loss": 3.541,
"step": 56700
},
{
"epoch": 0.6,
"learning_rate": 0.00041958652384891146,
"loss": 3.4849,
"step": 56800
},
{
"epoch": 0.6,
"learning_rate": 0.00041926234287834144,
"loss": 3.525,
"step": 56900
},
{
"epoch": 0.61,
"learning_rate": 0.00041893763555298527,
"loss": 3.5095,
"step": 57000
},
{
"epoch": 0.61,
"learning_rate": 0.00041861240288258483,
"loss": 3.4635,
"step": 57100
},
{
"epoch": 0.61,
"learning_rate": 0.0004182866458785155,
"loss": 3.5302,
"step": 57200
},
{
"epoch": 0.61,
"learning_rate": 0.00041796036555378325,
"loss": 3.4834,
"step": 57300
},
{
"epoch": 0.61,
"learning_rate": 0.0004176335629230213,
"loss": 3.4757,
"step": 57400
},
{
"epoch": 0.61,
"learning_rate": 0.00041730623900248717,
"loss": 3.5522,
"step": 57500
},
{
"epoch": 0.61,
"learning_rate": 0.0004169783948100595,
"loss": 3.4441,
"step": 57600
},
{
"epoch": 0.61,
"learning_rate": 0.0004166500313652347,
"loss": 3.5152,
"step": 57700
},
{
"epoch": 0.61,
"learning_rate": 0.00041632114968912404,
"loss": 3.4957,
"step": 57800
},
{
"epoch": 0.62,
"learning_rate": 0.0004159917508044502,
"loss": 3.4784,
"step": 57900
},
{
"epoch": 0.62,
"learning_rate": 0.0004156618357355442,
"loss": 3.4779,
"step": 58000
},
{
"epoch": 0.62,
"learning_rate": 0.00041533140550834225,
"loss": 3.4575,
"step": 58100
},
{
"epoch": 0.62,
"learning_rate": 0.0004150004611503828,
"loss": 3.5332,
"step": 58200
},
{
"epoch": 0.62,
"learning_rate": 0.0004146690036908028,
"loss": 3.5279,
"step": 58300
},
{
"epoch": 0.62,
"learning_rate": 0.00041433703416033485,
"loss": 3.4968,
"step": 58400
},
{
"epoch": 0.62,
"learning_rate": 0.00041400455359130397,
"loss": 3.5489,
"step": 58500
},
{
"epoch": 0.62,
"learning_rate": 0.00041367156301762444,
"loss": 3.5178,
"step": 58600
},
{
"epoch": 0.62,
"learning_rate": 0.0004133380634747963,
"loss": 3.4902,
"step": 58700
},
{
"epoch": 0.63,
"learning_rate": 0.0004130040559999025,
"loss": 3.4997,
"step": 58800
},
{
"epoch": 0.63,
"learning_rate": 0.0004126695416316054,
"loss": 3.4763,
"step": 58900
},
{
"epoch": 0.63,
"learning_rate": 0.0004123345214101438,
"loss": 3.4574,
"step": 59000
},
{
"epoch": 0.63,
"learning_rate": 0.00041199899637732934,
"loss": 3.4456,
"step": 59100
},
{
"epoch": 0.63,
"learning_rate": 0.00041166296757654366,
"loss": 3.4863,
"step": 59200
},
{
"epoch": 0.63,
"learning_rate": 0.0004113264360527348,
"loss": 3.4747,
"step": 59300
},
{
"epoch": 0.63,
"learning_rate": 0.0004109894028524143,
"loss": 3.4851,
"step": 59400
},
{
"epoch": 0.63,
"learning_rate": 0.0004106518690236536,
"loss": 3.4395,
"step": 59500
},
{
"epoch": 0.63,
"learning_rate": 0.000410313835616081,
"loss": 3.4848,
"step": 59600
},
{
"epoch": 0.63,
"learning_rate": 0.0004099753036808783,
"loss": 3.4535,
"step": 59700
},
{
"epoch": 0.64,
"learning_rate": 0.00040963627427077775,
"loss": 3.4992,
"step": 59800
},
{
"epoch": 0.64,
"learning_rate": 0.00040929674844005843,
"loss": 3.4552,
"step": 59900
},
{
"epoch": 0.64,
"learning_rate": 0.00040895672724454305,
"loss": 3.5318,
"step": 60000
},
{
"epoch": 0.64,
"learning_rate": 0.00040861621174159495,
"loss": 3.5415,
"step": 60100
},
{
"epoch": 0.64,
"learning_rate": 0.0004082752029901146,
"loss": 3.5494,
"step": 60200
},
{
"epoch": 0.64,
"learning_rate": 0.0004079337020505362,
"loss": 3.4648,
"step": 60300
},
{
"epoch": 0.64,
"learning_rate": 0.0004075917099848245,
"loss": 3.5292,
"step": 60400
},
{
"epoch": 0.64,
"learning_rate": 0.0004072492278564718,
"loss": 3.5001,
"step": 60500
},
{
"epoch": 0.64,
"learning_rate": 0.0004069062567304939,
"loss": 3.4884,
"step": 60600
},
{
"epoch": 0.65,
"learning_rate": 0.00040656279767342765,
"loss": 3.4814,
"step": 60700
},
{
"epoch": 0.65,
"learning_rate": 0.0004062188517533268,
"loss": 3.4705,
"step": 60800
},
{
"epoch": 0.65,
"learning_rate": 0.0004058744200397595,
"loss": 3.4994,
"step": 60900
},
{
"epoch": 0.65,
"learning_rate": 0.00040552950360380434,
"loss": 3.4953,
"step": 61000
},
{
"epoch": 0.65,
"learning_rate": 0.0004051841035180472,
"loss": 3.521,
"step": 61100
},
{
"epoch": 0.65,
"learning_rate": 0.0004048382208565784,
"loss": 3.5164,
"step": 61200
},
{
"epoch": 0.65,
"learning_rate": 0.0004044918566949882,
"loss": 3.507,
"step": 61300
},
{
"epoch": 0.65,
"learning_rate": 0.00040414501211036486,
"loss": 3.4945,
"step": 61400
},
{
"epoch": 0.65,
"learning_rate": 0.0004037976881812901,
"loss": 3.4484,
"step": 61500
},
{
"epoch": 0.65,
"learning_rate": 0.0004034498859878367,
"loss": 3.4555,
"step": 61600
},
{
"epoch": 0.66,
"learning_rate": 0.00040310160661156437,
"loss": 3.4847,
"step": 61700
},
{
"epoch": 0.66,
"learning_rate": 0.00040275285113551676,
"loss": 3.4943,
"step": 61800
},
{
"epoch": 0.66,
"learning_rate": 0.0004024036206442182,
"loss": 3.5139,
"step": 61900
},
{
"epoch": 0.66,
"learning_rate": 0.00040205391622367016,
"loss": 3.4618,
"step": 62000
},
{
"epoch": 0.66,
"learning_rate": 0.0004017037389613476,
"loss": 3.4801,
"step": 62100
},
{
"epoch": 0.66,
"learning_rate": 0.0004013530899461963,
"loss": 3.5376,
"step": 62200
},
{
"epoch": 0.66,
"learning_rate": 0.00040100197026862874,
"loss": 3.5269,
"step": 62300
},
{
"epoch": 0.66,
"learning_rate": 0.0004006503810205211,
"loss": 3.4552,
"step": 62400
},
{
"epoch": 0.66,
"learning_rate": 0.00040029832329520977,
"loss": 3.5446,
"step": 62500
},
{
"epoch": 0.67,
"learning_rate": 0.0003999457981874881,
"loss": 3.5022,
"step": 62600
},
{
"epoch": 0.67,
"learning_rate": 0.0003995928067936027,
"loss": 3.5138,
"step": 62700
},
{
"epoch": 0.67,
"learning_rate": 0.00039923935021125015,
"loss": 3.4631,
"step": 62800
},
{
"epoch": 0.67,
"learning_rate": 0.00039888542953957375,
"loss": 3.444,
"step": 62900
},
{
"epoch": 0.67,
"learning_rate": 0.00039853104587916005,
"loss": 3.5034,
"step": 63000
},
{
"epoch": 0.67,
"learning_rate": 0.0003981762003320351,
"loss": 3.4181,
"step": 63100
},
{
"epoch": 0.67,
"learning_rate": 0.00039782089400166155,
"loss": 3.515,
"step": 63200
},
{
"epoch": 0.67,
"learning_rate": 0.0003974651279929348,
"loss": 3.5136,
"step": 63300
},
{
"epoch": 0.67,
"learning_rate": 0.00039710890341217967,
"loss": 3.489,
"step": 63400
},
{
"epoch": 0.67,
"learning_rate": 0.00039675222136714705,
"loss": 3.4723,
"step": 63500
},
{
"epoch": 0.68,
"learning_rate": 0.00039639508296701045,
"loss": 3.4656,
"step": 63600
},
{
"epoch": 0.68,
"learning_rate": 0.0003960374893223625,
"loss": 3.5267,
"step": 63700
},
{
"epoch": 0.68,
"learning_rate": 0.0003956794415452115,
"loss": 3.4808,
"step": 63800
},
{
"epoch": 0.68,
"learning_rate": 0.00039532094074897785,
"loss": 3.5218,
"step": 63900
},
{
"epoch": 0.68,
"learning_rate": 0.00039496198804849083,
"loss": 3.4797,
"step": 64000
},
{
"epoch": 0.68,
"learning_rate": 0.00039460258455998497,
"loss": 3.4921,
"step": 64100
},
{
"epoch": 0.68,
"learning_rate": 0.0003942427314010967,
"loss": 3.4503,
"step": 64200
},
{
"epoch": 0.68,
"learning_rate": 0.00039388242969086066,
"loss": 3.4776,
"step": 64300
},
{
"epoch": 0.68,
"learning_rate": 0.0003935216805497063,
"loss": 3.4785,
"step": 64400
},
{
"epoch": 0.69,
"learning_rate": 0.00039316048509945457,
"loss": 3.4773,
"step": 64500
},
{
"epoch": 0.69,
"learning_rate": 0.00039279884446331436,
"loss": 3.4693,
"step": 64600
},
{
"epoch": 0.69,
"learning_rate": 0.00039243675976587876,
"loss": 3.4932,
"step": 64700
},
{
"epoch": 0.69,
"learning_rate": 0.00039207423213312204,
"loss": 3.4971,
"step": 64800
},
{
"epoch": 0.69,
"learning_rate": 0.00039171126269239555,
"loss": 3.4892,
"step": 64900
},
{
"epoch": 0.69,
"learning_rate": 0.00039134785257242467,
"loss": 3.527,
"step": 65000
},
{
"epoch": 0.69,
"learning_rate": 0.0003909840029033052,
"loss": 3.4535,
"step": 65100
},
{
"epoch": 0.69,
"learning_rate": 0.0003906197148164997,
"loss": 3.5352,
"step": 65200
},
{
"epoch": 0.69,
"learning_rate": 0.0003902549894448342,
"loss": 3.4764,
"step": 65300
},
{
"epoch": 0.7,
"learning_rate": 0.00038988982792249454,
"loss": 3.4837,
"step": 65400
},
{
"epoch": 0.7,
"learning_rate": 0.0003895242313850228,
"loss": 3.4606,
"step": 65500
},
{
"epoch": 0.7,
"learning_rate": 0.00038915820096931364,
"loss": 3.4713,
"step": 65600
},
{
"epoch": 0.7,
"learning_rate": 0.00038879173781361146,
"loss": 3.4764,
"step": 65700
},
{
"epoch": 0.7,
"learning_rate": 0.00038842484305750587,
"loss": 3.4697,
"step": 65800
},
{
"epoch": 0.7,
"learning_rate": 0.00038805751784192876,
"loss": 3.4771,
"step": 65900
},
{
"epoch": 0.7,
"learning_rate": 0.00038768976330915073,
"loss": 3.542,
"step": 66000
},
{
"epoch": 0.7,
"learning_rate": 0.0003873215806027773,
"loss": 3.4594,
"step": 66100
},
{
"epoch": 0.7,
"learning_rate": 0.0003869529708677456,
"loss": 3.5125,
"step": 66200
},
{
"epoch": 0.7,
"learning_rate": 0.0003865839352503206,
"loss": 3.4936,
"step": 66300
},
{
"epoch": 0.71,
"learning_rate": 0.0003862144748980917,
"loss": 3.4315,
"step": 66400
},
{
"epoch": 0.71,
"learning_rate": 0.00038584459095996883,
"loss": 3.5041,
"step": 66500
},
{
"epoch": 0.71,
"learning_rate": 0.0003854742845861796,
"loss": 3.4987,
"step": 66600
},
{
"epoch": 0.71,
"learning_rate": 0.00038510355692826504,
"loss": 3.4537,
"step": 66700
},
{
"epoch": 0.71,
"learning_rate": 0.0003847324091390761,
"loss": 3.4199,
"step": 66800
},
{
"epoch": 0.71,
"learning_rate": 0.0003843608423727706,
"loss": 3.545,
"step": 66900
},
{
"epoch": 0.71,
"learning_rate": 0.0003839888577848086,
"loss": 3.4893,
"step": 67000
},
{
"epoch": 0.71,
"learning_rate": 0.00038361645653195025,
"loss": 3.5127,
"step": 67100
},
{
"epoch": 0.71,
"learning_rate": 0.0003832436397722509,
"loss": 3.4984,
"step": 67200
},
{
"epoch": 0.72,
"learning_rate": 0.00038287040866505806,
"loss": 3.4773,
"step": 67300
},
{
"epoch": 0.72,
"learning_rate": 0.00038249676437100775,
"loss": 3.5147,
"step": 67400
},
{
"epoch": 0.72,
"learning_rate": 0.00038212270805202113,
"loss": 3.4999,
"step": 67500
},
{
"epoch": 0.72,
"learning_rate": 0.00038174824087130023,
"loss": 3.4605,
"step": 67600
},
{
"epoch": 0.72,
"learning_rate": 0.000381373363993325,
"loss": 3.5478,
"step": 67700
},
{
"epoch": 0.72,
"learning_rate": 0.00038099807858384935,
"loss": 3.5431,
"step": 67800
},
{
"epoch": 0.72,
"learning_rate": 0.0003806223858098976,
"loss": 3.5196,
"step": 67900
},
{
"epoch": 0.72,
"learning_rate": 0.0003802462868397609,
"loss": 3.4565,
"step": 68000
},
{
"epoch": 0.72,
"learning_rate": 0.00037986978284299346,
"loss": 3.479,
"step": 68100
},
{
"epoch": 0.72,
"learning_rate": 0.00037949287499040895,
"loss": 3.4619,
"step": 68200
},
{
"epoch": 0.73,
"learning_rate": 0.00037911556445407725,
"loss": 3.5283,
"step": 68300
},
{
"epoch": 0.73,
"learning_rate": 0.00037873785240731994,
"loss": 3.4817,
"step": 68400
},
{
"epoch": 0.73,
"learning_rate": 0.0003783597400247077,
"loss": 3.4796,
"step": 68500
},
{
"epoch": 0.73,
"learning_rate": 0.00037798122848205576,
"loss": 3.4814,
"step": 68600
},
{
"epoch": 0.73,
"learning_rate": 0.0003776023189564206,
"loss": 3.5202,
"step": 68700
},
{
"epoch": 0.73,
"learning_rate": 0.0003772230126260968,
"loss": 3.4558,
"step": 68800
},
{
"epoch": 0.73,
"learning_rate": 0.00037684331067061225,
"loss": 3.5382,
"step": 68900
},
{
"epoch": 0.73,
"learning_rate": 0.0003764632142707255,
"loss": 3.4725,
"step": 69000
},
{
"epoch": 0.73,
"learning_rate": 0.0003760827246084216,
"loss": 3.5022,
"step": 69100
},
{
"epoch": 0.74,
"learning_rate": 0.0003757018428669086,
"loss": 3.4569,
"step": 69200
},
{
"epoch": 0.74,
"learning_rate": 0.0003753205702306135,
"loss": 3.4843,
"step": 69300
},
{
"epoch": 0.74,
"learning_rate": 0.00037493890788517937,
"loss": 3.5093,
"step": 69400
},
{
"epoch": 0.74,
"learning_rate": 0.0003745568570174607,
"loss": 3.439,
"step": 69500
},
{
"epoch": 0.74,
"learning_rate": 0.00037417441881552036,
"loss": 3.5336,
"step": 69600
},
{
"epoch": 0.74,
"learning_rate": 0.0003737915944686258,
"loss": 3.4631,
"step": 69700
},
{
"epoch": 0.74,
"learning_rate": 0.00037340838516724514,
"loss": 3.4551,
"step": 69800
},
{
"epoch": 0.74,
"learning_rate": 0.0003730247921030436,
"loss": 3.4739,
"step": 69900
},
{
"epoch": 0.74,
"learning_rate": 0.0003726408164688797,
"loss": 3.448,
"step": 70000
},
{
"epoch": 0.75,
"learning_rate": 0.00037225645945880196,
"loss": 3.4937,
"step": 70100
},
{
"epoch": 0.75,
"learning_rate": 0.00037187172226804433,
"loss": 3.473,
"step": 70200
},
{
"epoch": 0.75,
"learning_rate": 0.00037148660609302367,
"loss": 3.5094,
"step": 70300
},
{
"epoch": 0.75,
"learning_rate": 0.00037110111213133475,
"loss": 3.5025,
"step": 70400
},
{
"epoch": 0.75,
"learning_rate": 0.0003707152415817476,
"loss": 3.51,
"step": 70500
},
{
"epoch": 0.75,
"learning_rate": 0.000370328995644203,
"loss": 3.5018,
"step": 70600
},
{
"epoch": 0.75,
"learning_rate": 0.0003699423755198092,
"loss": 3.5345,
"step": 70700
},
{
"epoch": 0.75,
"learning_rate": 0.0003695553824108381,
"loss": 3.5203,
"step": 70800
},
{
"epoch": 0.75,
"learning_rate": 0.00036916801752072154,
"loss": 3.4331,
"step": 70900
},
{
"epoch": 0.75,
"learning_rate": 0.0003687802820540473,
"loss": 3.4551,
"step": 71000
},
{
"epoch": 0.76,
"learning_rate": 0.0003683921772165556,
"loss": 3.4541,
"step": 71100
},
{
"epoch": 0.76,
"learning_rate": 0.0003680037042151353,
"loss": 3.4983,
"step": 71200
},
{
"epoch": 0.76,
"learning_rate": 0.00036761486425782025,
"loss": 3.4602,
"step": 71300
},
{
"epoch": 0.76,
"learning_rate": 0.00036722565855378534,
"loss": 3.5578,
"step": 71400
},
{
"epoch": 0.76,
"learning_rate": 0.0003668360883133426,
"loss": 3.4924,
"step": 71500
},
{
"epoch": 0.76,
"learning_rate": 0.0003664461547479381,
"loss": 3.5228,
"step": 71600
},
{
"epoch": 0.76,
"learning_rate": 0.00036605585907014727,
"loss": 3.4527,
"step": 71700
},
{
"epoch": 0.76,
"learning_rate": 0.00036566520249367216,
"loss": 3.5135,
"step": 71800
},
{
"epoch": 0.76,
"learning_rate": 0.00036527418623333655,
"loss": 3.4777,
"step": 71900
},
{
"epoch": 0.77,
"learning_rate": 0.00036488281150508293,
"loss": 3.4908,
"step": 72000
},
{
"epoch": 0.77,
"learning_rate": 0.0003644910795259687,
"loss": 3.4532,
"step": 72100
},
{
"epoch": 0.77,
"learning_rate": 0.00036409899151416194,
"loss": 3.4572,
"step": 72200
},
{
"epoch": 0.77,
"learning_rate": 0.00036370654868893813,
"loss": 3.5133,
"step": 72300
},
{
"epoch": 0.77,
"learning_rate": 0.0003633137522706758,
"loss": 3.528,
"step": 72400
},
{
"epoch": 0.77,
"learning_rate": 0.0003629206034808534,
"loss": 3.5001,
"step": 72500
},
{
"epoch": 0.77,
"learning_rate": 0.00036252710354204486,
"loss": 3.5106,
"step": 72600
},
{
"epoch": 0.77,
"learning_rate": 0.0003621332536779162,
"loss": 3.5027,
"step": 72700
},
{
"epoch": 0.77,
"learning_rate": 0.0003617390551132216,
"loss": 3.4608,
"step": 72800
},
{
"epoch": 0.77,
"learning_rate": 0.00036134450907379965,
"loss": 3.4917,
"step": 72900
},
{
"epoch": 0.78,
"learning_rate": 0.00036094961678656936,
"loss": 3.4839,
"step": 73000
},
{
"epoch": 0.78,
"learning_rate": 0.00036055437947952654,
"loss": 3.5027,
"step": 73100
},
{
"epoch": 0.78,
"learning_rate": 0.00036015879838173986,
"loss": 3.4551,
"step": 73200
},
{
"epoch": 0.78,
"learning_rate": 0.00035976287472334716,
"loss": 3.4427,
"step": 73300
},
{
"epoch": 0.78,
"learning_rate": 0.00035936660973555145,
"loss": 3.4986,
"step": 73400
},
{
"epoch": 0.78,
"learning_rate": 0.00035897000465061725,
"loss": 3.449,
"step": 73500
},
{
"epoch": 0.78,
"learning_rate": 0.0003585730607018667,
"loss": 3.4778,
"step": 73600
},
{
"epoch": 0.78,
"learning_rate": 0.00035817577912367537,
"loss": 3.5685,
"step": 73700
},
{
"epoch": 0.78,
"learning_rate": 0.0003577781611514694,
"loss": 3.5013,
"step": 73800
},
{
"epoch": 0.79,
"learning_rate": 0.0003573802080217203,
"loss": 3.4755,
"step": 73900
},
{
"epoch": 0.79,
"learning_rate": 0.0003569819209719425,
"loss": 3.4971,
"step": 74000
},
{
"epoch": 0.79,
"learning_rate": 0.00035658330124068815,
"loss": 3.5367,
"step": 74100
},
{
"epoch": 0.79,
"learning_rate": 0.0003561843500675445,
"loss": 3.4988,
"step": 74200
},
{
"epoch": 0.79,
"learning_rate": 0.0003557850686931292,
"loss": 3.4971,
"step": 74300
},
{
"epoch": 0.79,
"learning_rate": 0.00035538545835908674,
"loss": 3.471,
"step": 74400
},
{
"epoch": 0.79,
"learning_rate": 0.00035498552030808464,
"loss": 3.4602,
"step": 74500
},
{
"epoch": 0.79,
"learning_rate": 0.0003545852557838095,
"loss": 3.4899,
"step": 74600
},
{
"epoch": 0.79,
"learning_rate": 0.0003541846660309631,
"loss": 3.4727,
"step": 74700
},
{
"epoch": 0.8,
"learning_rate": 0.0003537837522952587,
"loss": 3.4743,
"step": 74800
},
{
"epoch": 0.8,
"learning_rate": 0.00035338251582341703,
"loss": 3.4798,
"step": 74900
},
{
"epoch": 0.8,
"learning_rate": 0.0003529809578631622,
"loss": 3.4574,
"step": 75000
},
{
"epoch": 0.8,
"learning_rate": 0.00035257907966321846,
"loss": 3.4465,
"step": 75100
},
{
"epoch": 0.8,
"learning_rate": 0.00035217688247330553,
"loss": 3.4908,
"step": 75200
},
{
"epoch": 0.8,
"learning_rate": 0.0003517743675441353,
"loss": 3.4411,
"step": 75300
},
{
"epoch": 0.8,
"learning_rate": 0.00035137153612740767,
"loss": 3.5287,
"step": 75400
},
{
"epoch": 0.8,
"learning_rate": 0.0003509683894758068,
"loss": 3.4939,
"step": 75500
},
{
"epoch": 0.8,
"learning_rate": 0.0003505649288429969,
"loss": 3.4693,
"step": 75600
},
{
"epoch": 0.8,
"learning_rate": 0.00035016115548361886,
"loss": 3.5589,
"step": 75700
},
{
"epoch": 0.81,
"learning_rate": 0.0003497570706532859,
"loss": 3.5431,
"step": 75800
},
{
"epoch": 0.81,
"learning_rate": 0.0003493526756085799,
"loss": 3.5008,
"step": 75900
},
{
"epoch": 0.81,
"learning_rate": 0.00034894797160704737,
"loss": 3.4913,
"step": 76000
},
{
"epoch": 0.81,
"learning_rate": 0.0003485429599071954,
"loss": 3.5414,
"step": 76100
},
{
"epoch": 0.81,
"learning_rate": 0.00034813764176848833,
"loss": 3.4656,
"step": 76200
},
{
"epoch": 0.81,
"learning_rate": 0.000347732018451343,
"loss": 3.5164,
"step": 76300
},
{
"epoch": 0.81,
"learning_rate": 0.00034732609121712566,
"loss": 3.5187,
"step": 76400
},
{
"epoch": 0.81,
"learning_rate": 0.00034691986132814737,
"loss": 3.4221,
"step": 76500
},
{
"epoch": 0.81,
"learning_rate": 0.0003465133300476604,
"loss": 3.4573,
"step": 76600
},
{
"epoch": 0.82,
"learning_rate": 0.00034610649863985434,
"loss": 3.4997,
"step": 76700
},
{
"epoch": 0.82,
"learning_rate": 0.0003456993683698521,
"loss": 3.4785,
"step": 76800
},
{
"epoch": 0.82,
"learning_rate": 0.0003452919405037057,
"loss": 3.461,
"step": 76900
},
{
"epoch": 0.82,
"learning_rate": 0.00034488421630839307,
"loss": 3.4799,
"step": 77000
},
{
"epoch": 0.82,
"learning_rate": 0.0003444761970518133,
"loss": 3.4866,
"step": 77100
},
{
"epoch": 0.82,
"learning_rate": 0.000344067884002783,
"loss": 3.4616,
"step": 77200
},
{
"epoch": 0.82,
"learning_rate": 0.0003436592784310325,
"loss": 3.5271,
"step": 77300
},
{
"epoch": 0.82,
"learning_rate": 0.00034325038160720186,
"loss": 3.5399,
"step": 77400
},
{
"epoch": 0.82,
"learning_rate": 0.0003428411948028367,
"loss": 3.4931,
"step": 77500
},
{
"epoch": 0.82,
"learning_rate": 0.0003424317192903844,
"loss": 3.481,
"step": 77600
},
{
"epoch": 0.83,
"learning_rate": 0.00034202195634319026,
"loss": 3.4759,
"step": 77700
},
{
"epoch": 0.83,
"learning_rate": 0.0003416119072354933,
"loss": 3.4805,
"step": 77800
},
{
"epoch": 0.83,
"learning_rate": 0.0003412015732424225,
"loss": 3.4803,
"step": 77900
},
{
"epoch": 0.83,
"learning_rate": 0.00034079095563999264,
"loss": 3.4571,
"step": 78000
},
{
"epoch": 0.83,
"learning_rate": 0.00034038005570510046,
"loss": 3.5024,
"step": 78100
},
{
"epoch": 0.83,
"learning_rate": 0.00033996887471552084,
"loss": 3.512,
"step": 78200
},
{
"epoch": 0.83,
"learning_rate": 0.00033955741394990234,
"loss": 3.4331,
"step": 78300
},
{
"epoch": 0.83,
"learning_rate": 0.00033914567468776394,
"loss": 3.4274,
"step": 78400
},
{
"epoch": 0.83,
"learning_rate": 0.00033873365820949025,
"loss": 3.5295,
"step": 78500
},
{
"epoch": 0.84,
"learning_rate": 0.00033832136579632833,
"loss": 3.4613,
"step": 78600
},
{
"epoch": 0.84,
"learning_rate": 0.0003379087987303829,
"loss": 3.459,
"step": 78700
},
{
"epoch": 0.84,
"learning_rate": 0.00033749595829461304,
"loss": 3.4423,
"step": 78800
},
{
"epoch": 0.84,
"learning_rate": 0.00033708284577282796,
"loss": 3.5483,
"step": 78900
},
{
"epoch": 0.84,
"learning_rate": 0.0003366694624496828,
"loss": 3.4994,
"step": 79000
},
{
"epoch": 0.84,
"learning_rate": 0.0003362558096106749,
"loss": 3.4706,
"step": 79100
},
{
"epoch": 0.84,
"learning_rate": 0.00033584188854213974,
"loss": 3.5044,
"step": 79200
},
{
"epoch": 0.84,
"learning_rate": 0.00033542770053124696,
"loss": 3.455,
"step": 79300
},
{
"epoch": 0.84,
"learning_rate": 0.000335013246865996,
"loss": 3.5131,
"step": 79400
},
{
"epoch": 0.85,
"learning_rate": 0.0003345985288352129,
"loss": 3.5119,
"step": 79500
},
{
"epoch": 0.85,
"learning_rate": 0.0003341835477285453,
"loss": 3.5121,
"step": 79600
},
{
"epoch": 0.85,
"learning_rate": 0.00033376830483645937,
"loss": 3.4693,
"step": 79700
},
{
"epoch": 0.85,
"learning_rate": 0.00033335280145023493,
"loss": 3.4531,
"step": 79800
},
{
"epoch": 0.85,
"learning_rate": 0.00033293703886196226,
"loss": 3.4548,
"step": 79900
},
{
"epoch": 0.85,
"learning_rate": 0.00033252101836453733,
"loss": 3.5033,
"step": 80000
},
{
"epoch": 0.85,
"learning_rate": 0.00033210474125165853,
"loss": 3.4889,
"step": 80100
},
{
"epoch": 0.85,
"learning_rate": 0.0003316882088178217,
"loss": 3.4725,
"step": 80200
},
{
"epoch": 0.85,
"learning_rate": 0.00033127142235831716,
"loss": 3.4618,
"step": 80300
},
{
"epoch": 0.85,
"learning_rate": 0.0003308543831692249,
"loss": 3.4913,
"step": 80400
},
{
"epoch": 0.86,
"learning_rate": 0.0003304370925474109,
"loss": 3.4637,
"step": 80500
},
{
"epoch": 0.86,
"learning_rate": 0.0003300195517905231,
"loss": 3.4736,
"step": 80600
},
{
"epoch": 0.86,
"learning_rate": 0.000329601762196987,
"loss": 3.4883,
"step": 80700
},
{
"epoch": 0.86,
"learning_rate": 0.0003291837250660023,
"loss": 3.4966,
"step": 80800
},
{
"epoch": 0.86,
"learning_rate": 0.0003287654416975382,
"loss": 3.4885,
"step": 80900
},
{
"epoch": 0.86,
"learning_rate": 0.0003283469133923297,
"loss": 3.4405,
"step": 81000
},
{
"epoch": 0.86,
"learning_rate": 0.00032792814145187344,
"loss": 3.5012,
"step": 81100
},
{
"epoch": 0.86,
"learning_rate": 0.00032750912717842385,
"loss": 3.4704,
"step": 81200
},
{
"epoch": 0.86,
"learning_rate": 0.0003270898718749886,
"loss": 3.5264,
"step": 81300
},
{
"epoch": 0.87,
"learning_rate": 0.0003266703768453253,
"loss": 3.4999,
"step": 81400
},
{
"epoch": 0.87,
"learning_rate": 0.00032625064339393686,
"loss": 3.5004,
"step": 81500
},
{
"epoch": 0.87,
"learning_rate": 0.0003258306728260674,
"loss": 3.4246,
"step": 81600
},
{
"epoch": 0.87,
"learning_rate": 0.00032541046644769876,
"loss": 3.4751,
"step": 81700
},
{
"epoch": 0.87,
"learning_rate": 0.0003249900255655459,
"loss": 3.5038,
"step": 81800
},
{
"epoch": 0.87,
"learning_rate": 0.00032456935148705303,
"loss": 3.5146,
"step": 81900
},
{
"epoch": 0.87,
"learning_rate": 0.0003241484455203895,
"loss": 3.4961,
"step": 82000
},
{
"epoch": 0.87,
"learning_rate": 0.0003237273089744458,
"loss": 3.4722,
"step": 82100
},
{
"epoch": 0.87,
"learning_rate": 0.00032330594315882943,
"loss": 3.4567,
"step": 82200
},
{
"epoch": 0.87,
"learning_rate": 0.000322884349383861,
"loss": 3.5115,
"step": 82300
},
{
"epoch": 0.88,
"learning_rate": 0.0003224625289605696,
"loss": 3.4695,
"step": 82400
},
{
"epoch": 0.88,
"learning_rate": 0.00032204048320068964,
"loss": 3.4923,
"step": 82500
},
{
"epoch": 0.88,
"learning_rate": 0.0003216182134166559,
"loss": 3.5214,
"step": 82600
},
{
"epoch": 0.88,
"learning_rate": 0.00032119572092160006,
"loss": 3.4668,
"step": 82700
},
{
"epoch": 0.88,
"learning_rate": 0.00032077300702934607,
"loss": 3.4876,
"step": 82800
},
{
"epoch": 0.88,
"learning_rate": 0.00032035007305440655,
"loss": 3.5109,
"step": 82900
},
{
"epoch": 0.88,
"learning_rate": 0.00031992692031197853,
"loss": 3.4911,
"step": 83000
},
{
"epoch": 0.88,
"learning_rate": 0.0003195035501179392,
"loss": 3.4561,
"step": 83100
},
{
"epoch": 0.88,
"learning_rate": 0.0003190799637888423,
"loss": 3.4836,
"step": 83200
},
{
"epoch": 0.89,
"learning_rate": 0.00031865616264191313,
"loss": 3.51,
"step": 83300
},
{
"epoch": 0.89,
"learning_rate": 0.0003182321479950454,
"loss": 3.5249,
"step": 83400
},
{
"epoch": 0.89,
"learning_rate": 0.0003178079211667967,
"loss": 3.4827,
"step": 83500
},
{
"epoch": 0.89,
"learning_rate": 0.00031738348347638444,
"loss": 3.4877,
"step": 83600
},
{
"epoch": 0.89,
"learning_rate": 0.0003169588362436816,
"loss": 3.438,
"step": 83700
},
{
"epoch": 0.89,
"learning_rate": 0.0003165339807892129,
"loss": 3.412,
"step": 83800
},
{
"epoch": 0.89,
"learning_rate": 0.00031610891843415046,
"loss": 3.4433,
"step": 83900
},
{
"epoch": 0.89,
"learning_rate": 0.0003156836505003101,
"loss": 3.4302,
"step": 84000
},
{
"epoch": 0.89,
"learning_rate": 0.0003152581783101465,
"loss": 3.44,
"step": 84100
},
{
"epoch": 0.9,
"learning_rate": 0.0003148325031867498,
"loss": 3.4679,
"step": 84200
},
{
"epoch": 0.9,
"learning_rate": 0.00031440662645384115,
"loss": 3.5349,
"step": 84300
},
{
"epoch": 0.9,
"learning_rate": 0.0003139805494357685,
"loss": 3.4902,
"step": 84400
},
{
"epoch": 0.9,
"learning_rate": 0.00031355427345750286,
"loss": 3.4661,
"step": 84500
},
{
"epoch": 0.9,
"learning_rate": 0.0003131277998446338,
"loss": 3.5095,
"step": 84600
},
{
"epoch": 0.9,
"learning_rate": 0.0003127011299233656,
"loss": 3.4287,
"step": 84700
},
{
"epoch": 0.9,
"learning_rate": 0.00031227426502051267,
"loss": 3.5032,
"step": 84800
},
{
"epoch": 0.9,
"learning_rate": 0.0003118472064634961,
"loss": 3.4561,
"step": 84900
},
{
"epoch": 0.9,
"learning_rate": 0.00031141995558033915,
"loss": 3.4984,
"step": 85000
},
{
"epoch": 0.9,
"learning_rate": 0.0003109925136996631,
"loss": 3.4991,
"step": 85100
},
{
"epoch": 0.91,
"learning_rate": 0.00031056488215068295,
"loss": 3.4838,
"step": 85200
},
{
"epoch": 0.91,
"learning_rate": 0.00031013706226320386,
"loss": 3.5089,
"step": 85300
},
{
"epoch": 0.91,
"learning_rate": 0.0003097090553676165,
"loss": 3.5005,
"step": 85400
},
{
"epoch": 0.91,
"learning_rate": 0.0003092808627948931,
"loss": 3.4652,
"step": 85500
},
{
"epoch": 0.91,
"learning_rate": 0.00030885248587658336,
"loss": 3.4901,
"step": 85600
},
{
"epoch": 0.91,
"learning_rate": 0.0003084239259448101,
"loss": 3.5131,
"step": 85700
},
{
"epoch": 0.91,
"learning_rate": 0.0003079951843322653,
"loss": 3.4778,
"step": 85800
},
{
"epoch": 0.91,
"learning_rate": 0.0003075662623722059,
"loss": 3.4706,
"step": 85900
},
{
"epoch": 0.91,
"learning_rate": 0.0003071371613984498,
"loss": 3.4887,
"step": 86000
},
{
"epoch": 0.92,
"learning_rate": 0.0003067078827453715,
"loss": 3.4765,
"step": 86100
},
{
"epoch": 0.92,
"learning_rate": 0.00030627842774789797,
"loss": 3.5069,
"step": 86200
},
{
"epoch": 0.92,
"learning_rate": 0.0003058487977415046,
"loss": 3.4825,
"step": 86300
},
{
"epoch": 0.92,
"learning_rate": 0.0003054189940622109,
"loss": 3.45,
"step": 86400
},
{
"epoch": 0.92,
"learning_rate": 0.00030498901804657674,
"loss": 3.4677,
"step": 86500
},
{
"epoch": 0.92,
"learning_rate": 0.0003045588710316976,
"loss": 3.4663,
"step": 86600
},
{
"epoch": 0.92,
"learning_rate": 0.00030412855435520093,
"loss": 3.507,
"step": 86700
},
{
"epoch": 0.92,
"learning_rate": 0.0003036980693552415,
"loss": 3.461,
"step": 86800
},
{
"epoch": 0.92,
"learning_rate": 0.0003032674173704979,
"loss": 3.4856,
"step": 86900
},
{
"epoch": 0.92,
"learning_rate": 0.00030283659974016764,
"loss": 3.554,
"step": 87000
},
{
"epoch": 0.93,
"learning_rate": 0.00030240561780396364,
"loss": 3.5279,
"step": 87100
},
{
"epoch": 0.93,
"learning_rate": 0.00030197447290210945,
"loss": 3.4643,
"step": 87200
},
{
"epoch": 0.93,
"learning_rate": 0.0003015431663753357,
"loss": 3.5135,
"step": 87300
},
{
"epoch": 0.93,
"learning_rate": 0.0003011116995648753,
"loss": 3.5021,
"step": 87400
},
{
"epoch": 0.93,
"learning_rate": 0.00030068007381245994,
"loss": 3.5225,
"step": 87500
},
{
"epoch": 0.93,
"learning_rate": 0.0003002482904603153,
"loss": 3.4466,
"step": 87600
},
{
"epoch": 0.93,
"learning_rate": 0.00029981635085115727,
"loss": 3.4411,
"step": 87700
},
{
"epoch": 0.93,
"learning_rate": 0.00029938425632818766,
"loss": 3.4759,
"step": 87800
},
{
"epoch": 0.93,
"learning_rate": 0.00029895200823508997,
"loss": 3.4777,
"step": 87900
},
{
"epoch": 0.94,
"learning_rate": 0.0002985196079160252,
"loss": 3.4094,
"step": 88000
},
{
"epoch": 0.94,
"learning_rate": 0.00029808705671562796,
"loss": 3.4614,
"step": 88100
},
{
"epoch": 0.94,
"learning_rate": 0.00029765435597900187,
"loss": 3.4643,
"step": 88200
},
{
"epoch": 0.94,
"learning_rate": 0.0002972215070517154,
"loss": 3.4988,
"step": 88300
},
{
"epoch": 0.94,
"learning_rate": 0.00029678851127979826,
"loss": 3.4302,
"step": 88400
},
{
"epoch": 0.94,
"learning_rate": 0.0002963553700097364,
"loss": 3.505,
"step": 88500
},
{
"epoch": 0.94,
"learning_rate": 0.0002959220845884686,
"loss": 3.4842,
"step": 88600
},
{
"epoch": 0.94,
"learning_rate": 0.0002954886563633815,
"loss": 3.4964,
"step": 88700
},
{
"epoch": 0.94,
"learning_rate": 0.0002950550866823062,
"loss": 3.4869,
"step": 88800
},
{
"epoch": 0.94,
"learning_rate": 0.00029462137689351337,
"loss": 3.4682,
"step": 88900
},
{
"epoch": 0.95,
"learning_rate": 0.0002941875283457096,
"loss": 3.4562,
"step": 89000
},
{
"epoch": 0.95,
"learning_rate": 0.00029375354238803293,
"loss": 3.4402,
"step": 89100
},
{
"epoch": 0.95,
"learning_rate": 0.00029331942037004856,
"loss": 3.4809,
"step": 89200
},
{
"epoch": 0.95,
"learning_rate": 0.00029288516364174506,
"loss": 3.4746,
"step": 89300
},
{
"epoch": 0.95,
"learning_rate": 0.0002924507735535296,
"loss": 3.4674,
"step": 89400
},
{
"epoch": 0.95,
"learning_rate": 0.0002920162514562243,
"loss": 3.5435,
"step": 89500
},
{
"epoch": 0.95,
"learning_rate": 0.0002915815987010616,
"loss": 3.5372,
"step": 89600
},
{
"epoch": 0.95,
"learning_rate": 0.0002911468166396805,
"loss": 3.4674,
"step": 89700
},
{
"epoch": 0.95,
"learning_rate": 0.00029071190662412183,
"loss": 3.4573,
"step": 89800
},
{
"epoch": 0.96,
"learning_rate": 0.0002902768700068245,
"loss": 3.4869,
"step": 89900
},
{
"epoch": 0.96,
"learning_rate": 0.000289841708140621,
"loss": 3.521,
"step": 90000
},
{
"epoch": 0.96,
"learning_rate": 0.0002894064223787334,
"loss": 3.4807,
"step": 90100
},
{
"epoch": 0.96,
"learning_rate": 0.000288971014074769,
"loss": 3.4656,
"step": 90200
},
{
"epoch": 0.96,
"learning_rate": 0.00028853548458271616,
"loss": 3.4716,
"step": 90300
},
{
"epoch": 0.96,
"learning_rate": 0.00028809983525694016,
"loss": 3.5101,
"step": 90400
},
{
"epoch": 0.96,
"learning_rate": 0.00028766406745217875,
"loss": 3.4879,
"step": 90500
},
{
"epoch": 0.96,
"learning_rate": 0.0002872281825235385,
"loss": 3.5152,
"step": 90600
},
{
"epoch": 0.96,
"learning_rate": 0.0002867921818264897,
"loss": 3.4802,
"step": 90700
},
{
"epoch": 0.97,
"learning_rate": 0.00028635606671686297,
"loss": 3.4585,
"step": 90800
},
{
"epoch": 0.97,
"learning_rate": 0.0002859198385508447,
"loss": 3.4754,
"step": 90900
},
{
"epoch": 0.97,
"learning_rate": 0.00028548349868497266,
"loss": 3.4861,
"step": 91000
},
{
"epoch": 0.97,
"learning_rate": 0.00028504704847613215,
"loss": 3.4643,
"step": 91100
},
{
"epoch": 0.97,
"learning_rate": 0.00028461048928155166,
"loss": 3.4961,
"step": 91200
},
{
"epoch": 0.97,
"learning_rate": 0.00028417382245879836,
"loss": 3.5433,
"step": 91300
},
{
"epoch": 0.97,
"learning_rate": 0.00028373704936577427,
"loss": 3.5039,
"step": 91400
},
{
"epoch": 0.97,
"learning_rate": 0.0002833001713607119,
"loss": 3.5384,
"step": 91500
},
{
"epoch": 0.97,
"learning_rate": 0.00028286318980216986,
"loss": 3.4986,
"step": 91600
},
{
"epoch": 0.97,
"learning_rate": 0.000282426106049029,
"loss": 3.4655,
"step": 91700
},
{
"epoch": 0.98,
"learning_rate": 0.0002819889214604877,
"loss": 3.4605,
"step": 91800
},
{
"epoch": 0.98,
"learning_rate": 0.0002815516373960582,
"loss": 3.5204,
"step": 91900
},
{
"epoch": 0.98,
"learning_rate": 0.00028111425521556174,
"loss": 3.4644,
"step": 92000
},
{
"epoch": 0.98,
"learning_rate": 0.000280676776279125,
"loss": 3.4554,
"step": 92100
},
{
"epoch": 0.98,
"learning_rate": 0.00028023920194717534,
"loss": 3.4838,
"step": 92200
},
{
"epoch": 0.98,
"learning_rate": 0.0002798015335804369,
"loss": 3.4925,
"step": 92300
},
{
"epoch": 0.98,
"learning_rate": 0.00027936377253992594,
"loss": 3.5146,
"step": 92400
},
{
"epoch": 0.98,
"learning_rate": 0.0002789259201869474,
"loss": 3.4192,
"step": 92500
},
{
"epoch": 0.98,
"learning_rate": 0.00027848797788308983,
"loss": 3.4699,
"step": 92600
},
{
"epoch": 0.99,
"learning_rate": 0.00027804994699022153,
"loss": 3.5011,
"step": 92700
},
{
"epoch": 0.99,
"learning_rate": 0.00027761182887048633,
"loss": 3.4562,
"step": 92800
},
{
"epoch": 0.99,
"learning_rate": 0.0002771736248862994,
"loss": 3.5495,
"step": 92900
},
{
"epoch": 0.99,
"learning_rate": 0.00027673533640034276,
"loss": 3.4774,
"step": 93000
},
{
"epoch": 0.99,
"learning_rate": 0.00027629696477556135,
"loss": 3.4738,
"step": 93100
},
{
"epoch": 0.99,
"learning_rate": 0.00027585851137515855,
"loss": 3.4803,
"step": 93200
},
{
"epoch": 0.99,
"learning_rate": 0.00027541997756259196,
"loss": 3.4923,
"step": 93300
},
{
"epoch": 0.99,
"learning_rate": 0.00027498136470156955,
"loss": 3.4801,
"step": 93400
},
{
"epoch": 0.99,
"learning_rate": 0.00027454267415604464,
"loss": 3.456,
"step": 93500
},
{
"epoch": 0.99,
"learning_rate": 0.00027410390729021273,
"loss": 3.5121,
"step": 93600
},
{
"epoch": 1.0,
"learning_rate": 0.000273665065468506,
"loss": 3.4838,
"step": 93700
},
{
"epoch": 1.0,
"learning_rate": 0.0002732261500555901,
"loss": 3.4894,
"step": 93800
},
{
"epoch": 1.0,
"learning_rate": 0.0002727871624163596,
"loss": 3.4759,
"step": 93900
},
{
"epoch": 1.0,
"learning_rate": 0.0002723481039159334,
"loss": 3.541,
"step": 94000
},
{
"epoch": 1.0,
"learning_rate": 0.0002719089759196509,
"loss": 3.467,
"step": 94100
},
{
"epoch": 1.0,
"learning_rate": 0.0002714697797930675,
"loss": 3.4861,
"step": 94200
},
{
"epoch": 1.0,
"learning_rate": 0.00027103051690195083,
"loss": 3.4222,
"step": 94300
},
{
"epoch": 1.0,
"learning_rate": 0.0002705911886122757,
"loss": 3.4357,
"step": 94400
},
{
"epoch": 1.0,
"learning_rate": 0.00027015179629022034,
"loss": 3.4971,
"step": 94500
},
{
"epoch": 1.01,
"learning_rate": 0.0002697123413021624,
"loss": 3.4512,
"step": 94600
},
{
"epoch": 1.01,
"learning_rate": 0.00026927282501467423,
"loss": 3.4607,
"step": 94700
},
{
"epoch": 1.01,
"learning_rate": 0.00026883324879451863,
"loss": 3.4858,
"step": 94800
},
{
"epoch": 1.01,
"learning_rate": 0.00026839361400864505,
"loss": 3.495,
"step": 94900
},
{
"epoch": 1.01,
"learning_rate": 0.000267953922024185,
"loss": 3.4637,
"step": 95000
},
{
"epoch": 1.01,
"learning_rate": 0.0002675141742084477,
"loss": 3.4934,
"step": 95100
},
{
"epoch": 1.01,
"learning_rate": 0.0002670743719289161,
"loss": 3.4604,
"step": 95200
},
{
"epoch": 1.01,
"learning_rate": 0.00026663451655324253,
"loss": 3.5176,
"step": 95300
},
{
"epoch": 1.01,
"learning_rate": 0.0002661946094492446,
"loss": 3.5227,
"step": 95400
},
{
"epoch": 1.02,
"learning_rate": 0.0002657546519849003,
"loss": 3.5155,
"step": 95500
},
{
"epoch": 1.02,
"learning_rate": 0.00026531464552834465,
"loss": 3.5119,
"step": 95600
},
{
"epoch": 1.02,
"learning_rate": 0.0002648745914478649,
"loss": 3.5198,
"step": 95700
},
{
"epoch": 1.02,
"learning_rate": 0.0002644344911118965,
"loss": 3.4797,
"step": 95800
},
{
"epoch": 1.02,
"learning_rate": 0.00026399434588901836,
"loss": 3.4992,
"step": 95900
},
{
"epoch": 1.02,
"learning_rate": 0.00026355415714794954,
"loss": 3.544,
"step": 96000
},
{
"epoch": 1.02,
"learning_rate": 0.00026311392625754385,
"loss": 3.5146,
"step": 96100
},
{
"epoch": 1.02,
"learning_rate": 0.0002626736545867867,
"loss": 3.4587,
"step": 96200
},
{
"epoch": 1.02,
"learning_rate": 0.0002622333435047899,
"loss": 3.4642,
"step": 96300
},
{
"epoch": 1.02,
"learning_rate": 0.000261792994380788,
"loss": 3.5103,
"step": 96400
},
{
"epoch": 1.03,
"learning_rate": 0.000261352608584134,
"loss": 3.4636,
"step": 96500
},
{
"epoch": 1.03,
"learning_rate": 0.0002609121874842945,
"loss": 3.4958,
"step": 96600
},
{
"epoch": 1.03,
"learning_rate": 0.0002604717324508464,
"loss": 3.4457,
"step": 96700
},
{
"epoch": 1.03,
"learning_rate": 0.00026003124485347184,
"loss": 3.48,
"step": 96800
},
{
"epoch": 1.03,
"learning_rate": 0.00025959072606195424,
"loss": 3.5188,
"step": 96900
},
{
"epoch": 1.03,
"learning_rate": 0.0002591501774461739,
"loss": 3.4497,
"step": 97000
},
{
"epoch": 1.03,
"learning_rate": 0.00025870960037610417,
"loss": 3.5086,
"step": 97100
},
{
"epoch": 1.03,
"learning_rate": 0.00025826899622180674,
"loss": 3.5139,
"step": 97200
},
{
"epoch": 1.03,
"learning_rate": 0.0002578283663534275,
"loss": 3.4254,
"step": 97300
},
{
"epoch": 1.04,
"learning_rate": 0.00025738771214119224,
"loss": 3.5688,
"step": 97400
},
{
"epoch": 1.04,
"learning_rate": 0.00025694703495540255,
"loss": 3.4497,
"step": 97500
},
{
"epoch": 1.04,
"learning_rate": 0.00025650633616643143,
"loss": 3.4947,
"step": 97600
},
{
"epoch": 1.04,
"learning_rate": 0.00025606561714471915,
"loss": 3.4521,
"step": 97700
},
{
"epoch": 1.04,
"learning_rate": 0.00025562487926076877,
"loss": 3.4697,
"step": 97800
},
{
"epoch": 1.04,
"learning_rate": 0.000255184123885142,
"loss": 3.4379,
"step": 97900
},
{
"epoch": 1.04,
"learning_rate": 0.0002547433523884551,
"loss": 3.528,
"step": 98000
},
{
"epoch": 1.04,
"learning_rate": 0.0002543025661413742,
"loss": 3.4807,
"step": 98100
},
{
"epoch": 1.04,
"learning_rate": 0.00025386176651461163,
"loss": 3.4575,
"step": 98200
},
{
"epoch": 1.04,
"learning_rate": 0.00025342095487892097,
"loss": 3.4028,
"step": 98300
},
{
"epoch": 1.05,
"learning_rate": 0.0002529801326050935,
"loss": 3.4966,
"step": 98400
},
{
"epoch": 1.05,
"learning_rate": 0.00025253930106395337,
"loss": 3.4547,
"step": 98500
},
{
"epoch": 1.05,
"learning_rate": 0.00025209846162635343,
"loss": 3.4785,
"step": 98600
},
{
"epoch": 1.05,
"learning_rate": 0.00025165761566317134,
"loss": 3.4614,
"step": 98700
},
{
"epoch": 1.05,
"learning_rate": 0.00025121676454530506,
"loss": 3.4976,
"step": 98800
},
{
"epoch": 1.05,
"learning_rate": 0.0002507759096436684,
"loss": 3.49,
"step": 98900
},
{
"epoch": 1.05,
"learning_rate": 0.00025033505232918696,
"loss": 3.4812,
"step": 99000
},
{
"epoch": 1.05,
"learning_rate": 0.0002498941939727939,
"loss": 3.4858,
"step": 99100
},
{
"epoch": 1.05,
"learning_rate": 0.0002494533359454257,
"loss": 3.4524,
"step": 99200
},
{
"epoch": 1.06,
"learning_rate": 0.00024901247961801767,
"loss": 3.4197,
"step": 99300
},
{
"epoch": 1.06,
"learning_rate": 0.00024857162636149983,
"loss": 3.4782,
"step": 99400
},
{
"epoch": 1.06,
"learning_rate": 0.00024813077754679285,
"loss": 3.5059,
"step": 99500
},
{
"epoch": 1.06,
"learning_rate": 0.00024768993454480335,
"loss": 3.5099,
"step": 99600
},
{
"epoch": 1.06,
"learning_rate": 0.00024724909872642,
"loss": 3.4788,
"step": 99700
},
{
"epoch": 1.06,
"learning_rate": 0.00024680827146250915,
"loss": 3.4652,
"step": 99800
},
{
"epoch": 1.06,
"learning_rate": 0.0002463674541239104,
"loss": 3.4577,
"step": 99900
},
{
"epoch": 1.06,
"learning_rate": 0.00024592664808143264,
"loss": 3.4207,
"step": 100000
},
{
"epoch": 1.06,
"learning_rate": 0.0002454858547058497,
"loss": 3.4953,
"step": 100100
},
{
"epoch": 1.07,
"learning_rate": 0.00024504507536789573,
"loss": 3.5048,
"step": 100200
},
{
"epoch": 1.07,
"learning_rate": 0.0002446043114382615,
"loss": 3.4709,
"step": 100300
},
{
"epoch": 1.07,
"learning_rate": 0.00024416356428758984,
"loss": 3.4563,
"step": 100400
},
{
"epoch": 1.07,
"learning_rate": 0.0002437228352864711,
"loss": 3.4406,
"step": 100500
},
{
"epoch": 1.07,
"learning_rate": 0.00024328212580543963,
"loss": 3.4617,
"step": 100600
},
{
"epoch": 1.07,
"learning_rate": 0.0002428414372149687,
"loss": 3.5065,
"step": 100700
},
{
"epoch": 1.07,
"learning_rate": 0.00024240077088546688,
"loss": 3.445,
"step": 100800
},
{
"epoch": 1.07,
"learning_rate": 0.00024196012818727334,
"loss": 3.469,
"step": 100900
},
{
"epoch": 1.07,
"learning_rate": 0.00024151951049065402,
"loss": 3.5099,
"step": 101000
},
{
"epoch": 1.07,
"learning_rate": 0.00024107891916579674,
"loss": 3.5347,
"step": 101100
},
{
"epoch": 1.08,
"learning_rate": 0.00024063835558280766,
"loss": 3.4266,
"step": 101200
},
{
"epoch": 1.08,
"learning_rate": 0.00024019782111170637,
"loss": 3.51,
"step": 101300
},
{
"epoch": 1.08,
"learning_rate": 0.00023975731712242216,
"loss": 3.5066,
"step": 101400
},
{
"epoch": 1.08,
"learning_rate": 0.00023931684498478947,
"loss": 3.484,
"step": 101500
},
{
"epoch": 1.08,
"learning_rate": 0.0002388764060685436,
"loss": 3.4547,
"step": 101600
},
{
"epoch": 1.08,
"learning_rate": 0.0002384360017433167,
"loss": 3.4793,
"step": 101700
},
{
"epoch": 1.08,
"learning_rate": 0.00023799563337863314,
"loss": 3.4553,
"step": 101800
},
{
"epoch": 1.08,
"learning_rate": 0.0002375553023439056,
"loss": 3.498,
"step": 101900
},
{
"epoch": 1.08,
"learning_rate": 0.00023711501000843078,
"loss": 3.4997,
"step": 102000
},
{
"epoch": 1.09,
"learning_rate": 0.00023667475774138491,
"loss": 3.4864,
"step": 102100
},
{
"epoch": 1.09,
"learning_rate": 0.0002362345469118195,
"loss": 3.4955,
"step": 102200
},
{
"epoch": 1.09,
"learning_rate": 0.00023579437888865748,
"loss": 3.4623,
"step": 102300
},
{
"epoch": 1.09,
"learning_rate": 0.0002353542550406884,
"loss": 3.491,
"step": 102400
},
{
"epoch": 1.09,
"learning_rate": 0.00023491417673656456,
"loss": 3.4934,
"step": 102500
},
{
"epoch": 1.09,
"learning_rate": 0.00023447414534479675,
"loss": 3.4448,
"step": 102600
},
{
"epoch": 1.09,
"learning_rate": 0.0002340341622337496,
"loss": 3.5072,
"step": 102700
},
{
"epoch": 1.09,
"learning_rate": 0.0002335942287716379,
"loss": 3.4392,
"step": 102800
},
{
"epoch": 1.09,
"learning_rate": 0.00023315434632652162,
"loss": 3.5145,
"step": 102900
},
{
"epoch": 1.09,
"learning_rate": 0.0002327145162663027,
"loss": 3.5082,
"step": 103000
},
{
"epoch": 1.1,
"learning_rate": 0.0002322747399587197,
"loss": 3.4582,
"step": 103100
},
{
"epoch": 1.1,
"learning_rate": 0.0002318350187713442,
"loss": 3.4514,
"step": 103200
},
{
"epoch": 1.1,
"learning_rate": 0.0002313953540715763,
"loss": 3.4805,
"step": 103300
},
{
"epoch": 1.1,
"learning_rate": 0.00023095574722664053,
"loss": 3.464,
"step": 103400
},
{
"epoch": 1.1,
"learning_rate": 0.00023051619960358136,
"loss": 3.5032,
"step": 103500
},
{
"epoch": 1.1,
"learning_rate": 0.00023007671256925928,
"loss": 3.4814,
"step": 103600
},
{
"epoch": 1.1,
"learning_rate": 0.00022963728749034632,
"loss": 3.4803,
"step": 103700
},
{
"epoch": 1.1,
"learning_rate": 0.0002291979257333217,
"loss": 3.5372,
"step": 103800
},
{
"epoch": 1.1,
"learning_rate": 0.0002287586286644679,
"loss": 3.4892,
"step": 103900
},
{
"epoch": 1.11,
"learning_rate": 0.0002283193976498662,
"loss": 3.4769,
"step": 104000
},
{
"epoch": 1.11,
"learning_rate": 0.0002278802340553925,
"loss": 3.4673,
"step": 104100
},
{
"epoch": 1.11,
"learning_rate": 0.00022744113924671287,
"loss": 3.4254,
"step": 104200
},
{
"epoch": 1.11,
"learning_rate": 0.0002270021145892797,
"loss": 3.4444,
"step": 104300
},
{
"epoch": 1.11,
"learning_rate": 0.00022656316144832708,
"loss": 3.4923,
"step": 104400
},
{
"epoch": 1.11,
"learning_rate": 0.00022612428118886683,
"loss": 3.5175,
"step": 104500
},
{
"epoch": 1.11,
"learning_rate": 0.00022568547517568395,
"loss": 3.4738,
"step": 104600
},
{
"epoch": 1.11,
"learning_rate": 0.0002252467447733327,
"loss": 3.4461,
"step": 104700
},
{
"epoch": 1.11,
"learning_rate": 0.00022480809134613227,
"loss": 3.4521,
"step": 104800
},
{
"epoch": 1.12,
"learning_rate": 0.00022436951625816228,
"loss": 3.4423,
"step": 104900
},
{
"epoch": 1.12,
"learning_rate": 0.00022393102087325884,
"loss": 3.5038,
"step": 105000
},
{
"epoch": 1.12,
"learning_rate": 0.0002234926065550103,
"loss": 3.5472,
"step": 105100
},
{
"epoch": 1.12,
"learning_rate": 0.0002230542746667528,
"loss": 3.4334,
"step": 105200
},
{
"epoch": 1.12,
"learning_rate": 0.0002226160265715662,
"loss": 3.4684,
"step": 105300
},
{
"epoch": 1.12,
"learning_rate": 0.00022217786363226978,
"loss": 3.4708,
"step": 105400
},
{
"epoch": 1.12,
"learning_rate": 0.0002217397872114179,
"loss": 3.4876,
"step": 105500
},
{
"epoch": 1.12,
"learning_rate": 0.00022130179867129606,
"loss": 3.4399,
"step": 105600
},
{
"epoch": 1.12,
"learning_rate": 0.00022086389937391634,
"loss": 3.4705,
"step": 105700
},
{
"epoch": 1.12,
"learning_rate": 0.00022042609068101342,
"loss": 3.4645,
"step": 105800
},
{
"epoch": 1.13,
"learning_rate": 0.00021998837395404013,
"loss": 3.4623,
"step": 105900
},
{
"epoch": 1.13,
"learning_rate": 0.00021955075055416322,
"loss": 3.4495,
"step": 106000
},
{
"epoch": 1.13,
"learning_rate": 0.00021911322184225957,
"loss": 3.4787,
"step": 106100
},
{
"epoch": 1.13,
"learning_rate": 0.00021867578917891128,
"loss": 3.4702,
"step": 106200
},
{
"epoch": 1.13,
"learning_rate": 0.00021823845392440183,
"loss": 3.4817,
"step": 106300
},
{
"epoch": 1.13,
"learning_rate": 0.0002178012174387119,
"loss": 3.5035,
"step": 106400
},
{
"epoch": 1.13,
"learning_rate": 0.00021736408108151496,
"loss": 3.4766,
"step": 106500
},
{
"epoch": 1.13,
"learning_rate": 0.00021692704621217298,
"loss": 3.5066,
"step": 106600
},
{
"epoch": 1.13,
"learning_rate": 0.00021649011418973266,
"loss": 3.5211,
"step": 106700
},
{
"epoch": 1.14,
"learning_rate": 0.0002160532863729205,
"loss": 3.4887,
"step": 106800
},
{
"epoch": 1.14,
"learning_rate": 0.00021561656412013924,
"loss": 3.516,
"step": 106900
},
{
"epoch": 1.14,
"learning_rate": 0.00021517994878946314,
"loss": 3.4538,
"step": 107000
},
{
"epoch": 1.14,
"learning_rate": 0.00021474344173863424,
"loss": 3.4798,
"step": 107100
},
{
"epoch": 1.14,
"learning_rate": 0.00021430704432505755,
"loss": 3.497,
"step": 107200
},
{
"epoch": 1.14,
"learning_rate": 0.00021387075790579735,
"loss": 3.5249,
"step": 107300
},
{
"epoch": 1.14,
"learning_rate": 0.0002134345838375726,
"loss": 3.5129,
"step": 107400
},
{
"epoch": 1.14,
"learning_rate": 0.00021299852347675302,
"loss": 3.5284,
"step": 107500
},
{
"epoch": 1.14,
"learning_rate": 0.00021256257817935475,
"loss": 3.423,
"step": 107600
},
{
"epoch": 1.14,
"learning_rate": 0.0002121267493010359,
"loss": 3.4436,
"step": 107700
},
{
"epoch": 1.15,
"learning_rate": 0.0002116910381970929,
"loss": 3.4876,
"step": 107800
},
{
"epoch": 1.15,
"learning_rate": 0.00021125544622245553,
"loss": 3.4873,
"step": 107900
},
{
"epoch": 1.15,
"learning_rate": 0.00021081997473168344,
"loss": 3.4589,
"step": 108000
},
{
"epoch": 1.15,
"learning_rate": 0.0002103846250789615,
"loss": 3.4876,
"step": 108100
},
{
"epoch": 1.15,
"learning_rate": 0.00020994939861809574,
"loss": 3.5278,
"step": 108200
},
{
"epoch": 1.15,
"learning_rate": 0.00020951429670250897,
"loss": 3.4383,
"step": 108300
},
{
"epoch": 1.15,
"learning_rate": 0.0002090793206852369,
"loss": 3.4604,
"step": 108400
},
{
"epoch": 1.15,
"learning_rate": 0.00020864447191892344,
"loss": 3.4604,
"step": 108500
},
{
"epoch": 1.15,
"learning_rate": 0.0002082097517558171,
"loss": 3.4143,
"step": 108600
},
{
"epoch": 1.16,
"learning_rate": 0.0002077751615477664,
"loss": 3.4799,
"step": 108700
},
{
"epoch": 1.16,
"learning_rate": 0.00020734070264621557,
"loss": 3.5034,
"step": 108800
},
{
"epoch": 1.16,
"learning_rate": 0.00020690637640220072,
"loss": 3.4856,
"step": 108900
},
{
"epoch": 1.16,
"learning_rate": 0.00020647218416634512,
"loss": 3.4974,
"step": 109000
},
{
"epoch": 1.16,
"learning_rate": 0.0002060381272888559,
"loss": 3.4974,
"step": 109100
},
{
"epoch": 1.16,
"learning_rate": 0.00020560420711951866,
"loss": 3.4761,
"step": 109200
},
{
"epoch": 1.16,
"learning_rate": 0.00020517042500769426,
"loss": 3.4425,
"step": 109300
},
{
"epoch": 1.16,
"learning_rate": 0.00020473678230231398,
"loss": 3.4528,
"step": 109400
},
{
"epoch": 1.16,
"learning_rate": 0.00020430328035187585,
"loss": 3.443,
"step": 109500
},
{
"epoch": 1.17,
"learning_rate": 0.00020386992050443992,
"loss": 3.4533,
"step": 109600
},
{
"epoch": 1.17,
"learning_rate": 0.00020343670410762456,
"loss": 3.4981,
"step": 109700
},
{
"epoch": 1.17,
"learning_rate": 0.00020300363250860205,
"loss": 3.4316,
"step": 109800
},
{
"epoch": 1.17,
"learning_rate": 0.00020257070705409415,
"loss": 3.4337,
"step": 109900
},
{
"epoch": 1.17,
"learning_rate": 0.0002021379290903684,
"loss": 3.49,
"step": 110000
},
{
"epoch": 1.17,
"learning_rate": 0.00020170529996323366,
"loss": 3.4601,
"step": 110100
},
{
"epoch": 1.17,
"learning_rate": 0.00020127282101803593,
"loss": 3.4827,
"step": 110200
},
{
"epoch": 1.17,
"learning_rate": 0.00020084049359965402,
"loss": 3.4761,
"step": 110300
},
{
"epoch": 1.17,
"learning_rate": 0.00020040831905249586,
"loss": 3.4625,
"step": 110400
},
{
"epoch": 1.17,
"learning_rate": 0.00019997629872049366,
"loss": 3.4822,
"step": 110500
},
{
"epoch": 1.18,
"learning_rate": 0.00019954443394710035,
"loss": 3.5124,
"step": 110600
},
{
"epoch": 1.18,
"learning_rate": 0.00019911272607528484,
"loss": 3.4623,
"step": 110700
},
{
"epoch": 1.18,
"learning_rate": 0.0001986811764475284,
"loss": 3.4704,
"step": 110800
},
{
"epoch": 1.18,
"learning_rate": 0.00019824978640582013,
"loss": 3.5546,
"step": 110900
},
{
"epoch": 1.18,
"learning_rate": 0.00019781855729165265,
"loss": 3.4981,
"step": 111000
},
{
"epoch": 1.18,
"learning_rate": 0.00019738749044601847,
"loss": 3.463,
"step": 111100
},
{
"epoch": 1.18,
"learning_rate": 0.0001969565872094053,
"loss": 3.4255,
"step": 111200
},
{
"epoch": 1.18,
"learning_rate": 0.00019652584892179215,
"loss": 3.4782,
"step": 111300
},
{
"epoch": 1.18,
"learning_rate": 0.00019609527692264497,
"loss": 3.4629,
"step": 111400
},
{
"epoch": 1.19,
"learning_rate": 0.00019566487255091278,
"loss": 3.4329,
"step": 111500
},
{
"epoch": 1.19,
"learning_rate": 0.00019523463714502315,
"loss": 3.4519,
"step": 111600
},
{
"epoch": 1.19,
"learning_rate": 0.00019480457204287837,
"loss": 3.4992,
"step": 111700
},
{
"epoch": 1.19,
"learning_rate": 0.000194374678581851,
"loss": 3.5083,
"step": 111800
},
{
"epoch": 1.19,
"learning_rate": 0.00019394495809877996,
"loss": 3.5005,
"step": 111900
},
{
"epoch": 1.19,
"learning_rate": 0.0001935154119299663,
"loss": 3.5134,
"step": 112000
},
{
"epoch": 1.19,
"learning_rate": 0.0001930860414111687,
"loss": 3.4555,
"step": 112100
},
{
"epoch": 1.19,
"learning_rate": 0.00019265684787760006,
"loss": 3.4719,
"step": 112200
},
{
"epoch": 1.19,
"learning_rate": 0.00019222783266392265,
"loss": 3.5005,
"step": 112300
},
{
"epoch": 1.19,
"learning_rate": 0.0001917989971042443,
"loss": 3.4918,
"step": 112400
},
{
"epoch": 1.2,
"learning_rate": 0.00019137034253211403,
"loss": 3.4612,
"step": 112500
},
{
"epoch": 1.2,
"learning_rate": 0.00019094187028051825,
"loss": 3.4352,
"step": 112600
},
{
"epoch": 1.2,
"learning_rate": 0.00019051358168187618,
"loss": 3.4624,
"step": 112700
},
{
"epoch": 1.2,
"learning_rate": 0.00019008547806803622,
"loss": 3.4301,
"step": 112800
},
{
"epoch": 1.2,
"learning_rate": 0.00018965756077027118,
"loss": 3.4142,
"step": 112900
},
{
"epoch": 1.2,
"learning_rate": 0.00018922983111927484,
"loss": 3.4817,
"step": 113000
},
{
"epoch": 1.2,
"learning_rate": 0.00018880229044515711,
"loss": 3.4376,
"step": 113100
},
{
"epoch": 1.2,
"learning_rate": 0.00018837494007744064,
"loss": 3.4768,
"step": 113200
},
{
"epoch": 1.2,
"learning_rate": 0.00018794778134505587,
"loss": 3.4391,
"step": 113300
},
{
"epoch": 1.21,
"learning_rate": 0.00018752081557633755,
"loss": 3.4482,
"step": 113400
},
{
"epoch": 1.21,
"learning_rate": 0.00018709404409902042,
"loss": 3.4682,
"step": 113500
},
{
"epoch": 1.21,
"learning_rate": 0.00018666746824023476,
"loss": 3.4296,
"step": 113600
},
{
"epoch": 1.21,
"learning_rate": 0.00018624108932650287,
"loss": 3.4653,
"step": 113700
},
{
"epoch": 1.21,
"learning_rate": 0.00018581490868373426,
"loss": 3.4661,
"step": 113800
},
{
"epoch": 1.21,
"learning_rate": 0.00018538892763722225,
"loss": 3.497,
"step": 113900
},
{
"epoch": 1.21,
"learning_rate": 0.00018496314751163917,
"loss": 3.4991,
"step": 114000
},
{
"epoch": 1.21,
"learning_rate": 0.0001845375696310327,
"loss": 3.4538,
"step": 114100
},
{
"epoch": 1.21,
"learning_rate": 0.00018411219531882156,
"loss": 3.5194,
"step": 114200
},
{
"epoch": 1.21,
"learning_rate": 0.00018368702589779154,
"loss": 3.472,
"step": 114300
},
{
"epoch": 1.22,
"learning_rate": 0.00018326206269009106,
"loss": 3.4382,
"step": 114400
},
{
"epoch": 1.22,
"learning_rate": 0.0001828373070172275,
"loss": 3.4555,
"step": 114500
},
{
"epoch": 1.22,
"learning_rate": 0.0001824127602000626,
"loss": 3.4512,
"step": 114600
},
{
"epoch": 1.22,
"learning_rate": 0.00018198842355880896,
"loss": 3.4754,
"step": 114700
},
{
"epoch": 1.22,
"learning_rate": 0.0001815642984130254,
"loss": 3.424,
"step": 114800
},
{
"epoch": 1.22,
"learning_rate": 0.00018114038608161299,
"loss": 3.4234,
"step": 114900
},
{
"epoch": 1.22,
"learning_rate": 0.00018071668788281121,
"loss": 3.4654,
"step": 115000
},
{
"epoch": 1.22,
"learning_rate": 0.00018029320513419334,
"loss": 3.4795,
"step": 115100
},
{
"epoch": 1.22,
"learning_rate": 0.00017986993915266314,
"loss": 3.4652,
"step": 115200
},
{
"epoch": 1.23,
"learning_rate": 0.00017944689125444992,
"loss": 3.4731,
"step": 115300
},
{
"epoch": 1.23,
"learning_rate": 0.00017902406275510497,
"loss": 3.4734,
"step": 115400
},
{
"epoch": 1.23,
"learning_rate": 0.00017860145496949718,
"loss": 3.5131,
"step": 115500
},
{
"epoch": 1.23,
"learning_rate": 0.00017817906921180926,
"loss": 3.4612,
"step": 115600
},
{
"epoch": 1.23,
"learning_rate": 0.0001777569067955333,
"loss": 3.466,
"step": 115700
},
{
"epoch": 1.23,
"learning_rate": 0.00017733496903346704,
"loss": 3.4542,
"step": 115800
},
{
"epoch": 1.23,
"learning_rate": 0.00017691325723770957,
"loss": 3.4701,
"step": 115900
},
{
"epoch": 1.23,
"learning_rate": 0.00017649177271965717,
"loss": 3.488,
"step": 116000
},
{
"epoch": 1.23,
"learning_rate": 0.00017607051678999945,
"loss": 3.5502,
"step": 116100
},
{
"epoch": 1.24,
"learning_rate": 0.0001756494907587152,
"loss": 3.4421,
"step": 116200
},
{
"epoch": 1.24,
"learning_rate": 0.0001752286959350684,
"loss": 3.4264,
"step": 116300
},
{
"epoch": 1.24,
"learning_rate": 0.00017480813362760373,
"loss": 3.4817,
"step": 116400
},
{
"epoch": 1.24,
"learning_rate": 0.00017438780514414308,
"loss": 3.4756,
"step": 116500
},
{
"epoch": 1.24,
"learning_rate": 0.0001739677117917811,
"loss": 3.4992,
"step": 116600
},
{
"epoch": 1.24,
"learning_rate": 0.00017354785487688139,
"loss": 3.4952,
"step": 116700
},
{
"epoch": 1.24,
"learning_rate": 0.000173128235705072,
"loss": 3.4887,
"step": 116800
},
{
"epoch": 1.24,
"learning_rate": 0.00017270885558124204,
"loss": 3.4912,
"step": 116900
},
{
"epoch": 1.24,
"learning_rate": 0.00017228971580953708,
"loss": 3.4863,
"step": 117000
},
{
"epoch": 1.24,
"learning_rate": 0.0001718708176933551,
"loss": 3.4861,
"step": 117100
},
{
"epoch": 1.25,
"learning_rate": 0.000171452162535343,
"loss": 3.5107,
"step": 117200
},
{
"epoch": 1.25,
"learning_rate": 0.00017103375163739183,
"loss": 3.4879,
"step": 117300
},
{
"epoch": 1.25,
"learning_rate": 0.00017061558630063317,
"loss": 3.4812,
"step": 117400
},
{
"epoch": 1.25,
"learning_rate": 0.00017019766782543495,
"loss": 3.4672,
"step": 117500
},
{
"epoch": 1.25,
"learning_rate": 0.00016977999751139754,
"loss": 3.4528,
"step": 117600
},
{
"epoch": 1.25,
"learning_rate": 0.0001693625766573494,
"loss": 3.5106,
"step": 117700
},
{
"epoch": 1.25,
"learning_rate": 0.00016894540656134345,
"loss": 3.4909,
"step": 117800
},
{
"epoch": 1.25,
"learning_rate": 0.00016852848852065273,
"loss": 3.5071,
"step": 117900
},
{
"epoch": 1.25,
"learning_rate": 0.00016811182383176643,
"loss": 3.5112,
"step": 118000
},
{
"epoch": 1.26,
"learning_rate": 0.00016769541379038595,
"loss": 3.4903,
"step": 118100
},
{
"epoch": 1.26,
"learning_rate": 0.0001672792596914209,
"loss": 3.4839,
"step": 118200
},
{
"epoch": 1.26,
"learning_rate": 0.00016686336282898485,
"loss": 3.4802,
"step": 118300
},
{
"epoch": 1.26,
"learning_rate": 0.0001664477244963914,
"loss": 3.4494,
"step": 118400
},
{
"epoch": 1.26,
"learning_rate": 0.0001660323459861504,
"loss": 3.4921,
"step": 118500
},
{
"epoch": 1.26,
"learning_rate": 0.00016561722858996354,
"loss": 3.5153,
"step": 118600
},
{
"epoch": 1.26,
"learning_rate": 0.00016520237359872068,
"loss": 3.4712,
"step": 118700
},
{
"epoch": 1.26,
"learning_rate": 0.00016478778230249544,
"loss": 3.4254,
"step": 118800
},
{
"epoch": 1.26,
"learning_rate": 0.00016437345599054176,
"loss": 3.4655,
"step": 118900
},
{
"epoch": 1.26,
"learning_rate": 0.00016395939595128926,
"loss": 3.4751,
"step": 119000
},
{
"epoch": 1.27,
"learning_rate": 0.00016354560347233972,
"loss": 3.4887,
"step": 119100
},
{
"epoch": 1.27,
"learning_rate": 0.0001631320798404627,
"loss": 3.4719,
"step": 119200
},
{
"epoch": 1.27,
"learning_rate": 0.0001627188263415921,
"loss": 3.4599,
"step": 119300
},
{
"epoch": 1.27,
"learning_rate": 0.00016230584426082134,
"loss": 3.4529,
"step": 119400
},
{
"epoch": 1.27,
"learning_rate": 0.0001618931348824001,
"loss": 3.4712,
"step": 119500
},
{
"epoch": 1.27,
"learning_rate": 0.00016148069948972995,
"loss": 3.4147,
"step": 119600
},
{
"epoch": 1.27,
"learning_rate": 0.0001610685393653604,
"loss": 3.4697,
"step": 119700
},
{
"epoch": 1.27,
"learning_rate": 0.00016065665579098503,
"loss": 3.522,
"step": 119800
},
{
"epoch": 1.27,
"learning_rate": 0.0001602450500474374,
"loss": 3.5019,
"step": 119900
},
{
"epoch": 1.28,
"learning_rate": 0.00015983372341468716,
"loss": 3.5104,
"step": 120000
},
{
"epoch": 1.28,
"learning_rate": 0.00015942267717183588,
"loss": 3.48,
"step": 120100
},
{
"epoch": 1.28,
"learning_rate": 0.00015901191259711322,
"loss": 3.4844,
"step": 120200
},
{
"epoch": 1.28,
"learning_rate": 0.00015860143096787317,
"loss": 3.4789,
"step": 120300
},
{
"epoch": 1.28,
"learning_rate": 0.00015819123356058961,
"loss": 3.4535,
"step": 120400
},
{
"epoch": 1.28,
"learning_rate": 0.00015778132165085264,
"loss": 3.4639,
"step": 120500
},
{
"epoch": 1.28,
"learning_rate": 0.00015737169651336446,
"loss": 3.5013,
"step": 120600
},
{
"epoch": 1.28,
"learning_rate": 0.0001569623594219357,
"loss": 3.4551,
"step": 120700
},
{
"epoch": 1.28,
"learning_rate": 0.00015655331164948107,
"loss": 3.4434,
"step": 120800
},
{
"epoch": 1.29,
"learning_rate": 0.00015614455446801573,
"loss": 3.4543,
"step": 120900
},
{
"epoch": 1.29,
"learning_rate": 0.000155736089148651,
"loss": 3.5137,
"step": 121000
},
{
"epoch": 1.29,
"learning_rate": 0.00015532791696159078,
"loss": 3.5108,
"step": 121100
},
{
"epoch": 1.29,
"learning_rate": 0.00015492003917612715,
"loss": 3.5088,
"step": 121200
},
{
"epoch": 1.29,
"learning_rate": 0.0001545124570606372,
"loss": 3.4808,
"step": 121300
},
{
"epoch": 1.29,
"learning_rate": 0.0001541051718825781,
"loss": 3.5011,
"step": 121400
},
{
"epoch": 1.29,
"learning_rate": 0.00015369818490848386,
"loss": 3.4427,
"step": 121500
},
{
"epoch": 1.29,
"learning_rate": 0.00015329149740396102,
"loss": 3.4482,
"step": 121600
},
{
"epoch": 1.29,
"learning_rate": 0.00015288511063368498,
"loss": 3.4824,
"step": 121700
},
{
"epoch": 1.29,
"learning_rate": 0.00015247902586139583,
"loss": 3.4745,
"step": 121800
},
{
"epoch": 1.3,
"learning_rate": 0.00015207324434989472,
"loss": 3.5294,
"step": 121900
},
{
"epoch": 1.3,
"learning_rate": 0.00015166776736103964,
"loss": 3.4532,
"step": 122000
},
{
"epoch": 1.3,
"learning_rate": 0.00015126259615574157,
"loss": 3.4671,
"step": 122100
},
{
"epoch": 1.3,
"learning_rate": 0.00015085773199396054,
"loss": 3.4737,
"step": 122200
},
{
"epoch": 1.3,
"learning_rate": 0.00015045317613470206,
"loss": 3.4988,
"step": 122300
},
{
"epoch": 1.3,
"learning_rate": 0.00015004892983601264,
"loss": 3.4739,
"step": 122400
},
{
"epoch": 1.3,
"learning_rate": 0.0001496449943549762,
"loss": 3.4934,
"step": 122500
},
{
"epoch": 1.3,
"learning_rate": 0.00014924137094771017,
"loss": 3.5086,
"step": 122600
},
{
"epoch": 1.3,
"learning_rate": 0.00014883806086936146,
"loss": 3.4656,
"step": 122700
},
{
"epoch": 1.31,
"learning_rate": 0.00014843506537410274,
"loss": 3.4651,
"step": 122800
},
{
"epoch": 1.31,
"learning_rate": 0.00014803238571512817,
"loss": 3.4964,
"step": 122900
},
{
"epoch": 1.31,
"learning_rate": 0.00014763002314465,
"loss": 3.4865,
"step": 123000
},
{
"epoch": 1.31,
"learning_rate": 0.00014722797891389444,
"loss": 3.5156,
"step": 123100
},
{
"epoch": 1.31,
"learning_rate": 0.00014682625427309753,
"loss": 3.4795,
"step": 123200
},
{
"epoch": 1.31,
"learning_rate": 0.00014642485047150171,
"loss": 3.5341,
"step": 123300
},
{
"epoch": 1.31,
"learning_rate": 0.00014602376875735162,
"loss": 3.5358,
"step": 123400
},
{
"epoch": 1.31,
"learning_rate": 0.00014562301037789028,
"loss": 3.4616,
"step": 123500
},
{
"epoch": 1.31,
"learning_rate": 0.00014522257657935533,
"loss": 3.4804,
"step": 123600
},
{
"epoch": 1.31,
"learning_rate": 0.00014482246860697486,
"loss": 3.4295,
"step": 123700
},
{
"epoch": 1.32,
"learning_rate": 0.00014442268770496392,
"loss": 3.5029,
"step": 123800
},
{
"epoch": 1.32,
"learning_rate": 0.00014402323511652045,
"loss": 3.4647,
"step": 123900
},
{
"epoch": 1.32,
"learning_rate": 0.0001436241120838214,
"loss": 3.4547,
"step": 124000
},
{
"epoch": 1.32,
"learning_rate": 0.000143225319848019,
"loss": 3.4493,
"step": 124100
},
{
"epoch": 1.32,
"learning_rate": 0.00014282685964923643,
"loss": 3.469,
"step": 124200
},
{
"epoch": 1.32,
"learning_rate": 0.00014242873272656486,
"loss": 3.4242,
"step": 124300
},
{
"epoch": 1.32,
"learning_rate": 0.0001420309403180589,
"loss": 3.4473,
"step": 124400
},
{
"epoch": 1.32,
"learning_rate": 0.0001416334836607326,
"loss": 3.5084,
"step": 124500
},
{
"epoch": 1.32,
"learning_rate": 0.00014123636399055622,
"loss": 3.4462,
"step": 124600
},
{
"epoch": 1.33,
"learning_rate": 0.00014083958254245215,
"loss": 3.5331,
"step": 124700
},
{
"epoch": 1.33,
"learning_rate": 0.00014044314055029083,
"loss": 3.5069,
"step": 124800
},
{
"epoch": 1.33,
"learning_rate": 0.00014004703924688734,
"loss": 3.4798,
"step": 124900
},
{
"epoch": 1.33,
"learning_rate": 0.00013965127986399688,
"loss": 3.468,
"step": 125000
},
{
"epoch": 1.33,
"learning_rate": 0.0001392558636323118,
"loss": 3.4743,
"step": 125100
},
{
"epoch": 1.33,
"learning_rate": 0.00013886079178145717,
"loss": 3.5451,
"step": 125200
},
{
"epoch": 1.33,
"learning_rate": 0.00013846606553998716,
"loss": 3.4554,
"step": 125300
},
{
"epoch": 1.33,
"learning_rate": 0.00013807168613538129,
"loss": 3.4819,
"step": 125400
},
{
"epoch": 1.33,
"learning_rate": 0.00013767765479404036,
"loss": 3.4847,
"step": 125500
},
{
"epoch": 1.34,
"learning_rate": 0.00013728397274128293,
"loss": 3.4506,
"step": 125600
},
{
"epoch": 1.34,
"learning_rate": 0.00013689064120134132,
"loss": 3.4751,
"step": 125700
},
{
"epoch": 1.34,
"learning_rate": 0.000136497661397358,
"loss": 3.5131,
"step": 125800
},
{
"epoch": 1.34,
"learning_rate": 0.00013610503455138123,
"loss": 3.4578,
"step": 125900
},
{
"epoch": 1.34,
"learning_rate": 0.00013571276188436215,
"loss": 3.4376,
"step": 126000
},
{
"epoch": 1.34,
"learning_rate": 0.00013532084461615035,
"loss": 3.4903,
"step": 126100
},
{
"epoch": 1.34,
"learning_rate": 0.00013492928396549014,
"loss": 3.4624,
"step": 126200
},
{
"epoch": 1.34,
"learning_rate": 0.00013453808115001698,
"loss": 3.468,
"step": 126300
},
{
"epoch": 1.34,
"learning_rate": 0.00013414723738625352,
"loss": 3.4748,
"step": 126400
},
{
"epoch": 1.34,
"learning_rate": 0.0001337567538896058,
"loss": 3.4878,
"step": 126500
},
{
"epoch": 1.35,
"learning_rate": 0.0001333666318743598,
"loss": 3.4782,
"step": 126600
},
{
"epoch": 1.35,
"learning_rate": 0.00013297687255367697,
"loss": 3.5124,
"step": 126700
},
{
"epoch": 1.35,
"learning_rate": 0.00013258747713959125,
"loss": 3.4443,
"step": 126800
},
{
"epoch": 1.35,
"learning_rate": 0.00013219844684300475,
"loss": 3.4409,
"step": 126900
},
{
"epoch": 1.35,
"learning_rate": 0.00013180978287368435,
"loss": 3.4815,
"step": 127000
},
{
"epoch": 1.35,
"learning_rate": 0.00013142148644025765,
"loss": 3.4525,
"step": 127100
},
{
"epoch": 1.35,
"learning_rate": 0.00013103355875020923,
"loss": 3.4578,
"step": 127200
},
{
"epoch": 1.35,
"learning_rate": 0.000130646001009877,
"loss": 3.4926,
"step": 127300
},
{
"epoch": 1.35,
"learning_rate": 0.00013025881442444882,
"loss": 3.4594,
"step": 127400
},
{
"epoch": 1.36,
"learning_rate": 0.00012987200019795798,
"loss": 3.441,
"step": 127500
},
{
"epoch": 1.36,
"learning_rate": 0.00012948555953327983,
"loss": 3.45,
"step": 127600
},
{
"epoch": 1.36,
"learning_rate": 0.00012909949363212823,
"loss": 3.4696,
"step": 127700
},
{
"epoch": 1.36,
"learning_rate": 0.0001287138036950516,
"loss": 3.5126,
"step": 127800
},
{
"epoch": 1.36,
"learning_rate": 0.00012832849092142917,
"loss": 3.4852,
"step": 127900
},
{
"epoch": 1.36,
"learning_rate": 0.0001279435565094675,
"loss": 3.4656,
"step": 128000
},
{
"epoch": 1.36,
"learning_rate": 0.0001275590016561961,
"loss": 3.4703,
"step": 128100
},
{
"epoch": 1.36,
"learning_rate": 0.00012717482755746467,
"loss": 3.469,
"step": 128200
},
{
"epoch": 1.36,
"learning_rate": 0.00012679103540793864,
"loss": 3.4687,
"step": 128300
},
{
"epoch": 1.36,
"learning_rate": 0.0001264076264010957,
"loss": 3.5109,
"step": 128400
},
{
"epoch": 1.37,
"learning_rate": 0.00012602460172922214,
"loss": 3.4951,
"step": 128500
},
{
"epoch": 1.37,
"learning_rate": 0.00012564196258340904,
"loss": 3.5326,
"step": 128600
},
{
"epoch": 1.37,
"learning_rate": 0.00012525971015354864,
"loss": 3.4683,
"step": 128700
},
{
"epoch": 1.37,
"learning_rate": 0.00012487784562833067,
"loss": 3.4843,
"step": 128800
},
{
"epoch": 1.37,
"learning_rate": 0.00012449637019523832,
"loss": 3.5223,
"step": 128900
},
{
"epoch": 1.37,
"learning_rate": 0.00012411528504054518,
"loss": 3.4825,
"step": 129000
},
{
"epoch": 1.37,
"learning_rate": 0.00012373459134931095,
"loss": 3.4544,
"step": 129100
},
{
"epoch": 1.37,
"learning_rate": 0.00012335429030537812,
"loss": 3.5195,
"step": 129200
},
{
"epoch": 1.37,
"learning_rate": 0.00012297438309136812,
"loss": 3.4476,
"step": 129300
},
{
"epoch": 1.38,
"learning_rate": 0.0001225948708886777,
"loss": 3.5221,
"step": 129400
},
{
"epoch": 1.38,
"learning_rate": 0.0001222157548774752,
"loss": 3.4944,
"step": 129500
},
{
"epoch": 1.38,
"learning_rate": 0.00012183703623669704,
"loss": 3.554,
"step": 129600
},
{
"epoch": 1.38,
"learning_rate": 0.00012145871614404383,
"loss": 3.4857,
"step": 129700
},
{
"epoch": 1.38,
"learning_rate": 0.00012108079577597674,
"loss": 3.4504,
"step": 129800
},
{
"epoch": 1.38,
"learning_rate": 0.00012070327630771414,
"loss": 3.4739,
"step": 129900
},
{
"epoch": 1.38,
"learning_rate": 0.0001203261589132275,
"loss": 3.4466,
"step": 130000
},
{
"epoch": 1.38,
"learning_rate": 0.00011994944476523817,
"loss": 3.4815,
"step": 130100
},
{
"epoch": 1.38,
"learning_rate": 0.00011957313503521344,
"loss": 3.5155,
"step": 130200
},
{
"epoch": 1.39,
"learning_rate": 0.0001191972308933627,
"loss": 3.4678,
"step": 130300
},
{
"epoch": 1.39,
"learning_rate": 0.00011882173350863468,
"loss": 3.468,
"step": 130400
},
{
"epoch": 1.39,
"learning_rate": 0.00011844664404871281,
"loss": 3.4707,
"step": 130500
},
{
"epoch": 1.39,
"learning_rate": 0.00011807196368001192,
"loss": 3.52,
"step": 130600
},
{
"epoch": 1.39,
"learning_rate": 0.0001176976935676749,
"loss": 3.5055,
"step": 130700
},
{
"epoch": 1.39,
"learning_rate": 0.00011732383487556888,
"loss": 3.4498,
"step": 130800
},
{
"epoch": 1.39,
"learning_rate": 0.00011695038876628145,
"loss": 3.4206,
"step": 130900
},
{
"epoch": 1.39,
"learning_rate": 0.00011657735640111742,
"loss": 3.4776,
"step": 131000
},
{
"epoch": 1.39,
"learning_rate": 0.0001162047389400946,
"loss": 3.4434,
"step": 131100
},
{
"epoch": 1.39,
"learning_rate": 0.00011583253754194088,
"loss": 3.484,
"step": 131200
},
{
"epoch": 1.4,
"learning_rate": 0.00011546075336409018,
"loss": 3.4894,
"step": 131300
},
{
"epoch": 1.4,
"learning_rate": 0.00011508938756267933,
"loss": 3.483,
"step": 131400
},
{
"epoch": 1.4,
"learning_rate": 0.00011471844129254359,
"loss": 3.4863,
"step": 131500
},
{
"epoch": 1.4,
"learning_rate": 0.00011434791570721392,
"loss": 3.4921,
"step": 131600
},
{
"epoch": 1.4,
"learning_rate": 0.00011397781195891308,
"loss": 3.4906,
"step": 131700
},
{
"epoch": 1.4,
"learning_rate": 0.00011360813119855193,
"loss": 3.5254,
"step": 131800
},
{
"epoch": 1.4,
"learning_rate": 0.00011323887457572619,
"loss": 3.4589,
"step": 131900
},
{
"epoch": 1.4,
"learning_rate": 0.00011287004323871225,
"loss": 3.4672,
"step": 132000
},
{
"epoch": 1.4,
"learning_rate": 0.00011250163833446433,
"loss": 3.4552,
"step": 132100
},
{
"epoch": 1.41,
"learning_rate": 0.00011213366100861044,
"loss": 3.4868,
"step": 132200
},
{
"epoch": 1.41,
"learning_rate": 0.00011176611240544899,
"loss": 3.471,
"step": 132300
},
{
"epoch": 1.41,
"learning_rate": 0.00011139899366794517,
"loss": 3.4567,
"step": 132400
},
{
"epoch": 1.41,
"learning_rate": 0.0001110323059377274,
"loss": 3.4666,
"step": 132500
},
{
"epoch": 1.41,
"learning_rate": 0.00011066605035508385,
"loss": 3.4836,
"step": 132600
},
{
"epoch": 1.41,
"learning_rate": 0.0001103002280589589,
"loss": 3.4704,
"step": 132700
},
{
"epoch": 1.41,
"learning_rate": 0.00010993484018694921,
"loss": 3.4604,
"step": 132800
},
{
"epoch": 1.41,
"learning_rate": 0.00010956988787530092,
"loss": 3.4447,
"step": 132900
},
{
"epoch": 1.41,
"learning_rate": 0.0001092053722589055,
"loss": 3.4769,
"step": 133000
},
{
"epoch": 1.41,
"learning_rate": 0.00010884129447129648,
"loss": 3.4205,
"step": 133100
},
{
"epoch": 1.42,
"learning_rate": 0.00010847765564464593,
"loss": 3.4452,
"step": 133200
},
{
"epoch": 1.42,
"learning_rate": 0.00010811445690976068,
"loss": 3.4749,
"step": 133300
},
{
"epoch": 1.42,
"learning_rate": 0.00010775169939607913,
"loss": 3.4909,
"step": 133400
},
{
"epoch": 1.42,
"learning_rate": 0.00010738938423166778,
"loss": 3.4493,
"step": 133500
},
{
"epoch": 1.42,
"learning_rate": 0.00010702751254321744,
"loss": 3.4452,
"step": 133600
},
{
"epoch": 1.42,
"learning_rate": 0.00010666608545603962,
"loss": 3.5166,
"step": 133700
},
{
"epoch": 1.42,
"learning_rate": 0.00010630510409406355,
"loss": 3.513,
"step": 133800
},
{
"epoch": 1.42,
"learning_rate": 0.00010594456957983229,
"loss": 3.4369,
"step": 133900
},
{
"epoch": 1.42,
"learning_rate": 0.0001055844830344993,
"loss": 3.4093,
"step": 134000
},
{
"epoch": 1.43,
"learning_rate": 0.00010522484557782513,
"loss": 3.4877,
"step": 134100
},
{
"epoch": 1.43,
"learning_rate": 0.00010486565832817354,
"loss": 3.5383,
"step": 134200
},
{
"epoch": 1.43,
"learning_rate": 0.00010450692240250853,
"loss": 3.4348,
"step": 134300
},
{
"epoch": 1.43,
"learning_rate": 0.0001041486389163904,
"loss": 3.5446,
"step": 134400
},
{
"epoch": 1.43,
"learning_rate": 0.00010379080898397289,
"loss": 3.4961,
"step": 134500
},
{
"epoch": 1.43,
"learning_rate": 0.00010343343371799885,
"loss": 3.4675,
"step": 134600
},
{
"epoch": 1.43,
"learning_rate": 0.0001030765142297975,
"loss": 3.4036,
"step": 134700
},
{
"epoch": 1.43,
"learning_rate": 0.00010272005162928072,
"loss": 3.4214,
"step": 134800
},
{
"epoch": 1.43,
"learning_rate": 0.00010236404702493967,
"loss": 3.5284,
"step": 134900
},
{
"epoch": 1.44,
"learning_rate": 0.000102008501523841,
"loss": 3.4838,
"step": 135000
},
{
"epoch": 1.44,
"learning_rate": 0.000101653416231624,
"loss": 3.4514,
"step": 135100
},
{
"epoch": 1.44,
"learning_rate": 0.00010129879225249666,
"loss": 3.45,
"step": 135200
},
{
"epoch": 1.44,
"learning_rate": 0.00010094463068923257,
"loss": 3.4556,
"step": 135300
},
{
"epoch": 1.44,
"learning_rate": 0.00010059093264316724,
"loss": 3.4745,
"step": 135400
},
{
"epoch": 1.44,
"learning_rate": 0.00010023769921419481,
"loss": 3.4483,
"step": 135500
},
{
"epoch": 1.44,
"learning_rate": 9.98849315007646e-05,
"loss": 3.4857,
"step": 135600
},
{
"epoch": 1.44,
"learning_rate": 9.953263059987772e-05,
"loss": 3.429,
"step": 135700
},
{
"epoch": 1.44,
"learning_rate": 9.918079760708365e-05,
"loss": 3.4677,
"step": 135800
},
{
"epoch": 1.44,
"learning_rate": 9.882943361647667e-05,
"loss": 3.516,
"step": 135900
},
{
"epoch": 1.45,
"learning_rate": 9.847853972069277e-05,
"loss": 3.4223,
"step": 136000
},
{
"epoch": 1.45,
"learning_rate": 9.812811701090599e-05,
"loss": 3.4708,
"step": 136100
},
{
"epoch": 1.45,
"learning_rate": 9.777816657682523e-05,
"loss": 3.4884,
"step": 136200
},
{
"epoch": 1.45,
"learning_rate": 9.742868950669076e-05,
"loss": 3.4627,
"step": 136300
},
{
"epoch": 1.45,
"learning_rate": 9.707968688727047e-05,
"loss": 3.4592,
"step": 136400
},
{
"epoch": 1.45,
"learning_rate": 9.673115980385744e-05,
"loss": 3.5064,
"step": 136500
},
{
"epoch": 1.45,
"learning_rate": 9.638310934026567e-05,
"loss": 3.5205,
"step": 136600
},
{
"epoch": 1.45,
"learning_rate": 9.603553657882686e-05,
"loss": 3.4942,
"step": 136700
},
{
"epoch": 1.45,
"learning_rate": 9.56884426003874e-05,
"loss": 3.4722,
"step": 136800
},
{
"epoch": 1.46,
"learning_rate": 9.534182848430484e-05,
"loss": 3.4912,
"step": 136900
},
{
"epoch": 1.46,
"learning_rate": 9.49956953084444e-05,
"loss": 3.4864,
"step": 137000
},
{
"epoch": 1.46,
"learning_rate": 9.465004414917586e-05,
"loss": 3.4623,
"step": 137100
},
{
"epoch": 1.46,
"learning_rate": 9.430487608136981e-05,
"loss": 3.4253,
"step": 137200
},
{
"epoch": 1.46,
"learning_rate": 9.39601921783948e-05,
"loss": 3.4765,
"step": 137300
},
{
"epoch": 1.46,
"learning_rate": 9.36159935121136e-05,
"loss": 3.5379,
"step": 137400
},
{
"epoch": 1.46,
"learning_rate": 9.32722811528805e-05,
"loss": 3.5132,
"step": 137500
},
{
"epoch": 1.46,
"learning_rate": 9.292905616953681e-05,
"loss": 3.4339,
"step": 137600
},
{
"epoch": 1.46,
"learning_rate": 9.258631962940875e-05,
"loss": 3.481,
"step": 137700
},
{
"epoch": 1.46,
"learning_rate": 9.224407259830347e-05,
"loss": 3.425,
"step": 137800
},
{
"epoch": 1.47,
"learning_rate": 9.190231614050592e-05,
"loss": 3.4525,
"step": 137900
},
{
"epoch": 1.47,
"learning_rate": 9.156105131877559e-05,
"loss": 3.4889,
"step": 138000
},
{
"epoch": 1.47,
"learning_rate": 9.122027919434287e-05,
"loss": 3.4895,
"step": 138100
},
{
"epoch": 1.47,
"learning_rate": 9.088000082690629e-05,
"loss": 3.4183,
"step": 138200
},
{
"epoch": 1.47,
"learning_rate": 9.05402172746288e-05,
"loss": 3.4894,
"step": 138300
},
{
"epoch": 1.47,
"learning_rate": 9.020092959413473e-05,
"loss": 3.4553,
"step": 138400
},
{
"epoch": 1.47,
"learning_rate": 8.986213884050629e-05,
"loss": 3.4826,
"step": 138500
},
{
"epoch": 1.47,
"learning_rate": 8.952384606728045e-05,
"loss": 3.4792,
"step": 138600
},
{
"epoch": 1.47,
"learning_rate": 8.918605232644564e-05,
"loss": 3.4568,
"step": 138700
},
{
"epoch": 1.48,
"learning_rate": 8.884875866843844e-05,
"loss": 3.4889,
"step": 138800
},
{
"epoch": 1.48,
"learning_rate": 8.851196614214016e-05,
"loss": 3.4883,
"step": 138900
},
{
"epoch": 1.48,
"learning_rate": 8.817567579487399e-05,
"loss": 3.499,
"step": 139000
},
{
"epoch": 1.48,
"learning_rate": 8.783988867240133e-05,
"loss": 3.4862,
"step": 139100
},
{
"epoch": 1.48,
"learning_rate": 8.750460581891877e-05,
"loss": 3.4511,
"step": 139200
},
{
"epoch": 1.48,
"learning_rate": 8.716982827705489e-05,
"loss": 3.4998,
"step": 139300
},
{
"epoch": 1.48,
"learning_rate": 8.683555708786658e-05,
"loss": 3.4064,
"step": 139400
},
{
"epoch": 1.48,
"learning_rate": 8.650179329083629e-05,
"loss": 3.4684,
"step": 139500
},
{
"epoch": 1.48,
"learning_rate": 8.616853792386889e-05,
"loss": 3.4596,
"step": 139600
},
{
"epoch": 1.48,
"learning_rate": 8.583579202328792e-05,
"loss": 3.4669,
"step": 139700
},
{
"epoch": 1.49,
"learning_rate": 8.550355662383253e-05,
"loss": 3.5372,
"step": 139800
},
{
"epoch": 1.49,
"learning_rate": 8.517183275865456e-05,
"loss": 3.5077,
"step": 139900
},
{
"epoch": 1.49,
"learning_rate": 8.484062145931514e-05,
"loss": 3.4921,
"step": 140000
},
{
"epoch": 1.49,
"learning_rate": 8.450992375578137e-05,
"loss": 3.4327,
"step": 140100
},
{
"epoch": 1.49,
"learning_rate": 8.417974067642336e-05,
"loss": 3.5026,
"step": 140200
},
{
"epoch": 1.49,
"learning_rate": 8.385007324801066e-05,
"loss": 3.4435,
"step": 140300
},
{
"epoch": 1.49,
"learning_rate": 8.352092249570953e-05,
"loss": 3.4548,
"step": 140400
},
{
"epoch": 1.49,
"learning_rate": 8.319228944307933e-05,
"loss": 3.4522,
"step": 140500
},
{
"epoch": 1.49,
"learning_rate": 8.286417511206992e-05,
"loss": 3.4525,
"step": 140600
},
{
"epoch": 1.5,
"learning_rate": 8.253658052301751e-05,
"loss": 3.4803,
"step": 140700
},
{
"epoch": 1.5,
"learning_rate": 8.220950669464253e-05,
"loss": 3.4681,
"step": 140800
},
{
"epoch": 1.5,
"learning_rate": 8.188295464404577e-05,
"loss": 3.5165,
"step": 140900
},
{
"epoch": 1.5,
"learning_rate": 8.155692538670568e-05,
"loss": 3.4869,
"step": 141000
},
{
"epoch": 1.5,
"learning_rate": 8.123141993647456e-05,
"loss": 3.4181,
"step": 141100
},
{
"epoch": 1.5,
"learning_rate": 8.090643930557625e-05,
"loss": 3.4428,
"step": 141200
},
{
"epoch": 1.5,
"learning_rate": 8.058198450460239e-05,
"loss": 3.4768,
"step": 141300
},
{
"epoch": 1.5,
"learning_rate": 8.025805654250942e-05,
"loss": 3.4992,
"step": 141400
},
{
"epoch": 1.5,
"learning_rate": 7.993465642661555e-05,
"loss": 3.4324,
"step": 141500
},
{
"epoch": 1.51,
"learning_rate": 7.961178516259745e-05,
"loss": 3.4736,
"step": 141600
},
{
"epoch": 1.51,
"learning_rate": 7.928944375448734e-05,
"loss": 3.4966,
"step": 141700
},
{
"epoch": 1.51,
"learning_rate": 7.896763320466968e-05,
"loss": 3.4269,
"step": 141800
},
{
"epoch": 1.51,
"learning_rate": 7.864635451387817e-05,
"loss": 3.5562,
"step": 141900
},
{
"epoch": 1.51,
"learning_rate": 7.83256086811924e-05,
"loss": 3.4452,
"step": 142000
},
{
"epoch": 1.51,
"learning_rate": 7.800539670403514e-05,
"loss": 3.465,
"step": 142100
},
{
"epoch": 1.51,
"learning_rate": 7.7685719578169e-05,
"loss": 3.4184,
"step": 142200
},
{
"epoch": 1.51,
"learning_rate": 7.73665782976933e-05,
"loss": 3.4772,
"step": 142300
},
{
"epoch": 1.51,
"learning_rate": 7.704797385504117e-05,
"loss": 3.467,
"step": 142400
},
{
"epoch": 1.51,
"learning_rate": 7.6729907240976e-05,
"loss": 3.4636,
"step": 142500
},
{
"epoch": 1.52,
"learning_rate": 7.641237944458918e-05,
"loss": 3.4184,
"step": 142600
},
{
"epoch": 1.52,
"learning_rate": 7.609539145329622e-05,
"loss": 3.4818,
"step": 142700
},
{
"epoch": 1.52,
"learning_rate": 7.57789442528341e-05,
"loss": 3.4073,
"step": 142800
},
{
"epoch": 1.52,
"learning_rate": 7.5463038827258e-05,
"loss": 3.4903,
"step": 142900
},
{
"epoch": 1.52,
"learning_rate": 7.514767615893844e-05,
"loss": 3.482,
"step": 143000
},
{
"epoch": 1.52,
"learning_rate": 7.483285722855815e-05,
"loss": 3.4642,
"step": 143100
},
{
"epoch": 1.52,
"learning_rate": 7.4518583015109e-05,
"loss": 3.4445,
"step": 143200
},
{
"epoch": 1.52,
"learning_rate": 7.420485449588878e-05,
"loss": 3.4651,
"step": 143300
},
{
"epoch": 1.52,
"learning_rate": 7.389167264649855e-05,
"loss": 3.4787,
"step": 143400
},
{
"epoch": 1.53,
"learning_rate": 7.357903844083924e-05,
"loss": 3.4666,
"step": 143500
},
{
"epoch": 1.53,
"learning_rate": 7.326695285110906e-05,
"loss": 3.5029,
"step": 143600
},
{
"epoch": 1.53,
"learning_rate": 7.295541684779975e-05,
"loss": 3.4557,
"step": 143700
},
{
"epoch": 1.53,
"learning_rate": 7.264443139969432e-05,
"loss": 3.4807,
"step": 143800
},
{
"epoch": 1.53,
"learning_rate": 7.23339974738636e-05,
"loss": 3.4719,
"step": 143900
},
{
"epoch": 1.53,
"learning_rate": 7.202411603566339e-05,
"loss": 3.4985,
"step": 144000
},
{
"epoch": 1.53,
"learning_rate": 7.171478804873152e-05,
"loss": 3.4518,
"step": 144100
},
{
"epoch": 1.53,
"learning_rate": 7.140601447498443e-05,
"loss": 3.5169,
"step": 144200
},
{
"epoch": 1.53,
"learning_rate": 7.109779627461482e-05,
"loss": 3.4492,
"step": 144300
},
{
"epoch": 1.53,
"learning_rate": 7.079013440608827e-05,
"loss": 3.4762,
"step": 144400
},
{
"epoch": 1.54,
"learning_rate": 7.048302982614026e-05,
"loss": 3.4617,
"step": 144500
},
{
"epoch": 1.54,
"learning_rate": 7.017648348977335e-05,
"loss": 3.4682,
"step": 144600
},
{
"epoch": 1.54,
"learning_rate": 6.98704963502541e-05,
"loss": 3.4709,
"step": 144700
},
{
"epoch": 1.54,
"learning_rate": 6.95650693591101e-05,
"loss": 3.4152,
"step": 144800
},
{
"epoch": 1.54,
"learning_rate": 6.926020346612722e-05,
"loss": 3.496,
"step": 144900
},
{
"epoch": 1.54,
"learning_rate": 6.895589961934615e-05,
"loss": 3.4792,
"step": 145000
},
{
"epoch": 1.54,
"learning_rate": 6.865215876506006e-05,
"loss": 3.4939,
"step": 145100
},
{
"epoch": 1.54,
"learning_rate": 6.834898184781135e-05,
"loss": 3.4709,
"step": 145200
},
{
"epoch": 1.54,
"learning_rate": 6.804636981038867e-05,
"loss": 3.4498,
"step": 145300
},
{
"epoch": 1.55,
"learning_rate": 6.774432359382415e-05,
"loss": 3.4552,
"step": 145400
},
{
"epoch": 1.55,
"learning_rate": 6.744284413739025e-05,
"loss": 3.3939,
"step": 145500
},
{
"epoch": 1.55,
"learning_rate": 6.7141932378597e-05,
"loss": 3.449,
"step": 145600
},
{
"epoch": 1.55,
"learning_rate": 6.684158925318931e-05,
"loss": 3.4837,
"step": 145700
},
{
"epoch": 1.55,
"learning_rate": 6.654181569514362e-05,
"loss": 3.4736,
"step": 145800
},
{
"epoch": 1.55,
"learning_rate": 6.624261263666504e-05,
"loss": 3.4677,
"step": 145900
},
{
"epoch": 1.55,
"learning_rate": 6.594398100818483e-05,
"loss": 3.4708,
"step": 146000
},
{
"epoch": 1.55,
"learning_rate": 6.564592173835718e-05,
"loss": 3.4526,
"step": 146100
},
{
"epoch": 1.55,
"learning_rate": 6.534843575405647e-05,
"loss": 3.5345,
"step": 146200
},
{
"epoch": 1.56,
"learning_rate": 6.505152398037433e-05,
"loss": 3.4945,
"step": 146300
},
{
"epoch": 1.56,
"learning_rate": 6.475518734061667e-05,
"loss": 3.4661,
"step": 146400
},
{
"epoch": 1.56,
"learning_rate": 6.445942675630095e-05,
"loss": 3.4289,
"step": 146500
},
{
"epoch": 1.56,
"learning_rate": 6.416424314715327e-05,
"loss": 3.4531,
"step": 146600
},
{
"epoch": 1.56,
"learning_rate": 6.38696374311058e-05,
"loss": 3.4917,
"step": 146700
},
{
"epoch": 1.56,
"learning_rate": 6.357561052429305e-05,
"loss": 3.3964,
"step": 146800
},
{
"epoch": 1.56,
"learning_rate": 6.328216334105014e-05,
"loss": 3.4736,
"step": 146900
},
{
"epoch": 1.56,
"learning_rate": 6.29892967939091e-05,
"loss": 3.5101,
"step": 147000
},
{
"epoch": 1.56,
"learning_rate": 6.269701179359663e-05,
"loss": 3.4323,
"step": 147100
},
{
"epoch": 1.56,
"learning_rate": 6.240530924903065e-05,
"loss": 3.4316,
"step": 147200
},
{
"epoch": 1.57,
"learning_rate": 6.211419006731808e-05,
"loss": 3.4749,
"step": 147300
},
{
"epoch": 1.57,
"learning_rate": 6.182365515375172e-05,
"loss": 3.4766,
"step": 147400
},
{
"epoch": 1.57,
"learning_rate": 6.153370541180739e-05,
"loss": 3.4461,
"step": 147500
},
{
"epoch": 1.57,
"learning_rate": 6.124434174314131e-05,
"loss": 3.5406,
"step": 147600
},
{
"epoch": 1.57,
"learning_rate": 6.0955565047587064e-05,
"loss": 3.5152,
"step": 147700
},
{
"epoch": 1.57,
"learning_rate": 6.0667376223153075e-05,
"loss": 3.504,
"step": 147800
},
{
"epoch": 1.57,
"learning_rate": 6.03797761660195e-05,
"loss": 3.466,
"step": 147900
},
{
"epoch": 1.57,
"learning_rate": 6.009276577053582e-05,
"loss": 3.4596,
"step": 148000
},
{
"epoch": 1.57,
"learning_rate": 5.9806345929217546e-05,
"loss": 3.4806,
"step": 148100
},
{
"epoch": 1.58,
"learning_rate": 5.9520517532744015e-05,
"loss": 3.4958,
"step": 148200
},
{
"epoch": 1.58,
"learning_rate": 5.923528146995519e-05,
"loss": 3.4599,
"step": 148300
},
{
"epoch": 1.58,
"learning_rate": 5.895063862784916e-05,
"loss": 3.4378,
"step": 148400
},
{
"epoch": 1.58,
"learning_rate": 5.8666589891579306e-05,
"loss": 3.4499,
"step": 148500
},
{
"epoch": 1.58,
"learning_rate": 5.83831361444512e-05,
"loss": 3.4667,
"step": 148600
},
{
"epoch": 1.58,
"learning_rate": 5.8100278267920665e-05,
"loss": 3.4887,
"step": 148700
},
{
"epoch": 1.58,
"learning_rate": 5.781801714159021e-05,
"loss": 3.4164,
"step": 148800
},
{
"epoch": 1.58,
"learning_rate": 5.7536353643206806e-05,
"loss": 3.4847,
"step": 148900
},
{
"epoch": 1.58,
"learning_rate": 5.7255288648658754e-05,
"loss": 3.43,
"step": 149000
},
{
"epoch": 1.58,
"learning_rate": 5.6974823031973405e-05,
"loss": 3.5193,
"step": 149100
},
{
"epoch": 1.59,
"learning_rate": 5.669495766531413e-05,
"loss": 3.4507,
"step": 149200
},
{
"epoch": 1.59,
"learning_rate": 5.6415693418977844e-05,
"loss": 3.494,
"step": 149300
},
{
"epoch": 1.59,
"learning_rate": 5.613703116139185e-05,
"loss": 3.5268,
"step": 149400
},
{
"epoch": 1.59,
"learning_rate": 5.5858971759111756e-05,
"loss": 3.4471,
"step": 149500
},
{
"epoch": 1.59,
"learning_rate": 5.558151607681827e-05,
"loss": 3.4171,
"step": 149600
},
{
"epoch": 1.59,
"learning_rate": 5.530466497731501e-05,
"loss": 3.4843,
"step": 149700
},
{
"epoch": 1.59,
"learning_rate": 5.502841932152511e-05,
"loss": 3.4613,
"step": 149800
},
{
"epoch": 1.59,
"learning_rate": 5.475277996848921e-05,
"loss": 3.5356,
"step": 149900
},
{
"epoch": 1.59,
"learning_rate": 5.447774777536249e-05,
"loss": 3.4039,
"step": 150000
},
{
"epoch": 1.6,
"learning_rate": 5.4203323597412066e-05,
"loss": 3.4498,
"step": 150100
},
{
"epoch": 1.6,
"learning_rate": 5.392950828801435e-05,
"loss": 3.4847,
"step": 150200
},
{
"epoch": 1.6,
"learning_rate": 5.3656302698652096e-05,
"loss": 3.4883,
"step": 150300
},
{
"epoch": 1.6,
"learning_rate": 5.3383707678912345e-05,
"loss": 3.4473,
"step": 150400
},
{
"epoch": 1.6,
"learning_rate": 5.311172407648327e-05,
"loss": 3.4113,
"step": 150500
},
{
"epoch": 1.6,
"learning_rate": 5.2840352737151766e-05,
"loss": 3.4673,
"step": 150600
},
{
"epoch": 1.6,
"learning_rate": 5.256959450480078e-05,
"loss": 3.4446,
"step": 150700
},
{
"epoch": 1.6,
"learning_rate": 5.229945022140667e-05,
"loss": 3.4601,
"step": 150800
},
{
"epoch": 1.6,
"learning_rate": 5.2029920727036605e-05,
"loss": 3.4191,
"step": 150900
},
{
"epoch": 1.61,
"learning_rate": 5.1761006859846e-05,
"loss": 3.47,
"step": 151000
},
{
"epoch": 1.61,
"learning_rate": 5.1492709456075675e-05,
"loss": 3.5108,
"step": 151100
},
{
"epoch": 1.61,
"learning_rate": 5.1225029350049604e-05,
"loss": 3.4592,
"step": 151200
},
{
"epoch": 1.61,
"learning_rate": 5.0957967374172134e-05,
"loss": 3.4277,
"step": 151300
},
{
"epoch": 1.61,
"learning_rate": 5.069152435892535e-05,
"loss": 3.4706,
"step": 151400
},
{
"epoch": 1.61,
"learning_rate": 5.042570113286668e-05,
"loss": 3.4567,
"step": 151500
},
{
"epoch": 1.61,
"learning_rate": 5.016049852262591e-05,
"loss": 3.4528,
"step": 151600
},
{
"epoch": 1.61,
"learning_rate": 4.989591735290328e-05,
"loss": 3.4114,
"step": 151700
},
{
"epoch": 1.61,
"learning_rate": 4.9631958446466256e-05,
"loss": 3.5006,
"step": 151800
},
{
"epoch": 1.61,
"learning_rate": 4.936862262414748e-05,
"loss": 3.4768,
"step": 151900
},
{
"epoch": 1.62,
"learning_rate": 4.910591070484169e-05,
"loss": 3.4693,
"step": 152000
},
{
"epoch": 1.62,
"learning_rate": 4.884382350550368e-05,
"loss": 3.447,
"step": 152100
},
{
"epoch": 1.62,
"learning_rate": 4.8582361841145564e-05,
"loss": 3.4081,
"step": 152200
},
{
"epoch": 1.62,
"learning_rate": 4.8321526524834156e-05,
"loss": 3.4792,
"step": 152300
},
{
"epoch": 1.62,
"learning_rate": 4.806131836768862e-05,
"loss": 3.4483,
"step": 152400
},
{
"epoch": 1.62,
"learning_rate": 4.780173817887765e-05,
"loss": 3.5346,
"step": 152500
},
{
"epoch": 1.62,
"learning_rate": 4.7542786765617296e-05,
"loss": 3.4657,
"step": 152600
},
{
"epoch": 1.62,
"learning_rate": 4.7284464933168235e-05,
"loss": 3.4891,
"step": 152700
},
{
"epoch": 1.62,
"learning_rate": 4.702677348483358e-05,
"loss": 3.4926,
"step": 152800
},
{
"epoch": 1.63,
"learning_rate": 4.6769713221955723e-05,
"loss": 3.4253,
"step": 152900
},
{
"epoch": 1.63,
"learning_rate": 4.6513284943914547e-05,
"loss": 3.4591,
"step": 153000
},
{
"epoch": 1.63,
"learning_rate": 4.625748944812458e-05,
"loss": 3.5055,
"step": 153100
},
{
"epoch": 1.63,
"learning_rate": 4.600232753003267e-05,
"loss": 3.4436,
"step": 153200
},
{
"epoch": 1.63,
"learning_rate": 4.574779998311518e-05,
"loss": 3.4526,
"step": 153300
},
{
"epoch": 1.63,
"learning_rate": 4.549390759887606e-05,
"loss": 3.4926,
"step": 153400
},
{
"epoch": 1.63,
"learning_rate": 4.524065116684392e-05,
"loss": 3.4372,
"step": 153500
},
{
"epoch": 1.63,
"learning_rate": 4.498803147456987e-05,
"loss": 3.4741,
"step": 153600
},
{
"epoch": 1.63,
"learning_rate": 4.4736049307624817e-05,
"loss": 3.4904,
"step": 153700
},
{
"epoch": 1.63,
"learning_rate": 4.448470544959726e-05,
"loss": 3.4679,
"step": 153800
},
{
"epoch": 1.64,
"learning_rate": 4.423400068209071e-05,
"loss": 3.4663,
"step": 153900
},
{
"epoch": 1.64,
"learning_rate": 4.398393578472132e-05,
"loss": 3.5239,
"step": 154000
},
{
"epoch": 1.64,
"learning_rate": 4.3734511535115486e-05,
"loss": 3.4588,
"step": 154100
},
{
"epoch": 1.64,
"learning_rate": 4.348572870890718e-05,
"loss": 3.5164,
"step": 154200
},
{
"epoch": 1.64,
"learning_rate": 4.323758807973596e-05,
"loss": 3.4214,
"step": 154300
},
{
"epoch": 1.64,
"learning_rate": 4.299009041924426e-05,
"loss": 3.5172,
"step": 154400
},
{
"epoch": 1.64,
"learning_rate": 4.274323649707509e-05,
"loss": 3.5102,
"step": 154500
},
{
"epoch": 1.64,
"learning_rate": 4.249702708086972e-05,
"loss": 3.4565,
"step": 154600
},
{
"epoch": 1.64,
"learning_rate": 4.225146293626486e-05,
"loss": 3.5431,
"step": 154700
},
{
"epoch": 1.65,
"learning_rate": 4.2006544826891065e-05,
"loss": 3.4823,
"step": 154800
},
{
"epoch": 1.65,
"learning_rate": 4.176227351436967e-05,
"loss": 3.4112,
"step": 154900
},
{
"epoch": 1.65,
"learning_rate": 4.1518649758310766e-05,
"loss": 3.468,
"step": 155000
},
{
"epoch": 1.65,
"learning_rate": 4.1275674316310574e-05,
"loss": 3.4689,
"step": 155100
},
{
"epoch": 1.65,
"learning_rate": 4.103334794394939e-05,
"loss": 3.4525,
"step": 155200
},
{
"epoch": 1.65,
"learning_rate": 4.079167139478909e-05,
"loss": 3.4909,
"step": 155300
},
{
"epoch": 1.65,
"learning_rate": 4.055064542037087e-05,
"loss": 3.5342,
"step": 155400
},
{
"epoch": 1.65,
"learning_rate": 4.03102707702126e-05,
"loss": 3.4856,
"step": 155500
},
{
"epoch": 1.65,
"learning_rate": 4.007054819180692e-05,
"loss": 3.4777,
"step": 155600
},
{
"epoch": 1.66,
"learning_rate": 3.983147843061863e-05,
"loss": 3.5254,
"step": 155700
},
{
"epoch": 1.66,
"learning_rate": 3.9593062230082685e-05,
"loss": 3.4694,
"step": 155800
},
{
"epoch": 1.66,
"learning_rate": 3.935530033160134e-05,
"loss": 3.4937,
"step": 155900
},
{
"epoch": 1.66,
"learning_rate": 3.911819347454234e-05,
"loss": 3.4633,
"step": 156000
},
{
"epoch": 1.66,
"learning_rate": 3.8881742396236455e-05,
"loss": 3.4595,
"step": 156100
},
{
"epoch": 1.66,
"learning_rate": 3.8645947831975145e-05,
"loss": 3.4702,
"step": 156200
},
{
"epoch": 1.66,
"learning_rate": 3.841081051500836e-05,
"loss": 3.4524,
"step": 156300
},
{
"epoch": 1.66,
"learning_rate": 3.817633117654207e-05,
"loss": 3.4967,
"step": 156400
},
{
"epoch": 1.66,
"learning_rate": 3.79425105457363e-05,
"loss": 3.4595,
"step": 156500
},
{
"epoch": 1.66,
"learning_rate": 3.77093493497026e-05,
"loss": 3.4135,
"step": 156600
},
{
"epoch": 1.67,
"learning_rate": 3.74768483135019e-05,
"loss": 3.4355,
"step": 156700
},
{
"epoch": 1.67,
"learning_rate": 3.724500816014223e-05,
"loss": 3.4815,
"step": 156800
},
{
"epoch": 1.67,
"learning_rate": 3.701382961057648e-05,
"loss": 3.4318,
"step": 156900
},
{
"epoch": 1.67,
"learning_rate": 3.678331338370014e-05,
"loss": 3.5005,
"step": 157000
},
{
"epoch": 1.67,
"learning_rate": 3.655346019634909e-05,
"loss": 3.4748,
"step": 157100
},
{
"epoch": 1.67,
"learning_rate": 3.632427076329739e-05,
"loss": 3.4054,
"step": 157200
},
{
"epoch": 1.67,
"learning_rate": 3.609574579725491e-05,
"loss": 3.4565,
"step": 157300
},
{
"epoch": 1.67,
"learning_rate": 3.5867886008865315e-05,
"loss": 3.4485,
"step": 157400
},
{
"epoch": 1.67,
"learning_rate": 3.564069210670379e-05,
"loss": 3.4623,
"step": 157500
},
{
"epoch": 1.68,
"learning_rate": 3.541416479727483e-05,
"loss": 3.4946,
"step": 157600
},
{
"epoch": 1.68,
"learning_rate": 3.518830478500978e-05,
"loss": 3.4598,
"step": 157700
},
{
"epoch": 1.68,
"learning_rate": 3.49631127722653e-05,
"loss": 3.4863,
"step": 157800
},
{
"epoch": 1.68,
"learning_rate": 3.473858945932046e-05,
"loss": 3.475,
"step": 157900
},
{
"epoch": 1.68,
"learning_rate": 3.451473554437509e-05,
"loss": 3.4652,
"step": 158000
},
{
"epoch": 1.68,
"learning_rate": 3.4291551723547146e-05,
"loss": 3.4687,
"step": 158100
},
{
"epoch": 1.68,
"learning_rate": 3.4069038690871e-05,
"loss": 3.4325,
"step": 158200
},
{
"epoch": 1.68,
"learning_rate": 3.384719713829498e-05,
"loss": 3.4336,
"step": 158300
},
{
"epoch": 1.68,
"learning_rate": 3.362602775567935e-05,
"loss": 3.4393,
"step": 158400
},
{
"epoch": 1.68,
"learning_rate": 3.340553123079421e-05,
"loss": 3.5204,
"step": 158500
},
{
"epoch": 1.69,
"learning_rate": 3.3185708249317045e-05,
"loss": 3.4819,
"step": 158600
},
{
"epoch": 1.69,
"learning_rate": 3.2966559494830934e-05,
"loss": 3.4414,
"step": 158700
},
{
"epoch": 1.69,
"learning_rate": 3.2748085648822506e-05,
"loss": 3.4852,
"step": 158800
},
{
"epoch": 1.69,
"learning_rate": 3.2530287390679426e-05,
"loss": 3.4863,
"step": 158900
},
{
"epoch": 1.69,
"learning_rate": 3.2313165397688445e-05,
"loss": 3.4531,
"step": 159000
},
{
"epoch": 1.69,
"learning_rate": 3.2096720345033445e-05,
"loss": 3.4858,
"step": 159100
},
{
"epoch": 1.69,
"learning_rate": 3.188095290579318e-05,
"loss": 3.4303,
"step": 159200
},
{
"epoch": 1.69,
"learning_rate": 3.166586375093935e-05,
"loss": 3.4155,
"step": 159300
},
{
"epoch": 1.69,
"learning_rate": 3.145145354933415e-05,
"loss": 3.5033,
"step": 159400
},
{
"epoch": 1.7,
"learning_rate": 3.123772296772862e-05,
"loss": 3.4965,
"step": 159500
},
{
"epoch": 1.7,
"learning_rate": 3.102467267076037e-05,
"loss": 3.499,
"step": 159600
},
{
"epoch": 1.7,
"learning_rate": 3.0812303320951475e-05,
"loss": 3.4457,
"step": 159700
},
{
"epoch": 1.7,
"learning_rate": 3.0600615578706524e-05,
"loss": 3.4652,
"step": 159800
},
{
"epoch": 1.7,
"learning_rate": 3.038961010231048e-05,
"loss": 3.4304,
"step": 159900
},
{
"epoch": 1.7,
"learning_rate": 3.0179287547926676e-05,
"loss": 3.4779,
"step": 160000
},
{
"epoch": 1.7,
"learning_rate": 2.996964856959475e-05,
"loss": 3.4483,
"step": 160100
},
{
"epoch": 1.7,
"learning_rate": 2.976069381922869e-05,
"loss": 3.4724,
"step": 160200
},
{
"epoch": 1.7,
"learning_rate": 2.955242394661456e-05,
"loss": 3.4281,
"step": 160300
},
{
"epoch": 1.71,
"learning_rate": 2.9344839599408897e-05,
"loss": 3.5002,
"step": 160400
},
{
"epoch": 1.71,
"learning_rate": 2.9137941423136305e-05,
"loss": 3.4879,
"step": 160500
},
{
"epoch": 1.71,
"learning_rate": 2.8931730061187656e-05,
"loss": 3.4045,
"step": 160600
},
{
"epoch": 1.71,
"learning_rate": 2.872620615481808e-05,
"loss": 3.4483,
"step": 160700
},
{
"epoch": 1.71,
"learning_rate": 2.8521370343144752e-05,
"loss": 3.4465,
"step": 160800
},
{
"epoch": 1.71,
"learning_rate": 2.8317223263145313e-05,
"loss": 3.4971,
"step": 160900
},
{
"epoch": 1.71,
"learning_rate": 2.811376554965553e-05,
"loss": 3.4879,
"step": 161000
},
{
"epoch": 1.71,
"learning_rate": 2.7910997835367548e-05,
"loss": 3.4802,
"step": 161100
},
{
"epoch": 1.71,
"learning_rate": 2.7708920750827565e-05,
"loss": 3.4915,
"step": 161200
},
{
"epoch": 1.71,
"learning_rate": 2.750753492443442e-05,
"loss": 3.4362,
"step": 161300
},
{
"epoch": 1.72,
"learning_rate": 2.7306840982437215e-05,
"loss": 3.4877,
"step": 161400
},
{
"epoch": 1.72,
"learning_rate": 2.710683954893356e-05,
"loss": 3.5149,
"step": 161500
},
{
"epoch": 1.72,
"learning_rate": 2.690753124586745e-05,
"loss": 3.4533,
"step": 161600
},
{
"epoch": 1.72,
"learning_rate": 2.6708916693027553e-05,
"loss": 3.4888,
"step": 161700
},
{
"epoch": 1.72,
"learning_rate": 2.651099650804517e-05,
"loss": 3.4699,
"step": 161800
},
{
"epoch": 1.72,
"learning_rate": 2.6313771306392453e-05,
"loss": 3.4967,
"step": 161900
},
{
"epoch": 1.72,
"learning_rate": 2.6117241701380052e-05,
"loss": 3.4676,
"step": 162000
},
{
"epoch": 1.72,
"learning_rate": 2.592140830415579e-05,
"loss": 3.484,
"step": 162100
},
{
"epoch": 1.72,
"learning_rate": 2.5726271723702428e-05,
"loss": 3.4728,
"step": 162200
},
{
"epoch": 1.73,
"learning_rate": 2.553183256683578e-05,
"loss": 3.4194,
"step": 162300
},
{
"epoch": 1.73,
"learning_rate": 2.533809143820298e-05,
"loss": 3.482,
"step": 162400
},
{
"epoch": 1.73,
"learning_rate": 2.5145048940280384e-05,
"loss": 3.4789,
"step": 162500
},
{
"epoch": 1.73,
"learning_rate": 2.4952705673371877e-05,
"loss": 3.5145,
"step": 162600
},
{
"epoch": 1.73,
"learning_rate": 2.4761062235607007e-05,
"loss": 3.4865,
"step": 162700
},
{
"epoch": 1.73,
"learning_rate": 2.4570119222938945e-05,
"loss": 3.4171,
"step": 162800
},
{
"epoch": 1.73,
"learning_rate": 2.4379877229142867e-05,
"loss": 3.4954,
"step": 162900
},
{
"epoch": 1.73,
"learning_rate": 2.4190336845813928e-05,
"loss": 3.4762,
"step": 163000
},
{
"epoch": 1.73,
"learning_rate": 2.400149866236548e-05,
"loss": 3.457,
"step": 163100
},
{
"epoch": 1.73,
"learning_rate": 2.3813363266027262e-05,
"loss": 3.4829,
"step": 163200
},
{
"epoch": 1.74,
"learning_rate": 2.3625931241843646e-05,
"loss": 3.443,
"step": 163300
},
{
"epoch": 1.74,
"learning_rate": 2.3439203172671507e-05,
"loss": 3.4499,
"step": 163400
},
{
"epoch": 1.74,
"learning_rate": 2.3253179639178806e-05,
"loss": 3.4199,
"step": 163500
},
{
"epoch": 1.74,
"learning_rate": 2.306786121984261e-05,
"loss": 3.4471,
"step": 163600
},
{
"epoch": 1.74,
"learning_rate": 2.2883248490947306e-05,
"loss": 3.434,
"step": 163700
},
{
"epoch": 1.74,
"learning_rate": 2.2699342026582553e-05,
"loss": 3.4878,
"step": 163800
},
{
"epoch": 1.74,
"learning_rate": 2.251614239864211e-05,
"loss": 3.5202,
"step": 163900
},
{
"epoch": 1.74,
"learning_rate": 2.2333650176821485e-05,
"loss": 3.4719,
"step": 164000
},
{
"epoch": 1.74,
"learning_rate": 2.21518659286164e-05,
"loss": 3.4925,
"step": 164100
},
{
"epoch": 1.75,
"learning_rate": 2.1970790219320885e-05,
"loss": 3.4056,
"step": 164200
},
{
"epoch": 1.75,
"learning_rate": 2.179042361202582e-05,
"loss": 3.48,
"step": 164300
},
{
"epoch": 1.75,
"learning_rate": 2.161076666761688e-05,
"loss": 3.4169,
"step": 164400
},
{
"epoch": 1.75,
"learning_rate": 2.1431819944772945e-05,
"loss": 3.4857,
"step": 164500
},
{
"epoch": 1.75,
"learning_rate": 2.1253583999964298e-05,
"loss": 3.4628,
"step": 164600
},
{
"epoch": 1.75,
"learning_rate": 2.107605938745086e-05,
"loss": 3.5136,
"step": 164700
},
{
"epoch": 1.75,
"learning_rate": 2.0899246659280584e-05,
"loss": 3.3885,
"step": 164800
},
{
"epoch": 1.75,
"learning_rate": 2.0723146365287743e-05,
"loss": 3.4974,
"step": 164900
},
{
"epoch": 1.75,
"learning_rate": 2.0547759053091088e-05,
"loss": 3.4751,
"step": 165000
},
{
"epoch": 1.75,
"learning_rate": 2.0373085268092144e-05,
"loss": 3.4691,
"step": 165100
},
{
"epoch": 1.76,
"learning_rate": 2.0199125553473696e-05,
"loss": 3.4679,
"step": 165200
},
{
"epoch": 1.76,
"learning_rate": 2.0025880450197902e-05,
"loss": 3.4301,
"step": 165300
},
{
"epoch": 1.76,
"learning_rate": 1.9853350497004763e-05,
"loss": 3.4768,
"step": 165400
},
{
"epoch": 1.76,
"learning_rate": 1.9681536230410386e-05,
"loss": 3.4745,
"step": 165500
},
{
"epoch": 1.76,
"learning_rate": 1.951043818470516e-05,
"loss": 3.4939,
"step": 165600
},
{
"epoch": 1.76,
"learning_rate": 1.9340056891952396e-05,
"loss": 3.4786,
"step": 165700
},
{
"epoch": 1.76,
"learning_rate": 1.917039288198652e-05,
"loss": 3.4544,
"step": 165800
},
{
"epoch": 1.76,
"learning_rate": 1.9001446682411355e-05,
"loss": 3.4916,
"step": 165900
},
{
"epoch": 1.76,
"learning_rate": 1.8833218818598563e-05,
"loss": 3.4662,
"step": 166000
},
{
"epoch": 1.77,
"learning_rate": 1.866570981368601e-05,
"loss": 3.4395,
"step": 166100
},
{
"epoch": 1.77,
"learning_rate": 1.8498920188576187e-05,
"loss": 3.4923,
"step": 166200
},
{
"epoch": 1.77,
"learning_rate": 1.833285046193442e-05,
"loss": 3.4653,
"step": 166300
},
{
"epoch": 1.77,
"learning_rate": 1.816750115018742e-05,
"loss": 3.4962,
"step": 166400
},
{
"epoch": 1.77,
"learning_rate": 1.800287276752166e-05,
"loss": 3.4816,
"step": 166500
},
{
"epoch": 1.77,
"learning_rate": 1.783896582588168e-05,
"loss": 3.3854,
"step": 166600
},
{
"epoch": 1.77,
"learning_rate": 1.767578083496857e-05,
"loss": 3.465,
"step": 166700
},
{
"epoch": 1.77,
"learning_rate": 1.7513318302238486e-05,
"loss": 3.4746,
"step": 166800
},
{
"epoch": 1.77,
"learning_rate": 1.735157873290069e-05,
"loss": 3.4732,
"step": 166900
},
{
"epoch": 1.78,
"learning_rate": 1.7190562629916557e-05,
"loss": 3.4785,
"step": 167000
},
{
"epoch": 1.78,
"learning_rate": 1.7030270493997495e-05,
"loss": 3.4685,
"step": 167100
},
{
"epoch": 1.78,
"learning_rate": 1.68707028236037e-05,
"loss": 3.4655,
"step": 167200
},
{
"epoch": 1.78,
"learning_rate": 1.6711860114942383e-05,
"loss": 3.4566,
"step": 167300
},
{
"epoch": 1.78,
"learning_rate": 1.6553742861966452e-05,
"loss": 3.4388,
"step": 167400
},
{
"epoch": 1.78,
"learning_rate": 1.639635155637284e-05,
"loss": 3.502,
"step": 167500
},
{
"epoch": 1.78,
"learning_rate": 1.623968668760101e-05,
"loss": 3.4313,
"step": 167600
},
{
"epoch": 1.78,
"learning_rate": 1.6083748742831304e-05,
"loss": 3.4205,
"step": 167700
},
{
"epoch": 1.78,
"learning_rate": 1.5928538206983755e-05,
"loss": 3.4836,
"step": 167800
},
{
"epoch": 1.78,
"learning_rate": 1.577405556271619e-05,
"loss": 3.4634,
"step": 167900
},
{
"epoch": 1.79,
"learning_rate": 1.5620301290423135e-05,
"loss": 3.4655,
"step": 168000
},
{
"epoch": 1.79,
"learning_rate": 1.546727586823382e-05,
"loss": 3.4441,
"step": 168100
},
{
"epoch": 1.79,
"learning_rate": 1.5314979772011155e-05,
"loss": 3.4556,
"step": 168200
},
{
"epoch": 1.79,
"learning_rate": 1.5163413475350025e-05,
"loss": 3.5384,
"step": 168300
},
{
"epoch": 1.79,
"learning_rate": 1.5012577449575848e-05,
"loss": 3.4572,
"step": 168400
},
{
"epoch": 1.79,
"learning_rate": 1.4862472163743146e-05,
"loss": 3.4505,
"step": 168500
},
{
"epoch": 1.79,
"learning_rate": 1.471309808463403e-05,
"loss": 3.4776,
"step": 168600
},
{
"epoch": 1.79,
"learning_rate": 1.4564455676756766e-05,
"loss": 3.5107,
"step": 168700
},
{
"epoch": 1.79,
"learning_rate": 1.4416545402344383e-05,
"loss": 3.5006,
"step": 168800
},
{
"epoch": 1.8,
"learning_rate": 1.4269367721353205e-05,
"loss": 3.519,
"step": 168900
},
{
"epoch": 1.8,
"learning_rate": 1.4122923091461348e-05,
"loss": 3.4775,
"step": 169000
},
{
"epoch": 1.8,
"learning_rate": 1.3977211968067422e-05,
"loss": 3.4108,
"step": 169100
},
{
"epoch": 1.8,
"learning_rate": 1.3832234804289023e-05,
"loss": 3.498,
"step": 169200
},
{
"epoch": 1.8,
"learning_rate": 1.3687992050961356e-05,
"loss": 3.4988,
"step": 169300
},
{
"epoch": 1.8,
"learning_rate": 1.3544484156635923e-05,
"loss": 3.4937,
"step": 169400
},
{
"epoch": 1.8,
"learning_rate": 1.34017115675788e-05,
"loss": 3.47,
"step": 169500
},
{
"epoch": 1.8,
"learning_rate": 1.3259674727769732e-05,
"loss": 3.4696,
"step": 169600
},
{
"epoch": 1.8,
"learning_rate": 1.3118374078900376e-05,
"loss": 3.4722,
"step": 169700
},
{
"epoch": 1.8,
"learning_rate": 1.297781006037313e-05,
"loss": 3.4794,
"step": 169800
},
{
"epoch": 1.81,
"learning_rate": 1.2837983109299566e-05,
"loss": 3.5015,
"step": 169900
},
{
"epoch": 1.81,
"learning_rate": 1.2698893660499394e-05,
"loss": 3.4986,
"step": 170000
},
{
"epoch": 1.81,
"learning_rate": 1.2560542146498766e-05,
"loss": 3.4245,
"step": 170100
},
{
"epoch": 1.81,
"learning_rate": 1.2422928997529142e-05,
"loss": 3.4625,
"step": 170200
},
{
"epoch": 1.81,
"learning_rate": 1.2286054641525824e-05,
"loss": 3.4951,
"step": 170300
},
{
"epoch": 1.81,
"learning_rate": 1.214991950412675e-05,
"loss": 3.4903,
"step": 170400
},
{
"epoch": 1.81,
"learning_rate": 1.2014524008671118e-05,
"loss": 3.4828,
"step": 170500
},
{
"epoch": 1.81,
"learning_rate": 1.1879868576198049e-05,
"loss": 3.4202,
"step": 170600
},
{
"epoch": 1.81,
"learning_rate": 1.1745953625445283e-05,
"loss": 3.457,
"step": 170700
},
{
"epoch": 1.82,
"learning_rate": 1.161277957284787e-05,
"loss": 3.4711,
"step": 170800
},
{
"epoch": 1.82,
"learning_rate": 1.1480346832536847e-05,
"loss": 3.4668,
"step": 170900
},
{
"epoch": 1.82,
"learning_rate": 1.1348655816338176e-05,
"loss": 3.4941,
"step": 171000
},
{
"epoch": 1.82,
"learning_rate": 1.1217706933771165e-05,
"loss": 3.4354,
"step": 171100
},
{
"epoch": 1.82,
"learning_rate": 1.1087500592047189e-05,
"loss": 3.4694,
"step": 171200
},
{
"epoch": 1.82,
"learning_rate": 1.0958037196068693e-05,
"loss": 3.4025,
"step": 171300
},
{
"epoch": 1.82,
"learning_rate": 1.0829317148427831e-05,
"loss": 3.5235,
"step": 171400
},
{
"epoch": 1.82,
"learning_rate": 1.0701340849404995e-05,
"loss": 3.4876,
"step": 171500
},
{
"epoch": 1.82,
"learning_rate": 1.0574108696967955e-05,
"loss": 3.5077,
"step": 171600
},
{
"epoch": 1.83,
"learning_rate": 1.0447621086770164e-05,
"loss": 3.4487,
"step": 171700
},
{
"epoch": 1.83,
"learning_rate": 1.0321878412149959e-05,
"loss": 3.5235,
"step": 171800
},
{
"epoch": 1.83,
"learning_rate": 1.019688106412911e-05,
"loss": 3.5167,
"step": 171900
},
{
"epoch": 1.83,
"learning_rate": 1.0072629431411629e-05,
"loss": 3.5023,
"step": 172000
},
{
"epoch": 1.83,
"learning_rate": 9.949123900382578e-06,
"loss": 3.4944,
"step": 172100
},
{
"epoch": 1.83,
"learning_rate": 9.826364855106851e-06,
"loss": 3.4764,
"step": 172200
},
{
"epoch": 1.83,
"learning_rate": 9.704352677328055e-06,
"loss": 3.4143,
"step": 172300
},
{
"epoch": 1.83,
"learning_rate": 9.583087746467212e-06,
"loss": 3.478,
"step": 172400
},
{
"epoch": 1.83,
"learning_rate": 9.46257043962162e-06,
"loss": 3.4686,
"step": 172500
},
{
"epoch": 1.83,
"learning_rate": 9.342801131563772e-06,
"loss": 3.4824,
"step": 172600
},
{
"epoch": 1.84,
"learning_rate": 9.22378019473999e-06,
"loss": 3.456,
"step": 172700
},
{
"epoch": 1.84,
"learning_rate": 9.105507999269513e-06,
"loss": 3.4789,
"step": 172800
},
{
"epoch": 1.84,
"learning_rate": 8.98798491294317e-06,
"loss": 3.4921,
"step": 172900
},
{
"epoch": 1.84,
"learning_rate": 8.871211301222205e-06,
"loss": 3.4355,
"step": 173000
},
{
"epoch": 1.84,
"learning_rate": 8.755187527237362e-06,
"loss": 3.4613,
"step": 173100
},
{
"epoch": 1.84,
"learning_rate": 8.639913951787537e-06,
"loss": 3.4982,
"step": 173200
},
{
"epoch": 1.84,
"learning_rate": 8.525390933338761e-06,
"loss": 3.498,
"step": 173300
},
{
"epoch": 1.84,
"learning_rate": 8.411618828022994e-06,
"loss": 3.4502,
"step": 173400
},
{
"epoch": 1.84,
"learning_rate": 8.298597989637197e-06,
"loss": 3.4695,
"step": 173500
},
{
"epoch": 1.85,
"learning_rate": 8.18632876964201e-06,
"loss": 3.4537,
"step": 173600
},
{
"epoch": 1.85,
"learning_rate": 8.074811517160885e-06,
"loss": 3.476,
"step": 173700
},
{
"epoch": 1.85,
"learning_rate": 7.964046578978701e-06,
"loss": 3.418,
"step": 173800
},
{
"epoch": 1.85,
"learning_rate": 7.854034299541068e-06,
"loss": 3.4262,
"step": 173900
},
{
"epoch": 1.85,
"learning_rate": 7.744775020952887e-06,
"loss": 3.4676,
"step": 174000
},
{
"epoch": 1.85,
"learning_rate": 7.636269082977626e-06,
"loss": 3.44,
"step": 174100
},
{
"epoch": 1.85,
"learning_rate": 7.528516823035902e-06,
"loss": 3.4154,
"step": 174200
},
{
"epoch": 1.85,
"learning_rate": 7.4215185762047385e-06,
"loss": 3.4926,
"step": 174300
},
{
"epoch": 1.85,
"learning_rate": 7.315274675216421e-06,
"loss": 3.4514,
"step": 174400
},
{
"epoch": 1.85,
"learning_rate": 7.2097854504573626e-06,
"loss": 3.5559,
"step": 174500
},
{
"epoch": 1.86,
"learning_rate": 7.105051229967241e-06,
"loss": 3.4448,
"step": 174600
},
{
"epoch": 1.86,
"learning_rate": 7.001072339437836e-06,
"loss": 3.4527,
"step": 174700
},
{
"epoch": 1.86,
"learning_rate": 6.897849102212083e-06,
"loss": 3.4917,
"step": 174800
},
{
"epoch": 1.86,
"learning_rate": 6.795381839283133e-06,
"loss": 3.4435,
"step": 174900
},
{
"epoch": 1.86,
"learning_rate": 6.693670869293206e-06,
"loss": 3.5062,
"step": 175000
},
{
"epoch": 1.86,
"learning_rate": 6.592716508532742e-06,
"loss": 3.4505,
"step": 175100
},
{
"epoch": 1.86,
"learning_rate": 6.492519070939312e-06,
"loss": 3.4699,
"step": 175200
},
{
"epoch": 1.86,
"learning_rate": 6.393078868096674e-06,
"loss": 3.5317,
"step": 175300
},
{
"epoch": 1.86,
"learning_rate": 6.294396209233888e-06,
"loss": 3.5148,
"step": 175400
},
{
"epoch": 1.87,
"learning_rate": 6.196471401224202e-06,
"loss": 3.4562,
"step": 175500
},
{
"epoch": 1.87,
"learning_rate": 6.0993047485841944e-06,
"loss": 3.4535,
"step": 175600
},
{
"epoch": 1.87,
"learning_rate": 6.002896553472831e-06,
"loss": 3.4372,
"step": 175700
},
{
"epoch": 1.87,
"learning_rate": 5.907247115690489e-06,
"loss": 3.4692,
"step": 175800
},
{
"epoch": 1.87,
"learning_rate": 5.812356732678076e-06,
"loss": 3.4198,
"step": 175900
},
{
"epoch": 1.87,
"learning_rate": 5.718225699515939e-06,
"loss": 3.4826,
"step": 176000
},
{
"epoch": 1.87,
"learning_rate": 5.624854308923289e-06,
"loss": 3.4803,
"step": 176100
},
{
"epoch": 1.87,
"learning_rate": 5.532242851256891e-06,
"loss": 3.4432,
"step": 176200
},
{
"epoch": 1.87,
"learning_rate": 5.440391614510487e-06,
"loss": 3.4248,
"step": 176300
},
{
"epoch": 1.88,
"learning_rate": 5.349300884313624e-06,
"loss": 3.4578,
"step": 176400
},
{
"epoch": 1.88,
"learning_rate": 5.258970943930991e-06,
"loss": 3.4432,
"step": 176500
},
{
"epoch": 1.88,
"learning_rate": 5.1694020742614474e-06,
"loss": 3.4696,
"step": 176600
},
{
"epoch": 1.88,
"learning_rate": 5.080594553837109e-06,
"loss": 3.4922,
"step": 176700
},
{
"epoch": 1.88,
"learning_rate": 4.992548658822593e-06,
"loss": 3.4488,
"step": 176800
},
{
"epoch": 1.88,
"learning_rate": 4.905264663014025e-06,
"loss": 3.4413,
"step": 176900
},
{
"epoch": 1.88,
"learning_rate": 4.818742837838258e-06,
"loss": 3.4904,
"step": 177000
},
{
"epoch": 1.88,
"learning_rate": 4.732983452352096e-06,
"loss": 3.4597,
"step": 177100
},
{
"epoch": 1.88,
"learning_rate": 4.647986773241353e-06,
"loss": 3.3803,
"step": 177200
},
{
"epoch": 1.88,
"learning_rate": 4.563753064819959e-06,
"loss": 3.4865,
"step": 177300
},
{
"epoch": 1.89,
"learning_rate": 4.480282589029383e-06,
"loss": 3.4853,
"step": 177400
},
{
"epoch": 1.89,
"learning_rate": 4.397575605437576e-06,
"loss": 3.4592,
"step": 177500
},
{
"epoch": 1.89,
"learning_rate": 4.315632371238304e-06,
"loss": 3.4212,
"step": 177600
},
{
"epoch": 1.89,
"learning_rate": 4.234453141250288e-06,
"loss": 3.4781,
"step": 177700
},
{
"epoch": 1.89,
"learning_rate": 4.154038167916402e-06,
"loss": 3.5055,
"step": 177800
},
{
"epoch": 1.89,
"learning_rate": 4.074387701302973e-06,
"loss": 3.4421,
"step": 177900
},
{
"epoch": 1.89,
"learning_rate": 3.995501989098843e-06,
"loss": 3.4785,
"step": 178000
},
{
"epoch": 1.89,
"learning_rate": 3.9173812766148394e-06,
"loss": 3.4931,
"step": 178100
},
{
"epoch": 1.89,
"learning_rate": 3.840025806782721e-06,
"loss": 3.4689,
"step": 178200
},
{
"epoch": 1.9,
"learning_rate": 3.7634358201547035e-06,
"loss": 3.4653,
"step": 178300
},
{
"epoch": 1.9,
"learning_rate": 3.6876115549024923e-06,
"loss": 3.4776,
"step": 178400
},
{
"epoch": 1.9,
"learning_rate": 3.612553246816669e-06,
"loss": 3.4824,
"step": 178500
},
{
"epoch": 1.9,
"learning_rate": 3.538261129305914e-06,
"loss": 3.4846,
"step": 178600
},
{
"epoch": 1.9,
"learning_rate": 3.464735433396288e-06,
"loss": 3.4451,
"step": 178700
},
{
"epoch": 1.9,
"learning_rate": 3.3919763877304777e-06,
"loss": 3.4596,
"step": 178800
},
{
"epoch": 1.9,
"learning_rate": 3.3199842185671903e-06,
"loss": 3.4577,
"step": 178900
},
{
"epoch": 1.9,
"learning_rate": 3.248759149780317e-06,
"loss": 3.457,
"step": 179000
},
{
"epoch": 1.9,
"learning_rate": 3.1783014028582967e-06,
"loss": 3.4635,
"step": 179100
},
{
"epoch": 1.9,
"learning_rate": 3.1086111969035048e-06,
"loss": 3.5187,
"step": 179200
},
{
"epoch": 1.91,
"learning_rate": 3.0396887486313916e-06,
"loss": 3.4766,
"step": 179300
},
{
"epoch": 1.91,
"learning_rate": 2.9715342723700133e-06,
"loss": 3.4628,
"step": 179400
},
{
"epoch": 1.91,
"learning_rate": 2.9041479800591685e-06,
"loss": 3.4998,
"step": 179500
},
{
"epoch": 1.91,
"learning_rate": 2.8375300812499007e-06,
"loss": 3.4767,
"step": 179600
},
{
"epoch": 1.91,
"learning_rate": 2.771680783103747e-06,
"loss": 3.4474,
"step": 179700
},
{
"epoch": 1.91,
"learning_rate": 2.706600290392186e-06,
"loss": 3.4676,
"step": 179800
},
{
"epoch": 1.91,
"learning_rate": 2.642288805495885e-06,
"loss": 3.4556,
"step": 179900
},
{
"epoch": 1.91,
"learning_rate": 2.5787465284041188e-06,
"loss": 3.4032,
"step": 180000
},
{
"epoch": 1.91,
"learning_rate": 2.5159736567141876e-06,
"loss": 3.4813,
"step": 180100
},
{
"epoch": 1.92,
"learning_rate": 2.4539703856308326e-06,
"loss": 3.4864,
"step": 180200
},
{
"epoch": 1.92,
"learning_rate": 2.3927369079654313e-06,
"loss": 3.4783,
"step": 180300
},
{
"epoch": 1.92,
"learning_rate": 2.332273414135666e-06,
"loss": 3.4934,
"step": 180400
},
{
"epoch": 1.92,
"learning_rate": 2.2725800921647164e-06,
"loss": 3.4395,
"step": 180500
},
{
"epoch": 1.92,
"learning_rate": 2.213657127680818e-06,
"loss": 3.4599,
"step": 180600
},
{
"epoch": 1.92,
"learning_rate": 2.1555047039165944e-06,
"loss": 3.482,
"step": 180700
},
{
"epoch": 1.92,
"learning_rate": 2.0981230017085017e-06,
"loss": 3.4425,
"step": 180800
},
{
"epoch": 1.92,
"learning_rate": 2.0415121994963314e-06,
"loss": 3.4422,
"step": 180900
},
{
"epoch": 1.92,
"learning_rate": 1.9856724733225695e-06,
"loss": 3.4536,
"step": 181000
},
{
"epoch": 1.93,
"learning_rate": 1.9306039968319535e-06,
"loss": 3.5386,
"step": 181100
},
{
"epoch": 1.93,
"learning_rate": 1.8763069412707778e-06,
"loss": 3.5005,
"step": 181200
},
{
"epoch": 1.93,
"learning_rate": 1.822781475486507e-06,
"loss": 3.4209,
"step": 181300
},
{
"epoch": 1.93,
"learning_rate": 1.7700277659271625e-06,
"loss": 3.4426,
"step": 181400
},
{
"epoch": 1.93,
"learning_rate": 1.7180459766408806e-06,
"loss": 3.4718,
"step": 181500
},
{
"epoch": 1.93,
"learning_rate": 1.6668362692753569e-06,
"loss": 3.4696,
"step": 181600
},
{
"epoch": 1.93,
"learning_rate": 1.6163988030772347e-06,
"loss": 3.4379,
"step": 181700
},
{
"epoch": 1.93,
"learning_rate": 1.5667337348918841e-06,
"loss": 3.4895,
"step": 181800
},
{
"epoch": 1.93,
"learning_rate": 1.5178412191626524e-06,
"loss": 3.4887,
"step": 181900
},
{
"epoch": 1.93,
"learning_rate": 1.469721407930502e-06,
"loss": 3.4375,
"step": 182000
},
{
"epoch": 1.94,
"learning_rate": 1.4223744508334857e-06,
"loss": 3.4431,
"step": 182100
},
{
"epoch": 1.94,
"learning_rate": 1.375800495106383e-06,
"loss": 3.4643,
"step": 182200
},
{
"epoch": 1.94,
"learning_rate": 1.3299996855801189e-06,
"loss": 3.4656,
"step": 182300
},
{
"epoch": 1.94,
"learning_rate": 1.2849721646814306e-06,
"loss": 3.4747,
"step": 182400
},
{
"epoch": 1.94,
"learning_rate": 1.2407180724322565e-06,
"loss": 3.3942,
"step": 182500
},
{
"epoch": 1.94,
"learning_rate": 1.1972375464494867e-06,
"loss": 3.475,
"step": 182600
},
{
"epoch": 1.94,
"learning_rate": 1.154530721944408e-06,
"loss": 3.513,
"step": 182700
},
{
"epoch": 1.94,
"learning_rate": 1.11259773172237e-06,
"loss": 3.3924,
"step": 182800
},
{
"epoch": 1.94,
"learning_rate": 1.0714387061823427e-06,
"loss": 3.4743,
"step": 182900
},
{
"epoch": 1.95,
"learning_rate": 1.031053773316415e-06,
"loss": 3.5169,
"step": 183000
},
{
"epoch": 1.95,
"learning_rate": 9.914430587095735e-07,
"loss": 3.4972,
"step": 183100
},
{
"epoch": 1.95,
"learning_rate": 9.52606685539148e-07,
"loss": 3.456,
"step": 183200
},
{
"epoch": 1.95,
"learning_rate": 9.145447745745883e-07,
"loss": 3.4078,
"step": 183300
},
{
"epoch": 1.95,
"learning_rate": 8.772574441768821e-07,
"loss": 3.4546,
"step": 183400
},
{
"epoch": 1.95,
"learning_rate": 8.407448102984161e-07,
"loss": 3.4795,
"step": 183500
},
{
"epoch": 1.95,
"learning_rate": 8.050069864824483e-07,
"loss": 3.4461,
"step": 183600
},
{
"epoch": 1.95,
"learning_rate": 7.700440838628031e-07,
"loss": 3.4308,
"step": 183700
},
{
"epoch": 1.95,
"learning_rate": 7.358562111635936e-07,
"loss": 3.4372,
"step": 183800
},
{
"epoch": 1.95,
"learning_rate": 7.024434746987218e-07,
"loss": 3.4851,
"step": 183900
},
{
"epoch": 1.96,
"learning_rate": 6.698059783717681e-07,
"loss": 3.4375,
"step": 184000
},
{
"epoch": 1.96,
"learning_rate": 6.379438236754354e-07,
"loss": 3.4974,
"step": 184100
},
{
"epoch": 1.96,
"learning_rate": 6.068571096914666e-07,
"loss": 3.4536,
"step": 184200
},
{
"epoch": 1.96,
"learning_rate": 5.765459330901446e-07,
"loss": 3.4817,
"step": 184300
},
{
"epoch": 1.96,
"learning_rate": 5.470103881300425e-07,
"loss": 3.4776,
"step": 184400
},
{
"epoch": 1.96,
"learning_rate": 5.182505666578019e-07,
"loss": 3.44,
"step": 184500
},
{
"epoch": 1.96,
"learning_rate": 4.902665581077991e-07,
"loss": 3.4449,
"step": 184600
},
{
"epoch": 1.96,
"learning_rate": 4.630584495018408e-07,
"loss": 3.5173,
"step": 184700
},
{
"epoch": 1.96,
"learning_rate": 4.366263254489411e-07,
"loss": 3.4473,
"step": 184800
},
{
"epoch": 1.97,
"learning_rate": 4.109702681450167e-07,
"loss": 3.442,
"step": 184900
},
{
"epoch": 1.97,
"learning_rate": 3.8609035737266486e-07,
"loss": 3.436,
"step": 185000
}
],
"logging_steps": 100,
"max_steps": 188152,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 5000,
"total_flos": 3.0232783618595517e+24,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}