gpt-for-est-base / trainer_state.json
mphi's picture
Newly created GPT for Estonian, base version.
44b40ec
raw
history blame
396 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 1598244,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.0002999061469963284,
"loss": 9.0229,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 0.00029981229399265693,
"loss": 8.3193,
"step": 1000
},
{
"epoch": 0.0,
"learning_rate": 0.0002997184409889854,
"loss": 8.0421,
"step": 1500
},
{
"epoch": 0.0,
"learning_rate": 0.00029962458798531383,
"loss": 7.8282,
"step": 2000
},
{
"epoch": 0.0,
"learning_rate": 0.00029953073498164233,
"loss": 7.6413,
"step": 2500
},
{
"epoch": 0.01,
"learning_rate": 0.0002994368819779708,
"loss": 7.4768,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 0.00029934302897429923,
"loss": 7.3366,
"step": 3500
},
{
"epoch": 0.01,
"learning_rate": 0.00029924917597062774,
"loss": 7.1968,
"step": 4000
},
{
"epoch": 0.01,
"learning_rate": 0.0002991553229669562,
"loss": 7.0463,
"step": 4500
},
{
"epoch": 0.01,
"learning_rate": 0.0002990614699632847,
"loss": 6.9269,
"step": 5000
},
{
"epoch": 0.01,
"learning_rate": 0.00029896761695961314,
"loss": 6.796,
"step": 5500
},
{
"epoch": 0.01,
"learning_rate": 0.00029887376395594165,
"loss": 6.7006,
"step": 6000
},
{
"epoch": 0.01,
"learning_rate": 0.0002987799109522701,
"loss": 6.5999,
"step": 6500
},
{
"epoch": 0.01,
"learning_rate": 0.00029868605794859855,
"loss": 6.516,
"step": 7000
},
{
"epoch": 0.01,
"learning_rate": 0.00029859220494492705,
"loss": 6.4314,
"step": 7500
},
{
"epoch": 0.02,
"learning_rate": 0.0002984983519412555,
"loss": 6.3386,
"step": 8000
},
{
"epoch": 0.02,
"learning_rate": 0.00029840449893758395,
"loss": 6.2853,
"step": 8500
},
{
"epoch": 0.02,
"learning_rate": 0.00029831064593391246,
"loss": 6.2242,
"step": 9000
},
{
"epoch": 0.02,
"learning_rate": 0.0002982167929302409,
"loss": 6.1438,
"step": 9500
},
{
"epoch": 0.02,
"learning_rate": 0.00029812293992656936,
"loss": 6.1042,
"step": 10000
},
{
"epoch": 0.02,
"learning_rate": 0.00029802908692289786,
"loss": 6.0451,
"step": 10500
},
{
"epoch": 0.02,
"learning_rate": 0.0002979352339192263,
"loss": 5.9868,
"step": 11000
},
{
"epoch": 0.02,
"learning_rate": 0.00029784138091555476,
"loss": 5.9375,
"step": 11500
},
{
"epoch": 0.02,
"learning_rate": 0.00029774752791188327,
"loss": 5.896,
"step": 12000
},
{
"epoch": 0.02,
"learning_rate": 0.0002976536749082117,
"loss": 5.8403,
"step": 12500
},
{
"epoch": 0.02,
"learning_rate": 0.00029755982190454017,
"loss": 5.8008,
"step": 13000
},
{
"epoch": 0.03,
"learning_rate": 0.0002974659689008687,
"loss": 5.7554,
"step": 13500
},
{
"epoch": 0.03,
"learning_rate": 0.0002973721158971971,
"loss": 5.726,
"step": 14000
},
{
"epoch": 0.03,
"learning_rate": 0.00029727826289352563,
"loss": 5.6831,
"step": 14500
},
{
"epoch": 0.03,
"learning_rate": 0.0002971844098898541,
"loss": 5.6498,
"step": 15000
},
{
"epoch": 0.03,
"learning_rate": 0.0002970905568861826,
"loss": 5.6124,
"step": 15500
},
{
"epoch": 0.03,
"learning_rate": 0.00029699670388251103,
"loss": 5.5981,
"step": 16000
},
{
"epoch": 0.03,
"learning_rate": 0.00029690285087883954,
"loss": 5.5783,
"step": 16500
},
{
"epoch": 0.03,
"learning_rate": 0.000296808997875168,
"loss": 5.543,
"step": 17000
},
{
"epoch": 0.03,
"learning_rate": 0.00029671514487149644,
"loss": 5.5149,
"step": 17500
},
{
"epoch": 0.03,
"learning_rate": 0.00029662129186782494,
"loss": 5.4884,
"step": 18000
},
{
"epoch": 0.03,
"learning_rate": 0.0002965274388641534,
"loss": 5.4705,
"step": 18500
},
{
"epoch": 0.04,
"learning_rate": 0.00029643358586048184,
"loss": 5.455,
"step": 19000
},
{
"epoch": 0.04,
"learning_rate": 0.0002963397328568103,
"loss": 5.4311,
"step": 19500
},
{
"epoch": 0.04,
"learning_rate": 0.0002962458798531388,
"loss": 5.4137,
"step": 20000
},
{
"epoch": 0.04,
"learning_rate": 0.00029615202684946725,
"loss": 5.3846,
"step": 20500
},
{
"epoch": 0.04,
"learning_rate": 0.0002960581738457957,
"loss": 5.3621,
"step": 21000
},
{
"epoch": 0.04,
"learning_rate": 0.0002959643208421242,
"loss": 5.3677,
"step": 21500
},
{
"epoch": 0.04,
"learning_rate": 0.00029587046783845265,
"loss": 5.3455,
"step": 22000
},
{
"epoch": 0.04,
"learning_rate": 0.00029577661483478116,
"loss": 5.3252,
"step": 22500
},
{
"epoch": 0.04,
"learning_rate": 0.0002956827618311096,
"loss": 5.3115,
"step": 23000
},
{
"epoch": 0.04,
"learning_rate": 0.00029558890882743806,
"loss": 5.3063,
"step": 23500
},
{
"epoch": 0.05,
"learning_rate": 0.00029549505582376656,
"loss": 5.2829,
"step": 24000
},
{
"epoch": 0.05,
"learning_rate": 0.000295401202820095,
"loss": 5.2593,
"step": 24500
},
{
"epoch": 0.05,
"learning_rate": 0.0002953073498164235,
"loss": 5.2641,
"step": 25000
},
{
"epoch": 0.05,
"learning_rate": 0.00029521349681275197,
"loss": 5.2502,
"step": 25500
},
{
"epoch": 0.05,
"learning_rate": 0.0002951196438090805,
"loss": 5.2255,
"step": 26000
},
{
"epoch": 0.05,
"learning_rate": 0.0002950257908054089,
"loss": 5.2052,
"step": 26500
},
{
"epoch": 0.05,
"learning_rate": 0.0002949319378017374,
"loss": 5.1995,
"step": 27000
},
{
"epoch": 0.05,
"learning_rate": 0.0002948380847980659,
"loss": 5.1872,
"step": 27500
},
{
"epoch": 0.05,
"learning_rate": 0.00029474423179439433,
"loss": 5.2049,
"step": 28000
},
{
"epoch": 0.05,
"learning_rate": 0.0002946503787907228,
"loss": 5.1725,
"step": 28500
},
{
"epoch": 0.05,
"learning_rate": 0.0002945565257870513,
"loss": 5.176,
"step": 29000
},
{
"epoch": 0.06,
"learning_rate": 0.00029446267278337973,
"loss": 5.1581,
"step": 29500
},
{
"epoch": 0.06,
"learning_rate": 0.0002943688197797082,
"loss": 5.1469,
"step": 30000
},
{
"epoch": 0.06,
"learning_rate": 0.00029427496677603664,
"loss": 5.1353,
"step": 30500
},
{
"epoch": 0.06,
"learning_rate": 0.00029418111377236514,
"loss": 5.1279,
"step": 31000
},
{
"epoch": 0.06,
"learning_rate": 0.0002940872607686936,
"loss": 5.1085,
"step": 31500
},
{
"epoch": 0.06,
"learning_rate": 0.0002939934077650221,
"loss": 5.1187,
"step": 32000
},
{
"epoch": 0.06,
"learning_rate": 0.00029389955476135055,
"loss": 5.0909,
"step": 32500
},
{
"epoch": 0.06,
"learning_rate": 0.00029380570175767905,
"loss": 5.0936,
"step": 33000
},
{
"epoch": 0.06,
"learning_rate": 0.0002937118487540075,
"loss": 5.0821,
"step": 33500
},
{
"epoch": 0.06,
"learning_rate": 0.00029361799575033595,
"loss": 5.0849,
"step": 34000
},
{
"epoch": 0.06,
"learning_rate": 0.00029352414274666445,
"loss": 5.0655,
"step": 34500
},
{
"epoch": 0.07,
"learning_rate": 0.0002934302897429929,
"loss": 5.0643,
"step": 35000
},
{
"epoch": 0.07,
"learning_rate": 0.0002933364367393214,
"loss": 5.0693,
"step": 35500
},
{
"epoch": 0.07,
"learning_rate": 0.00029324258373564986,
"loss": 5.0493,
"step": 36000
},
{
"epoch": 0.07,
"learning_rate": 0.0002931487307319783,
"loss": 5.0408,
"step": 36500
},
{
"epoch": 0.07,
"learning_rate": 0.0002930548777283068,
"loss": 5.0339,
"step": 37000
},
{
"epoch": 0.07,
"learning_rate": 0.00029296102472463526,
"loss": 5.0291,
"step": 37500
},
{
"epoch": 0.07,
"learning_rate": 0.0002928671717209637,
"loss": 5.0077,
"step": 38000
},
{
"epoch": 0.07,
"learning_rate": 0.0002927733187172922,
"loss": 5.0201,
"step": 38500
},
{
"epoch": 0.07,
"learning_rate": 0.00029267946571362067,
"loss": 5.0042,
"step": 39000
},
{
"epoch": 0.07,
"learning_rate": 0.0002925856127099491,
"loss": 4.9954,
"step": 39500
},
{
"epoch": 0.08,
"learning_rate": 0.0002924917597062776,
"loss": 5.0108,
"step": 40000
},
{
"epoch": 0.08,
"learning_rate": 0.0002923979067026061,
"loss": 4.9867,
"step": 40500
},
{
"epoch": 0.08,
"learning_rate": 0.0002923040536989345,
"loss": 4.9779,
"step": 41000
},
{
"epoch": 0.08,
"learning_rate": 0.00029221020069526303,
"loss": 4.9828,
"step": 41500
},
{
"epoch": 0.08,
"learning_rate": 0.0002921163476915915,
"loss": 4.9588,
"step": 42000
},
{
"epoch": 0.08,
"learning_rate": 0.00029202249468792,
"loss": 4.9711,
"step": 42500
},
{
"epoch": 0.08,
"learning_rate": 0.00029192864168424844,
"loss": 4.9616,
"step": 43000
},
{
"epoch": 0.08,
"learning_rate": 0.00029183478868057694,
"loss": 4.9597,
"step": 43500
},
{
"epoch": 0.08,
"learning_rate": 0.0002917409356769054,
"loss": 4.9501,
"step": 44000
},
{
"epoch": 0.08,
"learning_rate": 0.00029164708267323384,
"loss": 4.9507,
"step": 44500
},
{
"epoch": 0.08,
"learning_rate": 0.00029155322966956234,
"loss": 4.9347,
"step": 45000
},
{
"epoch": 0.09,
"learning_rate": 0.0002914593766658908,
"loss": 4.9387,
"step": 45500
},
{
"epoch": 0.09,
"learning_rate": 0.00029136552366221925,
"loss": 4.9312,
"step": 46000
},
{
"epoch": 0.09,
"learning_rate": 0.00029127167065854775,
"loss": 4.9258,
"step": 46500
},
{
"epoch": 0.09,
"learning_rate": 0.0002911778176548762,
"loss": 4.9162,
"step": 47000
},
{
"epoch": 0.09,
"learning_rate": 0.00029108396465120465,
"loss": 4.9197,
"step": 47500
},
{
"epoch": 0.09,
"learning_rate": 0.00029099011164753316,
"loss": 4.8996,
"step": 48000
},
{
"epoch": 0.09,
"learning_rate": 0.0002908962586438616,
"loss": 4.8886,
"step": 48500
},
{
"epoch": 0.09,
"learning_rate": 0.00029080240564019006,
"loss": 4.9008,
"step": 49000
},
{
"epoch": 0.09,
"learning_rate": 0.00029070855263651856,
"loss": 4.9134,
"step": 49500
},
{
"epoch": 0.09,
"learning_rate": 0.000290614699632847,
"loss": 4.8817,
"step": 50000
},
{
"epoch": 0.09,
"learning_rate": 0.00029052084662917546,
"loss": 4.8882,
"step": 50500
},
{
"epoch": 0.1,
"learning_rate": 0.00029042699362550397,
"loss": 4.8738,
"step": 51000
},
{
"epoch": 0.1,
"learning_rate": 0.0002903331406218324,
"loss": 4.8696,
"step": 51500
},
{
"epoch": 0.1,
"learning_rate": 0.0002902392876181609,
"loss": 4.8936,
"step": 52000
},
{
"epoch": 0.1,
"learning_rate": 0.00029014543461448937,
"loss": 4.8812,
"step": 52500
},
{
"epoch": 0.1,
"learning_rate": 0.0002900515816108179,
"loss": 4.8556,
"step": 53000
},
{
"epoch": 0.1,
"learning_rate": 0.0002899577286071463,
"loss": 4.8639,
"step": 53500
},
{
"epoch": 0.1,
"learning_rate": 0.00028986387560347483,
"loss": 4.8762,
"step": 54000
},
{
"epoch": 0.1,
"learning_rate": 0.0002897700225998033,
"loss": 4.8768,
"step": 54500
},
{
"epoch": 0.1,
"learning_rate": 0.00028967616959613173,
"loss": 4.8507,
"step": 55000
},
{
"epoch": 0.1,
"learning_rate": 0.0002895823165924602,
"loss": 4.8578,
"step": 55500
},
{
"epoch": 0.11,
"learning_rate": 0.0002894884635887887,
"loss": 4.8623,
"step": 56000
},
{
"epoch": 0.11,
"learning_rate": 0.00028939461058511714,
"loss": 4.8503,
"step": 56500
},
{
"epoch": 0.11,
"learning_rate": 0.0002893007575814456,
"loss": 4.83,
"step": 57000
},
{
"epoch": 0.11,
"learning_rate": 0.0002892069045777741,
"loss": 4.8259,
"step": 57500
},
{
"epoch": 0.11,
"learning_rate": 0.00028911305157410254,
"loss": 4.8262,
"step": 58000
},
{
"epoch": 0.11,
"learning_rate": 0.000289019198570431,
"loss": 4.8296,
"step": 58500
},
{
"epoch": 0.11,
"learning_rate": 0.0002889253455667595,
"loss": 4.8132,
"step": 59000
},
{
"epoch": 0.11,
"learning_rate": 0.00028883149256308795,
"loss": 4.8128,
"step": 59500
},
{
"epoch": 0.11,
"learning_rate": 0.0002887376395594164,
"loss": 4.8257,
"step": 60000
},
{
"epoch": 0.11,
"learning_rate": 0.0002886437865557449,
"loss": 4.8056,
"step": 60500
},
{
"epoch": 0.11,
"learning_rate": 0.00028854993355207335,
"loss": 4.8002,
"step": 61000
},
{
"epoch": 0.12,
"learning_rate": 0.00028845608054840186,
"loss": 4.8176,
"step": 61500
},
{
"epoch": 0.12,
"learning_rate": 0.0002883622275447303,
"loss": 4.8031,
"step": 62000
},
{
"epoch": 0.12,
"learning_rate": 0.0002882683745410588,
"loss": 4.7931,
"step": 62500
},
{
"epoch": 0.12,
"learning_rate": 0.00028817452153738726,
"loss": 4.7996,
"step": 63000
},
{
"epoch": 0.12,
"learning_rate": 0.00028808066853371577,
"loss": 4.7932,
"step": 63500
},
{
"epoch": 0.12,
"learning_rate": 0.0002879868155300442,
"loss": 4.7942,
"step": 64000
},
{
"epoch": 0.12,
"learning_rate": 0.00028789296252637267,
"loss": 4.7629,
"step": 64500
},
{
"epoch": 0.12,
"learning_rate": 0.00028779910952270117,
"loss": 4.7946,
"step": 65000
},
{
"epoch": 0.12,
"learning_rate": 0.0002877052565190296,
"loss": 4.7972,
"step": 65500
},
{
"epoch": 0.12,
"learning_rate": 0.00028761140351535807,
"loss": 4.7747,
"step": 66000
},
{
"epoch": 0.12,
"learning_rate": 0.0002875175505116866,
"loss": 4.7656,
"step": 66500
},
{
"epoch": 0.13,
"learning_rate": 0.000287423697508015,
"loss": 4.7701,
"step": 67000
},
{
"epoch": 0.13,
"learning_rate": 0.0002873298445043435,
"loss": 4.7587,
"step": 67500
},
{
"epoch": 0.13,
"learning_rate": 0.0002872359915006719,
"loss": 4.7727,
"step": 68000
},
{
"epoch": 0.13,
"learning_rate": 0.00028714213849700043,
"loss": 4.7734,
"step": 68500
},
{
"epoch": 0.13,
"learning_rate": 0.0002870482854933289,
"loss": 4.7839,
"step": 69000
},
{
"epoch": 0.13,
"learning_rate": 0.0002869544324896574,
"loss": 4.7712,
"step": 69500
},
{
"epoch": 0.13,
"learning_rate": 0.00028686057948598584,
"loss": 4.7754,
"step": 70000
},
{
"epoch": 0.13,
"learning_rate": 0.0002867667264823143,
"loss": 4.7686,
"step": 70500
},
{
"epoch": 0.13,
"learning_rate": 0.0002866728734786428,
"loss": 4.7434,
"step": 71000
},
{
"epoch": 0.13,
"learning_rate": 0.00028657902047497124,
"loss": 4.7537,
"step": 71500
},
{
"epoch": 0.14,
"learning_rate": 0.00028648516747129975,
"loss": 4.7555,
"step": 72000
},
{
"epoch": 0.14,
"learning_rate": 0.0002863913144676282,
"loss": 4.7473,
"step": 72500
},
{
"epoch": 0.14,
"learning_rate": 0.0002862974614639567,
"loss": 4.7394,
"step": 73000
},
{
"epoch": 0.14,
"learning_rate": 0.00028620360846028515,
"loss": 4.7185,
"step": 73500
},
{
"epoch": 0.14,
"learning_rate": 0.0002861097554566136,
"loss": 4.7267,
"step": 74000
},
{
"epoch": 0.14,
"learning_rate": 0.0002860159024529421,
"loss": 4.7256,
"step": 74500
},
{
"epoch": 0.14,
"learning_rate": 0.00028592204944927056,
"loss": 4.7324,
"step": 75000
},
{
"epoch": 0.14,
"learning_rate": 0.000285828196445599,
"loss": 4.7389,
"step": 75500
},
{
"epoch": 0.14,
"learning_rate": 0.0002857343434419275,
"loss": 4.7295,
"step": 76000
},
{
"epoch": 0.14,
"learning_rate": 0.00028564049043825596,
"loss": 4.7122,
"step": 76500
},
{
"epoch": 0.14,
"learning_rate": 0.0002855466374345844,
"loss": 4.7236,
"step": 77000
},
{
"epoch": 0.15,
"learning_rate": 0.0002854527844309129,
"loss": 4.7286,
"step": 77500
},
{
"epoch": 0.15,
"learning_rate": 0.00028535893142724137,
"loss": 4.7192,
"step": 78000
},
{
"epoch": 0.15,
"learning_rate": 0.0002852650784235698,
"loss": 4.7253,
"step": 78500
},
{
"epoch": 0.15,
"learning_rate": 0.0002851712254198983,
"loss": 4.7103,
"step": 79000
},
{
"epoch": 0.15,
"learning_rate": 0.00028507737241622677,
"loss": 4.7163,
"step": 79500
},
{
"epoch": 0.15,
"learning_rate": 0.0002849835194125553,
"loss": 4.7043,
"step": 80000
},
{
"epoch": 0.15,
"learning_rate": 0.0002848896664088837,
"loss": 4.6942,
"step": 80500
},
{
"epoch": 0.15,
"learning_rate": 0.0002847958134052122,
"loss": 4.7068,
"step": 81000
},
{
"epoch": 0.15,
"learning_rate": 0.0002847019604015407,
"loss": 4.7219,
"step": 81500
},
{
"epoch": 0.15,
"learning_rate": 0.00028460810739786913,
"loss": 4.6885,
"step": 82000
},
{
"epoch": 0.15,
"learning_rate": 0.00028451425439419764,
"loss": 4.7104,
"step": 82500
},
{
"epoch": 0.16,
"learning_rate": 0.0002844204013905261,
"loss": 4.7067,
"step": 83000
},
{
"epoch": 0.16,
"learning_rate": 0.00028432654838685454,
"loss": 4.7005,
"step": 83500
},
{
"epoch": 0.16,
"learning_rate": 0.00028423269538318304,
"loss": 4.6733,
"step": 84000
},
{
"epoch": 0.16,
"learning_rate": 0.0002841388423795115,
"loss": 4.6877,
"step": 84500
},
{
"epoch": 0.16,
"learning_rate": 0.00028404498937583994,
"loss": 4.6818,
"step": 85000
},
{
"epoch": 0.16,
"learning_rate": 0.00028395113637216845,
"loss": 4.6662,
"step": 85500
},
{
"epoch": 0.16,
"learning_rate": 0.0002838572833684969,
"loss": 4.6931,
"step": 86000
},
{
"epoch": 0.16,
"learning_rate": 0.00028376343036482535,
"loss": 4.6788,
"step": 86500
},
{
"epoch": 0.16,
"learning_rate": 0.00028366957736115385,
"loss": 4.6833,
"step": 87000
},
{
"epoch": 0.16,
"learning_rate": 0.0002835757243574823,
"loss": 4.6715,
"step": 87500
},
{
"epoch": 0.17,
"learning_rate": 0.00028348187135381075,
"loss": 4.6749,
"step": 88000
},
{
"epoch": 0.17,
"learning_rate": 0.00028338801835013926,
"loss": 4.6802,
"step": 88500
},
{
"epoch": 0.17,
"learning_rate": 0.0002832941653464677,
"loss": 4.6582,
"step": 89000
},
{
"epoch": 0.17,
"learning_rate": 0.0002832003123427962,
"loss": 4.6841,
"step": 89500
},
{
"epoch": 0.17,
"learning_rate": 0.00028310645933912466,
"loss": 4.6766,
"step": 90000
},
{
"epoch": 0.17,
"learning_rate": 0.00028301260633545317,
"loss": 4.6729,
"step": 90500
},
{
"epoch": 0.17,
"learning_rate": 0.0002829187533317816,
"loss": 4.6545,
"step": 91000
},
{
"epoch": 0.17,
"learning_rate": 0.00028282490032811007,
"loss": 4.668,
"step": 91500
},
{
"epoch": 0.17,
"learning_rate": 0.00028273104732443857,
"loss": 4.6747,
"step": 92000
},
{
"epoch": 0.17,
"learning_rate": 0.000282637194320767,
"loss": 4.6719,
"step": 92500
},
{
"epoch": 0.17,
"learning_rate": 0.00028254334131709547,
"loss": 4.642,
"step": 93000
},
{
"epoch": 0.18,
"learning_rate": 0.000282449488313424,
"loss": 4.6562,
"step": 93500
},
{
"epoch": 0.18,
"learning_rate": 0.0002823556353097524,
"loss": 4.6628,
"step": 94000
},
{
"epoch": 0.18,
"learning_rate": 0.0002822617823060809,
"loss": 4.6466,
"step": 94500
},
{
"epoch": 0.18,
"learning_rate": 0.0002821679293024094,
"loss": 4.6609,
"step": 95000
},
{
"epoch": 0.18,
"learning_rate": 0.00028207407629873783,
"loss": 4.6409,
"step": 95500
},
{
"epoch": 0.18,
"learning_rate": 0.0002819802232950663,
"loss": 4.6591,
"step": 96000
},
{
"epoch": 0.18,
"learning_rate": 0.0002818863702913948,
"loss": 4.6383,
"step": 96500
},
{
"epoch": 0.18,
"learning_rate": 0.00028179251728772324,
"loss": 4.6406,
"step": 97000
},
{
"epoch": 0.18,
"learning_rate": 0.0002816986642840517,
"loss": 4.6608,
"step": 97500
},
{
"epoch": 0.18,
"learning_rate": 0.0002816048112803802,
"loss": 4.6444,
"step": 98000
},
{
"epoch": 0.18,
"learning_rate": 0.00028151095827670864,
"loss": 4.6573,
"step": 98500
},
{
"epoch": 0.19,
"learning_rate": 0.00028141710527303715,
"loss": 4.646,
"step": 99000
},
{
"epoch": 0.19,
"learning_rate": 0.0002813232522693656,
"loss": 4.647,
"step": 99500
},
{
"epoch": 0.19,
"learning_rate": 0.0002812293992656941,
"loss": 4.6531,
"step": 100000
},
{
"epoch": 0.19,
"learning_rate": 0.00028113554626202255,
"loss": 4.6351,
"step": 100500
},
{
"epoch": 0.19,
"learning_rate": 0.00028104169325835106,
"loss": 4.6135,
"step": 101000
},
{
"epoch": 0.19,
"learning_rate": 0.0002809478402546795,
"loss": 4.6253,
"step": 101500
},
{
"epoch": 0.19,
"learning_rate": 0.00028085398725100796,
"loss": 4.6441,
"step": 102000
},
{
"epoch": 0.19,
"learning_rate": 0.00028076013424733646,
"loss": 4.6277,
"step": 102500
},
{
"epoch": 0.19,
"learning_rate": 0.0002806662812436649,
"loss": 4.622,
"step": 103000
},
{
"epoch": 0.19,
"learning_rate": 0.00028057242823999336,
"loss": 4.6247,
"step": 103500
},
{
"epoch": 0.2,
"learning_rate": 0.0002804785752363218,
"loss": 4.6346,
"step": 104000
},
{
"epoch": 0.2,
"learning_rate": 0.0002803847222326503,
"loss": 4.6262,
"step": 104500
},
{
"epoch": 0.2,
"learning_rate": 0.00028029086922897877,
"loss": 4.6191,
"step": 105000
},
{
"epoch": 0.2,
"learning_rate": 0.0002801970162253072,
"loss": 4.6266,
"step": 105500
},
{
"epoch": 0.2,
"learning_rate": 0.0002801031632216357,
"loss": 4.6181,
"step": 106000
},
{
"epoch": 0.2,
"learning_rate": 0.00028000931021796417,
"loss": 4.6199,
"step": 106500
},
{
"epoch": 0.2,
"learning_rate": 0.0002799154572142926,
"loss": 4.6123,
"step": 107000
},
{
"epoch": 0.2,
"learning_rate": 0.00027982160421062113,
"loss": 4.6061,
"step": 107500
},
{
"epoch": 0.2,
"learning_rate": 0.0002797277512069496,
"loss": 4.6127,
"step": 108000
},
{
"epoch": 0.2,
"learning_rate": 0.0002796338982032781,
"loss": 4.6098,
"step": 108500
},
{
"epoch": 0.2,
"learning_rate": 0.00027954004519960653,
"loss": 4.5959,
"step": 109000
},
{
"epoch": 0.21,
"learning_rate": 0.00027944619219593504,
"loss": 4.6217,
"step": 109500
},
{
"epoch": 0.21,
"learning_rate": 0.0002793523391922635,
"loss": 4.6196,
"step": 110000
},
{
"epoch": 0.21,
"learning_rate": 0.000279258486188592,
"loss": 4.613,
"step": 110500
},
{
"epoch": 0.21,
"learning_rate": 0.00027916463318492044,
"loss": 4.5969,
"step": 111000
},
{
"epoch": 0.21,
"learning_rate": 0.0002790707801812489,
"loss": 4.5849,
"step": 111500
},
{
"epoch": 0.21,
"learning_rate": 0.0002789769271775774,
"loss": 4.5936,
"step": 112000
},
{
"epoch": 0.21,
"learning_rate": 0.00027888307417390585,
"loss": 4.5909,
"step": 112500
},
{
"epoch": 0.21,
"learning_rate": 0.0002787892211702343,
"loss": 4.5804,
"step": 113000
},
{
"epoch": 0.21,
"learning_rate": 0.0002786953681665628,
"loss": 4.5991,
"step": 113500
},
{
"epoch": 0.21,
"learning_rate": 0.00027860151516289125,
"loss": 4.5796,
"step": 114000
},
{
"epoch": 0.21,
"learning_rate": 0.0002785076621592197,
"loss": 4.6,
"step": 114500
},
{
"epoch": 0.22,
"learning_rate": 0.00027841380915554815,
"loss": 4.5904,
"step": 115000
},
{
"epoch": 0.22,
"learning_rate": 0.00027831995615187666,
"loss": 4.6135,
"step": 115500
},
{
"epoch": 0.22,
"learning_rate": 0.0002782261031482051,
"loss": 4.5849,
"step": 116000
},
{
"epoch": 0.22,
"learning_rate": 0.0002781322501445336,
"loss": 4.5934,
"step": 116500
},
{
"epoch": 0.22,
"learning_rate": 0.00027803839714086206,
"loss": 4.5789,
"step": 117000
},
{
"epoch": 0.22,
"learning_rate": 0.0002779445441371905,
"loss": 4.6019,
"step": 117500
},
{
"epoch": 0.22,
"learning_rate": 0.000277850691133519,
"loss": 4.5784,
"step": 118000
},
{
"epoch": 0.22,
"learning_rate": 0.00027775683812984747,
"loss": 4.5894,
"step": 118500
},
{
"epoch": 0.22,
"learning_rate": 0.00027766298512617597,
"loss": 4.5661,
"step": 119000
},
{
"epoch": 0.22,
"learning_rate": 0.0002775691321225044,
"loss": 4.5731,
"step": 119500
},
{
"epoch": 0.23,
"learning_rate": 0.00027747527911883293,
"loss": 4.5879,
"step": 120000
},
{
"epoch": 0.23,
"learning_rate": 0.0002773814261151614,
"loss": 4.5668,
"step": 120500
},
{
"epoch": 0.23,
"learning_rate": 0.00027728757311148983,
"loss": 4.5854,
"step": 121000
},
{
"epoch": 0.23,
"learning_rate": 0.00027719372010781833,
"loss": 4.5802,
"step": 121500
},
{
"epoch": 0.23,
"learning_rate": 0.0002770998671041468,
"loss": 4.5791,
"step": 122000
},
{
"epoch": 0.23,
"learning_rate": 0.00027700601410047523,
"loss": 4.5802,
"step": 122500
},
{
"epoch": 0.23,
"learning_rate": 0.00027691216109680374,
"loss": 4.5772,
"step": 123000
},
{
"epoch": 0.23,
"learning_rate": 0.0002768183080931322,
"loss": 4.5704,
"step": 123500
},
{
"epoch": 0.23,
"learning_rate": 0.00027672445508946064,
"loss": 4.5865,
"step": 124000
},
{
"epoch": 0.23,
"learning_rate": 0.00027663060208578914,
"loss": 4.5698,
"step": 124500
},
{
"epoch": 0.23,
"learning_rate": 0.0002765367490821176,
"loss": 4.58,
"step": 125000
},
{
"epoch": 0.24,
"learning_rate": 0.00027644289607844604,
"loss": 4.5589,
"step": 125500
},
{
"epoch": 0.24,
"learning_rate": 0.00027634904307477455,
"loss": 4.557,
"step": 126000
},
{
"epoch": 0.24,
"learning_rate": 0.000276255190071103,
"loss": 4.5452,
"step": 126500
},
{
"epoch": 0.24,
"learning_rate": 0.0002761613370674315,
"loss": 4.5473,
"step": 127000
},
{
"epoch": 0.24,
"learning_rate": 0.00027606748406375995,
"loss": 4.5498,
"step": 127500
},
{
"epoch": 0.24,
"learning_rate": 0.0002759736310600884,
"loss": 4.5575,
"step": 128000
},
{
"epoch": 0.24,
"learning_rate": 0.0002758797780564169,
"loss": 4.5452,
"step": 128500
},
{
"epoch": 0.24,
"learning_rate": 0.00027578592505274536,
"loss": 4.5502,
"step": 129000
},
{
"epoch": 0.24,
"learning_rate": 0.00027569207204907386,
"loss": 4.5673,
"step": 129500
},
{
"epoch": 0.24,
"learning_rate": 0.0002755982190454023,
"loss": 4.5635,
"step": 130000
},
{
"epoch": 0.24,
"learning_rate": 0.00027550436604173076,
"loss": 4.556,
"step": 130500
},
{
"epoch": 0.25,
"learning_rate": 0.00027541051303805927,
"loss": 4.5591,
"step": 131000
},
{
"epoch": 0.25,
"learning_rate": 0.0002753166600343877,
"loss": 4.5233,
"step": 131500
},
{
"epoch": 0.25,
"learning_rate": 0.00027522280703071617,
"loss": 4.5415,
"step": 132000
},
{
"epoch": 0.25,
"learning_rate": 0.0002751289540270447,
"loss": 4.5755,
"step": 132500
},
{
"epoch": 0.25,
"learning_rate": 0.0002750351010233731,
"loss": 4.5587,
"step": 133000
},
{
"epoch": 0.25,
"learning_rate": 0.0002749412480197016,
"loss": 4.5436,
"step": 133500
},
{
"epoch": 0.25,
"learning_rate": 0.0002748473950160301,
"loss": 4.5312,
"step": 134000
},
{
"epoch": 0.25,
"learning_rate": 0.00027475354201235853,
"loss": 4.562,
"step": 134500
},
{
"epoch": 0.25,
"learning_rate": 0.000274659689008687,
"loss": 4.5392,
"step": 135000
},
{
"epoch": 0.25,
"learning_rate": 0.0002745658360050155,
"loss": 4.5207,
"step": 135500
},
{
"epoch": 0.26,
"learning_rate": 0.00027447198300134393,
"loss": 4.5521,
"step": 136000
},
{
"epoch": 0.26,
"learning_rate": 0.00027437812999767244,
"loss": 4.5322,
"step": 136500
},
{
"epoch": 0.26,
"learning_rate": 0.0002742842769940009,
"loss": 4.5305,
"step": 137000
},
{
"epoch": 0.26,
"learning_rate": 0.0002741904239903294,
"loss": 4.5166,
"step": 137500
},
{
"epoch": 0.26,
"learning_rate": 0.00027409657098665784,
"loss": 4.5248,
"step": 138000
},
{
"epoch": 0.26,
"learning_rate": 0.0002740027179829863,
"loss": 4.5413,
"step": 138500
},
{
"epoch": 0.26,
"learning_rate": 0.0002739088649793148,
"loss": 4.5441,
"step": 139000
},
{
"epoch": 0.26,
"learning_rate": 0.00027381501197564325,
"loss": 4.5325,
"step": 139500
},
{
"epoch": 0.26,
"learning_rate": 0.0002737211589719717,
"loss": 4.5529,
"step": 140000
},
{
"epoch": 0.26,
"learning_rate": 0.0002736273059683002,
"loss": 4.5275,
"step": 140500
},
{
"epoch": 0.26,
"learning_rate": 0.00027353345296462865,
"loss": 4.543,
"step": 141000
},
{
"epoch": 0.27,
"learning_rate": 0.0002734395999609571,
"loss": 4.5283,
"step": 141500
},
{
"epoch": 0.27,
"learning_rate": 0.0002733457469572856,
"loss": 4.5547,
"step": 142000
},
{
"epoch": 0.27,
"learning_rate": 0.00027325189395361406,
"loss": 4.523,
"step": 142500
},
{
"epoch": 0.27,
"learning_rate": 0.0002731580409499425,
"loss": 4.5202,
"step": 143000
},
{
"epoch": 0.27,
"learning_rate": 0.000273064187946271,
"loss": 4.5235,
"step": 143500
},
{
"epoch": 0.27,
"learning_rate": 0.00027297033494259946,
"loss": 4.5292,
"step": 144000
},
{
"epoch": 0.27,
"learning_rate": 0.0002728764819389279,
"loss": 4.5304,
"step": 144500
},
{
"epoch": 0.27,
"learning_rate": 0.0002727826289352564,
"loss": 4.5203,
"step": 145000
},
{
"epoch": 0.27,
"learning_rate": 0.00027268877593158487,
"loss": 4.5426,
"step": 145500
},
{
"epoch": 0.27,
"learning_rate": 0.0002725949229279134,
"loss": 4.5062,
"step": 146000
},
{
"epoch": 0.27,
"learning_rate": 0.0002725010699242418,
"loss": 4.5218,
"step": 146500
},
{
"epoch": 0.28,
"learning_rate": 0.00027240721692057033,
"loss": 4.5159,
"step": 147000
},
{
"epoch": 0.28,
"learning_rate": 0.0002723133639168988,
"loss": 4.5134,
"step": 147500
},
{
"epoch": 0.28,
"learning_rate": 0.0002722195109132273,
"loss": 4.5127,
"step": 148000
},
{
"epoch": 0.28,
"learning_rate": 0.00027212565790955573,
"loss": 4.5214,
"step": 148500
},
{
"epoch": 0.28,
"learning_rate": 0.0002720318049058842,
"loss": 4.5216,
"step": 149000
},
{
"epoch": 0.28,
"learning_rate": 0.0002719379519022127,
"loss": 4.5274,
"step": 149500
},
{
"epoch": 0.28,
"learning_rate": 0.00027184409889854114,
"loss": 4.5214,
"step": 150000
},
{
"epoch": 0.28,
"learning_rate": 0.0002717502458948696,
"loss": 4.5199,
"step": 150500
},
{
"epoch": 0.28,
"learning_rate": 0.0002716563928911981,
"loss": 4.498,
"step": 151000
},
{
"epoch": 0.28,
"learning_rate": 0.00027156253988752654,
"loss": 4.5243,
"step": 151500
},
{
"epoch": 0.29,
"learning_rate": 0.000271468686883855,
"loss": 4.5082,
"step": 152000
},
{
"epoch": 0.29,
"learning_rate": 0.00027137483388018344,
"loss": 4.4959,
"step": 152500
},
{
"epoch": 0.29,
"learning_rate": 0.00027128098087651195,
"loss": 4.4927,
"step": 153000
},
{
"epoch": 0.29,
"learning_rate": 0.0002711871278728404,
"loss": 4.4983,
"step": 153500
},
{
"epoch": 0.29,
"learning_rate": 0.00027109327486916885,
"loss": 4.5191,
"step": 154000
},
{
"epoch": 0.29,
"learning_rate": 0.00027099942186549735,
"loss": 4.4982,
"step": 154500
},
{
"epoch": 0.29,
"learning_rate": 0.0002709055688618258,
"loss": 4.5146,
"step": 155000
},
{
"epoch": 0.29,
"learning_rate": 0.0002708117158581543,
"loss": 4.5323,
"step": 155500
},
{
"epoch": 0.29,
"learning_rate": 0.00027071786285448276,
"loss": 4.4845,
"step": 156000
},
{
"epoch": 0.29,
"learning_rate": 0.00027062400985081126,
"loss": 4.5019,
"step": 156500
},
{
"epoch": 0.29,
"learning_rate": 0.0002705301568471397,
"loss": 4.5051,
"step": 157000
},
{
"epoch": 0.3,
"learning_rate": 0.0002704363038434682,
"loss": 4.4981,
"step": 157500
},
{
"epoch": 0.3,
"learning_rate": 0.00027034245083979667,
"loss": 4.5029,
"step": 158000
},
{
"epoch": 0.3,
"learning_rate": 0.0002702485978361251,
"loss": 4.4943,
"step": 158500
},
{
"epoch": 0.3,
"learning_rate": 0.0002701547448324536,
"loss": 4.5068,
"step": 159000
},
{
"epoch": 0.3,
"learning_rate": 0.0002700608918287821,
"loss": 4.5022,
"step": 159500
},
{
"epoch": 0.3,
"learning_rate": 0.0002699670388251105,
"loss": 4.5088,
"step": 160000
},
{
"epoch": 0.3,
"learning_rate": 0.00026987318582143903,
"loss": 4.4995,
"step": 160500
},
{
"epoch": 0.3,
"learning_rate": 0.0002697793328177675,
"loss": 4.482,
"step": 161000
},
{
"epoch": 0.3,
"learning_rate": 0.00026968547981409593,
"loss": 4.498,
"step": 161500
},
{
"epoch": 0.3,
"learning_rate": 0.00026959162681042443,
"loss": 4.4966,
"step": 162000
},
{
"epoch": 0.31,
"learning_rate": 0.0002694977738067529,
"loss": 4.4759,
"step": 162500
},
{
"epoch": 0.31,
"learning_rate": 0.00026940392080308133,
"loss": 4.4983,
"step": 163000
},
{
"epoch": 0.31,
"learning_rate": 0.0002693100677994098,
"loss": 4.501,
"step": 163500
},
{
"epoch": 0.31,
"learning_rate": 0.0002692162147957383,
"loss": 4.493,
"step": 164000
},
{
"epoch": 0.31,
"learning_rate": 0.00026912236179206674,
"loss": 4.5082,
"step": 164500
},
{
"epoch": 0.31,
"learning_rate": 0.00026902850878839524,
"loss": 4.4999,
"step": 165000
},
{
"epoch": 0.31,
"learning_rate": 0.0002689346557847237,
"loss": 4.499,
"step": 165500
},
{
"epoch": 0.31,
"learning_rate": 0.0002688408027810522,
"loss": 4.4951,
"step": 166000
},
{
"epoch": 0.31,
"learning_rate": 0.00026874694977738065,
"loss": 4.4889,
"step": 166500
},
{
"epoch": 0.31,
"learning_rate": 0.00026865309677370915,
"loss": 4.4975,
"step": 167000
},
{
"epoch": 0.31,
"learning_rate": 0.0002685592437700376,
"loss": 4.4951,
"step": 167500
},
{
"epoch": 0.32,
"learning_rate": 0.00026846539076636605,
"loss": 4.4833,
"step": 168000
},
{
"epoch": 0.32,
"learning_rate": 0.00026837153776269456,
"loss": 4.4934,
"step": 168500
},
{
"epoch": 0.32,
"learning_rate": 0.000268277684759023,
"loss": 4.4715,
"step": 169000
},
{
"epoch": 0.32,
"learning_rate": 0.00026818383175535146,
"loss": 4.4913,
"step": 169500
},
{
"epoch": 0.32,
"learning_rate": 0.00026808997875167996,
"loss": 4.4812,
"step": 170000
},
{
"epoch": 0.32,
"learning_rate": 0.0002679961257480084,
"loss": 4.4829,
"step": 170500
},
{
"epoch": 0.32,
"learning_rate": 0.00026790227274433687,
"loss": 4.4816,
"step": 171000
},
{
"epoch": 0.32,
"learning_rate": 0.00026780841974066537,
"loss": 4.4943,
"step": 171500
},
{
"epoch": 0.32,
"learning_rate": 0.0002677145667369938,
"loss": 4.4927,
"step": 172000
},
{
"epoch": 0.32,
"learning_rate": 0.00026762071373332227,
"loss": 4.4701,
"step": 172500
},
{
"epoch": 0.32,
"learning_rate": 0.0002675268607296508,
"loss": 4.4765,
"step": 173000
},
{
"epoch": 0.33,
"learning_rate": 0.0002674330077259792,
"loss": 4.4852,
"step": 173500
},
{
"epoch": 0.33,
"learning_rate": 0.0002673391547223077,
"loss": 4.4734,
"step": 174000
},
{
"epoch": 0.33,
"learning_rate": 0.0002672453017186362,
"loss": 4.487,
"step": 174500
},
{
"epoch": 0.33,
"learning_rate": 0.00026715144871496463,
"loss": 4.4866,
"step": 175000
},
{
"epoch": 0.33,
"learning_rate": 0.00026705759571129313,
"loss": 4.499,
"step": 175500
},
{
"epoch": 0.33,
"learning_rate": 0.0002669637427076216,
"loss": 4.4848,
"step": 176000
},
{
"epoch": 0.33,
"learning_rate": 0.0002668698897039501,
"loss": 4.4732,
"step": 176500
},
{
"epoch": 0.33,
"learning_rate": 0.00026677603670027854,
"loss": 4.4708,
"step": 177000
},
{
"epoch": 0.33,
"learning_rate": 0.000266682183696607,
"loss": 4.4751,
"step": 177500
},
{
"epoch": 0.33,
"learning_rate": 0.0002665883306929355,
"loss": 4.4744,
"step": 178000
},
{
"epoch": 0.34,
"learning_rate": 0.00026649447768926394,
"loss": 4.4781,
"step": 178500
},
{
"epoch": 0.34,
"learning_rate": 0.0002664006246855924,
"loss": 4.4573,
"step": 179000
},
{
"epoch": 0.34,
"learning_rate": 0.0002663067716819209,
"loss": 4.4974,
"step": 179500
},
{
"epoch": 0.34,
"learning_rate": 0.00026621291867824935,
"loss": 4.482,
"step": 180000
},
{
"epoch": 0.34,
"learning_rate": 0.0002661190656745778,
"loss": 4.4881,
"step": 180500
},
{
"epoch": 0.34,
"learning_rate": 0.0002660252126709063,
"loss": 4.4581,
"step": 181000
},
{
"epoch": 0.34,
"learning_rate": 0.00026593135966723476,
"loss": 4.4804,
"step": 181500
},
{
"epoch": 0.34,
"learning_rate": 0.0002658375066635632,
"loss": 4.4607,
"step": 182000
},
{
"epoch": 0.34,
"learning_rate": 0.0002657436536598917,
"loss": 4.4511,
"step": 182500
},
{
"epoch": 0.34,
"learning_rate": 0.00026564980065622016,
"loss": 4.4807,
"step": 183000
},
{
"epoch": 0.34,
"learning_rate": 0.00026555594765254866,
"loss": 4.455,
"step": 183500
},
{
"epoch": 0.35,
"learning_rate": 0.0002654620946488771,
"loss": 4.4794,
"step": 184000
},
{
"epoch": 0.35,
"learning_rate": 0.00026536824164520557,
"loss": 4.4458,
"step": 184500
},
{
"epoch": 0.35,
"learning_rate": 0.00026527438864153407,
"loss": 4.4586,
"step": 185000
},
{
"epoch": 0.35,
"learning_rate": 0.0002651805356378625,
"loss": 4.455,
"step": 185500
},
{
"epoch": 0.35,
"learning_rate": 0.000265086682634191,
"loss": 4.4608,
"step": 186000
},
{
"epoch": 0.35,
"learning_rate": 0.0002649928296305195,
"loss": 4.4719,
"step": 186500
},
{
"epoch": 0.35,
"learning_rate": 0.000264898976626848,
"loss": 4.4616,
"step": 187000
},
{
"epoch": 0.35,
"learning_rate": 0.00026480512362317643,
"loss": 4.4581,
"step": 187500
},
{
"epoch": 0.35,
"learning_rate": 0.0002647112706195049,
"loss": 4.4627,
"step": 188000
},
{
"epoch": 0.35,
"learning_rate": 0.00026461741761583333,
"loss": 4.4613,
"step": 188500
},
{
"epoch": 0.35,
"learning_rate": 0.00026452356461216184,
"loss": 4.4615,
"step": 189000
},
{
"epoch": 0.36,
"learning_rate": 0.0002644297116084903,
"loss": 4.4574,
"step": 189500
},
{
"epoch": 0.36,
"learning_rate": 0.00026433585860481874,
"loss": 4.4601,
"step": 190000
},
{
"epoch": 0.36,
"learning_rate": 0.00026424200560114724,
"loss": 4.4783,
"step": 190500
},
{
"epoch": 0.36,
"learning_rate": 0.0002641481525974757,
"loss": 4.4601,
"step": 191000
},
{
"epoch": 0.36,
"learning_rate": 0.00026405429959380414,
"loss": 4.4601,
"step": 191500
},
{
"epoch": 0.36,
"learning_rate": 0.00026396044659013265,
"loss": 4.4616,
"step": 192000
},
{
"epoch": 0.36,
"learning_rate": 0.0002638665935864611,
"loss": 4.4425,
"step": 192500
},
{
"epoch": 0.36,
"learning_rate": 0.0002637727405827896,
"loss": 4.454,
"step": 193000
},
{
"epoch": 0.36,
"learning_rate": 0.00026367888757911805,
"loss": 4.442,
"step": 193500
},
{
"epoch": 0.36,
"learning_rate": 0.00026358503457544656,
"loss": 4.4526,
"step": 194000
},
{
"epoch": 0.37,
"learning_rate": 0.000263491181571775,
"loss": 4.4622,
"step": 194500
},
{
"epoch": 0.37,
"learning_rate": 0.00026339732856810346,
"loss": 4.4352,
"step": 195000
},
{
"epoch": 0.37,
"learning_rate": 0.00026330347556443196,
"loss": 4.4377,
"step": 195500
},
{
"epoch": 0.37,
"learning_rate": 0.0002632096225607604,
"loss": 4.457,
"step": 196000
},
{
"epoch": 0.37,
"learning_rate": 0.0002631157695570889,
"loss": 4.4503,
"step": 196500
},
{
"epoch": 0.37,
"learning_rate": 0.00026302191655341737,
"loss": 4.4549,
"step": 197000
},
{
"epoch": 0.37,
"learning_rate": 0.0002629280635497458,
"loss": 4.4328,
"step": 197500
},
{
"epoch": 0.37,
"learning_rate": 0.0002628342105460743,
"loss": 4.4321,
"step": 198000
},
{
"epoch": 0.37,
"learning_rate": 0.00026274035754240277,
"loss": 4.4529,
"step": 198500
},
{
"epoch": 0.37,
"learning_rate": 0.0002626465045387312,
"loss": 4.4408,
"step": 199000
},
{
"epoch": 0.37,
"learning_rate": 0.0002625526515350597,
"loss": 4.4626,
"step": 199500
},
{
"epoch": 0.38,
"learning_rate": 0.0002624587985313882,
"loss": 4.4443,
"step": 200000
},
{
"epoch": 0.38,
"learning_rate": 0.0002623649455277166,
"loss": 4.4406,
"step": 200500
},
{
"epoch": 0.38,
"learning_rate": 0.0002622710925240451,
"loss": 4.4512,
"step": 201000
},
{
"epoch": 0.38,
"learning_rate": 0.0002621772395203736,
"loss": 4.452,
"step": 201500
},
{
"epoch": 0.38,
"learning_rate": 0.00026208338651670203,
"loss": 4.4217,
"step": 202000
},
{
"epoch": 0.38,
"learning_rate": 0.00026198953351303054,
"loss": 4.4309,
"step": 202500
},
{
"epoch": 0.38,
"learning_rate": 0.000261895680509359,
"loss": 4.4316,
"step": 203000
},
{
"epoch": 0.38,
"learning_rate": 0.0002618018275056875,
"loss": 4.4368,
"step": 203500
},
{
"epoch": 0.38,
"learning_rate": 0.00026170797450201594,
"loss": 4.444,
"step": 204000
},
{
"epoch": 0.38,
"learning_rate": 0.00026161412149834445,
"loss": 4.4473,
"step": 204500
},
{
"epoch": 0.38,
"learning_rate": 0.0002615202684946729,
"loss": 4.4358,
"step": 205000
},
{
"epoch": 0.39,
"learning_rate": 0.00026142641549100135,
"loss": 4.4415,
"step": 205500
},
{
"epoch": 0.39,
"learning_rate": 0.00026133256248732985,
"loss": 4.449,
"step": 206000
},
{
"epoch": 0.39,
"learning_rate": 0.0002612387094836583,
"loss": 4.4278,
"step": 206500
},
{
"epoch": 0.39,
"learning_rate": 0.00026114485647998675,
"loss": 4.4475,
"step": 207000
},
{
"epoch": 0.39,
"learning_rate": 0.00026105100347631526,
"loss": 4.4435,
"step": 207500
},
{
"epoch": 0.39,
"learning_rate": 0.0002609571504726437,
"loss": 4.4337,
"step": 208000
},
{
"epoch": 0.39,
"learning_rate": 0.00026086329746897216,
"loss": 4.4399,
"step": 208500
},
{
"epoch": 0.39,
"learning_rate": 0.00026076944446530066,
"loss": 4.4535,
"step": 209000
},
{
"epoch": 0.39,
"learning_rate": 0.0002606755914616291,
"loss": 4.4317,
"step": 209500
},
{
"epoch": 0.39,
"learning_rate": 0.00026058173845795756,
"loss": 4.4487,
"step": 210000
},
{
"epoch": 0.4,
"learning_rate": 0.00026048788545428607,
"loss": 4.4395,
"step": 210500
},
{
"epoch": 0.4,
"learning_rate": 0.0002603940324506145,
"loss": 4.4324,
"step": 211000
},
{
"epoch": 0.4,
"learning_rate": 0.00026030017944694297,
"loss": 4.4298,
"step": 211500
},
{
"epoch": 0.4,
"learning_rate": 0.00026020632644327147,
"loss": 4.4465,
"step": 212000
},
{
"epoch": 0.4,
"learning_rate": 0.0002601124734395999,
"loss": 4.4077,
"step": 212500
},
{
"epoch": 0.4,
"learning_rate": 0.0002600186204359284,
"loss": 4.4179,
"step": 213000
},
{
"epoch": 0.4,
"learning_rate": 0.0002599247674322569,
"loss": 4.4179,
"step": 213500
},
{
"epoch": 0.4,
"learning_rate": 0.0002598309144285854,
"loss": 4.4445,
"step": 214000
},
{
"epoch": 0.4,
"learning_rate": 0.00025973706142491383,
"loss": 4.4384,
"step": 214500
},
{
"epoch": 0.4,
"learning_rate": 0.0002596432084212423,
"loss": 4.414,
"step": 215000
},
{
"epoch": 0.4,
"learning_rate": 0.0002595493554175708,
"loss": 4.4242,
"step": 215500
},
{
"epoch": 0.41,
"learning_rate": 0.00025945550241389924,
"loss": 4.4212,
"step": 216000
},
{
"epoch": 0.41,
"learning_rate": 0.0002593616494102277,
"loss": 4.4517,
"step": 216500
},
{
"epoch": 0.41,
"learning_rate": 0.0002592677964065562,
"loss": 4.4202,
"step": 217000
},
{
"epoch": 0.41,
"learning_rate": 0.00025917394340288464,
"loss": 4.419,
"step": 217500
},
{
"epoch": 0.41,
"learning_rate": 0.0002590800903992131,
"loss": 4.4212,
"step": 218000
},
{
"epoch": 0.41,
"learning_rate": 0.0002589862373955416,
"loss": 4.4205,
"step": 218500
},
{
"epoch": 0.41,
"learning_rate": 0.00025889238439187005,
"loss": 4.4269,
"step": 219000
},
{
"epoch": 0.41,
"learning_rate": 0.0002587985313881985,
"loss": 4.4234,
"step": 219500
},
{
"epoch": 0.41,
"learning_rate": 0.000258704678384527,
"loss": 4.4394,
"step": 220000
},
{
"epoch": 0.41,
"learning_rate": 0.00025861082538085545,
"loss": 4.448,
"step": 220500
},
{
"epoch": 0.41,
"learning_rate": 0.0002585169723771839,
"loss": 4.4311,
"step": 221000
},
{
"epoch": 0.42,
"learning_rate": 0.0002584231193735124,
"loss": 4.4127,
"step": 221500
},
{
"epoch": 0.42,
"learning_rate": 0.00025832926636984086,
"loss": 4.4226,
"step": 222000
},
{
"epoch": 0.42,
"learning_rate": 0.00025823541336616936,
"loss": 4.4315,
"step": 222500
},
{
"epoch": 0.42,
"learning_rate": 0.0002581415603624978,
"loss": 4.4174,
"step": 223000
},
{
"epoch": 0.42,
"learning_rate": 0.0002580477073588263,
"loss": 4.408,
"step": 223500
},
{
"epoch": 0.42,
"learning_rate": 0.00025795385435515477,
"loss": 4.4174,
"step": 224000
},
{
"epoch": 0.42,
"learning_rate": 0.0002578600013514832,
"loss": 4.4194,
"step": 224500
},
{
"epoch": 0.42,
"learning_rate": 0.0002577661483478117,
"loss": 4.4232,
"step": 225000
},
{
"epoch": 0.42,
"learning_rate": 0.00025767229534414017,
"loss": 4.4008,
"step": 225500
},
{
"epoch": 0.42,
"learning_rate": 0.0002575784423404686,
"loss": 4.4126,
"step": 226000
},
{
"epoch": 0.43,
"learning_rate": 0.0002574845893367971,
"loss": 4.419,
"step": 226500
},
{
"epoch": 0.43,
"learning_rate": 0.0002573907363331256,
"loss": 4.4076,
"step": 227000
},
{
"epoch": 0.43,
"learning_rate": 0.000257296883329454,
"loss": 4.4135,
"step": 227500
},
{
"epoch": 0.43,
"learning_rate": 0.00025720303032578253,
"loss": 4.4165,
"step": 228000
},
{
"epoch": 0.43,
"learning_rate": 0.000257109177322111,
"loss": 4.3921,
"step": 228500
},
{
"epoch": 0.43,
"learning_rate": 0.00025701532431843943,
"loss": 4.4044,
"step": 229000
},
{
"epoch": 0.43,
"learning_rate": 0.00025692147131476794,
"loss": 4.4253,
"step": 229500
},
{
"epoch": 0.43,
"learning_rate": 0.0002568276183110964,
"loss": 4.3979,
"step": 230000
},
{
"epoch": 0.43,
"learning_rate": 0.0002567337653074249,
"loss": 4.4103,
"step": 230500
},
{
"epoch": 0.43,
"learning_rate": 0.00025663991230375334,
"loss": 4.4104,
"step": 231000
},
{
"epoch": 0.43,
"learning_rate": 0.0002565460593000818,
"loss": 4.4139,
"step": 231500
},
{
"epoch": 0.44,
"learning_rate": 0.0002564522062964103,
"loss": 4.4317,
"step": 232000
},
{
"epoch": 0.44,
"learning_rate": 0.00025635835329273875,
"loss": 4.4092,
"step": 232500
},
{
"epoch": 0.44,
"learning_rate": 0.00025626450028906725,
"loss": 4.4138,
"step": 233000
},
{
"epoch": 0.44,
"learning_rate": 0.0002561706472853957,
"loss": 4.4008,
"step": 233500
},
{
"epoch": 0.44,
"learning_rate": 0.0002560767942817242,
"loss": 4.4074,
"step": 234000
},
{
"epoch": 0.44,
"learning_rate": 0.00025598294127805266,
"loss": 4.4017,
"step": 234500
},
{
"epoch": 0.44,
"learning_rate": 0.0002558890882743811,
"loss": 4.3993,
"step": 235000
},
{
"epoch": 0.44,
"learning_rate": 0.0002557952352707096,
"loss": 4.3899,
"step": 235500
},
{
"epoch": 0.44,
"learning_rate": 0.00025570138226703806,
"loss": 4.411,
"step": 236000
},
{
"epoch": 0.44,
"learning_rate": 0.0002556075292633665,
"loss": 4.4277,
"step": 236500
},
{
"epoch": 0.44,
"learning_rate": 0.00025551367625969496,
"loss": 4.3965,
"step": 237000
},
{
"epoch": 0.45,
"learning_rate": 0.00025541982325602347,
"loss": 4.4047,
"step": 237500
},
{
"epoch": 0.45,
"learning_rate": 0.0002553259702523519,
"loss": 4.4081,
"step": 238000
},
{
"epoch": 0.45,
"learning_rate": 0.00025523211724868037,
"loss": 4.4082,
"step": 238500
},
{
"epoch": 0.45,
"learning_rate": 0.00025513826424500887,
"loss": 4.397,
"step": 239000
},
{
"epoch": 0.45,
"learning_rate": 0.0002550444112413373,
"loss": 4.3929,
"step": 239500
},
{
"epoch": 0.45,
"learning_rate": 0.0002549505582376658,
"loss": 4.4188,
"step": 240000
},
{
"epoch": 0.45,
"learning_rate": 0.0002548567052339943,
"loss": 4.3988,
"step": 240500
},
{
"epoch": 0.45,
"learning_rate": 0.0002547628522303228,
"loss": 4.4008,
"step": 241000
},
{
"epoch": 0.45,
"learning_rate": 0.00025466899922665123,
"loss": 4.4084,
"step": 241500
},
{
"epoch": 0.45,
"learning_rate": 0.0002545751462229797,
"loss": 4.4074,
"step": 242000
},
{
"epoch": 0.46,
"learning_rate": 0.0002544812932193082,
"loss": 4.4194,
"step": 242500
},
{
"epoch": 0.46,
"learning_rate": 0.00025438744021563664,
"loss": 4.4067,
"step": 243000
},
{
"epoch": 0.46,
"learning_rate": 0.00025429358721196514,
"loss": 4.4004,
"step": 243500
},
{
"epoch": 0.46,
"learning_rate": 0.0002541997342082936,
"loss": 4.4099,
"step": 244000
},
{
"epoch": 0.46,
"learning_rate": 0.00025410588120462204,
"loss": 4.4007,
"step": 244500
},
{
"epoch": 0.46,
"learning_rate": 0.00025401202820095055,
"loss": 4.3855,
"step": 245000
},
{
"epoch": 0.46,
"learning_rate": 0.000253918175197279,
"loss": 4.3895,
"step": 245500
},
{
"epoch": 0.46,
"learning_rate": 0.00025382432219360745,
"loss": 4.4021,
"step": 246000
},
{
"epoch": 0.46,
"learning_rate": 0.00025373046918993595,
"loss": 4.4017,
"step": 246500
},
{
"epoch": 0.46,
"learning_rate": 0.0002536366161862644,
"loss": 4.3945,
"step": 247000
},
{
"epoch": 0.46,
"learning_rate": 0.00025354276318259285,
"loss": 4.4167,
"step": 247500
},
{
"epoch": 0.47,
"learning_rate": 0.00025344891017892136,
"loss": 4.3858,
"step": 248000
},
{
"epoch": 0.47,
"learning_rate": 0.0002533550571752498,
"loss": 4.385,
"step": 248500
},
{
"epoch": 0.47,
"learning_rate": 0.00025326120417157826,
"loss": 4.3828,
"step": 249000
},
{
"epoch": 0.47,
"learning_rate": 0.00025316735116790676,
"loss": 4.3942,
"step": 249500
},
{
"epoch": 0.47,
"learning_rate": 0.0002530734981642352,
"loss": 4.4001,
"step": 250000
},
{
"epoch": 0.47,
"learning_rate": 0.0002529796451605637,
"loss": 4.3929,
"step": 250500
},
{
"epoch": 0.47,
"learning_rate": 0.00025288579215689217,
"loss": 4.4087,
"step": 251000
},
{
"epoch": 0.47,
"learning_rate": 0.00025279193915322067,
"loss": 4.389,
"step": 251500
},
{
"epoch": 0.47,
"learning_rate": 0.0002526980861495491,
"loss": 4.3932,
"step": 252000
},
{
"epoch": 0.47,
"learning_rate": 0.00025260423314587757,
"loss": 4.403,
"step": 252500
},
{
"epoch": 0.47,
"learning_rate": 0.0002525103801422061,
"loss": 4.3735,
"step": 253000
},
{
"epoch": 0.48,
"learning_rate": 0.00025241652713853453,
"loss": 4.3942,
"step": 253500
},
{
"epoch": 0.48,
"learning_rate": 0.000252322674134863,
"loss": 4.3777,
"step": 254000
},
{
"epoch": 0.48,
"learning_rate": 0.0002522288211311915,
"loss": 4.3981,
"step": 254500
},
{
"epoch": 0.48,
"learning_rate": 0.00025213496812751993,
"loss": 4.3964,
"step": 255000
},
{
"epoch": 0.48,
"learning_rate": 0.0002520411151238484,
"loss": 4.3924,
"step": 255500
},
{
"epoch": 0.48,
"learning_rate": 0.0002519472621201769,
"loss": 4.4021,
"step": 256000
},
{
"epoch": 0.48,
"learning_rate": 0.00025185340911650534,
"loss": 4.3905,
"step": 256500
},
{
"epoch": 0.48,
"learning_rate": 0.0002517595561128338,
"loss": 4.3912,
"step": 257000
},
{
"epoch": 0.48,
"learning_rate": 0.0002516657031091623,
"loss": 4.3911,
"step": 257500
},
{
"epoch": 0.48,
"learning_rate": 0.00025157185010549074,
"loss": 4.384,
"step": 258000
},
{
"epoch": 0.49,
"learning_rate": 0.0002514779971018192,
"loss": 4.3818,
"step": 258500
},
{
"epoch": 0.49,
"learning_rate": 0.0002513841440981477,
"loss": 4.3821,
"step": 259000
},
{
"epoch": 0.49,
"learning_rate": 0.00025129029109447615,
"loss": 4.3911,
"step": 259500
},
{
"epoch": 0.49,
"learning_rate": 0.00025119643809080465,
"loss": 4.3689,
"step": 260000
},
{
"epoch": 0.49,
"learning_rate": 0.0002511025850871331,
"loss": 4.3823,
"step": 260500
},
{
"epoch": 0.49,
"learning_rate": 0.0002510087320834616,
"loss": 4.383,
"step": 261000
},
{
"epoch": 0.49,
"learning_rate": 0.00025091487907979006,
"loss": 4.3888,
"step": 261500
},
{
"epoch": 0.49,
"learning_rate": 0.0002508210260761185,
"loss": 4.3815,
"step": 262000
},
{
"epoch": 0.49,
"learning_rate": 0.000250727173072447,
"loss": 4.3687,
"step": 262500
},
{
"epoch": 0.49,
"learning_rate": 0.00025063332006877546,
"loss": 4.3823,
"step": 263000
},
{
"epoch": 0.49,
"learning_rate": 0.0002505394670651039,
"loss": 4.3825,
"step": 263500
},
{
"epoch": 0.5,
"learning_rate": 0.0002504456140614324,
"loss": 4.374,
"step": 264000
},
{
"epoch": 0.5,
"learning_rate": 0.00025035176105776087,
"loss": 4.3859,
"step": 264500
},
{
"epoch": 0.5,
"learning_rate": 0.0002502579080540893,
"loss": 4.4123,
"step": 265000
},
{
"epoch": 0.5,
"learning_rate": 0.0002501640550504178,
"loss": 4.3858,
"step": 265500
},
{
"epoch": 0.5,
"learning_rate": 0.0002500702020467463,
"loss": 4.3665,
"step": 266000
},
{
"epoch": 0.5,
"learning_rate": 0.0002499763490430747,
"loss": 4.3723,
"step": 266500
},
{
"epoch": 0.5,
"learning_rate": 0.00024988249603940323,
"loss": 4.3791,
"step": 267000
},
{
"epoch": 0.5,
"learning_rate": 0.0002497886430357317,
"loss": 4.3884,
"step": 267500
},
{
"epoch": 0.5,
"learning_rate": 0.00024969479003206013,
"loss": 4.3901,
"step": 268000
},
{
"epoch": 0.5,
"learning_rate": 0.00024960093702838863,
"loss": 4.381,
"step": 268500
},
{
"epoch": 0.5,
"learning_rate": 0.0002495070840247171,
"loss": 4.3554,
"step": 269000
},
{
"epoch": 0.51,
"learning_rate": 0.0002494132310210456,
"loss": 4.3889,
"step": 269500
},
{
"epoch": 0.51,
"learning_rate": 0.00024931937801737404,
"loss": 4.3601,
"step": 270000
},
{
"epoch": 0.51,
"learning_rate": 0.00024922552501370254,
"loss": 4.38,
"step": 270500
},
{
"epoch": 0.51,
"learning_rate": 0.000249131672010031,
"loss": 4.3733,
"step": 271000
},
{
"epoch": 0.51,
"learning_rate": 0.0002490378190063595,
"loss": 4.3805,
"step": 271500
},
{
"epoch": 0.51,
"learning_rate": 0.00024894396600268795,
"loss": 4.3747,
"step": 272000
},
{
"epoch": 0.51,
"learning_rate": 0.0002488501129990164,
"loss": 4.3724,
"step": 272500
},
{
"epoch": 0.51,
"learning_rate": 0.00024875625999534485,
"loss": 4.3476,
"step": 273000
},
{
"epoch": 0.51,
"learning_rate": 0.00024866240699167335,
"loss": 4.364,
"step": 273500
},
{
"epoch": 0.51,
"learning_rate": 0.0002485685539880018,
"loss": 4.385,
"step": 274000
},
{
"epoch": 0.52,
"learning_rate": 0.00024847470098433025,
"loss": 4.3838,
"step": 274500
},
{
"epoch": 0.52,
"learning_rate": 0.00024838084798065876,
"loss": 4.3826,
"step": 275000
},
{
"epoch": 0.52,
"learning_rate": 0.0002482869949769872,
"loss": 4.377,
"step": 275500
},
{
"epoch": 0.52,
"learning_rate": 0.00024819314197331566,
"loss": 4.3527,
"step": 276000
},
{
"epoch": 0.52,
"learning_rate": 0.00024809928896964416,
"loss": 4.3549,
"step": 276500
},
{
"epoch": 0.52,
"learning_rate": 0.0002480054359659726,
"loss": 4.3647,
"step": 277000
},
{
"epoch": 0.52,
"learning_rate": 0.0002479115829623011,
"loss": 4.363,
"step": 277500
},
{
"epoch": 0.52,
"learning_rate": 0.00024781772995862957,
"loss": 4.3738,
"step": 278000
},
{
"epoch": 0.52,
"learning_rate": 0.000247723876954958,
"loss": 4.3596,
"step": 278500
},
{
"epoch": 0.52,
"learning_rate": 0.0002476300239512865,
"loss": 4.3564,
"step": 279000
},
{
"epoch": 0.52,
"learning_rate": 0.000247536170947615,
"loss": 4.3638,
"step": 279500
},
{
"epoch": 0.53,
"learning_rate": 0.0002474423179439435,
"loss": 4.3797,
"step": 280000
},
{
"epoch": 0.53,
"learning_rate": 0.00024734846494027193,
"loss": 4.3552,
"step": 280500
},
{
"epoch": 0.53,
"learning_rate": 0.00024725461193660043,
"loss": 4.3644,
"step": 281000
},
{
"epoch": 0.53,
"learning_rate": 0.0002471607589329289,
"loss": 4.3884,
"step": 281500
},
{
"epoch": 0.53,
"learning_rate": 0.00024706690592925733,
"loss": 4.3544,
"step": 282000
},
{
"epoch": 0.53,
"learning_rate": 0.00024697305292558584,
"loss": 4.3692,
"step": 282500
},
{
"epoch": 0.53,
"learning_rate": 0.0002468791999219143,
"loss": 4.3538,
"step": 283000
},
{
"epoch": 0.53,
"learning_rate": 0.00024678534691824274,
"loss": 4.3398,
"step": 283500
},
{
"epoch": 0.53,
"learning_rate": 0.00024669149391457124,
"loss": 4.3694,
"step": 284000
},
{
"epoch": 0.53,
"learning_rate": 0.0002465976409108997,
"loss": 4.3593,
"step": 284500
},
{
"epoch": 0.53,
"learning_rate": 0.00024650378790722814,
"loss": 4.366,
"step": 285000
},
{
"epoch": 0.54,
"learning_rate": 0.0002464099349035566,
"loss": 4.3603,
"step": 285500
},
{
"epoch": 0.54,
"learning_rate": 0.0002463160818998851,
"loss": 4.3453,
"step": 286000
},
{
"epoch": 0.54,
"learning_rate": 0.00024622222889621355,
"loss": 4.3559,
"step": 286500
},
{
"epoch": 0.54,
"learning_rate": 0.00024612837589254205,
"loss": 4.3786,
"step": 287000
},
{
"epoch": 0.54,
"learning_rate": 0.0002460345228888705,
"loss": 4.3518,
"step": 287500
},
{
"epoch": 0.54,
"learning_rate": 0.000245940669885199,
"loss": 4.359,
"step": 288000
},
{
"epoch": 0.54,
"learning_rate": 0.00024584681688152746,
"loss": 4.3653,
"step": 288500
},
{
"epoch": 0.54,
"learning_rate": 0.0002457529638778559,
"loss": 4.3666,
"step": 289000
},
{
"epoch": 0.54,
"learning_rate": 0.0002456591108741844,
"loss": 4.3611,
"step": 289500
},
{
"epoch": 0.54,
"learning_rate": 0.00024556525787051286,
"loss": 4.3547,
"step": 290000
},
{
"epoch": 0.55,
"learning_rate": 0.00024547140486684137,
"loss": 4.3447,
"step": 290500
},
{
"epoch": 0.55,
"learning_rate": 0.0002453775518631698,
"loss": 4.3746,
"step": 291000
},
{
"epoch": 0.55,
"learning_rate": 0.00024528369885949827,
"loss": 4.3582,
"step": 291500
},
{
"epoch": 0.55,
"learning_rate": 0.0002451898458558268,
"loss": 4.3502,
"step": 292000
},
{
"epoch": 0.55,
"learning_rate": 0.0002450959928521552,
"loss": 4.3524,
"step": 292500
},
{
"epoch": 0.55,
"learning_rate": 0.0002450021398484837,
"loss": 4.3419,
"step": 293000
},
{
"epoch": 0.55,
"learning_rate": 0.0002449082868448122,
"loss": 4.364,
"step": 293500
},
{
"epoch": 0.55,
"learning_rate": 0.00024481443384114063,
"loss": 4.3592,
"step": 294000
},
{
"epoch": 0.55,
"learning_rate": 0.0002447205808374691,
"loss": 4.3825,
"step": 294500
},
{
"epoch": 0.55,
"learning_rate": 0.0002446267278337976,
"loss": 4.3526,
"step": 295000
},
{
"epoch": 0.55,
"learning_rate": 0.00024453287483012603,
"loss": 4.3585,
"step": 295500
},
{
"epoch": 0.56,
"learning_rate": 0.0002444390218264545,
"loss": 4.3622,
"step": 296000
},
{
"epoch": 0.56,
"learning_rate": 0.000244345168822783,
"loss": 4.3534,
"step": 296500
},
{
"epoch": 0.56,
"learning_rate": 0.00024425131581911144,
"loss": 4.353,
"step": 297000
},
{
"epoch": 0.56,
"learning_rate": 0.00024415746281543994,
"loss": 4.3559,
"step": 297500
},
{
"epoch": 0.56,
"learning_rate": 0.00024406360981176837,
"loss": 4.3668,
"step": 298000
},
{
"epoch": 0.56,
"learning_rate": 0.00024396975680809687,
"loss": 4.346,
"step": 298500
},
{
"epoch": 0.56,
"learning_rate": 0.00024387590380442532,
"loss": 4.367,
"step": 299000
},
{
"epoch": 0.56,
"learning_rate": 0.0002437820508007538,
"loss": 4.3727,
"step": 299500
},
{
"epoch": 0.56,
"learning_rate": 0.00024368819779708228,
"loss": 4.3396,
"step": 300000
},
{
"epoch": 0.56,
"learning_rate": 0.00024359434479341075,
"loss": 4.3482,
"step": 300500
},
{
"epoch": 0.56,
"learning_rate": 0.0002435004917897392,
"loss": 4.3492,
"step": 301000
},
{
"epoch": 0.57,
"learning_rate": 0.0002434066387860677,
"loss": 4.3645,
"step": 301500
},
{
"epoch": 0.57,
"learning_rate": 0.00024331278578239616,
"loss": 4.3599,
"step": 302000
},
{
"epoch": 0.57,
"learning_rate": 0.0002432189327787246,
"loss": 4.3476,
"step": 302500
},
{
"epoch": 0.57,
"learning_rate": 0.00024312507977505311,
"loss": 4.3586,
"step": 303000
},
{
"epoch": 0.57,
"learning_rate": 0.00024303122677138156,
"loss": 4.3416,
"step": 303500
},
{
"epoch": 0.57,
"learning_rate": 0.00024293737376771004,
"loss": 4.3826,
"step": 304000
},
{
"epoch": 0.57,
"learning_rate": 0.00024284352076403852,
"loss": 4.3317,
"step": 304500
},
{
"epoch": 0.57,
"learning_rate": 0.000242749667760367,
"loss": 4.3539,
"step": 305000
},
{
"epoch": 0.57,
"learning_rate": 0.00024265581475669545,
"loss": 4.344,
"step": 305500
},
{
"epoch": 0.57,
"learning_rate": 0.00024256196175302392,
"loss": 4.3666,
"step": 306000
},
{
"epoch": 0.58,
"learning_rate": 0.0002424681087493524,
"loss": 4.3372,
"step": 306500
},
{
"epoch": 0.58,
"learning_rate": 0.00024237425574568085,
"loss": 4.3413,
"step": 307000
},
{
"epoch": 0.58,
"learning_rate": 0.00024228040274200936,
"loss": 4.346,
"step": 307500
},
{
"epoch": 0.58,
"learning_rate": 0.0002421865497383378,
"loss": 4.3614,
"step": 308000
},
{
"epoch": 0.58,
"learning_rate": 0.00024209269673466626,
"loss": 4.3508,
"step": 308500
},
{
"epoch": 0.58,
"learning_rate": 0.00024199884373099473,
"loss": 4.3532,
"step": 309000
},
{
"epoch": 0.58,
"learning_rate": 0.0002419049907273232,
"loss": 4.3417,
"step": 309500
},
{
"epoch": 0.58,
"learning_rate": 0.0002418111377236517,
"loss": 4.3452,
"step": 310000
},
{
"epoch": 0.58,
"learning_rate": 0.00024171728471998014,
"loss": 4.3506,
"step": 310500
},
{
"epoch": 0.58,
"learning_rate": 0.00024162343171630864,
"loss": 4.3387,
"step": 311000
},
{
"epoch": 0.58,
"learning_rate": 0.0002415295787126371,
"loss": 4.35,
"step": 311500
},
{
"epoch": 0.59,
"learning_rate": 0.00024143572570896555,
"loss": 4.3511,
"step": 312000
},
{
"epoch": 0.59,
"learning_rate": 0.00024134187270529405,
"loss": 4.3422,
"step": 312500
},
{
"epoch": 0.59,
"learning_rate": 0.0002412480197016225,
"loss": 4.3391,
"step": 313000
},
{
"epoch": 0.59,
"learning_rate": 0.00024115416669795098,
"loss": 4.3499,
"step": 313500
},
{
"epoch": 0.59,
"learning_rate": 0.00024106031369427945,
"loss": 4.3613,
"step": 314000
},
{
"epoch": 0.59,
"learning_rate": 0.00024096646069060793,
"loss": 4.3589,
"step": 314500
},
{
"epoch": 0.59,
"learning_rate": 0.00024087260768693638,
"loss": 4.3397,
"step": 315000
},
{
"epoch": 0.59,
"learning_rate": 0.0002407787546832649,
"loss": 4.3506,
"step": 315500
},
{
"epoch": 0.59,
"learning_rate": 0.00024068490167959334,
"loss": 4.3429,
"step": 316000
},
{
"epoch": 0.59,
"learning_rate": 0.0002405910486759218,
"loss": 4.3441,
"step": 316500
},
{
"epoch": 0.6,
"learning_rate": 0.0002404971956722503,
"loss": 4.3449,
"step": 317000
},
{
"epoch": 0.6,
"learning_rate": 0.00024040334266857874,
"loss": 4.3546,
"step": 317500
},
{
"epoch": 0.6,
"learning_rate": 0.00024030948966490722,
"loss": 4.3526,
"step": 318000
},
{
"epoch": 0.6,
"learning_rate": 0.0002402156366612357,
"loss": 4.3328,
"step": 318500
},
{
"epoch": 0.6,
"learning_rate": 0.00024012178365756415,
"loss": 4.3409,
"step": 319000
},
{
"epoch": 0.6,
"learning_rate": 0.00024002793065389262,
"loss": 4.3399,
"step": 319500
},
{
"epoch": 0.6,
"learning_rate": 0.0002399340776502211,
"loss": 4.3447,
"step": 320000
},
{
"epoch": 0.6,
"learning_rate": 0.00023984022464654958,
"loss": 4.3372,
"step": 320500
},
{
"epoch": 0.6,
"learning_rate": 0.00023974637164287803,
"loss": 4.3523,
"step": 321000
},
{
"epoch": 0.6,
"learning_rate": 0.00023965251863920648,
"loss": 4.3183,
"step": 321500
},
{
"epoch": 0.6,
"learning_rate": 0.00023955866563553498,
"loss": 4.3426,
"step": 322000
},
{
"epoch": 0.61,
"learning_rate": 0.00023946481263186344,
"loss": 4.3426,
"step": 322500
},
{
"epoch": 0.61,
"learning_rate": 0.0002393709596281919,
"loss": 4.3506,
"step": 323000
},
{
"epoch": 0.61,
"learning_rate": 0.0002392771066245204,
"loss": 4.3224,
"step": 323500
},
{
"epoch": 0.61,
"learning_rate": 0.00023918325362084887,
"loss": 4.3441,
"step": 324000
},
{
"epoch": 0.61,
"learning_rate": 0.00023908940061717732,
"loss": 4.3336,
"step": 324500
},
{
"epoch": 0.61,
"learning_rate": 0.00023899554761350582,
"loss": 4.3337,
"step": 325000
},
{
"epoch": 0.61,
"learning_rate": 0.00023890169460983427,
"loss": 4.3369,
"step": 325500
},
{
"epoch": 0.61,
"learning_rate": 0.00023880784160616272,
"loss": 4.3413,
"step": 326000
},
{
"epoch": 0.61,
"learning_rate": 0.00023871398860249123,
"loss": 4.328,
"step": 326500
},
{
"epoch": 0.61,
"learning_rate": 0.00023862013559881968,
"loss": 4.3425,
"step": 327000
},
{
"epoch": 0.61,
"learning_rate": 0.00023852628259514816,
"loss": 4.3429,
"step": 327500
},
{
"epoch": 0.62,
"learning_rate": 0.00023843242959147663,
"loss": 4.3412,
"step": 328000
},
{
"epoch": 0.62,
"learning_rate": 0.0002383385765878051,
"loss": 4.3334,
"step": 328500
},
{
"epoch": 0.62,
"learning_rate": 0.00023824472358413356,
"loss": 4.3338,
"step": 329000
},
{
"epoch": 0.62,
"learning_rate": 0.00023815087058046204,
"loss": 4.3422,
"step": 329500
},
{
"epoch": 0.62,
"learning_rate": 0.00023805701757679052,
"loss": 4.3341,
"step": 330000
},
{
"epoch": 0.62,
"learning_rate": 0.00023796316457311897,
"loss": 4.339,
"step": 330500
},
{
"epoch": 0.62,
"learning_rate": 0.00023786931156944747,
"loss": 4.3386,
"step": 331000
},
{
"epoch": 0.62,
"learning_rate": 0.00023777545856577592,
"loss": 4.333,
"step": 331500
},
{
"epoch": 0.62,
"learning_rate": 0.00023768160556210437,
"loss": 4.3387,
"step": 332000
},
{
"epoch": 0.62,
"learning_rate": 0.00023758775255843288,
"loss": 4.3313,
"step": 332500
},
{
"epoch": 0.63,
"learning_rate": 0.00023749389955476133,
"loss": 4.3317,
"step": 333000
},
{
"epoch": 0.63,
"learning_rate": 0.0002374000465510898,
"loss": 4.3209,
"step": 333500
},
{
"epoch": 0.63,
"learning_rate": 0.00023730619354741825,
"loss": 4.331,
"step": 334000
},
{
"epoch": 0.63,
"learning_rate": 0.00023721234054374676,
"loss": 4.3382,
"step": 334500
},
{
"epoch": 0.63,
"learning_rate": 0.0002371184875400752,
"loss": 4.3364,
"step": 335000
},
{
"epoch": 0.63,
"learning_rate": 0.00023702463453640366,
"loss": 4.3246,
"step": 335500
},
{
"epoch": 0.63,
"learning_rate": 0.00023693078153273216,
"loss": 4.3123,
"step": 336000
},
{
"epoch": 0.63,
"learning_rate": 0.0002368369285290606,
"loss": 4.3352,
"step": 336500
},
{
"epoch": 0.63,
"learning_rate": 0.0002367430755253891,
"loss": 4.3253,
"step": 337000
},
{
"epoch": 0.63,
"learning_rate": 0.00023664922252171757,
"loss": 4.3184,
"step": 337500
},
{
"epoch": 0.63,
"learning_rate": 0.00023655536951804605,
"loss": 4.3325,
"step": 338000
},
{
"epoch": 0.64,
"learning_rate": 0.0002364615165143745,
"loss": 4.3392,
"step": 338500
},
{
"epoch": 0.64,
"learning_rate": 0.000236367663510703,
"loss": 4.3291,
"step": 339000
},
{
"epoch": 0.64,
"learning_rate": 0.00023627381050703145,
"loss": 4.3066,
"step": 339500
},
{
"epoch": 0.64,
"learning_rate": 0.0002361799575033599,
"loss": 4.3231,
"step": 340000
},
{
"epoch": 0.64,
"learning_rate": 0.0002360861044996884,
"loss": 4.3297,
"step": 340500
},
{
"epoch": 0.64,
"learning_rate": 0.00023599225149601686,
"loss": 4.3105,
"step": 341000
},
{
"epoch": 0.64,
"learning_rate": 0.00023589839849234533,
"loss": 4.3169,
"step": 341500
},
{
"epoch": 0.64,
"learning_rate": 0.0002358045454886738,
"loss": 4.3291,
"step": 342000
},
{
"epoch": 0.64,
"learning_rate": 0.00023571069248500226,
"loss": 4.3137,
"step": 342500
},
{
"epoch": 0.64,
"learning_rate": 0.00023561683948133074,
"loss": 4.3295,
"step": 343000
},
{
"epoch": 0.64,
"learning_rate": 0.00023552298647765922,
"loss": 4.3266,
"step": 343500
},
{
"epoch": 0.65,
"learning_rate": 0.0002354291334739877,
"loss": 4.3203,
"step": 344000
},
{
"epoch": 0.65,
"learning_rate": 0.00023533528047031614,
"loss": 4.3302,
"step": 344500
},
{
"epoch": 0.65,
"learning_rate": 0.0002352414274666446,
"loss": 4.3419,
"step": 345000
},
{
"epoch": 0.65,
"learning_rate": 0.0002351475744629731,
"loss": 4.3287,
"step": 345500
},
{
"epoch": 0.65,
"learning_rate": 0.00023505372145930155,
"loss": 4.3181,
"step": 346000
},
{
"epoch": 0.65,
"learning_rate": 0.00023495986845563003,
"loss": 4.3305,
"step": 346500
},
{
"epoch": 0.65,
"learning_rate": 0.0002348660154519585,
"loss": 4.3143,
"step": 347000
},
{
"epoch": 0.65,
"learning_rate": 0.00023477216244828698,
"loss": 4.3286,
"step": 347500
},
{
"epoch": 0.65,
"learning_rate": 0.00023467830944461543,
"loss": 4.3343,
"step": 348000
},
{
"epoch": 0.65,
"learning_rate": 0.00023458445644094394,
"loss": 4.3125,
"step": 348500
},
{
"epoch": 0.66,
"learning_rate": 0.00023449060343727239,
"loss": 4.3239,
"step": 349000
},
{
"epoch": 0.66,
"learning_rate": 0.00023439675043360084,
"loss": 4.3225,
"step": 349500
},
{
"epoch": 0.66,
"learning_rate": 0.00023430289742992934,
"loss": 4.3198,
"step": 350000
},
{
"epoch": 0.66,
"learning_rate": 0.0002342090444262578,
"loss": 4.3288,
"step": 350500
},
{
"epoch": 0.66,
"learning_rate": 0.00023411519142258627,
"loss": 4.327,
"step": 351000
},
{
"epoch": 0.66,
"learning_rate": 0.00023402133841891475,
"loss": 4.322,
"step": 351500
},
{
"epoch": 0.66,
"learning_rate": 0.00023392748541524322,
"loss": 4.3258,
"step": 352000
},
{
"epoch": 0.66,
"learning_rate": 0.00023383363241157167,
"loss": 4.33,
"step": 352500
},
{
"epoch": 0.66,
"learning_rate": 0.00023373977940790015,
"loss": 4.3214,
"step": 353000
},
{
"epoch": 0.66,
"learning_rate": 0.00023364592640422863,
"loss": 4.3336,
"step": 353500
},
{
"epoch": 0.66,
"learning_rate": 0.00023355207340055708,
"loss": 4.3066,
"step": 354000
},
{
"epoch": 0.67,
"learning_rate": 0.00023345822039688558,
"loss": 4.3172,
"step": 354500
},
{
"epoch": 0.67,
"learning_rate": 0.00023336436739321403,
"loss": 4.3198,
"step": 355000
},
{
"epoch": 0.67,
"learning_rate": 0.00023327051438954248,
"loss": 4.3232,
"step": 355500
},
{
"epoch": 0.67,
"learning_rate": 0.000233176661385871,
"loss": 4.3227,
"step": 356000
},
{
"epoch": 0.67,
"learning_rate": 0.00023308280838219944,
"loss": 4.3282,
"step": 356500
},
{
"epoch": 0.67,
"learning_rate": 0.00023298895537852792,
"loss": 4.3354,
"step": 357000
},
{
"epoch": 0.67,
"learning_rate": 0.00023289510237485637,
"loss": 4.3167,
"step": 357500
},
{
"epoch": 0.67,
"learning_rate": 0.00023280124937118487,
"loss": 4.3128,
"step": 358000
},
{
"epoch": 0.67,
"learning_rate": 0.00023270739636751332,
"loss": 4.3079,
"step": 358500
},
{
"epoch": 0.67,
"learning_rate": 0.00023261354336384177,
"loss": 4.3187,
"step": 359000
},
{
"epoch": 0.67,
"learning_rate": 0.00023251969036017028,
"loss": 4.3335,
"step": 359500
},
{
"epoch": 0.68,
"learning_rate": 0.00023242583735649873,
"loss": 4.3001,
"step": 360000
},
{
"epoch": 0.68,
"learning_rate": 0.0002323319843528272,
"loss": 4.311,
"step": 360500
},
{
"epoch": 0.68,
"learning_rate": 0.00023223813134915568,
"loss": 4.3033,
"step": 361000
},
{
"epoch": 0.68,
"learning_rate": 0.00023214427834548416,
"loss": 4.3062,
"step": 361500
},
{
"epoch": 0.68,
"learning_rate": 0.0002320504253418126,
"loss": 4.337,
"step": 362000
},
{
"epoch": 0.68,
"learning_rate": 0.00023195657233814111,
"loss": 4.3261,
"step": 362500
},
{
"epoch": 0.68,
"learning_rate": 0.00023186271933446956,
"loss": 4.3073,
"step": 363000
},
{
"epoch": 0.68,
"learning_rate": 0.00023176886633079801,
"loss": 4.3152,
"step": 363500
},
{
"epoch": 0.68,
"learning_rate": 0.00023167501332712652,
"loss": 4.3326,
"step": 364000
},
{
"epoch": 0.68,
"learning_rate": 0.00023158116032345497,
"loss": 4.3189,
"step": 364500
},
{
"epoch": 0.69,
"learning_rate": 0.00023148730731978342,
"loss": 4.3118,
"step": 365000
},
{
"epoch": 0.69,
"learning_rate": 0.00023139345431611192,
"loss": 4.3262,
"step": 365500
},
{
"epoch": 0.69,
"learning_rate": 0.00023129960131244037,
"loss": 4.3084,
"step": 366000
},
{
"epoch": 0.69,
"learning_rate": 0.00023120574830876885,
"loss": 4.3094,
"step": 366500
},
{
"epoch": 0.69,
"learning_rate": 0.00023111189530509733,
"loss": 4.3039,
"step": 367000
},
{
"epoch": 0.69,
"learning_rate": 0.0002310180423014258,
"loss": 4.3015,
"step": 367500
},
{
"epoch": 0.69,
"learning_rate": 0.00023092418929775426,
"loss": 4.2926,
"step": 368000
},
{
"epoch": 0.69,
"learning_rate": 0.00023083033629408276,
"loss": 4.3182,
"step": 368500
},
{
"epoch": 0.69,
"learning_rate": 0.0002307364832904112,
"loss": 4.315,
"step": 369000
},
{
"epoch": 0.69,
"learning_rate": 0.00023064263028673966,
"loss": 4.3044,
"step": 369500
},
{
"epoch": 0.69,
"learning_rate": 0.00023054877728306814,
"loss": 4.289,
"step": 370000
},
{
"epoch": 0.7,
"learning_rate": 0.00023045492427939662,
"loss": 4.323,
"step": 370500
},
{
"epoch": 0.7,
"learning_rate": 0.0002303610712757251,
"loss": 4.3132,
"step": 371000
},
{
"epoch": 0.7,
"learning_rate": 0.00023026721827205354,
"loss": 4.3132,
"step": 371500
},
{
"epoch": 0.7,
"learning_rate": 0.00023017336526838205,
"loss": 4.306,
"step": 372000
},
{
"epoch": 0.7,
"learning_rate": 0.0002300795122647105,
"loss": 4.2933,
"step": 372500
},
{
"epoch": 0.7,
"learning_rate": 0.00022998565926103895,
"loss": 4.2984,
"step": 373000
},
{
"epoch": 0.7,
"learning_rate": 0.00022989180625736745,
"loss": 4.2908,
"step": 373500
},
{
"epoch": 0.7,
"learning_rate": 0.0002297979532536959,
"loss": 4.3171,
"step": 374000
},
{
"epoch": 0.7,
"learning_rate": 0.00022970410025002438,
"loss": 4.3105,
"step": 374500
},
{
"epoch": 0.7,
"learning_rate": 0.00022961024724635286,
"loss": 4.3084,
"step": 375000
},
{
"epoch": 0.7,
"learning_rate": 0.0002295163942426813,
"loss": 4.3002,
"step": 375500
},
{
"epoch": 0.71,
"learning_rate": 0.0002294225412390098,
"loss": 4.2929,
"step": 376000
},
{
"epoch": 0.71,
"learning_rate": 0.00022932868823533826,
"loss": 4.3018,
"step": 376500
},
{
"epoch": 0.71,
"learning_rate": 0.00022923483523166674,
"loss": 4.3052,
"step": 377000
},
{
"epoch": 0.71,
"learning_rate": 0.0002291409822279952,
"loss": 4.314,
"step": 377500
},
{
"epoch": 0.71,
"learning_rate": 0.0002290471292243237,
"loss": 4.3259,
"step": 378000
},
{
"epoch": 0.71,
"learning_rate": 0.00022895327622065215,
"loss": 4.3064,
"step": 378500
},
{
"epoch": 0.71,
"learning_rate": 0.0002288594232169806,
"loss": 4.3098,
"step": 379000
},
{
"epoch": 0.71,
"learning_rate": 0.0002287655702133091,
"loss": 4.3057,
"step": 379500
},
{
"epoch": 0.71,
"learning_rate": 0.00022867171720963755,
"loss": 4.3241,
"step": 380000
},
{
"epoch": 0.71,
"learning_rate": 0.00022857786420596603,
"loss": 4.3116,
"step": 380500
},
{
"epoch": 0.72,
"learning_rate": 0.0002284840112022945,
"loss": 4.3033,
"step": 381000
},
{
"epoch": 0.72,
"learning_rate": 0.00022839015819862298,
"loss": 4.3052,
"step": 381500
},
{
"epoch": 0.72,
"learning_rate": 0.00022829630519495143,
"loss": 4.2986,
"step": 382000
},
{
"epoch": 0.72,
"learning_rate": 0.00022820245219127989,
"loss": 4.3329,
"step": 382500
},
{
"epoch": 0.72,
"learning_rate": 0.0002281085991876084,
"loss": 4.3184,
"step": 383000
},
{
"epoch": 0.72,
"learning_rate": 0.00022801474618393684,
"loss": 4.3221,
"step": 383500
},
{
"epoch": 0.72,
"learning_rate": 0.00022792089318026532,
"loss": 4.2981,
"step": 384000
},
{
"epoch": 0.72,
"learning_rate": 0.0002278270401765938,
"loss": 4.3068,
"step": 384500
},
{
"epoch": 0.72,
"learning_rate": 0.00022773318717292227,
"loss": 4.2975,
"step": 385000
},
{
"epoch": 0.72,
"learning_rate": 0.00022763933416925072,
"loss": 4.3087,
"step": 385500
},
{
"epoch": 0.72,
"learning_rate": 0.0002275454811655792,
"loss": 4.3093,
"step": 386000
},
{
"epoch": 0.73,
"learning_rate": 0.00022745162816190768,
"loss": 4.3002,
"step": 386500
},
{
"epoch": 0.73,
"learning_rate": 0.00022735777515823613,
"loss": 4.2916,
"step": 387000
},
{
"epoch": 0.73,
"learning_rate": 0.00022726392215456463,
"loss": 4.3075,
"step": 387500
},
{
"epoch": 0.73,
"learning_rate": 0.00022717006915089308,
"loss": 4.3241,
"step": 388000
},
{
"epoch": 0.73,
"learning_rate": 0.00022707621614722153,
"loss": 4.3041,
"step": 388500
},
{
"epoch": 0.73,
"learning_rate": 0.00022698236314355004,
"loss": 4.2962,
"step": 389000
},
{
"epoch": 0.73,
"learning_rate": 0.0002268885101398785,
"loss": 4.2888,
"step": 389500
},
{
"epoch": 0.73,
"learning_rate": 0.00022679465713620696,
"loss": 4.2749,
"step": 390000
},
{
"epoch": 0.73,
"learning_rate": 0.00022670080413253544,
"loss": 4.2951,
"step": 390500
},
{
"epoch": 0.73,
"learning_rate": 0.00022660695112886392,
"loss": 4.3075,
"step": 391000
},
{
"epoch": 0.73,
"learning_rate": 0.00022651309812519237,
"loss": 4.3103,
"step": 391500
},
{
"epoch": 0.74,
"learning_rate": 0.00022641924512152087,
"loss": 4.3059,
"step": 392000
},
{
"epoch": 0.74,
"learning_rate": 0.00022632539211784932,
"loss": 4.2875,
"step": 392500
},
{
"epoch": 0.74,
"learning_rate": 0.00022623153911417778,
"loss": 4.3118,
"step": 393000
},
{
"epoch": 0.74,
"learning_rate": 0.00022613768611050625,
"loss": 4.3154,
"step": 393500
},
{
"epoch": 0.74,
"learning_rate": 0.00022604383310683473,
"loss": 4.3082,
"step": 394000
},
{
"epoch": 0.74,
"learning_rate": 0.0002259499801031632,
"loss": 4.318,
"step": 394500
},
{
"epoch": 0.74,
"learning_rate": 0.00022585612709949166,
"loss": 4.3045,
"step": 395000
},
{
"epoch": 0.74,
"learning_rate": 0.00022576227409582016,
"loss": 4.3018,
"step": 395500
},
{
"epoch": 0.74,
"learning_rate": 0.0002256684210921486,
"loss": 4.3095,
"step": 396000
},
{
"epoch": 0.74,
"learning_rate": 0.00022557456808847706,
"loss": 4.3,
"step": 396500
},
{
"epoch": 0.75,
"learning_rate": 0.00022548071508480557,
"loss": 4.3044,
"step": 397000
},
{
"epoch": 0.75,
"learning_rate": 0.00022538686208113402,
"loss": 4.2872,
"step": 397500
},
{
"epoch": 0.75,
"learning_rate": 0.0002252930090774625,
"loss": 4.3034,
"step": 398000
},
{
"epoch": 0.75,
"learning_rate": 0.00022519915607379097,
"loss": 4.2817,
"step": 398500
},
{
"epoch": 0.75,
"learning_rate": 0.00022510530307011942,
"loss": 4.2997,
"step": 399000
},
{
"epoch": 0.75,
"learning_rate": 0.0002250114500664479,
"loss": 4.3049,
"step": 399500
},
{
"epoch": 0.75,
"learning_rate": 0.00022491759706277638,
"loss": 4.2894,
"step": 400000
},
{
"epoch": 0.75,
"learning_rate": 0.00022482374405910486,
"loss": 4.2934,
"step": 400500
},
{
"epoch": 0.75,
"learning_rate": 0.0002247298910554333,
"loss": 4.2779,
"step": 401000
},
{
"epoch": 0.75,
"learning_rate": 0.0002246360380517618,
"loss": 4.2995,
"step": 401500
},
{
"epoch": 0.75,
"learning_rate": 0.00022454218504809026,
"loss": 4.2822,
"step": 402000
},
{
"epoch": 0.76,
"learning_rate": 0.0002244483320444187,
"loss": 4.3028,
"step": 402500
},
{
"epoch": 0.76,
"learning_rate": 0.00022435447904074722,
"loss": 4.2893,
"step": 403000
},
{
"epoch": 0.76,
"learning_rate": 0.00022426062603707567,
"loss": 4.2889,
"step": 403500
},
{
"epoch": 0.76,
"learning_rate": 0.00022416677303340414,
"loss": 4.2974,
"step": 404000
},
{
"epoch": 0.76,
"learning_rate": 0.00022407292002973262,
"loss": 4.2914,
"step": 404500
},
{
"epoch": 0.76,
"learning_rate": 0.0002239790670260611,
"loss": 4.283,
"step": 405000
},
{
"epoch": 0.76,
"learning_rate": 0.00022388521402238955,
"loss": 4.3096,
"step": 405500
},
{
"epoch": 0.76,
"learning_rate": 0.000223791361018718,
"loss": 4.3023,
"step": 406000
},
{
"epoch": 0.76,
"learning_rate": 0.0002236975080150465,
"loss": 4.3092,
"step": 406500
},
{
"epoch": 0.76,
"learning_rate": 0.00022360365501137495,
"loss": 4.2833,
"step": 407000
},
{
"epoch": 0.76,
"learning_rate": 0.00022350980200770343,
"loss": 4.2847,
"step": 407500
},
{
"epoch": 0.77,
"learning_rate": 0.0002234159490040319,
"loss": 4.3017,
"step": 408000
},
{
"epoch": 0.77,
"learning_rate": 0.00022332209600036039,
"loss": 4.2833,
"step": 408500
},
{
"epoch": 0.77,
"learning_rate": 0.00022322824299668884,
"loss": 4.2866,
"step": 409000
},
{
"epoch": 0.77,
"learning_rate": 0.0002231343899930173,
"loss": 4.2772,
"step": 409500
},
{
"epoch": 0.77,
"learning_rate": 0.0002230405369893458,
"loss": 4.2986,
"step": 410000
},
{
"epoch": 0.77,
"learning_rate": 0.00022294668398567424,
"loss": 4.2921,
"step": 410500
},
{
"epoch": 0.77,
"learning_rate": 0.00022285283098200275,
"loss": 4.2811,
"step": 411000
},
{
"epoch": 0.77,
"learning_rate": 0.0002227589779783312,
"loss": 4.2975,
"step": 411500
},
{
"epoch": 0.77,
"learning_rate": 0.00022266512497465965,
"loss": 4.2839,
"step": 412000
},
{
"epoch": 0.77,
"learning_rate": 0.00022257127197098815,
"loss": 4.2911,
"step": 412500
},
{
"epoch": 0.78,
"learning_rate": 0.0002224774189673166,
"loss": 4.2886,
"step": 413000
},
{
"epoch": 0.78,
"learning_rate": 0.00022238356596364508,
"loss": 4.2952,
"step": 413500
},
{
"epoch": 0.78,
"learning_rate": 0.00022228971295997356,
"loss": 4.2975,
"step": 414000
},
{
"epoch": 0.78,
"learning_rate": 0.00022219585995630203,
"loss": 4.2894,
"step": 414500
},
{
"epoch": 0.78,
"learning_rate": 0.00022210200695263048,
"loss": 4.292,
"step": 415000
},
{
"epoch": 0.78,
"learning_rate": 0.000222008153948959,
"loss": 4.2854,
"step": 415500
},
{
"epoch": 0.78,
"learning_rate": 0.00022191430094528744,
"loss": 4.2782,
"step": 416000
},
{
"epoch": 0.78,
"learning_rate": 0.0002218204479416159,
"loss": 4.2933,
"step": 416500
},
{
"epoch": 0.78,
"learning_rate": 0.0002217265949379444,
"loss": 4.2914,
"step": 417000
},
{
"epoch": 0.78,
"learning_rate": 0.00022163274193427284,
"loss": 4.2903,
"step": 417500
},
{
"epoch": 0.78,
"learning_rate": 0.00022153888893060132,
"loss": 4.2945,
"step": 418000
},
{
"epoch": 0.79,
"learning_rate": 0.00022144503592692977,
"loss": 4.2849,
"step": 418500
},
{
"epoch": 0.79,
"learning_rate": 0.00022135118292325828,
"loss": 4.2944,
"step": 419000
},
{
"epoch": 0.79,
"learning_rate": 0.00022125732991958673,
"loss": 4.2872,
"step": 419500
},
{
"epoch": 0.79,
"learning_rate": 0.00022116347691591518,
"loss": 4.2767,
"step": 420000
},
{
"epoch": 0.79,
"learning_rate": 0.00022106962391224368,
"loss": 4.2938,
"step": 420500
},
{
"epoch": 0.79,
"learning_rate": 0.00022097577090857213,
"loss": 4.2677,
"step": 421000
},
{
"epoch": 0.79,
"learning_rate": 0.0002208819179049006,
"loss": 4.2734,
"step": 421500
},
{
"epoch": 0.79,
"learning_rate": 0.00022078806490122909,
"loss": 4.2786,
"step": 422000
},
{
"epoch": 0.79,
"learning_rate": 0.00022069421189755754,
"loss": 4.2816,
"step": 422500
},
{
"epoch": 0.79,
"learning_rate": 0.00022060035889388601,
"loss": 4.2743,
"step": 423000
},
{
"epoch": 0.79,
"learning_rate": 0.0002205065058902145,
"loss": 4.2944,
"step": 423500
},
{
"epoch": 0.8,
"learning_rate": 0.00022041265288654297,
"loss": 4.2991,
"step": 424000
},
{
"epoch": 0.8,
"learning_rate": 0.00022031879988287142,
"loss": 4.2912,
"step": 424500
},
{
"epoch": 0.8,
"learning_rate": 0.00022022494687919992,
"loss": 4.2999,
"step": 425000
},
{
"epoch": 0.8,
"learning_rate": 0.00022013109387552837,
"loss": 4.2885,
"step": 425500
},
{
"epoch": 0.8,
"learning_rate": 0.00022003724087185682,
"loss": 4.2875,
"step": 426000
},
{
"epoch": 0.8,
"learning_rate": 0.00021994338786818533,
"loss": 4.3016,
"step": 426500
},
{
"epoch": 0.8,
"learning_rate": 0.00021984953486451378,
"loss": 4.2757,
"step": 427000
},
{
"epoch": 0.8,
"learning_rate": 0.00021975568186084226,
"loss": 4.2645,
"step": 427500
},
{
"epoch": 0.8,
"learning_rate": 0.00021966182885717073,
"loss": 4.2858,
"step": 428000
},
{
"epoch": 0.8,
"learning_rate": 0.0002195679758534992,
"loss": 4.2878,
"step": 428500
},
{
"epoch": 0.81,
"learning_rate": 0.00021947412284982766,
"loss": 4.271,
"step": 429000
},
{
"epoch": 0.81,
"learning_rate": 0.00021938026984615617,
"loss": 4.3062,
"step": 429500
},
{
"epoch": 0.81,
"learning_rate": 0.00021928641684248462,
"loss": 4.2894,
"step": 430000
},
{
"epoch": 0.81,
"learning_rate": 0.00021919256383881307,
"loss": 4.2885,
"step": 430500
},
{
"epoch": 0.81,
"learning_rate": 0.00021909871083514154,
"loss": 4.2785,
"step": 431000
},
{
"epoch": 0.81,
"learning_rate": 0.00021900485783147002,
"loss": 4.2897,
"step": 431500
},
{
"epoch": 0.81,
"learning_rate": 0.0002189110048277985,
"loss": 4.2903,
"step": 432000
},
{
"epoch": 0.81,
"learning_rate": 0.00021881715182412695,
"loss": 4.2806,
"step": 432500
},
{
"epoch": 0.81,
"learning_rate": 0.00021872329882045543,
"loss": 4.2764,
"step": 433000
},
{
"epoch": 0.81,
"learning_rate": 0.0002186294458167839,
"loss": 4.2873,
"step": 433500
},
{
"epoch": 0.81,
"learning_rate": 0.00021853559281311235,
"loss": 4.2777,
"step": 434000
},
{
"epoch": 0.82,
"learning_rate": 0.00021844173980944086,
"loss": 4.2766,
"step": 434500
},
{
"epoch": 0.82,
"learning_rate": 0.0002183478868057693,
"loss": 4.2807,
"step": 435000
},
{
"epoch": 0.82,
"learning_rate": 0.00021825403380209776,
"loss": 4.2786,
"step": 435500
},
{
"epoch": 0.82,
"learning_rate": 0.00021816018079842626,
"loss": 4.2924,
"step": 436000
},
{
"epoch": 0.82,
"learning_rate": 0.00021806632779475471,
"loss": 4.2792,
"step": 436500
},
{
"epoch": 0.82,
"learning_rate": 0.0002179724747910832,
"loss": 4.284,
"step": 437000
},
{
"epoch": 0.82,
"learning_rate": 0.00021787862178741167,
"loss": 4.2709,
"step": 437500
},
{
"epoch": 0.82,
"learning_rate": 0.00021778476878374015,
"loss": 4.2788,
"step": 438000
},
{
"epoch": 0.82,
"learning_rate": 0.0002176909157800686,
"loss": 4.2804,
"step": 438500
},
{
"epoch": 0.82,
"learning_rate": 0.0002175970627763971,
"loss": 4.2787,
"step": 439000
},
{
"epoch": 0.82,
"learning_rate": 0.00021750320977272555,
"loss": 4.2891,
"step": 439500
},
{
"epoch": 0.83,
"learning_rate": 0.000217409356769054,
"loss": 4.2787,
"step": 440000
},
{
"epoch": 0.83,
"learning_rate": 0.0002173155037653825,
"loss": 4.2757,
"step": 440500
},
{
"epoch": 0.83,
"learning_rate": 0.00021722165076171096,
"loss": 4.2793,
"step": 441000
},
{
"epoch": 0.83,
"learning_rate": 0.00021712779775803943,
"loss": 4.2771,
"step": 441500
},
{
"epoch": 0.83,
"learning_rate": 0.00021703394475436788,
"loss": 4.2874,
"step": 442000
},
{
"epoch": 0.83,
"learning_rate": 0.0002169400917506964,
"loss": 4.2867,
"step": 442500
},
{
"epoch": 0.83,
"learning_rate": 0.00021684623874702484,
"loss": 4.293,
"step": 443000
},
{
"epoch": 0.83,
"learning_rate": 0.0002167523857433533,
"loss": 4.2633,
"step": 443500
},
{
"epoch": 0.83,
"learning_rate": 0.0002166585327396818,
"loss": 4.2578,
"step": 444000
},
{
"epoch": 0.83,
"learning_rate": 0.00021656467973601024,
"loss": 4.2501,
"step": 444500
},
{
"epoch": 0.84,
"learning_rate": 0.00021647082673233872,
"loss": 4.2755,
"step": 445000
},
{
"epoch": 0.84,
"learning_rate": 0.0002163769737286672,
"loss": 4.2611,
"step": 445500
},
{
"epoch": 0.84,
"learning_rate": 0.00021628312072499565,
"loss": 4.2594,
"step": 446000
},
{
"epoch": 0.84,
"learning_rate": 0.00021618926772132413,
"loss": 4.2783,
"step": 446500
},
{
"epoch": 0.84,
"learning_rate": 0.0002160954147176526,
"loss": 4.2789,
"step": 447000
},
{
"epoch": 0.84,
"learning_rate": 0.00021600156171398108,
"loss": 4.2734,
"step": 447500
},
{
"epoch": 0.84,
"learning_rate": 0.00021590770871030953,
"loss": 4.2652,
"step": 448000
},
{
"epoch": 0.84,
"learning_rate": 0.00021581385570663804,
"loss": 4.2554,
"step": 448500
},
{
"epoch": 0.84,
"learning_rate": 0.0002157200027029665,
"loss": 4.2737,
"step": 449000
},
{
"epoch": 0.84,
"learning_rate": 0.00021562614969929494,
"loss": 4.2768,
"step": 449500
},
{
"epoch": 0.84,
"learning_rate": 0.00021553229669562344,
"loss": 4.2797,
"step": 450000
},
{
"epoch": 0.85,
"learning_rate": 0.0002154384436919519,
"loss": 4.2645,
"step": 450500
},
{
"epoch": 0.85,
"learning_rate": 0.00021534459068828037,
"loss": 4.2993,
"step": 451000
},
{
"epoch": 0.85,
"learning_rate": 0.00021525073768460885,
"loss": 4.2764,
"step": 451500
},
{
"epoch": 0.85,
"learning_rate": 0.00021515688468093732,
"loss": 4.2728,
"step": 452000
},
{
"epoch": 0.85,
"learning_rate": 0.00021506303167726577,
"loss": 4.2797,
"step": 452500
},
{
"epoch": 0.85,
"learning_rate": 0.00021496917867359428,
"loss": 4.2753,
"step": 453000
},
{
"epoch": 0.85,
"learning_rate": 0.00021487532566992273,
"loss": 4.2689,
"step": 453500
},
{
"epoch": 0.85,
"learning_rate": 0.00021478147266625118,
"loss": 4.2779,
"step": 454000
},
{
"epoch": 0.85,
"learning_rate": 0.00021468761966257966,
"loss": 4.2649,
"step": 454500
},
{
"epoch": 0.85,
"learning_rate": 0.00021459376665890813,
"loss": 4.2652,
"step": 455000
},
{
"epoch": 0.86,
"learning_rate": 0.0002144999136552366,
"loss": 4.2896,
"step": 455500
},
{
"epoch": 0.86,
"learning_rate": 0.00021440606065156506,
"loss": 4.2937,
"step": 456000
},
{
"epoch": 0.86,
"learning_rate": 0.00021431220764789354,
"loss": 4.2737,
"step": 456500
},
{
"epoch": 0.86,
"learning_rate": 0.00021421835464422202,
"loss": 4.2747,
"step": 457000
},
{
"epoch": 0.86,
"learning_rate": 0.00021412450164055047,
"loss": 4.2596,
"step": 457500
},
{
"epoch": 0.86,
"learning_rate": 0.00021403064863687897,
"loss": 4.2499,
"step": 458000
},
{
"epoch": 0.86,
"learning_rate": 0.00021393679563320742,
"loss": 4.2678,
"step": 458500
},
{
"epoch": 0.86,
"learning_rate": 0.00021384294262953587,
"loss": 4.2618,
"step": 459000
},
{
"epoch": 0.86,
"learning_rate": 0.00021374908962586438,
"loss": 4.2678,
"step": 459500
},
{
"epoch": 0.86,
"learning_rate": 0.00021365523662219283,
"loss": 4.2614,
"step": 460000
},
{
"epoch": 0.86,
"learning_rate": 0.0002135613836185213,
"loss": 4.2823,
"step": 460500
},
{
"epoch": 0.87,
"learning_rate": 0.00021346753061484978,
"loss": 4.2742,
"step": 461000
},
{
"epoch": 0.87,
"learning_rate": 0.00021337367761117826,
"loss": 4.2699,
"step": 461500
},
{
"epoch": 0.87,
"learning_rate": 0.0002132798246075067,
"loss": 4.2681,
"step": 462000
},
{
"epoch": 0.87,
"learning_rate": 0.00021318597160383521,
"loss": 4.2648,
"step": 462500
},
{
"epoch": 0.87,
"learning_rate": 0.00021309211860016366,
"loss": 4.277,
"step": 463000
},
{
"epoch": 0.87,
"learning_rate": 0.00021299826559649212,
"loss": 4.2745,
"step": 463500
},
{
"epoch": 0.87,
"learning_rate": 0.00021290441259282062,
"loss": 4.2811,
"step": 464000
},
{
"epoch": 0.87,
"learning_rate": 0.00021281055958914907,
"loss": 4.2692,
"step": 464500
},
{
"epoch": 0.87,
"learning_rate": 0.00021271670658547755,
"loss": 4.2891,
"step": 465000
},
{
"epoch": 0.87,
"learning_rate": 0.00021262285358180602,
"loss": 4.2878,
"step": 465500
},
{
"epoch": 0.87,
"learning_rate": 0.0002125290005781345,
"loss": 4.265,
"step": 466000
},
{
"epoch": 0.88,
"learning_rate": 0.00021243514757446295,
"loss": 4.2714,
"step": 466500
},
{
"epoch": 0.88,
"learning_rate": 0.0002123412945707914,
"loss": 4.2806,
"step": 467000
},
{
"epoch": 0.88,
"learning_rate": 0.0002122474415671199,
"loss": 4.2564,
"step": 467500
},
{
"epoch": 0.88,
"learning_rate": 0.00021215358856344836,
"loss": 4.2518,
"step": 468000
},
{
"epoch": 0.88,
"learning_rate": 0.00021205973555977684,
"loss": 4.282,
"step": 468500
},
{
"epoch": 0.88,
"learning_rate": 0.0002119658825561053,
"loss": 4.2601,
"step": 469000
},
{
"epoch": 0.88,
"learning_rate": 0.00021187202955243376,
"loss": 4.2715,
"step": 469500
},
{
"epoch": 0.88,
"learning_rate": 0.00021177817654876224,
"loss": 4.2638,
"step": 470000
},
{
"epoch": 0.88,
"learning_rate": 0.00021168432354509072,
"loss": 4.2539,
"step": 470500
},
{
"epoch": 0.88,
"learning_rate": 0.0002115904705414192,
"loss": 4.27,
"step": 471000
},
{
"epoch": 0.89,
"learning_rate": 0.00021149661753774765,
"loss": 4.2664,
"step": 471500
},
{
"epoch": 0.89,
"learning_rate": 0.00021140276453407615,
"loss": 4.2567,
"step": 472000
},
{
"epoch": 0.89,
"learning_rate": 0.0002113089115304046,
"loss": 4.271,
"step": 472500
},
{
"epoch": 0.89,
"learning_rate": 0.00021121505852673305,
"loss": 4.2487,
"step": 473000
},
{
"epoch": 0.89,
"learning_rate": 0.00021112120552306156,
"loss": 4.2477,
"step": 473500
},
{
"epoch": 0.89,
"learning_rate": 0.00021102735251939,
"loss": 4.2482,
"step": 474000
},
{
"epoch": 0.89,
"learning_rate": 0.00021093349951571848,
"loss": 4.2735,
"step": 474500
},
{
"epoch": 0.89,
"learning_rate": 0.00021083964651204696,
"loss": 4.2625,
"step": 475000
},
{
"epoch": 0.89,
"learning_rate": 0.00021074579350837544,
"loss": 4.2746,
"step": 475500
},
{
"epoch": 0.89,
"learning_rate": 0.0002106519405047039,
"loss": 4.2719,
"step": 476000
},
{
"epoch": 0.89,
"learning_rate": 0.0002105580875010324,
"loss": 4.2586,
"step": 476500
},
{
"epoch": 0.9,
"learning_rate": 0.00021046423449736084,
"loss": 4.2766,
"step": 477000
},
{
"epoch": 0.9,
"learning_rate": 0.0002103703814936893,
"loss": 4.2502,
"step": 477500
},
{
"epoch": 0.9,
"learning_rate": 0.0002102765284900178,
"loss": 4.2608,
"step": 478000
},
{
"epoch": 0.9,
"learning_rate": 0.00021018267548634625,
"loss": 4.2689,
"step": 478500
},
{
"epoch": 0.9,
"learning_rate": 0.00021008882248267473,
"loss": 4.2725,
"step": 479000
},
{
"epoch": 0.9,
"learning_rate": 0.00020999496947900318,
"loss": 4.2519,
"step": 479500
},
{
"epoch": 0.9,
"learning_rate": 0.00020990111647533165,
"loss": 4.2506,
"step": 480000
},
{
"epoch": 0.9,
"learning_rate": 0.00020980726347166013,
"loss": 4.2739,
"step": 480500
},
{
"epoch": 0.9,
"learning_rate": 0.00020971341046798858,
"loss": 4.2557,
"step": 481000
},
{
"epoch": 0.9,
"learning_rate": 0.00020961955746431709,
"loss": 4.2609,
"step": 481500
},
{
"epoch": 0.9,
"learning_rate": 0.00020952570446064554,
"loss": 4.2623,
"step": 482000
},
{
"epoch": 0.91,
"learning_rate": 0.00020943185145697399,
"loss": 4.276,
"step": 482500
},
{
"epoch": 0.91,
"learning_rate": 0.0002093379984533025,
"loss": 4.2485,
"step": 483000
},
{
"epoch": 0.91,
"learning_rate": 0.00020924414544963094,
"loss": 4.2676,
"step": 483500
},
{
"epoch": 0.91,
"learning_rate": 0.00020915029244595942,
"loss": 4.2527,
"step": 484000
},
{
"epoch": 0.91,
"learning_rate": 0.0002090564394422879,
"loss": 4.2803,
"step": 484500
},
{
"epoch": 0.91,
"learning_rate": 0.00020896258643861637,
"loss": 4.2584,
"step": 485000
},
{
"epoch": 0.91,
"learning_rate": 0.00020886873343494482,
"loss": 4.2635,
"step": 485500
},
{
"epoch": 0.91,
"learning_rate": 0.00020877488043127333,
"loss": 4.2583,
"step": 486000
},
{
"epoch": 0.91,
"learning_rate": 0.00020868102742760178,
"loss": 4.2783,
"step": 486500
},
{
"epoch": 0.91,
"learning_rate": 0.00020858717442393023,
"loss": 4.2579,
"step": 487000
},
{
"epoch": 0.92,
"learning_rate": 0.00020849332142025873,
"loss": 4.2727,
"step": 487500
},
{
"epoch": 0.92,
"learning_rate": 0.00020839946841658718,
"loss": 4.2704,
"step": 488000
},
{
"epoch": 0.92,
"learning_rate": 0.00020830561541291566,
"loss": 4.2684,
"step": 488500
},
{
"epoch": 0.92,
"learning_rate": 0.00020821176240924414,
"loss": 4.2596,
"step": 489000
},
{
"epoch": 0.92,
"learning_rate": 0.00020811790940557262,
"loss": 4.2519,
"step": 489500
},
{
"epoch": 0.92,
"learning_rate": 0.00020802405640190107,
"loss": 4.2665,
"step": 490000
},
{
"epoch": 0.92,
"learning_rate": 0.00020793020339822952,
"loss": 4.2481,
"step": 490500
},
{
"epoch": 0.92,
"learning_rate": 0.00020783635039455802,
"loss": 4.2664,
"step": 491000
},
{
"epoch": 0.92,
"learning_rate": 0.00020774249739088647,
"loss": 4.2443,
"step": 491500
},
{
"epoch": 0.92,
"learning_rate": 0.00020764864438721495,
"loss": 4.2506,
"step": 492000
},
{
"epoch": 0.92,
"learning_rate": 0.00020755479138354343,
"loss": 4.2729,
"step": 492500
},
{
"epoch": 0.93,
"learning_rate": 0.00020746093837987188,
"loss": 4.2578,
"step": 493000
},
{
"epoch": 0.93,
"learning_rate": 0.00020736708537620035,
"loss": 4.2583,
"step": 493500
},
{
"epoch": 0.93,
"learning_rate": 0.00020727323237252883,
"loss": 4.2613,
"step": 494000
},
{
"epoch": 0.93,
"learning_rate": 0.0002071793793688573,
"loss": 4.2652,
"step": 494500
},
{
"epoch": 0.93,
"learning_rate": 0.00020708552636518576,
"loss": 4.2533,
"step": 495000
},
{
"epoch": 0.93,
"learning_rate": 0.00020699167336151426,
"loss": 4.2594,
"step": 495500
},
{
"epoch": 0.93,
"learning_rate": 0.00020689782035784271,
"loss": 4.2733,
"step": 496000
},
{
"epoch": 0.93,
"learning_rate": 0.00020680396735417116,
"loss": 4.2422,
"step": 496500
},
{
"epoch": 0.93,
"learning_rate": 0.00020671011435049967,
"loss": 4.2414,
"step": 497000
},
{
"epoch": 0.93,
"learning_rate": 0.00020661626134682812,
"loss": 4.2533,
"step": 497500
},
{
"epoch": 0.93,
"learning_rate": 0.0002065224083431566,
"loss": 4.2763,
"step": 498000
},
{
"epoch": 0.94,
"learning_rate": 0.00020642855533948507,
"loss": 4.2528,
"step": 498500
},
{
"epoch": 0.94,
"learning_rate": 0.00020633470233581355,
"loss": 4.2631,
"step": 499000
},
{
"epoch": 0.94,
"learning_rate": 0.000206240849332142,
"loss": 4.245,
"step": 499500
},
{
"epoch": 0.94,
"learning_rate": 0.0002061469963284705,
"loss": 4.2573,
"step": 500000
},
{
"epoch": 0.94,
"learning_rate": 0.00020605314332479896,
"loss": 4.2552,
"step": 500500
},
{
"epoch": 0.94,
"learning_rate": 0.0002059592903211274,
"loss": 4.265,
"step": 501000
},
{
"epoch": 0.94,
"learning_rate": 0.0002058654373174559,
"loss": 4.2606,
"step": 501500
},
{
"epoch": 0.94,
"learning_rate": 0.00020577158431378436,
"loss": 4.2641,
"step": 502000
},
{
"epoch": 0.94,
"learning_rate": 0.00020567773131011284,
"loss": 4.2382,
"step": 502500
},
{
"epoch": 0.94,
"learning_rate": 0.0002055838783064413,
"loss": 4.2601,
"step": 503000
},
{
"epoch": 0.95,
"learning_rate": 0.00020549002530276977,
"loss": 4.2627,
"step": 503500
},
{
"epoch": 0.95,
"learning_rate": 0.00020539617229909824,
"loss": 4.2453,
"step": 504000
},
{
"epoch": 0.95,
"learning_rate": 0.0002053023192954267,
"loss": 4.2559,
"step": 504500
},
{
"epoch": 0.95,
"learning_rate": 0.0002052084662917552,
"loss": 4.2667,
"step": 505000
},
{
"epoch": 0.95,
"learning_rate": 0.00020511461328808365,
"loss": 4.2508,
"step": 505500
},
{
"epoch": 0.95,
"learning_rate": 0.0002050207602844121,
"loss": 4.2531,
"step": 506000
},
{
"epoch": 0.95,
"learning_rate": 0.0002049269072807406,
"loss": 4.2831,
"step": 506500
},
{
"epoch": 0.95,
"learning_rate": 0.00020483305427706905,
"loss": 4.2682,
"step": 507000
},
{
"epoch": 0.95,
"learning_rate": 0.00020473920127339753,
"loss": 4.2628,
"step": 507500
},
{
"epoch": 0.95,
"learning_rate": 0.000204645348269726,
"loss": 4.2403,
"step": 508000
},
{
"epoch": 0.95,
"learning_rate": 0.0002045514952660545,
"loss": 4.2521,
"step": 508500
},
{
"epoch": 0.96,
"learning_rate": 0.00020445764226238294,
"loss": 4.2387,
"step": 509000
},
{
"epoch": 0.96,
"learning_rate": 0.00020436378925871144,
"loss": 4.2724,
"step": 509500
},
{
"epoch": 0.96,
"learning_rate": 0.0002042699362550399,
"loss": 4.2577,
"step": 510000
},
{
"epoch": 0.96,
"learning_rate": 0.00020417608325136834,
"loss": 4.2621,
"step": 510500
},
{
"epoch": 0.96,
"learning_rate": 0.00020408223024769685,
"loss": 4.248,
"step": 511000
},
{
"epoch": 0.96,
"learning_rate": 0.0002039883772440253,
"loss": 4.2503,
"step": 511500
},
{
"epoch": 0.96,
"learning_rate": 0.00020389452424035377,
"loss": 4.2477,
"step": 512000
},
{
"epoch": 0.96,
"learning_rate": 0.00020380067123668225,
"loss": 4.2293,
"step": 512500
},
{
"epoch": 0.96,
"learning_rate": 0.00020370681823301073,
"loss": 4.2378,
"step": 513000
},
{
"epoch": 0.96,
"learning_rate": 0.00020361296522933918,
"loss": 4.2499,
"step": 513500
},
{
"epoch": 0.96,
"learning_rate": 0.00020351911222566766,
"loss": 4.2397,
"step": 514000
},
{
"epoch": 0.97,
"learning_rate": 0.00020342525922199613,
"loss": 4.2635,
"step": 514500
},
{
"epoch": 0.97,
"learning_rate": 0.00020333140621832458,
"loss": 4.24,
"step": 515000
},
{
"epoch": 0.97,
"learning_rate": 0.00020323755321465303,
"loss": 4.2453,
"step": 515500
},
{
"epoch": 0.97,
"learning_rate": 0.00020314370021098154,
"loss": 4.2573,
"step": 516000
},
{
"epoch": 0.97,
"learning_rate": 0.00020304984720731,
"loss": 4.249,
"step": 516500
},
{
"epoch": 0.97,
"learning_rate": 0.00020295599420363847,
"loss": 4.2683,
"step": 517000
},
{
"epoch": 0.97,
"learning_rate": 0.00020286214119996694,
"loss": 4.2641,
"step": 517500
},
{
"epoch": 0.97,
"learning_rate": 0.00020276828819629542,
"loss": 4.255,
"step": 518000
},
{
"epoch": 0.97,
"learning_rate": 0.00020267443519262387,
"loss": 4.2394,
"step": 518500
},
{
"epoch": 0.97,
"learning_rate": 0.00020258058218895238,
"loss": 4.2414,
"step": 519000
},
{
"epoch": 0.98,
"learning_rate": 0.00020248672918528083,
"loss": 4.2488,
"step": 519500
},
{
"epoch": 0.98,
"learning_rate": 0.00020239287618160928,
"loss": 4.2479,
"step": 520000
},
{
"epoch": 0.98,
"learning_rate": 0.00020229902317793778,
"loss": 4.2705,
"step": 520500
},
{
"epoch": 0.98,
"learning_rate": 0.00020220517017426623,
"loss": 4.2416,
"step": 521000
},
{
"epoch": 0.98,
"learning_rate": 0.0002021113171705947,
"loss": 4.2642,
"step": 521500
},
{
"epoch": 0.98,
"learning_rate": 0.0002020174641669232,
"loss": 4.2613,
"step": 522000
},
{
"epoch": 0.98,
"learning_rate": 0.00020192361116325166,
"loss": 4.2429,
"step": 522500
},
{
"epoch": 0.98,
"learning_rate": 0.00020182975815958011,
"loss": 4.2542,
"step": 523000
},
{
"epoch": 0.98,
"learning_rate": 0.00020173590515590862,
"loss": 4.244,
"step": 523500
},
{
"epoch": 0.98,
"learning_rate": 0.00020164205215223707,
"loss": 4.2483,
"step": 524000
},
{
"epoch": 0.98,
"learning_rate": 0.00020154819914856552,
"loss": 4.2476,
"step": 524500
},
{
"epoch": 0.99,
"learning_rate": 0.00020145434614489402,
"loss": 4.2525,
"step": 525000
},
{
"epoch": 0.99,
"learning_rate": 0.00020136049314122247,
"loss": 4.2519,
"step": 525500
},
{
"epoch": 0.99,
"learning_rate": 0.00020126664013755092,
"loss": 4.2441,
"step": 526000
},
{
"epoch": 0.99,
"learning_rate": 0.00020117278713387943,
"loss": 4.2348,
"step": 526500
},
{
"epoch": 0.99,
"learning_rate": 0.00020107893413020788,
"loss": 4.2528,
"step": 527000
},
{
"epoch": 0.99,
"learning_rate": 0.00020098508112653636,
"loss": 4.2333,
"step": 527500
},
{
"epoch": 0.99,
"learning_rate": 0.0002008912281228648,
"loss": 4.2449,
"step": 528000
},
{
"epoch": 0.99,
"learning_rate": 0.0002007973751191933,
"loss": 4.2469,
"step": 528500
},
{
"epoch": 0.99,
"learning_rate": 0.00020070352211552176,
"loss": 4.2464,
"step": 529000
},
{
"epoch": 0.99,
"learning_rate": 0.0002006096691118502,
"loss": 4.2355,
"step": 529500
},
{
"epoch": 0.99,
"learning_rate": 0.00020051581610817872,
"loss": 4.2236,
"step": 530000
},
{
"epoch": 1.0,
"learning_rate": 0.00020042196310450717,
"loss": 4.2422,
"step": 530500
},
{
"epoch": 1.0,
"learning_rate": 0.00020032811010083564,
"loss": 4.2556,
"step": 531000
},
{
"epoch": 1.0,
"learning_rate": 0.00020023425709716412,
"loss": 4.2515,
"step": 531500
},
{
"epoch": 1.0,
"learning_rate": 0.0002001404040934926,
"loss": 4.2271,
"step": 532000
},
{
"epoch": 1.0,
"learning_rate": 0.00020004655108982105,
"loss": 4.2359,
"step": 532500
},
{
"epoch": 1.0,
"learning_rate": 0.00019995269808614955,
"loss": 4.2297,
"step": 533000
},
{
"epoch": 1.0,
"learning_rate": 0.000199858845082478,
"loss": 4.2488,
"step": 533500
},
{
"epoch": 1.0,
"learning_rate": 0.00019976499207880646,
"loss": 4.2258,
"step": 534000
},
{
"epoch": 1.0,
"learning_rate": 0.00019967113907513496,
"loss": 4.2224,
"step": 534500
},
{
"epoch": 1.0,
"learning_rate": 0.0001995772860714634,
"loss": 4.2312,
"step": 535000
},
{
"epoch": 1.01,
"learning_rate": 0.0001994834330677919,
"loss": 4.2321,
"step": 535500
},
{
"epoch": 1.01,
"learning_rate": 0.00019938958006412036,
"loss": 4.2091,
"step": 536000
},
{
"epoch": 1.01,
"learning_rate": 0.00019929572706044882,
"loss": 4.224,
"step": 536500
},
{
"epoch": 1.01,
"learning_rate": 0.0001992018740567773,
"loss": 4.2276,
"step": 537000
},
{
"epoch": 1.01,
"learning_rate": 0.00019910802105310577,
"loss": 4.259,
"step": 537500
},
{
"epoch": 1.01,
"learning_rate": 0.00019901416804943425,
"loss": 4.2215,
"step": 538000
},
{
"epoch": 1.01,
"learning_rate": 0.0001989203150457627,
"loss": 4.2088,
"step": 538500
},
{
"epoch": 1.01,
"learning_rate": 0.00019882646204209115,
"loss": 4.2335,
"step": 539000
},
{
"epoch": 1.01,
"learning_rate": 0.00019873260903841965,
"loss": 4.2255,
"step": 539500
},
{
"epoch": 1.01,
"learning_rate": 0.0001986387560347481,
"loss": 4.2327,
"step": 540000
},
{
"epoch": 1.01,
"learning_rate": 0.00019854490303107658,
"loss": 4.2239,
"step": 540500
},
{
"epoch": 1.02,
"learning_rate": 0.00019845105002740506,
"loss": 4.2246,
"step": 541000
},
{
"epoch": 1.02,
"learning_rate": 0.00019835719702373354,
"loss": 4.2427,
"step": 541500
},
{
"epoch": 1.02,
"learning_rate": 0.00019826334402006199,
"loss": 4.2361,
"step": 542000
},
{
"epoch": 1.02,
"learning_rate": 0.0001981694910163905,
"loss": 4.2247,
"step": 542500
},
{
"epoch": 1.02,
"learning_rate": 0.00019807563801271894,
"loss": 4.2346,
"step": 543000
},
{
"epoch": 1.02,
"learning_rate": 0.0001979817850090474,
"loss": 4.2269,
"step": 543500
},
{
"epoch": 1.02,
"learning_rate": 0.0001978879320053759,
"loss": 4.2321,
"step": 544000
},
{
"epoch": 1.02,
"learning_rate": 0.00019779407900170435,
"loss": 4.2198,
"step": 544500
},
{
"epoch": 1.02,
"learning_rate": 0.00019770022599803282,
"loss": 4.2388,
"step": 545000
},
{
"epoch": 1.02,
"learning_rate": 0.0001976063729943613,
"loss": 4.23,
"step": 545500
},
{
"epoch": 1.02,
"learning_rate": 0.00019751251999068978,
"loss": 4.2329,
"step": 546000
},
{
"epoch": 1.03,
"learning_rate": 0.00019741866698701823,
"loss": 4.2312,
"step": 546500
},
{
"epoch": 1.03,
"learning_rate": 0.0001973248139833467,
"loss": 4.2313,
"step": 547000
},
{
"epoch": 1.03,
"learning_rate": 0.00019723096097967518,
"loss": 4.2346,
"step": 547500
},
{
"epoch": 1.03,
"learning_rate": 0.00019713710797600363,
"loss": 4.243,
"step": 548000
},
{
"epoch": 1.03,
"learning_rate": 0.00019704325497233214,
"loss": 4.2174,
"step": 548500
},
{
"epoch": 1.03,
"learning_rate": 0.0001969494019686606,
"loss": 4.2352,
"step": 549000
},
{
"epoch": 1.03,
"learning_rate": 0.00019685554896498904,
"loss": 4.2285,
"step": 549500
},
{
"epoch": 1.03,
"learning_rate": 0.00019676169596131754,
"loss": 4.2227,
"step": 550000
},
{
"epoch": 1.03,
"learning_rate": 0.000196667842957646,
"loss": 4.215,
"step": 550500
},
{
"epoch": 1.03,
"learning_rate": 0.00019657398995397447,
"loss": 4.2237,
"step": 551000
},
{
"epoch": 1.04,
"learning_rate": 0.00019648013695030292,
"loss": 4.2057,
"step": 551500
},
{
"epoch": 1.04,
"learning_rate": 0.00019638628394663143,
"loss": 4.2184,
"step": 552000
},
{
"epoch": 1.04,
"learning_rate": 0.00019629243094295988,
"loss": 4.2099,
"step": 552500
},
{
"epoch": 1.04,
"learning_rate": 0.00019619857793928833,
"loss": 4.2225,
"step": 553000
},
{
"epoch": 1.04,
"learning_rate": 0.00019610472493561683,
"loss": 4.2369,
"step": 553500
},
{
"epoch": 1.04,
"learning_rate": 0.00019601087193194528,
"loss": 4.2086,
"step": 554000
},
{
"epoch": 1.04,
"learning_rate": 0.00019591701892827376,
"loss": 4.2253,
"step": 554500
},
{
"epoch": 1.04,
"learning_rate": 0.00019582316592460224,
"loss": 4.2251,
"step": 555000
},
{
"epoch": 1.04,
"learning_rate": 0.0001957293129209307,
"loss": 4.2226,
"step": 555500
},
{
"epoch": 1.04,
"learning_rate": 0.00019563545991725916,
"loss": 4.2374,
"step": 556000
},
{
"epoch": 1.04,
"learning_rate": 0.00019554160691358767,
"loss": 4.2233,
"step": 556500
},
{
"epoch": 1.05,
"learning_rate": 0.00019544775390991612,
"loss": 4.228,
"step": 557000
},
{
"epoch": 1.05,
"learning_rate": 0.00019535390090624457,
"loss": 4.2256,
"step": 557500
},
{
"epoch": 1.05,
"learning_rate": 0.00019526004790257307,
"loss": 4.2303,
"step": 558000
},
{
"epoch": 1.05,
"learning_rate": 0.00019516619489890152,
"loss": 4.2313,
"step": 558500
},
{
"epoch": 1.05,
"learning_rate": 0.00019507234189523,
"loss": 4.2416,
"step": 559000
},
{
"epoch": 1.05,
"learning_rate": 0.00019497848889155848,
"loss": 4.2075,
"step": 559500
},
{
"epoch": 1.05,
"learning_rate": 0.00019488463588788693,
"loss": 4.2282,
"step": 560000
},
{
"epoch": 1.05,
"learning_rate": 0.0001947907828842154,
"loss": 4.2386,
"step": 560500
},
{
"epoch": 1.05,
"learning_rate": 0.00019469692988054388,
"loss": 4.1995,
"step": 561000
},
{
"epoch": 1.05,
"learning_rate": 0.00019460307687687236,
"loss": 4.2351,
"step": 561500
},
{
"epoch": 1.05,
"learning_rate": 0.0001945092238732008,
"loss": 4.2391,
"step": 562000
},
{
"epoch": 1.06,
"learning_rate": 0.00019441537086952932,
"loss": 4.2291,
"step": 562500
},
{
"epoch": 1.06,
"learning_rate": 0.00019432151786585777,
"loss": 4.2197,
"step": 563000
},
{
"epoch": 1.06,
"learning_rate": 0.00019422766486218622,
"loss": 4.2284,
"step": 563500
},
{
"epoch": 1.06,
"learning_rate": 0.0001941338118585147,
"loss": 4.24,
"step": 564000
},
{
"epoch": 1.06,
"learning_rate": 0.00019403995885484317,
"loss": 4.2367,
"step": 564500
},
{
"epoch": 1.06,
"learning_rate": 0.00019394610585117165,
"loss": 4.2241,
"step": 565000
},
{
"epoch": 1.06,
"learning_rate": 0.0001938522528475001,
"loss": 4.2227,
"step": 565500
},
{
"epoch": 1.06,
"learning_rate": 0.0001937583998438286,
"loss": 4.2124,
"step": 566000
},
{
"epoch": 1.06,
"learning_rate": 0.00019366454684015705,
"loss": 4.2236,
"step": 566500
},
{
"epoch": 1.06,
"learning_rate": 0.0001935706938364855,
"loss": 4.2193,
"step": 567000
},
{
"epoch": 1.07,
"learning_rate": 0.000193476840832814,
"loss": 4.2251,
"step": 567500
},
{
"epoch": 1.07,
"learning_rate": 0.00019338298782914246,
"loss": 4.2165,
"step": 568000
},
{
"epoch": 1.07,
"learning_rate": 0.00019328913482547094,
"loss": 4.2178,
"step": 568500
},
{
"epoch": 1.07,
"learning_rate": 0.00019319528182179941,
"loss": 4.2291,
"step": 569000
},
{
"epoch": 1.07,
"learning_rate": 0.0001931014288181279,
"loss": 4.2278,
"step": 569500
},
{
"epoch": 1.07,
"learning_rate": 0.00019300757581445634,
"loss": 4.2115,
"step": 570000
},
{
"epoch": 1.07,
"learning_rate": 0.00019291372281078482,
"loss": 4.2327,
"step": 570500
},
{
"epoch": 1.07,
"learning_rate": 0.0001928198698071133,
"loss": 4.2181,
"step": 571000
},
{
"epoch": 1.07,
"learning_rate": 0.00019272601680344175,
"loss": 4.2258,
"step": 571500
},
{
"epoch": 1.07,
"learning_rate": 0.00019263216379977025,
"loss": 4.2262,
"step": 572000
},
{
"epoch": 1.07,
"learning_rate": 0.0001925383107960987,
"loss": 4.2313,
"step": 572500
},
{
"epoch": 1.08,
"learning_rate": 0.00019244445779242715,
"loss": 4.2121,
"step": 573000
},
{
"epoch": 1.08,
"learning_rate": 0.00019235060478875566,
"loss": 4.2348,
"step": 573500
},
{
"epoch": 1.08,
"learning_rate": 0.0001922567517850841,
"loss": 4.2235,
"step": 574000
},
{
"epoch": 1.08,
"learning_rate": 0.00019216289878141258,
"loss": 4.2236,
"step": 574500
},
{
"epoch": 1.08,
"learning_rate": 0.00019206904577774103,
"loss": 4.2327,
"step": 575000
},
{
"epoch": 1.08,
"learning_rate": 0.00019197519277406954,
"loss": 4.2335,
"step": 575500
},
{
"epoch": 1.08,
"learning_rate": 0.000191881339770398,
"loss": 4.2289,
"step": 576000
},
{
"epoch": 1.08,
"learning_rate": 0.00019178748676672644,
"loss": 4.2433,
"step": 576500
},
{
"epoch": 1.08,
"learning_rate": 0.00019169363376305494,
"loss": 4.2198,
"step": 577000
},
{
"epoch": 1.08,
"learning_rate": 0.0001915997807593834,
"loss": 4.2281,
"step": 577500
},
{
"epoch": 1.08,
"learning_rate": 0.00019150592775571187,
"loss": 4.2201,
"step": 578000
},
{
"epoch": 1.09,
"learning_rate": 0.00019141207475204035,
"loss": 4.2255,
"step": 578500
},
{
"epoch": 1.09,
"learning_rate": 0.00019131822174836883,
"loss": 4.2203,
"step": 579000
},
{
"epoch": 1.09,
"learning_rate": 0.00019122436874469728,
"loss": 4.2217,
"step": 579500
},
{
"epoch": 1.09,
"learning_rate": 0.00019113051574102578,
"loss": 4.223,
"step": 580000
},
{
"epoch": 1.09,
"learning_rate": 0.00019103666273735423,
"loss": 4.2309,
"step": 580500
},
{
"epoch": 1.09,
"learning_rate": 0.00019094280973368268,
"loss": 4.2395,
"step": 581000
},
{
"epoch": 1.09,
"learning_rate": 0.00019084895673001119,
"loss": 4.2214,
"step": 581500
},
{
"epoch": 1.09,
"learning_rate": 0.00019075510372633964,
"loss": 4.2269,
"step": 582000
},
{
"epoch": 1.09,
"learning_rate": 0.00019066125072266811,
"loss": 4.1937,
"step": 582500
},
{
"epoch": 1.09,
"learning_rate": 0.0001905673977189966,
"loss": 4.2311,
"step": 583000
},
{
"epoch": 1.1,
"learning_rate": 0.00019047354471532504,
"loss": 4.2116,
"step": 583500
},
{
"epoch": 1.1,
"learning_rate": 0.00019037969171165352,
"loss": 4.2534,
"step": 584000
},
{
"epoch": 1.1,
"learning_rate": 0.000190285838707982,
"loss": 4.2268,
"step": 584500
},
{
"epoch": 1.1,
"learning_rate": 0.00019019198570431047,
"loss": 4.2081,
"step": 585000
},
{
"epoch": 1.1,
"learning_rate": 0.00019009813270063892,
"loss": 4.2207,
"step": 585500
},
{
"epoch": 1.1,
"learning_rate": 0.00019000427969696743,
"loss": 4.2161,
"step": 586000
},
{
"epoch": 1.1,
"learning_rate": 0.00018991042669329588,
"loss": 4.2327,
"step": 586500
},
{
"epoch": 1.1,
"learning_rate": 0.00018981657368962433,
"loss": 4.2175,
"step": 587000
},
{
"epoch": 1.1,
"learning_rate": 0.0001897227206859528,
"loss": 4.2299,
"step": 587500
},
{
"epoch": 1.1,
"learning_rate": 0.00018962886768228128,
"loss": 4.2199,
"step": 588000
},
{
"epoch": 1.1,
"learning_rate": 0.00018953501467860976,
"loss": 4.2339,
"step": 588500
},
{
"epoch": 1.11,
"learning_rate": 0.0001894411616749382,
"loss": 4.2331,
"step": 589000
},
{
"epoch": 1.11,
"learning_rate": 0.00018934730867126672,
"loss": 4.2203,
"step": 589500
},
{
"epoch": 1.11,
"learning_rate": 0.00018925345566759517,
"loss": 4.2066,
"step": 590000
},
{
"epoch": 1.11,
"learning_rate": 0.00018915960266392362,
"loss": 4.2226,
"step": 590500
},
{
"epoch": 1.11,
"learning_rate": 0.00018906574966025212,
"loss": 4.2288,
"step": 591000
},
{
"epoch": 1.11,
"learning_rate": 0.00018897189665658057,
"loss": 4.2285,
"step": 591500
},
{
"epoch": 1.11,
"learning_rate": 0.00018887804365290905,
"loss": 4.1987,
"step": 592000
},
{
"epoch": 1.11,
"learning_rate": 0.00018878419064923753,
"loss": 4.2278,
"step": 592500
},
{
"epoch": 1.11,
"learning_rate": 0.000188690337645566,
"loss": 4.209,
"step": 593000
},
{
"epoch": 1.11,
"learning_rate": 0.00018859648464189445,
"loss": 4.233,
"step": 593500
},
{
"epoch": 1.11,
"learning_rate": 0.00018850263163822293,
"loss": 4.2264,
"step": 594000
},
{
"epoch": 1.12,
"learning_rate": 0.0001884087786345514,
"loss": 4.2198,
"step": 594500
},
{
"epoch": 1.12,
"learning_rate": 0.00018831492563087986,
"loss": 4.2152,
"step": 595000
},
{
"epoch": 1.12,
"learning_rate": 0.00018822107262720836,
"loss": 4.2281,
"step": 595500
},
{
"epoch": 1.12,
"learning_rate": 0.00018812721962353681,
"loss": 4.215,
"step": 596000
},
{
"epoch": 1.12,
"learning_rate": 0.00018803336661986526,
"loss": 4.238,
"step": 596500
},
{
"epoch": 1.12,
"learning_rate": 0.00018793951361619377,
"loss": 4.2216,
"step": 597000
},
{
"epoch": 1.12,
"learning_rate": 0.00018784566061252222,
"loss": 4.2376,
"step": 597500
},
{
"epoch": 1.12,
"learning_rate": 0.0001877518076088507,
"loss": 4.2052,
"step": 598000
},
{
"epoch": 1.12,
"learning_rate": 0.00018765795460517917,
"loss": 4.2181,
"step": 598500
},
{
"epoch": 1.12,
"learning_rate": 0.00018756410160150765,
"loss": 4.2172,
"step": 599000
},
{
"epoch": 1.13,
"learning_rate": 0.0001874702485978361,
"loss": 4.2144,
"step": 599500
},
{
"epoch": 1.13,
"learning_rate": 0.00018737639559416455,
"loss": 4.2155,
"step": 600000
},
{
"epoch": 1.13,
"learning_rate": 0.00018728254259049306,
"loss": 4.2061,
"step": 600500
},
{
"epoch": 1.13,
"learning_rate": 0.0001871886895868215,
"loss": 4.2325,
"step": 601000
},
{
"epoch": 1.13,
"learning_rate": 0.00018709483658314998,
"loss": 4.203,
"step": 601500
},
{
"epoch": 1.13,
"learning_rate": 0.00018700098357947846,
"loss": 4.2066,
"step": 602000
},
{
"epoch": 1.13,
"learning_rate": 0.00018690713057580694,
"loss": 4.2303,
"step": 602500
},
{
"epoch": 1.13,
"learning_rate": 0.0001868132775721354,
"loss": 4.2242,
"step": 603000
},
{
"epoch": 1.13,
"learning_rate": 0.0001867194245684639,
"loss": 4.2095,
"step": 603500
},
{
"epoch": 1.13,
"learning_rate": 0.00018662557156479234,
"loss": 4.2156,
"step": 604000
},
{
"epoch": 1.13,
"learning_rate": 0.0001865317185611208,
"loss": 4.2042,
"step": 604500
},
{
"epoch": 1.14,
"learning_rate": 0.0001864378655574493,
"loss": 4.2106,
"step": 605000
},
{
"epoch": 1.14,
"learning_rate": 0.00018634401255377775,
"loss": 4.208,
"step": 605500
},
{
"epoch": 1.14,
"learning_rate": 0.00018625015955010623,
"loss": 4.2211,
"step": 606000
},
{
"epoch": 1.14,
"learning_rate": 0.0001861563065464347,
"loss": 4.2134,
"step": 606500
},
{
"epoch": 1.14,
"learning_rate": 0.00018606245354276316,
"loss": 4.225,
"step": 607000
},
{
"epoch": 1.14,
"learning_rate": 0.00018596860053909163,
"loss": 4.2293,
"step": 607500
},
{
"epoch": 1.14,
"learning_rate": 0.0001858747475354201,
"loss": 4.2237,
"step": 608000
},
{
"epoch": 1.14,
"learning_rate": 0.0001857808945317486,
"loss": 4.2054,
"step": 608500
},
{
"epoch": 1.14,
"learning_rate": 0.00018568704152807704,
"loss": 4.2071,
"step": 609000
},
{
"epoch": 1.14,
"learning_rate": 0.00018559318852440554,
"loss": 4.2144,
"step": 609500
},
{
"epoch": 1.15,
"learning_rate": 0.000185499335520734,
"loss": 4.2135,
"step": 610000
},
{
"epoch": 1.15,
"learning_rate": 0.00018540548251706244,
"loss": 4.2218,
"step": 610500
},
{
"epoch": 1.15,
"learning_rate": 0.00018531162951339095,
"loss": 4.2164,
"step": 611000
},
{
"epoch": 1.15,
"learning_rate": 0.0001852177765097194,
"loss": 4.224,
"step": 611500
},
{
"epoch": 1.15,
"learning_rate": 0.00018512392350604788,
"loss": 4.2092,
"step": 612000
},
{
"epoch": 1.15,
"learning_rate": 0.00018503007050237633,
"loss": 4.2103,
"step": 612500
},
{
"epoch": 1.15,
"learning_rate": 0.00018493621749870483,
"loss": 4.2111,
"step": 613000
},
{
"epoch": 1.15,
"learning_rate": 0.00018484236449503328,
"loss": 4.2203,
"step": 613500
},
{
"epoch": 1.15,
"learning_rate": 0.00018474851149136173,
"loss": 4.2308,
"step": 614000
},
{
"epoch": 1.15,
"learning_rate": 0.00018465465848769024,
"loss": 4.2132,
"step": 614500
},
{
"epoch": 1.15,
"learning_rate": 0.00018456080548401869,
"loss": 4.2204,
"step": 615000
},
{
"epoch": 1.16,
"learning_rate": 0.00018446695248034716,
"loss": 4.2039,
"step": 615500
},
{
"epoch": 1.16,
"learning_rate": 0.00018437309947667564,
"loss": 4.2223,
"step": 616000
},
{
"epoch": 1.16,
"learning_rate": 0.00018427924647300412,
"loss": 4.2024,
"step": 616500
},
{
"epoch": 1.16,
"learning_rate": 0.00018418539346933257,
"loss": 4.1935,
"step": 617000
},
{
"epoch": 1.16,
"learning_rate": 0.00018409154046566105,
"loss": 4.214,
"step": 617500
},
{
"epoch": 1.16,
"learning_rate": 0.00018399768746198952,
"loss": 4.226,
"step": 618000
},
{
"epoch": 1.16,
"learning_rate": 0.00018390383445831797,
"loss": 4.2093,
"step": 618500
},
{
"epoch": 1.16,
"learning_rate": 0.00018380998145464648,
"loss": 4.2317,
"step": 619000
},
{
"epoch": 1.16,
"learning_rate": 0.00018371612845097493,
"loss": 4.2188,
"step": 619500
},
{
"epoch": 1.16,
"learning_rate": 0.00018362227544730338,
"loss": 4.2172,
"step": 620000
},
{
"epoch": 1.16,
"learning_rate": 0.00018352842244363188,
"loss": 4.2123,
"step": 620500
},
{
"epoch": 1.17,
"learning_rate": 0.00018343456943996033,
"loss": 4.2063,
"step": 621000
},
{
"epoch": 1.17,
"learning_rate": 0.0001833407164362888,
"loss": 4.2137,
"step": 621500
},
{
"epoch": 1.17,
"learning_rate": 0.0001832468634326173,
"loss": 4.2104,
"step": 622000
},
{
"epoch": 1.17,
"learning_rate": 0.00018315301042894577,
"loss": 4.2196,
"step": 622500
},
{
"epoch": 1.17,
"learning_rate": 0.00018305915742527422,
"loss": 4.2209,
"step": 623000
},
{
"epoch": 1.17,
"learning_rate": 0.00018296530442160267,
"loss": 4.2221,
"step": 623500
},
{
"epoch": 1.17,
"learning_rate": 0.00018287145141793117,
"loss": 4.1967,
"step": 624000
},
{
"epoch": 1.17,
"learning_rate": 0.00018277759841425962,
"loss": 4.2119,
"step": 624500
},
{
"epoch": 1.17,
"learning_rate": 0.0001826837454105881,
"loss": 4.2446,
"step": 625000
},
{
"epoch": 1.17,
"learning_rate": 0.00018258989240691658,
"loss": 4.2099,
"step": 625500
},
{
"epoch": 1.18,
"learning_rate": 0.00018249603940324505,
"loss": 4.2067,
"step": 626000
},
{
"epoch": 1.18,
"learning_rate": 0.0001824021863995735,
"loss": 4.2227,
"step": 626500
},
{
"epoch": 1.18,
"learning_rate": 0.000182308333395902,
"loss": 4.2201,
"step": 627000
},
{
"epoch": 1.18,
"learning_rate": 0.00018221448039223046,
"loss": 4.209,
"step": 627500
},
{
"epoch": 1.18,
"learning_rate": 0.0001821206273885589,
"loss": 4.2072,
"step": 628000
},
{
"epoch": 1.18,
"learning_rate": 0.0001820267743848874,
"loss": 4.1922,
"step": 628500
},
{
"epoch": 1.18,
"learning_rate": 0.00018193292138121586,
"loss": 4.2127,
"step": 629000
},
{
"epoch": 1.18,
"learning_rate": 0.00018183906837754434,
"loss": 4.2231,
"step": 629500
},
{
"epoch": 1.18,
"learning_rate": 0.00018174521537387282,
"loss": 4.2032,
"step": 630000
},
{
"epoch": 1.18,
"learning_rate": 0.00018165136237020127,
"loss": 4.2178,
"step": 630500
},
{
"epoch": 1.18,
"learning_rate": 0.00018155750936652975,
"loss": 4.2204,
"step": 631000
},
{
"epoch": 1.19,
"learning_rate": 0.00018146365636285822,
"loss": 4.2148,
"step": 631500
},
{
"epoch": 1.19,
"learning_rate": 0.0001813698033591867,
"loss": 4.2049,
"step": 632000
},
{
"epoch": 1.19,
"learning_rate": 0.00018127595035551515,
"loss": 4.2093,
"step": 632500
},
{
"epoch": 1.19,
"learning_rate": 0.00018118209735184366,
"loss": 4.2281,
"step": 633000
},
{
"epoch": 1.19,
"learning_rate": 0.0001810882443481721,
"loss": 4.1951,
"step": 633500
},
{
"epoch": 1.19,
"learning_rate": 0.00018099439134450056,
"loss": 4.2293,
"step": 634000
},
{
"epoch": 1.19,
"learning_rate": 0.00018090053834082906,
"loss": 4.1989,
"step": 634500
},
{
"epoch": 1.19,
"learning_rate": 0.0001808066853371575,
"loss": 4.2071,
"step": 635000
},
{
"epoch": 1.19,
"learning_rate": 0.000180712832333486,
"loss": 4.2136,
"step": 635500
},
{
"epoch": 1.19,
"learning_rate": 0.00018061897932981444,
"loss": 4.2002,
"step": 636000
},
{
"epoch": 1.19,
"learning_rate": 0.00018052512632614294,
"loss": 4.213,
"step": 636500
},
{
"epoch": 1.2,
"learning_rate": 0.0001804312733224714,
"loss": 4.1991,
"step": 637000
},
{
"epoch": 1.2,
"learning_rate": 0.00018033742031879984,
"loss": 4.2209,
"step": 637500
},
{
"epoch": 1.2,
"learning_rate": 0.00018024356731512835,
"loss": 4.2063,
"step": 638000
},
{
"epoch": 1.2,
"learning_rate": 0.0001801497143114568,
"loss": 4.182,
"step": 638500
},
{
"epoch": 1.2,
"learning_rate": 0.00018005586130778528,
"loss": 4.2142,
"step": 639000
},
{
"epoch": 1.2,
"learning_rate": 0.00017996200830411375,
"loss": 4.2075,
"step": 639500
},
{
"epoch": 1.2,
"learning_rate": 0.00017986815530044223,
"loss": 4.2225,
"step": 640000
},
{
"epoch": 1.2,
"learning_rate": 0.00017977430229677068,
"loss": 4.1956,
"step": 640500
},
{
"epoch": 1.2,
"learning_rate": 0.00017968044929309916,
"loss": 4.2198,
"step": 641000
},
{
"epoch": 1.2,
"learning_rate": 0.00017958659628942764,
"loss": 4.2175,
"step": 641500
},
{
"epoch": 1.21,
"learning_rate": 0.0001794927432857561,
"loss": 4.2163,
"step": 642000
},
{
"epoch": 1.21,
"learning_rate": 0.0001793988902820846,
"loss": 4.2066,
"step": 642500
},
{
"epoch": 1.21,
"learning_rate": 0.00017930503727841304,
"loss": 4.1946,
"step": 643000
},
{
"epoch": 1.21,
"learning_rate": 0.0001792111842747415,
"loss": 4.203,
"step": 643500
},
{
"epoch": 1.21,
"learning_rate": 0.00017911733127107,
"loss": 4.2222,
"step": 644000
},
{
"epoch": 1.21,
"learning_rate": 0.00017902347826739845,
"loss": 4.2074,
"step": 644500
},
{
"epoch": 1.21,
"learning_rate": 0.00017892962526372692,
"loss": 4.2008,
"step": 645000
},
{
"epoch": 1.21,
"learning_rate": 0.0001788357722600554,
"loss": 4.1977,
"step": 645500
},
{
"epoch": 1.21,
"learning_rate": 0.00017874191925638388,
"loss": 4.2094,
"step": 646000
},
{
"epoch": 1.21,
"learning_rate": 0.00017864806625271233,
"loss": 4.1833,
"step": 646500
},
{
"epoch": 1.21,
"learning_rate": 0.00017855421324904083,
"loss": 4.2017,
"step": 647000
},
{
"epoch": 1.22,
"learning_rate": 0.00017846036024536928,
"loss": 4.1936,
"step": 647500
},
{
"epoch": 1.22,
"learning_rate": 0.00017836650724169773,
"loss": 4.1945,
"step": 648000
},
{
"epoch": 1.22,
"learning_rate": 0.0001782726542380262,
"loss": 4.2016,
"step": 648500
},
{
"epoch": 1.22,
"learning_rate": 0.0001781788012343547,
"loss": 4.2021,
"step": 649000
},
{
"epoch": 1.22,
"learning_rate": 0.00017808494823068317,
"loss": 4.2121,
"step": 649500
},
{
"epoch": 1.22,
"learning_rate": 0.00017799109522701162,
"loss": 4.2016,
"step": 650000
},
{
"epoch": 1.22,
"learning_rate": 0.00017789724222334012,
"loss": 4.1895,
"step": 650500
},
{
"epoch": 1.22,
"learning_rate": 0.00017780338921966857,
"loss": 4.2204,
"step": 651000
},
{
"epoch": 1.22,
"learning_rate": 0.00017770953621599702,
"loss": 4.1967,
"step": 651500
},
{
"epoch": 1.22,
"learning_rate": 0.00017761568321232553,
"loss": 4.1896,
"step": 652000
},
{
"epoch": 1.22,
"learning_rate": 0.00017752183020865398,
"loss": 4.2053,
"step": 652500
},
{
"epoch": 1.23,
"learning_rate": 0.00017742797720498245,
"loss": 4.217,
"step": 653000
},
{
"epoch": 1.23,
"learning_rate": 0.00017733412420131093,
"loss": 4.2119,
"step": 653500
},
{
"epoch": 1.23,
"learning_rate": 0.00017724027119763938,
"loss": 4.1935,
"step": 654000
},
{
"epoch": 1.23,
"learning_rate": 0.00017714641819396786,
"loss": 4.2168,
"step": 654500
},
{
"epoch": 1.23,
"learning_rate": 0.00017705256519029634,
"loss": 4.2104,
"step": 655000
},
{
"epoch": 1.23,
"learning_rate": 0.00017695871218662481,
"loss": 4.2074,
"step": 655500
},
{
"epoch": 1.23,
"learning_rate": 0.00017686485918295326,
"loss": 4.1953,
"step": 656000
},
{
"epoch": 1.23,
"learning_rate": 0.00017677100617928177,
"loss": 4.2014,
"step": 656500
},
{
"epoch": 1.23,
"learning_rate": 0.00017667715317561022,
"loss": 4.2013,
"step": 657000
},
{
"epoch": 1.23,
"learning_rate": 0.00017658330017193867,
"loss": 4.1982,
"step": 657500
},
{
"epoch": 1.24,
"learning_rate": 0.00017648944716826717,
"loss": 4.1911,
"step": 658000
},
{
"epoch": 1.24,
"learning_rate": 0.00017639559416459562,
"loss": 4.2104,
"step": 658500
},
{
"epoch": 1.24,
"learning_rate": 0.0001763017411609241,
"loss": 4.2123,
"step": 659000
},
{
"epoch": 1.24,
"learning_rate": 0.00017620788815725258,
"loss": 4.2062,
"step": 659500
},
{
"epoch": 1.24,
"learning_rate": 0.00017611403515358106,
"loss": 4.2148,
"step": 660000
},
{
"epoch": 1.24,
"learning_rate": 0.0001760201821499095,
"loss": 4.1897,
"step": 660500
},
{
"epoch": 1.24,
"learning_rate": 0.00017592632914623796,
"loss": 4.1842,
"step": 661000
},
{
"epoch": 1.24,
"learning_rate": 0.00017583247614256646,
"loss": 4.1976,
"step": 661500
},
{
"epoch": 1.24,
"learning_rate": 0.0001757386231388949,
"loss": 4.1973,
"step": 662000
},
{
"epoch": 1.24,
"learning_rate": 0.0001756447701352234,
"loss": 4.1975,
"step": 662500
},
{
"epoch": 1.24,
"learning_rate": 0.00017555091713155187,
"loss": 4.1829,
"step": 663000
},
{
"epoch": 1.25,
"learning_rate": 0.00017545706412788034,
"loss": 4.2162,
"step": 663500
},
{
"epoch": 1.25,
"learning_rate": 0.0001753632111242088,
"loss": 4.1734,
"step": 664000
},
{
"epoch": 1.25,
"learning_rate": 0.00017526935812053727,
"loss": 4.2041,
"step": 664500
},
{
"epoch": 1.25,
"learning_rate": 0.00017517550511686575,
"loss": 4.1929,
"step": 665000
},
{
"epoch": 1.25,
"learning_rate": 0.0001750816521131942,
"loss": 4.2058,
"step": 665500
},
{
"epoch": 1.25,
"learning_rate": 0.0001749877991095227,
"loss": 4.1985,
"step": 666000
},
{
"epoch": 1.25,
"learning_rate": 0.00017489394610585115,
"loss": 4.2067,
"step": 666500
},
{
"epoch": 1.25,
"learning_rate": 0.0001748000931021796,
"loss": 4.1944,
"step": 667000
},
{
"epoch": 1.25,
"learning_rate": 0.0001747062400985081,
"loss": 4.2133,
"step": 667500
},
{
"epoch": 1.25,
"learning_rate": 0.00017461238709483656,
"loss": 4.1892,
"step": 668000
},
{
"epoch": 1.25,
"learning_rate": 0.00017451853409116504,
"loss": 4.2097,
"step": 668500
},
{
"epoch": 1.26,
"learning_rate": 0.00017442468108749351,
"loss": 4.2034,
"step": 669000
},
{
"epoch": 1.26,
"learning_rate": 0.000174330828083822,
"loss": 4.1851,
"step": 669500
},
{
"epoch": 1.26,
"learning_rate": 0.00017423697508015044,
"loss": 4.212,
"step": 670000
},
{
"epoch": 1.26,
"learning_rate": 0.00017414312207647895,
"loss": 4.2136,
"step": 670500
},
{
"epoch": 1.26,
"learning_rate": 0.0001740492690728074,
"loss": 4.1989,
"step": 671000
},
{
"epoch": 1.26,
"learning_rate": 0.00017395541606913585,
"loss": 4.2107,
"step": 671500
},
{
"epoch": 1.26,
"learning_rate": 0.00017386156306546432,
"loss": 4.1927,
"step": 672000
},
{
"epoch": 1.26,
"learning_rate": 0.0001737677100617928,
"loss": 4.1823,
"step": 672500
},
{
"epoch": 1.26,
"learning_rate": 0.00017367385705812128,
"loss": 4.1967,
"step": 673000
},
{
"epoch": 1.26,
"learning_rate": 0.00017358000405444973,
"loss": 4.1905,
"step": 673500
},
{
"epoch": 1.27,
"learning_rate": 0.00017348615105077823,
"loss": 4.2168,
"step": 674000
},
{
"epoch": 1.27,
"learning_rate": 0.00017339229804710668,
"loss": 4.1814,
"step": 674500
},
{
"epoch": 1.27,
"learning_rate": 0.00017329844504343514,
"loss": 4.2118,
"step": 675000
},
{
"epoch": 1.27,
"learning_rate": 0.00017320459203976364,
"loss": 4.2121,
"step": 675500
},
{
"epoch": 1.27,
"learning_rate": 0.0001731107390360921,
"loss": 4.192,
"step": 676000
},
{
"epoch": 1.27,
"learning_rate": 0.00017301688603242057,
"loss": 4.1885,
"step": 676500
},
{
"epoch": 1.27,
"learning_rate": 0.00017292303302874904,
"loss": 4.2103,
"step": 677000
},
{
"epoch": 1.27,
"learning_rate": 0.0001728291800250775,
"loss": 4.1964,
"step": 677500
},
{
"epoch": 1.27,
"learning_rate": 0.00017273532702140597,
"loss": 4.1836,
"step": 678000
},
{
"epoch": 1.27,
"learning_rate": 0.00017264147401773445,
"loss": 4.2167,
"step": 678500
},
{
"epoch": 1.27,
"learning_rate": 0.00017254762101406293,
"loss": 4.1908,
"step": 679000
},
{
"epoch": 1.28,
"learning_rate": 0.00017245376801039138,
"loss": 4.2083,
"step": 679500
},
{
"epoch": 1.28,
"learning_rate": 0.00017235991500671988,
"loss": 4.2022,
"step": 680000
},
{
"epoch": 1.28,
"learning_rate": 0.00017226606200304833,
"loss": 4.1904,
"step": 680500
},
{
"epoch": 1.28,
"learning_rate": 0.00017217220899937678,
"loss": 4.2128,
"step": 681000
},
{
"epoch": 1.28,
"learning_rate": 0.0001720783559957053,
"loss": 4.2046,
"step": 681500
},
{
"epoch": 1.28,
"learning_rate": 0.00017198450299203374,
"loss": 4.2045,
"step": 682000
},
{
"epoch": 1.28,
"learning_rate": 0.00017189064998836222,
"loss": 4.2121,
"step": 682500
},
{
"epoch": 1.28,
"learning_rate": 0.0001717967969846907,
"loss": 4.1857,
"step": 683000
},
{
"epoch": 1.28,
"learning_rate": 0.00017170294398101917,
"loss": 4.2036,
"step": 683500
},
{
"epoch": 1.28,
"learning_rate": 0.00017160909097734762,
"loss": 4.2028,
"step": 684000
},
{
"epoch": 1.28,
"learning_rate": 0.00017151523797367607,
"loss": 4.1933,
"step": 684500
},
{
"epoch": 1.29,
"learning_rate": 0.00017142138497000458,
"loss": 4.1952,
"step": 685000
},
{
"epoch": 1.29,
"learning_rate": 0.00017132753196633303,
"loss": 4.1993,
"step": 685500
},
{
"epoch": 1.29,
"learning_rate": 0.0001712336789626615,
"loss": 4.187,
"step": 686000
},
{
"epoch": 1.29,
"learning_rate": 0.00017113982595898998,
"loss": 4.2031,
"step": 686500
},
{
"epoch": 1.29,
"learning_rate": 0.00017104597295531846,
"loss": 4.198,
"step": 687000
},
{
"epoch": 1.29,
"learning_rate": 0.0001709521199516469,
"loss": 4.2016,
"step": 687500
},
{
"epoch": 1.29,
"learning_rate": 0.00017085826694797539,
"loss": 4.2017,
"step": 688000
},
{
"epoch": 1.29,
"learning_rate": 0.00017076441394430386,
"loss": 4.1881,
"step": 688500
},
{
"epoch": 1.29,
"learning_rate": 0.0001706705609406323,
"loss": 4.1924,
"step": 689000
},
{
"epoch": 1.29,
"learning_rate": 0.00017057670793696082,
"loss": 4.1897,
"step": 689500
},
{
"epoch": 1.3,
"learning_rate": 0.00017048285493328927,
"loss": 4.1903,
"step": 690000
},
{
"epoch": 1.3,
"learning_rate": 0.00017038900192961772,
"loss": 4.1983,
"step": 690500
},
{
"epoch": 1.3,
"learning_rate": 0.00017029514892594622,
"loss": 4.199,
"step": 691000
},
{
"epoch": 1.3,
"learning_rate": 0.00017020129592227467,
"loss": 4.216,
"step": 691500
},
{
"epoch": 1.3,
"learning_rate": 0.00017010744291860315,
"loss": 4.1864,
"step": 692000
},
{
"epoch": 1.3,
"learning_rate": 0.00017001358991493163,
"loss": 4.1839,
"step": 692500
},
{
"epoch": 1.3,
"learning_rate": 0.0001699197369112601,
"loss": 4.2082,
"step": 693000
},
{
"epoch": 1.3,
"learning_rate": 0.00016982588390758856,
"loss": 4.1988,
"step": 693500
},
{
"epoch": 1.3,
"learning_rate": 0.00016973203090391706,
"loss": 4.2104,
"step": 694000
},
{
"epoch": 1.3,
"learning_rate": 0.0001696381779002455,
"loss": 4.1891,
"step": 694500
},
{
"epoch": 1.3,
"learning_rate": 0.00016954432489657396,
"loss": 4.1901,
"step": 695000
},
{
"epoch": 1.31,
"learning_rate": 0.00016945047189290247,
"loss": 4.1977,
"step": 695500
},
{
"epoch": 1.31,
"learning_rate": 0.00016935661888923092,
"loss": 4.1946,
"step": 696000
},
{
"epoch": 1.31,
"learning_rate": 0.0001692627658855594,
"loss": 4.1983,
"step": 696500
},
{
"epoch": 1.31,
"learning_rate": 0.00016916891288188784,
"loss": 4.1952,
"step": 697000
},
{
"epoch": 1.31,
"learning_rate": 0.00016907505987821635,
"loss": 4.2005,
"step": 697500
},
{
"epoch": 1.31,
"learning_rate": 0.0001689812068745448,
"loss": 4.1933,
"step": 698000
},
{
"epoch": 1.31,
"learning_rate": 0.00016888735387087325,
"loss": 4.2106,
"step": 698500
},
{
"epoch": 1.31,
"learning_rate": 0.00016879350086720175,
"loss": 4.1811,
"step": 699000
},
{
"epoch": 1.31,
"learning_rate": 0.0001686996478635302,
"loss": 4.1948,
"step": 699500
},
{
"epoch": 1.31,
"learning_rate": 0.00016860579485985865,
"loss": 4.2029,
"step": 700000
},
{
"epoch": 1.31,
"learning_rate": 0.00016851194185618716,
"loss": 4.1943,
"step": 700500
},
{
"epoch": 1.32,
"learning_rate": 0.0001684180888525156,
"loss": 4.185,
"step": 701000
},
{
"epoch": 1.32,
"learning_rate": 0.00016832423584884409,
"loss": 4.1702,
"step": 701500
},
{
"epoch": 1.32,
"learning_rate": 0.00016823038284517256,
"loss": 4.1864,
"step": 702000
},
{
"epoch": 1.32,
"learning_rate": 0.00016813652984150104,
"loss": 4.1979,
"step": 702500
},
{
"epoch": 1.32,
"learning_rate": 0.0001680426768378295,
"loss": 4.1825,
"step": 703000
},
{
"epoch": 1.32,
"learning_rate": 0.000167948823834158,
"loss": 4.2068,
"step": 703500
},
{
"epoch": 1.32,
"learning_rate": 0.00016785497083048645,
"loss": 4.1958,
"step": 704000
},
{
"epoch": 1.32,
"learning_rate": 0.0001677611178268149,
"loss": 4.1965,
"step": 704500
},
{
"epoch": 1.32,
"learning_rate": 0.0001676672648231434,
"loss": 4.1962,
"step": 705000
},
{
"epoch": 1.32,
"learning_rate": 0.00016757341181947185,
"loss": 4.1855,
"step": 705500
},
{
"epoch": 1.33,
"learning_rate": 0.00016747955881580033,
"loss": 4.1735,
"step": 706000
},
{
"epoch": 1.33,
"learning_rate": 0.0001673857058121288,
"loss": 4.1977,
"step": 706500
},
{
"epoch": 1.33,
"learning_rate": 0.00016729185280845728,
"loss": 4.1997,
"step": 707000
},
{
"epoch": 1.33,
"learning_rate": 0.00016719799980478573,
"loss": 4.1971,
"step": 707500
},
{
"epoch": 1.33,
"learning_rate": 0.00016710414680111424,
"loss": 4.1821,
"step": 708000
},
{
"epoch": 1.33,
"learning_rate": 0.0001670102937974427,
"loss": 4.1996,
"step": 708500
},
{
"epoch": 1.33,
"learning_rate": 0.00016691644079377114,
"loss": 4.1816,
"step": 709000
},
{
"epoch": 1.33,
"learning_rate": 0.00016682258779009962,
"loss": 4.1945,
"step": 709500
},
{
"epoch": 1.33,
"learning_rate": 0.0001667287347864281,
"loss": 4.2,
"step": 710000
},
{
"epoch": 1.33,
"learning_rate": 0.00016663488178275654,
"loss": 4.1884,
"step": 710500
},
{
"epoch": 1.33,
"learning_rate": 0.00016654102877908502,
"loss": 4.2082,
"step": 711000
},
{
"epoch": 1.34,
"learning_rate": 0.0001664471757754135,
"loss": 4.1925,
"step": 711500
},
{
"epoch": 1.34,
"learning_rate": 0.00016635332277174198,
"loss": 4.1997,
"step": 712000
},
{
"epoch": 1.34,
"learning_rate": 0.00016625946976807043,
"loss": 4.178,
"step": 712500
},
{
"epoch": 1.34,
"learning_rate": 0.00016616561676439893,
"loss": 4.1777,
"step": 713000
},
{
"epoch": 1.34,
"learning_rate": 0.00016607176376072738,
"loss": 4.1983,
"step": 713500
},
{
"epoch": 1.34,
"learning_rate": 0.00016597791075705583,
"loss": 4.1841,
"step": 714000
},
{
"epoch": 1.34,
"learning_rate": 0.00016588405775338434,
"loss": 4.192,
"step": 714500
},
{
"epoch": 1.34,
"learning_rate": 0.0001657902047497128,
"loss": 4.1998,
"step": 715000
},
{
"epoch": 1.34,
"learning_rate": 0.00016569635174604126,
"loss": 4.1924,
"step": 715500
},
{
"epoch": 1.34,
"learning_rate": 0.00016560249874236974,
"loss": 4.1828,
"step": 716000
},
{
"epoch": 1.34,
"learning_rate": 0.00016550864573869822,
"loss": 4.1877,
"step": 716500
},
{
"epoch": 1.35,
"learning_rate": 0.00016541479273502667,
"loss": 4.182,
"step": 717000
},
{
"epoch": 1.35,
"learning_rate": 0.00016532093973135517,
"loss": 4.1934,
"step": 717500
},
{
"epoch": 1.35,
"learning_rate": 0.00016522708672768362,
"loss": 4.21,
"step": 718000
},
{
"epoch": 1.35,
"learning_rate": 0.00016513323372401207,
"loss": 4.1916,
"step": 718500
},
{
"epoch": 1.35,
"learning_rate": 0.00016503938072034058,
"loss": 4.1703,
"step": 719000
},
{
"epoch": 1.35,
"learning_rate": 0.00016494552771666903,
"loss": 4.1835,
"step": 719500
},
{
"epoch": 1.35,
"learning_rate": 0.0001648516747129975,
"loss": 4.1846,
"step": 720000
},
{
"epoch": 1.35,
"learning_rate": 0.00016475782170932596,
"loss": 4.1917,
"step": 720500
},
{
"epoch": 1.35,
"learning_rate": 0.00016466396870565443,
"loss": 4.1941,
"step": 721000
},
{
"epoch": 1.35,
"learning_rate": 0.0001645701157019829,
"loss": 4.1991,
"step": 721500
},
{
"epoch": 1.36,
"learning_rate": 0.00016447626269831136,
"loss": 4.1985,
"step": 722000
},
{
"epoch": 1.36,
"learning_rate": 0.00016438240969463987,
"loss": 4.193,
"step": 722500
},
{
"epoch": 1.36,
"learning_rate": 0.00016428855669096832,
"loss": 4.1767,
"step": 723000
},
{
"epoch": 1.36,
"learning_rate": 0.00016419470368729677,
"loss": 4.1779,
"step": 723500
},
{
"epoch": 1.36,
"learning_rate": 0.00016410085068362527,
"loss": 4.2041,
"step": 724000
},
{
"epoch": 1.36,
"learning_rate": 0.00016400699767995372,
"loss": 4.1821,
"step": 724500
},
{
"epoch": 1.36,
"learning_rate": 0.0001639131446762822,
"loss": 4.2033,
"step": 725000
},
{
"epoch": 1.36,
"learning_rate": 0.00016381929167261068,
"loss": 4.1786,
"step": 725500
},
{
"epoch": 1.36,
"learning_rate": 0.00016372543866893915,
"loss": 4.1741,
"step": 726000
},
{
"epoch": 1.36,
"learning_rate": 0.0001636315856652676,
"loss": 4.2011,
"step": 726500
},
{
"epoch": 1.36,
"learning_rate": 0.0001635377326615961,
"loss": 4.1889,
"step": 727000
},
{
"epoch": 1.37,
"learning_rate": 0.00016344387965792456,
"loss": 4.1788,
"step": 727500
},
{
"epoch": 1.37,
"learning_rate": 0.000163350026654253,
"loss": 4.1782,
"step": 728000
},
{
"epoch": 1.37,
"learning_rate": 0.00016325617365058151,
"loss": 4.1972,
"step": 728500
},
{
"epoch": 1.37,
"learning_rate": 0.00016316232064690996,
"loss": 4.1815,
"step": 729000
},
{
"epoch": 1.37,
"learning_rate": 0.00016306846764323844,
"loss": 4.1941,
"step": 729500
},
{
"epoch": 1.37,
"learning_rate": 0.00016297461463956692,
"loss": 4.2002,
"step": 730000
},
{
"epoch": 1.37,
"learning_rate": 0.0001628807616358954,
"loss": 4.1908,
"step": 730500
},
{
"epoch": 1.37,
"learning_rate": 0.00016278690863222385,
"loss": 4.1763,
"step": 731000
},
{
"epoch": 1.37,
"learning_rate": 0.00016269305562855232,
"loss": 4.1936,
"step": 731500
},
{
"epoch": 1.37,
"learning_rate": 0.0001625992026248808,
"loss": 4.1784,
"step": 732000
},
{
"epoch": 1.37,
"learning_rate": 0.00016250534962120925,
"loss": 4.1885,
"step": 732500
},
{
"epoch": 1.38,
"learning_rate": 0.00016241149661753773,
"loss": 4.1771,
"step": 733000
},
{
"epoch": 1.38,
"learning_rate": 0.0001623176436138662,
"loss": 4.191,
"step": 733500
},
{
"epoch": 1.38,
"learning_rate": 0.00016222379061019466,
"loss": 4.1733,
"step": 734000
},
{
"epoch": 1.38,
"learning_rate": 0.00016212993760652313,
"loss": 4.1905,
"step": 734500
},
{
"epoch": 1.38,
"learning_rate": 0.0001620360846028516,
"loss": 4.1791,
"step": 735000
},
{
"epoch": 1.38,
"learning_rate": 0.0001619422315991801,
"loss": 4.1753,
"step": 735500
},
{
"epoch": 1.38,
"learning_rate": 0.00016184837859550854,
"loss": 4.1822,
"step": 736000
},
{
"epoch": 1.38,
"learning_rate": 0.00016175452559183704,
"loss": 4.1787,
"step": 736500
},
{
"epoch": 1.38,
"learning_rate": 0.0001616606725881655,
"loss": 4.1932,
"step": 737000
},
{
"epoch": 1.38,
"learning_rate": 0.00016156681958449394,
"loss": 4.1827,
"step": 737500
},
{
"epoch": 1.39,
"learning_rate": 0.00016147296658082245,
"loss": 4.1915,
"step": 738000
},
{
"epoch": 1.39,
"learning_rate": 0.0001613791135771509,
"loss": 4.1811,
"step": 738500
},
{
"epoch": 1.39,
"learning_rate": 0.00016128526057347938,
"loss": 4.1798,
"step": 739000
},
{
"epoch": 1.39,
"learning_rate": 0.00016119140756980785,
"loss": 4.1899,
"step": 739500
},
{
"epoch": 1.39,
"learning_rate": 0.00016109755456613633,
"loss": 4.2125,
"step": 740000
},
{
"epoch": 1.39,
"learning_rate": 0.00016100370156246478,
"loss": 4.1874,
"step": 740500
},
{
"epoch": 1.39,
"learning_rate": 0.0001609098485587933,
"loss": 4.1687,
"step": 741000
},
{
"epoch": 1.39,
"learning_rate": 0.00016081599555512174,
"loss": 4.1913,
"step": 741500
},
{
"epoch": 1.39,
"learning_rate": 0.0001607221425514502,
"loss": 4.1759,
"step": 742000
},
{
"epoch": 1.39,
"learning_rate": 0.0001606282895477787,
"loss": 4.1903,
"step": 742500
},
{
"epoch": 1.39,
"learning_rate": 0.00016053443654410714,
"loss": 4.1879,
"step": 743000
},
{
"epoch": 1.4,
"learning_rate": 0.00016044058354043562,
"loss": 4.1911,
"step": 743500
},
{
"epoch": 1.4,
"learning_rate": 0.0001603467305367641,
"loss": 4.1772,
"step": 744000
},
{
"epoch": 1.4,
"learning_rate": 0.00016025287753309255,
"loss": 4.2081,
"step": 744500
},
{
"epoch": 1.4,
"learning_rate": 0.00016015902452942102,
"loss": 4.1775,
"step": 745000
},
{
"epoch": 1.4,
"learning_rate": 0.00016006517152574948,
"loss": 4.2105,
"step": 745500
},
{
"epoch": 1.4,
"learning_rate": 0.00015997131852207798,
"loss": 4.193,
"step": 746000
},
{
"epoch": 1.4,
"learning_rate": 0.00015987746551840643,
"loss": 4.1761,
"step": 746500
},
{
"epoch": 1.4,
"learning_rate": 0.00015978361251473488,
"loss": 4.1868,
"step": 747000
},
{
"epoch": 1.4,
"learning_rate": 0.00015968975951106338,
"loss": 4.171,
"step": 747500
},
{
"epoch": 1.4,
"learning_rate": 0.00015959590650739184,
"loss": 4.1779,
"step": 748000
},
{
"epoch": 1.4,
"learning_rate": 0.0001595020535037203,
"loss": 4.1982,
"step": 748500
},
{
"epoch": 1.41,
"learning_rate": 0.0001594082005000488,
"loss": 4.186,
"step": 749000
},
{
"epoch": 1.41,
"learning_rate": 0.00015931434749637727,
"loss": 4.1693,
"step": 749500
},
{
"epoch": 1.41,
"learning_rate": 0.00015922049449270572,
"loss": 4.1854,
"step": 750000
},
{
"epoch": 1.41,
"learning_rate": 0.00015912664148903422,
"loss": 4.1853,
"step": 750500
},
{
"epoch": 1.41,
"learning_rate": 0.00015903278848536267,
"loss": 4.1884,
"step": 751000
},
{
"epoch": 1.41,
"learning_rate": 0.00015893893548169112,
"loss": 4.1863,
"step": 751500
},
{
"epoch": 1.41,
"learning_rate": 0.00015884508247801963,
"loss": 4.192,
"step": 752000
},
{
"epoch": 1.41,
"learning_rate": 0.00015875122947434808,
"loss": 4.1714,
"step": 752500
},
{
"epoch": 1.41,
"learning_rate": 0.00015865737647067656,
"loss": 4.1813,
"step": 753000
},
{
"epoch": 1.41,
"learning_rate": 0.00015856352346700503,
"loss": 4.1751,
"step": 753500
},
{
"epoch": 1.42,
"learning_rate": 0.0001584696704633335,
"loss": 4.1841,
"step": 754000
},
{
"epoch": 1.42,
"learning_rate": 0.00015837581745966196,
"loss": 4.1959,
"step": 754500
},
{
"epoch": 1.42,
"learning_rate": 0.00015828196445599044,
"loss": 4.1777,
"step": 755000
},
{
"epoch": 1.42,
"learning_rate": 0.00015818811145231892,
"loss": 4.1776,
"step": 755500
},
{
"epoch": 1.42,
"learning_rate": 0.00015809425844864737,
"loss": 4.1805,
"step": 756000
},
{
"epoch": 1.42,
"learning_rate": 0.00015800040544497584,
"loss": 4.1792,
"step": 756500
},
{
"epoch": 1.42,
"learning_rate": 0.00015790655244130432,
"loss": 4.1975,
"step": 757000
},
{
"epoch": 1.42,
"learning_rate": 0.00015781269943763277,
"loss": 4.1828,
"step": 757500
},
{
"epoch": 1.42,
"learning_rate": 0.00015771884643396125,
"loss": 4.182,
"step": 758000
},
{
"epoch": 1.42,
"learning_rate": 0.00015762499343028973,
"loss": 4.1866,
"step": 758500
},
{
"epoch": 1.42,
"learning_rate": 0.0001575311404266182,
"loss": 4.1729,
"step": 759000
},
{
"epoch": 1.43,
"learning_rate": 0.00015743728742294665,
"loss": 4.1691,
"step": 759500
},
{
"epoch": 1.43,
"learning_rate": 0.00015734343441927516,
"loss": 4.1738,
"step": 760000
},
{
"epoch": 1.43,
"learning_rate": 0.0001572495814156036,
"loss": 4.186,
"step": 760500
},
{
"epoch": 1.43,
"learning_rate": 0.00015715572841193206,
"loss": 4.1774,
"step": 761000
},
{
"epoch": 1.43,
"learning_rate": 0.00015706187540826056,
"loss": 4.1862,
"step": 761500
},
{
"epoch": 1.43,
"learning_rate": 0.000156968022404589,
"loss": 4.1813,
"step": 762000
},
{
"epoch": 1.43,
"learning_rate": 0.0001568741694009175,
"loss": 4.1995,
"step": 762500
},
{
"epoch": 1.43,
"learning_rate": 0.00015678031639724597,
"loss": 4.1895,
"step": 763000
},
{
"epoch": 1.43,
"learning_rate": 0.00015668646339357445,
"loss": 4.1856,
"step": 763500
},
{
"epoch": 1.43,
"learning_rate": 0.0001565926103899029,
"loss": 4.1886,
"step": 764000
},
{
"epoch": 1.44,
"learning_rate": 0.0001564987573862314,
"loss": 4.185,
"step": 764500
},
{
"epoch": 1.44,
"learning_rate": 0.00015640490438255985,
"loss": 4.1698,
"step": 765000
},
{
"epoch": 1.44,
"learning_rate": 0.0001563110513788883,
"loss": 4.1734,
"step": 765500
},
{
"epoch": 1.44,
"learning_rate": 0.0001562171983752168,
"loss": 4.1807,
"step": 766000
},
{
"epoch": 1.44,
"learning_rate": 0.00015612334537154526,
"loss": 4.1649,
"step": 766500
},
{
"epoch": 1.44,
"learning_rate": 0.00015602949236787373,
"loss": 4.1836,
"step": 767000
},
{
"epoch": 1.44,
"learning_rate": 0.0001559356393642022,
"loss": 4.1665,
"step": 767500
},
{
"epoch": 1.44,
"learning_rate": 0.00015584178636053066,
"loss": 4.1743,
"step": 768000
},
{
"epoch": 1.44,
"learning_rate": 0.00015574793335685914,
"loss": 4.1779,
"step": 768500
},
{
"epoch": 1.44,
"learning_rate": 0.0001556540803531876,
"loss": 4.1824,
"step": 769000
},
{
"epoch": 1.44,
"learning_rate": 0.0001555602273495161,
"loss": 4.19,
"step": 769500
},
{
"epoch": 1.45,
"learning_rate": 0.00015546637434584454,
"loss": 4.1761,
"step": 770000
},
{
"epoch": 1.45,
"learning_rate": 0.000155372521342173,
"loss": 4.1614,
"step": 770500
},
{
"epoch": 1.45,
"learning_rate": 0.0001552786683385015,
"loss": 4.1856,
"step": 771000
},
{
"epoch": 1.45,
"learning_rate": 0.00015518481533482995,
"loss": 4.1783,
"step": 771500
},
{
"epoch": 1.45,
"learning_rate": 0.00015509096233115843,
"loss": 4.1799,
"step": 772000
},
{
"epoch": 1.45,
"learning_rate": 0.0001549971093274869,
"loss": 4.1716,
"step": 772500
},
{
"epoch": 1.45,
"learning_rate": 0.00015490325632381538,
"loss": 4.1699,
"step": 773000
},
{
"epoch": 1.45,
"learning_rate": 0.00015480940332014383,
"loss": 4.181,
"step": 773500
},
{
"epoch": 1.45,
"learning_rate": 0.00015471555031647234,
"loss": 4.1692,
"step": 774000
},
{
"epoch": 1.45,
"learning_rate": 0.00015462169731280079,
"loss": 4.1682,
"step": 774500
},
{
"epoch": 1.45,
"learning_rate": 0.00015452784430912924,
"loss": 4.1863,
"step": 775000
},
{
"epoch": 1.46,
"learning_rate": 0.00015443399130545774,
"loss": 4.1728,
"step": 775500
},
{
"epoch": 1.46,
"learning_rate": 0.0001543401383017862,
"loss": 4.1685,
"step": 776000
},
{
"epoch": 1.46,
"learning_rate": 0.00015424628529811467,
"loss": 4.1935,
"step": 776500
},
{
"epoch": 1.46,
"learning_rate": 0.00015415243229444315,
"loss": 4.1843,
"step": 777000
},
{
"epoch": 1.46,
"learning_rate": 0.00015405857929077162,
"loss": 4.1879,
"step": 777500
},
{
"epoch": 1.46,
"learning_rate": 0.00015396472628710007,
"loss": 4.1884,
"step": 778000
},
{
"epoch": 1.46,
"learning_rate": 0.00015387087328342855,
"loss": 4.1629,
"step": 778500
},
{
"epoch": 1.46,
"learning_rate": 0.00015377702027975703,
"loss": 4.1926,
"step": 779000
},
{
"epoch": 1.46,
"learning_rate": 0.00015368316727608548,
"loss": 4.174,
"step": 779500
},
{
"epoch": 1.46,
"learning_rate": 0.00015358931427241398,
"loss": 4.1655,
"step": 780000
},
{
"epoch": 1.47,
"learning_rate": 0.00015349546126874243,
"loss": 4.1828,
"step": 780500
},
{
"epoch": 1.47,
"learning_rate": 0.00015340160826507088,
"loss": 4.1699,
"step": 781000
},
{
"epoch": 1.47,
"learning_rate": 0.00015330775526139936,
"loss": 4.1737,
"step": 781500
},
{
"epoch": 1.47,
"learning_rate": 0.00015321390225772784,
"loss": 4.1799,
"step": 782000
},
{
"epoch": 1.47,
"learning_rate": 0.00015312004925405632,
"loss": 4.1621,
"step": 782500
},
{
"epoch": 1.47,
"learning_rate": 0.00015302619625038477,
"loss": 4.1555,
"step": 783000
},
{
"epoch": 1.47,
"learning_rate": 0.00015293234324671327,
"loss": 4.1955,
"step": 783500
},
{
"epoch": 1.47,
"learning_rate": 0.00015283849024304172,
"loss": 4.1794,
"step": 784000
},
{
"epoch": 1.47,
"learning_rate": 0.00015274463723937017,
"loss": 4.1778,
"step": 784500
},
{
"epoch": 1.47,
"learning_rate": 0.00015265078423569868,
"loss": 4.1704,
"step": 785000
},
{
"epoch": 1.47,
"learning_rate": 0.00015255693123202713,
"loss": 4.1734,
"step": 785500
},
{
"epoch": 1.48,
"learning_rate": 0.0001524630782283556,
"loss": 4.1817,
"step": 786000
},
{
"epoch": 1.48,
"learning_rate": 0.00015236922522468408,
"loss": 4.172,
"step": 786500
},
{
"epoch": 1.48,
"learning_rate": 0.00015227537222101256,
"loss": 4.168,
"step": 787000
},
{
"epoch": 1.48,
"learning_rate": 0.000152181519217341,
"loss": 4.1887,
"step": 787500
},
{
"epoch": 1.48,
"learning_rate": 0.0001520876662136695,
"loss": 4.1689,
"step": 788000
},
{
"epoch": 1.48,
"learning_rate": 0.00015199381320999796,
"loss": 4.1787,
"step": 788500
},
{
"epoch": 1.48,
"learning_rate": 0.00015189996020632641,
"loss": 4.1706,
"step": 789000
},
{
"epoch": 1.48,
"learning_rate": 0.00015180610720265492,
"loss": 4.1888,
"step": 789500
},
{
"epoch": 1.48,
"learning_rate": 0.00015171225419898337,
"loss": 4.1842,
"step": 790000
},
{
"epoch": 1.48,
"learning_rate": 0.00015161840119531185,
"loss": 4.1832,
"step": 790500
},
{
"epoch": 1.48,
"learning_rate": 0.00015152454819164032,
"loss": 4.165,
"step": 791000
},
{
"epoch": 1.49,
"learning_rate": 0.00015143069518796877,
"loss": 4.1736,
"step": 791500
},
{
"epoch": 1.49,
"learning_rate": 0.00015133684218429725,
"loss": 4.1654,
"step": 792000
},
{
"epoch": 1.49,
"learning_rate": 0.00015124298918062573,
"loss": 4.1936,
"step": 792500
},
{
"epoch": 1.49,
"learning_rate": 0.0001511491361769542,
"loss": 4.1598,
"step": 793000
},
{
"epoch": 1.49,
"learning_rate": 0.00015105528317328266,
"loss": 4.1692,
"step": 793500
},
{
"epoch": 1.49,
"learning_rate": 0.0001509614301696111,
"loss": 4.1788,
"step": 794000
},
{
"epoch": 1.49,
"learning_rate": 0.0001508675771659396,
"loss": 4.1794,
"step": 794500
},
{
"epoch": 1.49,
"learning_rate": 0.00015077372416226806,
"loss": 4.1873,
"step": 795000
},
{
"epoch": 1.49,
"learning_rate": 0.00015067987115859654,
"loss": 4.1755,
"step": 795500
},
{
"epoch": 1.49,
"learning_rate": 0.00015058601815492502,
"loss": 4.1757,
"step": 796000
},
{
"epoch": 1.5,
"learning_rate": 0.0001504921651512535,
"loss": 4.1948,
"step": 796500
},
{
"epoch": 1.5,
"learning_rate": 0.00015039831214758194,
"loss": 4.1691,
"step": 797000
},
{
"epoch": 1.5,
"learning_rate": 0.00015030445914391045,
"loss": 4.1865,
"step": 797500
},
{
"epoch": 1.5,
"learning_rate": 0.0001502106061402389,
"loss": 4.1511,
"step": 798000
},
{
"epoch": 1.5,
"learning_rate": 0.00015011675313656735,
"loss": 4.1769,
"step": 798500
},
{
"epoch": 1.5,
"learning_rate": 0.00015002290013289585,
"loss": 4.1835,
"step": 799000
},
{
"epoch": 1.5,
"learning_rate": 0.0001499290471292243,
"loss": 4.1644,
"step": 799500
},
{
"epoch": 1.5,
"learning_rate": 0.00014983519412555278,
"loss": 4.1547,
"step": 800000
},
{
"epoch": 1.5,
"learning_rate": 0.00014974134112188126,
"loss": 4.1744,
"step": 800500
},
{
"epoch": 1.5,
"learning_rate": 0.00014964748811820974,
"loss": 4.1791,
"step": 801000
},
{
"epoch": 1.5,
"learning_rate": 0.00014955363511453821,
"loss": 4.1763,
"step": 801500
},
{
"epoch": 1.51,
"learning_rate": 0.00014945978211086666,
"loss": 4.1831,
"step": 802000
},
{
"epoch": 1.51,
"learning_rate": 0.00014936592910719514,
"loss": 4.1795,
"step": 802500
},
{
"epoch": 1.51,
"learning_rate": 0.0001492720761035236,
"loss": 4.1889,
"step": 803000
},
{
"epoch": 1.51,
"learning_rate": 0.00014917822309985207,
"loss": 4.1814,
"step": 803500
},
{
"epoch": 1.51,
"learning_rate": 0.00014908437009618055,
"loss": 4.1669,
"step": 804000
},
{
"epoch": 1.51,
"learning_rate": 0.000148990517092509,
"loss": 4.1695,
"step": 804500
},
{
"epoch": 1.51,
"learning_rate": 0.00014889666408883747,
"loss": 4.1616,
"step": 805000
},
{
"epoch": 1.51,
"learning_rate": 0.00014880281108516595,
"loss": 4.1777,
"step": 805500
},
{
"epoch": 1.51,
"learning_rate": 0.00014870895808149443,
"loss": 4.1715,
"step": 806000
},
{
"epoch": 1.51,
"learning_rate": 0.0001486151050778229,
"loss": 4.181,
"step": 806500
},
{
"epoch": 1.51,
"learning_rate": 0.00014852125207415138,
"loss": 4.1551,
"step": 807000
},
{
"epoch": 1.52,
"learning_rate": 0.00014842739907047983,
"loss": 4.1772,
"step": 807500
},
{
"epoch": 1.52,
"learning_rate": 0.0001483335460668083,
"loss": 4.1738,
"step": 808000
},
{
"epoch": 1.52,
"learning_rate": 0.00014823969306313676,
"loss": 4.1522,
"step": 808500
},
{
"epoch": 1.52,
"learning_rate": 0.00014814584005946524,
"loss": 4.1626,
"step": 809000
},
{
"epoch": 1.52,
"learning_rate": 0.00014805198705579372,
"loss": 4.164,
"step": 809500
},
{
"epoch": 1.52,
"learning_rate": 0.0001479581340521222,
"loss": 4.1861,
"step": 810000
},
{
"epoch": 1.52,
"learning_rate": 0.00014786428104845067,
"loss": 4.161,
"step": 810500
},
{
"epoch": 1.52,
"learning_rate": 0.00014777042804477915,
"loss": 4.1602,
"step": 811000
},
{
"epoch": 1.52,
"learning_rate": 0.0001476765750411076,
"loss": 4.1881,
"step": 811500
},
{
"epoch": 1.52,
"learning_rate": 0.00014758272203743608,
"loss": 4.1601,
"step": 812000
},
{
"epoch": 1.53,
"learning_rate": 0.00014748886903376455,
"loss": 4.1681,
"step": 812500
},
{
"epoch": 1.53,
"learning_rate": 0.000147395016030093,
"loss": 4.1703,
"step": 813000
},
{
"epoch": 1.53,
"learning_rate": 0.00014730116302642148,
"loss": 4.1818,
"step": 813500
},
{
"epoch": 1.53,
"learning_rate": 0.00014720731002274996,
"loss": 4.154,
"step": 814000
},
{
"epoch": 1.53,
"learning_rate": 0.0001471134570190784,
"loss": 4.1879,
"step": 814500
},
{
"epoch": 1.53,
"learning_rate": 0.0001470196040154069,
"loss": 4.1641,
"step": 815000
},
{
"epoch": 1.53,
"learning_rate": 0.00014692575101173536,
"loss": 4.1616,
"step": 815500
},
{
"epoch": 1.53,
"learning_rate": 0.00014683189800806384,
"loss": 4.18,
"step": 816000
},
{
"epoch": 1.53,
"learning_rate": 0.00014673804500439232,
"loss": 4.1826,
"step": 816500
},
{
"epoch": 1.53,
"learning_rate": 0.00014664419200072077,
"loss": 4.164,
"step": 817000
},
{
"epoch": 1.53,
"learning_rate": 0.00014655033899704925,
"loss": 4.1907,
"step": 817500
},
{
"epoch": 1.54,
"learning_rate": 0.00014645648599337772,
"loss": 4.1632,
"step": 818000
},
{
"epoch": 1.54,
"learning_rate": 0.00014636263298970618,
"loss": 4.1794,
"step": 818500
},
{
"epoch": 1.54,
"learning_rate": 0.00014626877998603465,
"loss": 4.1945,
"step": 819000
},
{
"epoch": 1.54,
"learning_rate": 0.00014617492698236313,
"loss": 4.1681,
"step": 819500
},
{
"epoch": 1.54,
"learning_rate": 0.0001460810739786916,
"loss": 4.1524,
"step": 820000
},
{
"epoch": 1.54,
"learning_rate": 0.00014598722097502008,
"loss": 4.1832,
"step": 820500
},
{
"epoch": 1.54,
"learning_rate": 0.00014589336797134854,
"loss": 4.1577,
"step": 821000
},
{
"epoch": 1.54,
"learning_rate": 0.000145799514967677,
"loss": 4.1752,
"step": 821500
},
{
"epoch": 1.54,
"learning_rate": 0.0001457056619640055,
"loss": 4.1663,
"step": 822000
},
{
"epoch": 1.54,
"learning_rate": 0.00014561180896033394,
"loss": 4.1744,
"step": 822500
},
{
"epoch": 1.54,
"learning_rate": 0.00014551795595666242,
"loss": 4.1754,
"step": 823000
},
{
"epoch": 1.55,
"learning_rate": 0.0001454241029529909,
"loss": 4.1776,
"step": 823500
},
{
"epoch": 1.55,
"learning_rate": 0.00014533024994931937,
"loss": 4.195,
"step": 824000
},
{
"epoch": 1.55,
"learning_rate": 0.00014523639694564785,
"loss": 4.1709,
"step": 824500
},
{
"epoch": 1.55,
"learning_rate": 0.0001451425439419763,
"loss": 4.176,
"step": 825000
},
{
"epoch": 1.55,
"learning_rate": 0.00014504869093830478,
"loss": 4.1478,
"step": 825500
},
{
"epoch": 1.55,
"learning_rate": 0.00014495483793463326,
"loss": 4.1835,
"step": 826000
},
{
"epoch": 1.55,
"learning_rate": 0.0001448609849309617,
"loss": 4.1651,
"step": 826500
},
{
"epoch": 1.55,
"learning_rate": 0.00014476713192729018,
"loss": 4.1713,
"step": 827000
},
{
"epoch": 1.55,
"learning_rate": 0.00014467327892361866,
"loss": 4.1648,
"step": 827500
},
{
"epoch": 1.55,
"learning_rate": 0.0001445794259199471,
"loss": 4.151,
"step": 828000
},
{
"epoch": 1.56,
"learning_rate": 0.0001444855729162756,
"loss": 4.1574,
"step": 828500
},
{
"epoch": 1.56,
"learning_rate": 0.00014439171991260407,
"loss": 4.1701,
"step": 829000
},
{
"epoch": 1.56,
"learning_rate": 0.00014429786690893254,
"loss": 4.1571,
"step": 829500
},
{
"epoch": 1.56,
"learning_rate": 0.00014420401390526102,
"loss": 4.1607,
"step": 830000
},
{
"epoch": 1.56,
"learning_rate": 0.0001441101609015895,
"loss": 4.164,
"step": 830500
},
{
"epoch": 1.56,
"learning_rate": 0.00014401630789791795,
"loss": 4.1665,
"step": 831000
},
{
"epoch": 1.56,
"learning_rate": 0.00014392245489424643,
"loss": 4.1664,
"step": 831500
},
{
"epoch": 1.56,
"learning_rate": 0.0001438286018905749,
"loss": 4.167,
"step": 832000
},
{
"epoch": 1.56,
"learning_rate": 0.00014373474888690335,
"loss": 4.1545,
"step": 832500
},
{
"epoch": 1.56,
"learning_rate": 0.00014364089588323183,
"loss": 4.1721,
"step": 833000
},
{
"epoch": 1.56,
"learning_rate": 0.0001435470428795603,
"loss": 4.166,
"step": 833500
},
{
"epoch": 1.57,
"learning_rate": 0.00014345318987588879,
"loss": 4.1592,
"step": 834000
},
{
"epoch": 1.57,
"learning_rate": 0.00014335933687221726,
"loss": 4.1648,
"step": 834500
},
{
"epoch": 1.57,
"learning_rate": 0.0001432654838685457,
"loss": 4.1613,
"step": 835000
},
{
"epoch": 1.57,
"learning_rate": 0.0001431716308648742,
"loss": 4.1674,
"step": 835500
},
{
"epoch": 1.57,
"learning_rate": 0.00014307777786120267,
"loss": 4.1785,
"step": 836000
},
{
"epoch": 1.57,
"learning_rate": 0.00014298392485753112,
"loss": 4.1788,
"step": 836500
},
{
"epoch": 1.57,
"learning_rate": 0.0001428900718538596,
"loss": 4.1522,
"step": 837000
},
{
"epoch": 1.57,
"learning_rate": 0.00014279621885018807,
"loss": 4.1723,
"step": 837500
},
{
"epoch": 1.57,
"learning_rate": 0.00014270236584651652,
"loss": 4.1717,
"step": 838000
},
{
"epoch": 1.57,
"learning_rate": 0.000142608512842845,
"loss": 4.1589,
"step": 838500
},
{
"epoch": 1.57,
"learning_rate": 0.00014251465983917348,
"loss": 4.1685,
"step": 839000
},
{
"epoch": 1.58,
"learning_rate": 0.00014242080683550196,
"loss": 4.1608,
"step": 839500
},
{
"epoch": 1.58,
"learning_rate": 0.00014232695383183043,
"loss": 4.157,
"step": 840000
},
{
"epoch": 1.58,
"learning_rate": 0.00014223310082815888,
"loss": 4.1499,
"step": 840500
},
{
"epoch": 1.58,
"learning_rate": 0.00014213924782448736,
"loss": 4.1432,
"step": 841000
},
{
"epoch": 1.58,
"learning_rate": 0.00014204539482081584,
"loss": 4.1689,
"step": 841500
},
{
"epoch": 1.58,
"learning_rate": 0.0001419515418171443,
"loss": 4.1645,
"step": 842000
},
{
"epoch": 1.58,
"learning_rate": 0.00014185768881347277,
"loss": 4.1648,
"step": 842500
},
{
"epoch": 1.58,
"learning_rate": 0.00014176383580980124,
"loss": 4.1825,
"step": 843000
},
{
"epoch": 1.58,
"learning_rate": 0.00014166998280612972,
"loss": 4.1707,
"step": 843500
},
{
"epoch": 1.58,
"learning_rate": 0.0001415761298024582,
"loss": 4.1767,
"step": 844000
},
{
"epoch": 1.59,
"learning_rate": 0.00014148227679878665,
"loss": 4.1638,
"step": 844500
},
{
"epoch": 1.59,
"learning_rate": 0.00014138842379511513,
"loss": 4.1606,
"step": 845000
},
{
"epoch": 1.59,
"learning_rate": 0.0001412945707914436,
"loss": 4.1743,
"step": 845500
},
{
"epoch": 1.59,
"learning_rate": 0.00014120071778777205,
"loss": 4.1715,
"step": 846000
},
{
"epoch": 1.59,
"learning_rate": 0.00014110686478410053,
"loss": 4.16,
"step": 846500
},
{
"epoch": 1.59,
"learning_rate": 0.000141013011780429,
"loss": 4.1645,
"step": 847000
},
{
"epoch": 1.59,
"learning_rate": 0.00014091915877675749,
"loss": 4.1855,
"step": 847500
},
{
"epoch": 1.59,
"learning_rate": 0.00014082530577308596,
"loss": 4.1694,
"step": 848000
},
{
"epoch": 1.59,
"learning_rate": 0.00014073145276941441,
"loss": 4.1787,
"step": 848500
},
{
"epoch": 1.59,
"learning_rate": 0.0001406375997657429,
"loss": 4.153,
"step": 849000
},
{
"epoch": 1.59,
"learning_rate": 0.00014054374676207137,
"loss": 4.1593,
"step": 849500
},
{
"epoch": 1.6,
"learning_rate": 0.00014044989375839985,
"loss": 4.1566,
"step": 850000
},
{
"epoch": 1.6,
"learning_rate": 0.0001403560407547283,
"loss": 4.1693,
"step": 850500
},
{
"epoch": 1.6,
"learning_rate": 0.00014026218775105677,
"loss": 4.1543,
"step": 851000
},
{
"epoch": 1.6,
"learning_rate": 0.00014016833474738522,
"loss": 4.1569,
"step": 851500
},
{
"epoch": 1.6,
"learning_rate": 0.0001400744817437137,
"loss": 4.1569,
"step": 852000
},
{
"epoch": 1.6,
"learning_rate": 0.00013998062874004218,
"loss": 4.1807,
"step": 852500
},
{
"epoch": 1.6,
"learning_rate": 0.00013988677573637066,
"loss": 4.1469,
"step": 853000
},
{
"epoch": 1.6,
"learning_rate": 0.00013979292273269913,
"loss": 4.1802,
"step": 853500
},
{
"epoch": 1.6,
"learning_rate": 0.0001396990697290276,
"loss": 4.1752,
"step": 854000
},
{
"epoch": 1.6,
"learning_rate": 0.00013960521672535606,
"loss": 4.1669,
"step": 854500
},
{
"epoch": 1.6,
"learning_rate": 0.00013951136372168454,
"loss": 4.1645,
"step": 855000
},
{
"epoch": 1.61,
"learning_rate": 0.00013941751071801302,
"loss": 4.1575,
"step": 855500
},
{
"epoch": 1.61,
"learning_rate": 0.00013932365771434147,
"loss": 4.1702,
"step": 856000
},
{
"epoch": 1.61,
"learning_rate": 0.00013922980471066994,
"loss": 4.142,
"step": 856500
},
{
"epoch": 1.61,
"learning_rate": 0.00013913595170699842,
"loss": 4.149,
"step": 857000
},
{
"epoch": 1.61,
"learning_rate": 0.0001390420987033269,
"loss": 4.144,
"step": 857500
},
{
"epoch": 1.61,
"learning_rate": 0.00013894824569965538,
"loss": 4.1731,
"step": 858000
},
{
"epoch": 1.61,
"learning_rate": 0.00013885439269598383,
"loss": 4.1451,
"step": 858500
},
{
"epoch": 1.61,
"learning_rate": 0.0001387605396923123,
"loss": 4.1602,
"step": 859000
},
{
"epoch": 1.61,
"learning_rate": 0.00013866668668864078,
"loss": 4.1705,
"step": 859500
},
{
"epoch": 1.61,
"learning_rate": 0.00013857283368496923,
"loss": 4.1884,
"step": 860000
},
{
"epoch": 1.62,
"learning_rate": 0.0001384789806812977,
"loss": 4.1839,
"step": 860500
},
{
"epoch": 1.62,
"learning_rate": 0.00013838512767762619,
"loss": 4.1571,
"step": 861000
},
{
"epoch": 1.62,
"learning_rate": 0.00013829127467395464,
"loss": 4.1432,
"step": 861500
},
{
"epoch": 1.62,
"learning_rate": 0.00013819742167028311,
"loss": 4.1425,
"step": 862000
},
{
"epoch": 1.62,
"learning_rate": 0.0001381035686666116,
"loss": 4.1456,
"step": 862500
},
{
"epoch": 1.62,
"learning_rate": 0.00013800971566294007,
"loss": 4.1528,
"step": 863000
},
{
"epoch": 1.62,
"learning_rate": 0.00013791586265926855,
"loss": 4.1685,
"step": 863500
},
{
"epoch": 1.62,
"learning_rate": 0.000137822009655597,
"loss": 4.1539,
"step": 864000
},
{
"epoch": 1.62,
"learning_rate": 0.00013772815665192547,
"loss": 4.171,
"step": 864500
},
{
"epoch": 1.62,
"learning_rate": 0.00013763430364825395,
"loss": 4.1472,
"step": 865000
},
{
"epoch": 1.62,
"learning_rate": 0.0001375404506445824,
"loss": 4.1517,
"step": 865500
},
{
"epoch": 1.63,
"learning_rate": 0.00013744659764091088,
"loss": 4.1509,
"step": 866000
},
{
"epoch": 1.63,
"learning_rate": 0.00013735274463723936,
"loss": 4.1623,
"step": 866500
},
{
"epoch": 1.63,
"learning_rate": 0.00013725889163356783,
"loss": 4.1633,
"step": 867000
},
{
"epoch": 1.63,
"learning_rate": 0.0001371650386298963,
"loss": 4.1658,
"step": 867500
},
{
"epoch": 1.63,
"learning_rate": 0.0001370711856262248,
"loss": 4.1715,
"step": 868000
},
{
"epoch": 1.63,
"learning_rate": 0.00013697733262255324,
"loss": 4.1552,
"step": 868500
},
{
"epoch": 1.63,
"learning_rate": 0.00013688347961888172,
"loss": 4.1719,
"step": 869000
},
{
"epoch": 1.63,
"learning_rate": 0.00013678962661521017,
"loss": 4.1733,
"step": 869500
},
{
"epoch": 1.63,
"learning_rate": 0.00013669577361153864,
"loss": 4.1662,
"step": 870000
},
{
"epoch": 1.63,
"learning_rate": 0.00013660192060786712,
"loss": 4.1711,
"step": 870500
},
{
"epoch": 1.63,
"learning_rate": 0.0001365080676041956,
"loss": 4.1749,
"step": 871000
},
{
"epoch": 1.64,
"learning_rate": 0.00013641421460052408,
"loss": 4.1627,
"step": 871500
},
{
"epoch": 1.64,
"learning_rate": 0.00013632036159685253,
"loss": 4.1471,
"step": 872000
},
{
"epoch": 1.64,
"learning_rate": 0.000136226508593181,
"loss": 4.1572,
"step": 872500
},
{
"epoch": 1.64,
"learning_rate": 0.00013613265558950948,
"loss": 4.1741,
"step": 873000
},
{
"epoch": 1.64,
"learning_rate": 0.00013603880258583796,
"loss": 4.1635,
"step": 873500
},
{
"epoch": 1.64,
"learning_rate": 0.0001359449495821664,
"loss": 4.1609,
"step": 874000
},
{
"epoch": 1.64,
"learning_rate": 0.0001358510965784949,
"loss": 4.1593,
"step": 874500
},
{
"epoch": 1.64,
"learning_rate": 0.00013575724357482334,
"loss": 4.1554,
"step": 875000
},
{
"epoch": 1.64,
"learning_rate": 0.00013566339057115181,
"loss": 4.1531,
"step": 875500
},
{
"epoch": 1.64,
"learning_rate": 0.0001355695375674803,
"loss": 4.1634,
"step": 876000
},
{
"epoch": 1.65,
"learning_rate": 0.00013547568456380877,
"loss": 4.1664,
"step": 876500
},
{
"epoch": 1.65,
"learning_rate": 0.00013538183156013725,
"loss": 4.143,
"step": 877000
},
{
"epoch": 1.65,
"learning_rate": 0.00013528797855646572,
"loss": 4.1369,
"step": 877500
},
{
"epoch": 1.65,
"learning_rate": 0.00013519412555279417,
"loss": 4.149,
"step": 878000
},
{
"epoch": 1.65,
"learning_rate": 0.00013510027254912265,
"loss": 4.1502,
"step": 878500
},
{
"epoch": 1.65,
"learning_rate": 0.00013500641954545113,
"loss": 4.1547,
"step": 879000
},
{
"epoch": 1.65,
"learning_rate": 0.00013491256654177958,
"loss": 4.1675,
"step": 879500
},
{
"epoch": 1.65,
"learning_rate": 0.00013481871353810806,
"loss": 4.1688,
"step": 880000
},
{
"epoch": 1.65,
"learning_rate": 0.00013472486053443653,
"loss": 4.1762,
"step": 880500
},
{
"epoch": 1.65,
"learning_rate": 0.000134631007530765,
"loss": 4.1329,
"step": 881000
},
{
"epoch": 1.65,
"learning_rate": 0.0001345371545270935,
"loss": 4.1668,
"step": 881500
},
{
"epoch": 1.66,
"learning_rate": 0.00013444330152342194,
"loss": 4.1542,
"step": 882000
},
{
"epoch": 1.66,
"learning_rate": 0.00013434944851975042,
"loss": 4.1452,
"step": 882500
},
{
"epoch": 1.66,
"learning_rate": 0.0001342555955160789,
"loss": 4.1658,
"step": 883000
},
{
"epoch": 1.66,
"learning_rate": 0.00013416174251240734,
"loss": 4.1588,
"step": 883500
},
{
"epoch": 1.66,
"learning_rate": 0.00013406788950873582,
"loss": 4.1566,
"step": 884000
},
{
"epoch": 1.66,
"learning_rate": 0.0001339740365050643,
"loss": 4.1473,
"step": 884500
},
{
"epoch": 1.66,
"learning_rate": 0.00013388018350139275,
"loss": 4.1476,
"step": 885000
},
{
"epoch": 1.66,
"learning_rate": 0.00013378633049772123,
"loss": 4.1725,
"step": 885500
},
{
"epoch": 1.66,
"learning_rate": 0.0001336924774940497,
"loss": 4.1553,
"step": 886000
},
{
"epoch": 1.66,
"learning_rate": 0.00013359862449037818,
"loss": 4.161,
"step": 886500
},
{
"epoch": 1.66,
"learning_rate": 0.00013350477148670666,
"loss": 4.1336,
"step": 887000
},
{
"epoch": 1.67,
"learning_rate": 0.0001334109184830351,
"loss": 4.1471,
"step": 887500
},
{
"epoch": 1.67,
"learning_rate": 0.0001333170654793636,
"loss": 4.1533,
"step": 888000
},
{
"epoch": 1.67,
"learning_rate": 0.00013322321247569206,
"loss": 4.1537,
"step": 888500
},
{
"epoch": 1.67,
"learning_rate": 0.00013312935947202052,
"loss": 4.1666,
"step": 889000
},
{
"epoch": 1.67,
"learning_rate": 0.000133035506468349,
"loss": 4.1584,
"step": 889500
},
{
"epoch": 1.67,
"learning_rate": 0.00013294165346467747,
"loss": 4.1578,
"step": 890000
},
{
"epoch": 1.67,
"learning_rate": 0.00013284780046100595,
"loss": 4.1589,
"step": 890500
},
{
"epoch": 1.67,
"learning_rate": 0.00013275394745733442,
"loss": 4.1773,
"step": 891000
},
{
"epoch": 1.67,
"learning_rate": 0.0001326600944536629,
"loss": 4.1608,
"step": 891500
},
{
"epoch": 1.67,
"learning_rate": 0.00013256624144999135,
"loss": 4.1466,
"step": 892000
},
{
"epoch": 1.68,
"learning_rate": 0.00013247238844631983,
"loss": 4.1565,
"step": 892500
},
{
"epoch": 1.68,
"learning_rate": 0.00013237853544264828,
"loss": 4.1605,
"step": 893000
},
{
"epoch": 1.68,
"learning_rate": 0.00013228468243897676,
"loss": 4.1482,
"step": 893500
},
{
"epoch": 1.68,
"learning_rate": 0.00013219082943530524,
"loss": 4.1617,
"step": 894000
},
{
"epoch": 1.68,
"learning_rate": 0.0001320969764316337,
"loss": 4.1526,
"step": 894500
},
{
"epoch": 1.68,
"learning_rate": 0.00013200312342796216,
"loss": 4.1671,
"step": 895000
},
{
"epoch": 1.68,
"learning_rate": 0.00013190927042429064,
"loss": 4.1766,
"step": 895500
},
{
"epoch": 1.68,
"learning_rate": 0.00013181541742061912,
"loss": 4.1628,
"step": 896000
},
{
"epoch": 1.68,
"learning_rate": 0.0001317215644169476,
"loss": 4.1754,
"step": 896500
},
{
"epoch": 1.68,
"learning_rate": 0.00013162771141327607,
"loss": 4.1571,
"step": 897000
},
{
"epoch": 1.68,
"learning_rate": 0.00013153385840960452,
"loss": 4.1459,
"step": 897500
},
{
"epoch": 1.69,
"learning_rate": 0.000131440005405933,
"loss": 4.1665,
"step": 898000
},
{
"epoch": 1.69,
"learning_rate": 0.00013134615240226148,
"loss": 4.1266,
"step": 898500
},
{
"epoch": 1.69,
"learning_rate": 0.00013125229939858993,
"loss": 4.1453,
"step": 899000
},
{
"epoch": 1.69,
"learning_rate": 0.0001311584463949184,
"loss": 4.1517,
"step": 899500
},
{
"epoch": 1.69,
"learning_rate": 0.00013106459339124688,
"loss": 4.1438,
"step": 900000
},
{
"epoch": 1.69,
"learning_rate": 0.00013097074038757536,
"loss": 4.1525,
"step": 900500
},
{
"epoch": 1.69,
"learning_rate": 0.00013087688738390384,
"loss": 4.1584,
"step": 901000
},
{
"epoch": 1.69,
"learning_rate": 0.0001307830343802323,
"loss": 4.1575,
"step": 901500
},
{
"epoch": 1.69,
"learning_rate": 0.00013068918137656077,
"loss": 4.146,
"step": 902000
},
{
"epoch": 1.69,
"learning_rate": 0.00013059532837288924,
"loss": 4.1607,
"step": 902500
},
{
"epoch": 1.69,
"learning_rate": 0.0001305014753692177,
"loss": 4.1482,
"step": 903000
},
{
"epoch": 1.7,
"learning_rate": 0.00013040762236554617,
"loss": 4.1457,
"step": 903500
},
{
"epoch": 1.7,
"learning_rate": 0.00013031376936187465,
"loss": 4.1477,
"step": 904000
},
{
"epoch": 1.7,
"learning_rate": 0.00013021991635820313,
"loss": 4.1523,
"step": 904500
},
{
"epoch": 1.7,
"learning_rate": 0.0001301260633545316,
"loss": 4.1349,
"step": 905000
},
{
"epoch": 1.7,
"learning_rate": 0.00013003221035086005,
"loss": 4.147,
"step": 905500
},
{
"epoch": 1.7,
"learning_rate": 0.00012993835734718853,
"loss": 4.1542,
"step": 906000
},
{
"epoch": 1.7,
"learning_rate": 0.000129844504343517,
"loss": 4.1514,
"step": 906500
},
{
"epoch": 1.7,
"learning_rate": 0.00012975065133984546,
"loss": 4.1542,
"step": 907000
},
{
"epoch": 1.7,
"learning_rate": 0.00012965679833617394,
"loss": 4.1556,
"step": 907500
},
{
"epoch": 1.7,
"learning_rate": 0.0001295629453325024,
"loss": 4.1406,
"step": 908000
},
{
"epoch": 1.71,
"learning_rate": 0.00012946909232883086,
"loss": 4.1554,
"step": 908500
},
{
"epoch": 1.71,
"learning_rate": 0.00012937523932515934,
"loss": 4.1343,
"step": 909000
},
{
"epoch": 1.71,
"learning_rate": 0.00012928138632148782,
"loss": 4.1464,
"step": 909500
},
{
"epoch": 1.71,
"learning_rate": 0.0001291875333178163,
"loss": 4.1572,
"step": 910000
},
{
"epoch": 1.71,
"learning_rate": 0.00012909368031414477,
"loss": 4.1512,
"step": 910500
},
{
"epoch": 1.71,
"learning_rate": 0.00012899982731047322,
"loss": 4.1333,
"step": 911000
},
{
"epoch": 1.71,
"learning_rate": 0.0001289059743068017,
"loss": 4.1432,
"step": 911500
},
{
"epoch": 1.71,
"learning_rate": 0.00012881212130313018,
"loss": 4.152,
"step": 912000
},
{
"epoch": 1.71,
"learning_rate": 0.00012871826829945863,
"loss": 4.1375,
"step": 912500
},
{
"epoch": 1.71,
"learning_rate": 0.0001286244152957871,
"loss": 4.1629,
"step": 913000
},
{
"epoch": 1.71,
"learning_rate": 0.00012853056229211558,
"loss": 4.1557,
"step": 913500
},
{
"epoch": 1.72,
"learning_rate": 0.00012843670928844406,
"loss": 4.149,
"step": 914000
},
{
"epoch": 1.72,
"learning_rate": 0.00012834285628477254,
"loss": 4.1455,
"step": 914500
},
{
"epoch": 1.72,
"learning_rate": 0.00012824900328110102,
"loss": 4.1546,
"step": 915000
},
{
"epoch": 1.72,
"learning_rate": 0.00012815515027742947,
"loss": 4.1494,
"step": 915500
},
{
"epoch": 1.72,
"learning_rate": 0.00012806129727375794,
"loss": 4.1478,
"step": 916000
},
{
"epoch": 1.72,
"learning_rate": 0.00012796744427008642,
"loss": 4.1756,
"step": 916500
},
{
"epoch": 1.72,
"learning_rate": 0.00012787359126641487,
"loss": 4.1373,
"step": 917000
},
{
"epoch": 1.72,
"learning_rate": 0.00012777973826274335,
"loss": 4.1653,
"step": 917500
},
{
"epoch": 1.72,
"learning_rate": 0.00012768588525907183,
"loss": 4.1449,
"step": 918000
},
{
"epoch": 1.72,
"learning_rate": 0.00012759203225540028,
"loss": 4.1569,
"step": 918500
},
{
"epoch": 1.73,
"learning_rate": 0.00012749817925172875,
"loss": 4.1697,
"step": 919000
},
{
"epoch": 1.73,
"learning_rate": 0.00012740432624805723,
"loss": 4.1371,
"step": 919500
},
{
"epoch": 1.73,
"learning_rate": 0.0001273104732443857,
"loss": 4.1391,
"step": 920000
},
{
"epoch": 1.73,
"learning_rate": 0.00012721662024071419,
"loss": 4.1325,
"step": 920500
},
{
"epoch": 1.73,
"learning_rate": 0.00012712276723704264,
"loss": 4.133,
"step": 921000
},
{
"epoch": 1.73,
"learning_rate": 0.00012702891423337111,
"loss": 4.1564,
"step": 921500
},
{
"epoch": 1.73,
"learning_rate": 0.0001269350612296996,
"loss": 4.1415,
"step": 922000
},
{
"epoch": 1.73,
"learning_rate": 0.00012684120822602804,
"loss": 4.1426,
"step": 922500
},
{
"epoch": 1.73,
"learning_rate": 0.00012674735522235652,
"loss": 4.1483,
"step": 923000
},
{
"epoch": 1.73,
"learning_rate": 0.000126653502218685,
"loss": 4.1296,
"step": 923500
},
{
"epoch": 1.73,
"learning_rate": 0.00012655964921501347,
"loss": 4.1324,
"step": 924000
},
{
"epoch": 1.74,
"learning_rate": 0.00012646579621134195,
"loss": 4.164,
"step": 924500
},
{
"epoch": 1.74,
"learning_rate": 0.0001263719432076704,
"loss": 4.1306,
"step": 925000
},
{
"epoch": 1.74,
"learning_rate": 0.00012627809020399888,
"loss": 4.1526,
"step": 925500
},
{
"epoch": 1.74,
"learning_rate": 0.00012618423720032736,
"loss": 4.1502,
"step": 926000
},
{
"epoch": 1.74,
"learning_rate": 0.0001260903841966558,
"loss": 4.1489,
"step": 926500
},
{
"epoch": 1.74,
"learning_rate": 0.00012599653119298428,
"loss": 4.1272,
"step": 927000
},
{
"epoch": 1.74,
"learning_rate": 0.00012590267818931276,
"loss": 4.1326,
"step": 927500
},
{
"epoch": 1.74,
"learning_rate": 0.00012580882518564124,
"loss": 4.1592,
"step": 928000
},
{
"epoch": 1.74,
"learning_rate": 0.00012571497218196972,
"loss": 4.1425,
"step": 928500
},
{
"epoch": 1.74,
"learning_rate": 0.00012562111917829817,
"loss": 4.1498,
"step": 929000
},
{
"epoch": 1.74,
"learning_rate": 0.00012552726617462664,
"loss": 4.145,
"step": 929500
},
{
"epoch": 1.75,
"learning_rate": 0.00012543341317095512,
"loss": 4.1473,
"step": 930000
},
{
"epoch": 1.75,
"learning_rate": 0.00012533956016728357,
"loss": 4.1499,
"step": 930500
},
{
"epoch": 1.75,
"learning_rate": 0.00012524570716361205,
"loss": 4.1531,
"step": 931000
},
{
"epoch": 1.75,
"learning_rate": 0.00012515185415994053,
"loss": 4.1434,
"step": 931500
},
{
"epoch": 1.75,
"learning_rate": 0.00012505800115626898,
"loss": 4.1528,
"step": 932000
},
{
"epoch": 1.75,
"learning_rate": 0.00012496414815259745,
"loss": 4.1428,
"step": 932500
},
{
"epoch": 1.75,
"learning_rate": 0.00012487029514892593,
"loss": 4.1576,
"step": 933000
},
{
"epoch": 1.75,
"learning_rate": 0.0001247764421452544,
"loss": 4.1558,
"step": 933500
},
{
"epoch": 1.75,
"learning_rate": 0.00012468258914158289,
"loss": 4.1521,
"step": 934000
},
{
"epoch": 1.75,
"learning_rate": 0.00012458873613791136,
"loss": 4.1473,
"step": 934500
},
{
"epoch": 1.76,
"learning_rate": 0.00012449488313423981,
"loss": 4.1403,
"step": 935000
},
{
"epoch": 1.76,
"learning_rate": 0.0001244010301305683,
"loss": 4.1466,
"step": 935500
},
{
"epoch": 1.76,
"learning_rate": 0.00012430717712689674,
"loss": 4.1479,
"step": 936000
},
{
"epoch": 1.76,
"learning_rate": 0.00012421332412322522,
"loss": 4.154,
"step": 936500
},
{
"epoch": 1.76,
"learning_rate": 0.0001241194711195537,
"loss": 4.1392,
"step": 937000
},
{
"epoch": 1.76,
"learning_rate": 0.00012402561811588217,
"loss": 4.1386,
"step": 937500
},
{
"epoch": 1.76,
"learning_rate": 0.00012393176511221065,
"loss": 4.1508,
"step": 938000
},
{
"epoch": 1.76,
"learning_rate": 0.00012383791210853913,
"loss": 4.1375,
"step": 938500
},
{
"epoch": 1.76,
"learning_rate": 0.00012374405910486758,
"loss": 4.157,
"step": 939000
},
{
"epoch": 1.76,
"learning_rate": 0.00012365020610119606,
"loss": 4.1461,
"step": 939500
},
{
"epoch": 1.76,
"learning_rate": 0.00012355635309752453,
"loss": 4.156,
"step": 940000
},
{
"epoch": 1.77,
"learning_rate": 0.00012346250009385298,
"loss": 4.1311,
"step": 940500
},
{
"epoch": 1.77,
"learning_rate": 0.00012336864709018146,
"loss": 4.1446,
"step": 941000
},
{
"epoch": 1.77,
"learning_rate": 0.00012327479408650994,
"loss": 4.1588,
"step": 941500
},
{
"epoch": 1.77,
"learning_rate": 0.0001231809410828384,
"loss": 4.1286,
"step": 942000
},
{
"epoch": 1.77,
"learning_rate": 0.00012308708807916687,
"loss": 4.1618,
"step": 942500
},
{
"epoch": 1.77,
"learning_rate": 0.00012299323507549534,
"loss": 4.1153,
"step": 943000
},
{
"epoch": 1.77,
"learning_rate": 0.00012289938207182382,
"loss": 4.1735,
"step": 943500
},
{
"epoch": 1.77,
"learning_rate": 0.0001228055290681523,
"loss": 4.1568,
"step": 944000
},
{
"epoch": 1.77,
"learning_rate": 0.00012271167606448075,
"loss": 4.1195,
"step": 944500
},
{
"epoch": 1.77,
"learning_rate": 0.00012261782306080923,
"loss": 4.1557,
"step": 945000
},
{
"epoch": 1.77,
"learning_rate": 0.0001225239700571377,
"loss": 4.1538,
"step": 945500
},
{
"epoch": 1.78,
"learning_rate": 0.00012243011705346615,
"loss": 4.1532,
"step": 946000
},
{
"epoch": 1.78,
"learning_rate": 0.00012233626404979463,
"loss": 4.1455,
"step": 946500
},
{
"epoch": 1.78,
"learning_rate": 0.0001222424110461231,
"loss": 4.1525,
"step": 947000
},
{
"epoch": 1.78,
"learning_rate": 0.0001221485580424516,
"loss": 4.1732,
"step": 947500
},
{
"epoch": 1.78,
"learning_rate": 0.00012205470503878005,
"loss": 4.1564,
"step": 948000
},
{
"epoch": 1.78,
"learning_rate": 0.00012196085203510851,
"loss": 4.1306,
"step": 948500
},
{
"epoch": 1.78,
"learning_rate": 0.00012186699903143699,
"loss": 4.1487,
"step": 949000
},
{
"epoch": 1.78,
"learning_rate": 0.00012177314602776547,
"loss": 4.1409,
"step": 949500
},
{
"epoch": 1.78,
"learning_rate": 0.00012167929302409393,
"loss": 4.1265,
"step": 950000
},
{
"epoch": 1.78,
"learning_rate": 0.0001215854400204224,
"loss": 4.1365,
"step": 950500
},
{
"epoch": 1.79,
"learning_rate": 0.00012149158701675087,
"loss": 4.1518,
"step": 951000
},
{
"epoch": 1.79,
"learning_rate": 0.00012139773401307934,
"loss": 4.1391,
"step": 951500
},
{
"epoch": 1.79,
"learning_rate": 0.00012130388100940782,
"loss": 4.1373,
"step": 952000
},
{
"epoch": 1.79,
"learning_rate": 0.0001212100280057363,
"loss": 4.1258,
"step": 952500
},
{
"epoch": 1.79,
"learning_rate": 0.00012111617500206476,
"loss": 4.1463,
"step": 953000
},
{
"epoch": 1.79,
"learning_rate": 0.00012102232199839323,
"loss": 4.1345,
"step": 953500
},
{
"epoch": 1.79,
"learning_rate": 0.00012092846899472168,
"loss": 4.1452,
"step": 954000
},
{
"epoch": 1.79,
"learning_rate": 0.00012083461599105016,
"loss": 4.1288,
"step": 954500
},
{
"epoch": 1.79,
"learning_rate": 0.00012074076298737864,
"loss": 4.1593,
"step": 955000
},
{
"epoch": 1.79,
"learning_rate": 0.0001206469099837071,
"loss": 4.1404,
"step": 955500
},
{
"epoch": 1.79,
"learning_rate": 0.00012055305698003558,
"loss": 4.1375,
"step": 956000
},
{
"epoch": 1.8,
"learning_rate": 0.00012045920397636406,
"loss": 4.1524,
"step": 956500
},
{
"epoch": 1.8,
"learning_rate": 0.00012036535097269251,
"loss": 4.1575,
"step": 957000
},
{
"epoch": 1.8,
"learning_rate": 0.00012027149796902099,
"loss": 4.1392,
"step": 957500
},
{
"epoch": 1.8,
"learning_rate": 0.00012017764496534946,
"loss": 4.1581,
"step": 958000
},
{
"epoch": 1.8,
"learning_rate": 0.00012008379196167793,
"loss": 4.136,
"step": 958500
},
{
"epoch": 1.8,
"learning_rate": 0.0001199899389580064,
"loss": 4.137,
"step": 959000
},
{
"epoch": 1.8,
"learning_rate": 0.00011989608595433487,
"loss": 4.1558,
"step": 959500
},
{
"epoch": 1.8,
"learning_rate": 0.00011980223295066335,
"loss": 4.154,
"step": 960000
},
{
"epoch": 1.8,
"learning_rate": 0.00011970837994699182,
"loss": 4.1409,
"step": 960500
},
{
"epoch": 1.8,
"learning_rate": 0.00011961452694332027,
"loss": 4.1322,
"step": 961000
},
{
"epoch": 1.8,
"learning_rate": 0.00011952067393964875,
"loss": 4.1709,
"step": 961500
},
{
"epoch": 1.81,
"learning_rate": 0.00011942682093597723,
"loss": 4.1564,
"step": 962000
},
{
"epoch": 1.81,
"learning_rate": 0.00011933296793230569,
"loss": 4.1596,
"step": 962500
},
{
"epoch": 1.81,
"learning_rate": 0.00011923911492863417,
"loss": 4.1356,
"step": 963000
},
{
"epoch": 1.81,
"learning_rate": 0.00011914526192496265,
"loss": 4.1275,
"step": 963500
},
{
"epoch": 1.81,
"learning_rate": 0.0001190514089212911,
"loss": 4.1554,
"step": 964000
},
{
"epoch": 1.81,
"learning_rate": 0.00011895755591761958,
"loss": 4.1327,
"step": 964500
},
{
"epoch": 1.81,
"learning_rate": 0.00011886370291394805,
"loss": 4.1317,
"step": 965000
},
{
"epoch": 1.81,
"learning_rate": 0.00011876984991027652,
"loss": 4.1325,
"step": 965500
},
{
"epoch": 1.81,
"learning_rate": 0.000118675996906605,
"loss": 4.1607,
"step": 966000
},
{
"epoch": 1.81,
"learning_rate": 0.00011858214390293346,
"loss": 4.1461,
"step": 966500
},
{
"epoch": 1.82,
"learning_rate": 0.00011848829089926194,
"loss": 4.1407,
"step": 967000
},
{
"epoch": 1.82,
"learning_rate": 0.0001183944378955904,
"loss": 4.1523,
"step": 967500
},
{
"epoch": 1.82,
"learning_rate": 0.00011830058489191886,
"loss": 4.1324,
"step": 968000
},
{
"epoch": 1.82,
"learning_rate": 0.00011820673188824734,
"loss": 4.1391,
"step": 968500
},
{
"epoch": 1.82,
"learning_rate": 0.00011811287888457582,
"loss": 4.1511,
"step": 969000
},
{
"epoch": 1.82,
"learning_rate": 0.00011801902588090428,
"loss": 4.1539,
"step": 969500
},
{
"epoch": 1.82,
"learning_rate": 0.00011792517287723276,
"loss": 4.1431,
"step": 970000
},
{
"epoch": 1.82,
"learning_rate": 0.00011783131987356124,
"loss": 4.1339,
"step": 970500
},
{
"epoch": 1.82,
"learning_rate": 0.00011773746686988969,
"loss": 4.145,
"step": 971000
},
{
"epoch": 1.82,
"learning_rate": 0.00011764361386621816,
"loss": 4.1357,
"step": 971500
},
{
"epoch": 1.82,
"learning_rate": 0.00011754976086254663,
"loss": 4.1461,
"step": 972000
},
{
"epoch": 1.83,
"learning_rate": 0.0001174559078588751,
"loss": 4.1355,
"step": 972500
},
{
"epoch": 1.83,
"learning_rate": 0.00011736205485520358,
"loss": 4.1309,
"step": 973000
},
{
"epoch": 1.83,
"learning_rate": 0.00011726820185153203,
"loss": 4.1298,
"step": 973500
},
{
"epoch": 1.83,
"learning_rate": 0.00011717434884786051,
"loss": 4.1269,
"step": 974000
},
{
"epoch": 1.83,
"learning_rate": 0.00011708049584418899,
"loss": 4.1468,
"step": 974500
},
{
"epoch": 1.83,
"learning_rate": 0.00011698664284051745,
"loss": 4.1476,
"step": 975000
},
{
"epoch": 1.83,
"learning_rate": 0.00011689278983684593,
"loss": 4.1415,
"step": 975500
},
{
"epoch": 1.83,
"learning_rate": 0.0001167989368331744,
"loss": 4.1372,
"step": 976000
},
{
"epoch": 1.83,
"learning_rate": 0.00011670508382950287,
"loss": 4.1227,
"step": 976500
},
{
"epoch": 1.83,
"learning_rate": 0.00011661123082583135,
"loss": 4.1526,
"step": 977000
},
{
"epoch": 1.83,
"learning_rate": 0.0001165173778221598,
"loss": 4.1404,
"step": 977500
},
{
"epoch": 1.84,
"learning_rate": 0.00011642352481848828,
"loss": 4.1542,
"step": 978000
},
{
"epoch": 1.84,
"learning_rate": 0.00011632967181481675,
"loss": 4.1278,
"step": 978500
},
{
"epoch": 1.84,
"learning_rate": 0.00011623581881114522,
"loss": 4.121,
"step": 979000
},
{
"epoch": 1.84,
"learning_rate": 0.0001161419658074737,
"loss": 4.1483,
"step": 979500
},
{
"epoch": 1.84,
"learning_rate": 0.00011604811280380217,
"loss": 4.135,
"step": 980000
},
{
"epoch": 1.84,
"learning_rate": 0.00011595425980013062,
"loss": 4.1195,
"step": 980500
},
{
"epoch": 1.84,
"learning_rate": 0.0001158604067964591,
"loss": 4.1515,
"step": 981000
},
{
"epoch": 1.84,
"learning_rate": 0.00011576655379278758,
"loss": 4.1385,
"step": 981500
},
{
"epoch": 1.84,
"learning_rate": 0.00011567270078911604,
"loss": 4.1471,
"step": 982000
},
{
"epoch": 1.84,
"learning_rate": 0.00011557884778544452,
"loss": 4.1415,
"step": 982500
},
{
"epoch": 1.85,
"learning_rate": 0.000115484994781773,
"loss": 4.1453,
"step": 983000
},
{
"epoch": 1.85,
"learning_rate": 0.00011539114177810146,
"loss": 4.1373,
"step": 983500
},
{
"epoch": 1.85,
"learning_rate": 0.00011529728877442992,
"loss": 4.1174,
"step": 984000
},
{
"epoch": 1.85,
"learning_rate": 0.00011520343577075839,
"loss": 4.1484,
"step": 984500
},
{
"epoch": 1.85,
"learning_rate": 0.00011510958276708686,
"loss": 4.1352,
"step": 985000
},
{
"epoch": 1.85,
"learning_rate": 0.00011501572976341534,
"loss": 4.1491,
"step": 985500
},
{
"epoch": 1.85,
"learning_rate": 0.0001149218767597438,
"loss": 4.1569,
"step": 986000
},
{
"epoch": 1.85,
"learning_rate": 0.00011482802375607228,
"loss": 4.1185,
"step": 986500
},
{
"epoch": 1.85,
"learning_rate": 0.00011473417075240076,
"loss": 4.1379,
"step": 987000
},
{
"epoch": 1.85,
"learning_rate": 0.00011464031774872921,
"loss": 4.1233,
"step": 987500
},
{
"epoch": 1.85,
"learning_rate": 0.00011454646474505769,
"loss": 4.124,
"step": 988000
},
{
"epoch": 1.86,
"learning_rate": 0.00011445261174138617,
"loss": 4.1467,
"step": 988500
},
{
"epoch": 1.86,
"learning_rate": 0.00011435875873771463,
"loss": 4.159,
"step": 989000
},
{
"epoch": 1.86,
"learning_rate": 0.00011426490573404311,
"loss": 4.138,
"step": 989500
},
{
"epoch": 1.86,
"learning_rate": 0.00011417105273037157,
"loss": 4.1624,
"step": 990000
},
{
"epoch": 1.86,
"learning_rate": 0.00011407719972670003,
"loss": 4.1434,
"step": 990500
},
{
"epoch": 1.86,
"learning_rate": 0.00011398334672302851,
"loss": 4.1239,
"step": 991000
},
{
"epoch": 1.86,
"learning_rate": 0.00011388949371935698,
"loss": 4.1452,
"step": 991500
},
{
"epoch": 1.86,
"learning_rate": 0.00011379564071568545,
"loss": 4.1212,
"step": 992000
},
{
"epoch": 1.86,
"learning_rate": 0.00011370178771201393,
"loss": 4.1346,
"step": 992500
},
{
"epoch": 1.86,
"learning_rate": 0.0001136079347083424,
"loss": 4.1433,
"step": 993000
},
{
"epoch": 1.86,
"learning_rate": 0.00011351408170467087,
"loss": 4.1496,
"step": 993500
},
{
"epoch": 1.87,
"learning_rate": 0.00011342022870099935,
"loss": 4.1362,
"step": 994000
},
{
"epoch": 1.87,
"learning_rate": 0.0001133263756973278,
"loss": 4.1491,
"step": 994500
},
{
"epoch": 1.87,
"learning_rate": 0.00011323252269365628,
"loss": 4.1326,
"step": 995000
},
{
"epoch": 1.87,
"learning_rate": 0.00011313866968998475,
"loss": 4.1242,
"step": 995500
},
{
"epoch": 1.87,
"learning_rate": 0.00011304481668631322,
"loss": 4.1487,
"step": 996000
},
{
"epoch": 1.87,
"learning_rate": 0.0001129509636826417,
"loss": 4.1224,
"step": 996500
},
{
"epoch": 1.87,
"learning_rate": 0.00011285711067897015,
"loss": 4.1172,
"step": 997000
},
{
"epoch": 1.87,
"learning_rate": 0.00011276325767529862,
"loss": 4.132,
"step": 997500
},
{
"epoch": 1.87,
"learning_rate": 0.0001126694046716271,
"loss": 4.1388,
"step": 998000
},
{
"epoch": 1.87,
"learning_rate": 0.00011257555166795557,
"loss": 4.1333,
"step": 998500
},
{
"epoch": 1.88,
"learning_rate": 0.00011248169866428404,
"loss": 4.142,
"step": 999000
},
{
"epoch": 1.88,
"learning_rate": 0.00011238784566061252,
"loss": 4.1385,
"step": 999500
},
{
"epoch": 1.88,
"learning_rate": 0.00011229399265694098,
"loss": 4.1118,
"step": 1000000
},
{
"epoch": 1.88,
"learning_rate": 0.00011220013965326946,
"loss": 4.1553,
"step": 1000500
},
{
"epoch": 1.88,
"learning_rate": 0.00011210628664959793,
"loss": 4.1319,
"step": 1001000
},
{
"epoch": 1.88,
"learning_rate": 0.00011201243364592639,
"loss": 4.1237,
"step": 1001500
},
{
"epoch": 1.88,
"learning_rate": 0.00011191858064225487,
"loss": 4.1383,
"step": 1002000
},
{
"epoch": 1.88,
"learning_rate": 0.00011182472763858333,
"loss": 4.1455,
"step": 1002500
},
{
"epoch": 1.88,
"learning_rate": 0.00011173087463491181,
"loss": 4.1282,
"step": 1003000
},
{
"epoch": 1.88,
"learning_rate": 0.00011163702163124029,
"loss": 4.1575,
"step": 1003500
},
{
"epoch": 1.88,
"learning_rate": 0.00011154316862756874,
"loss": 4.1647,
"step": 1004000
},
{
"epoch": 1.89,
"learning_rate": 0.00011144931562389721,
"loss": 4.1119,
"step": 1004500
},
{
"epoch": 1.89,
"learning_rate": 0.00011135546262022569,
"loss": 4.1236,
"step": 1005000
},
{
"epoch": 1.89,
"learning_rate": 0.00011126160961655415,
"loss": 4.1447,
"step": 1005500
},
{
"epoch": 1.89,
"learning_rate": 0.00011116775661288263,
"loss": 4.1236,
"step": 1006000
},
{
"epoch": 1.89,
"learning_rate": 0.00011107390360921111,
"loss": 4.1291,
"step": 1006500
},
{
"epoch": 1.89,
"learning_rate": 0.00011098005060553957,
"loss": 4.1438,
"step": 1007000
},
{
"epoch": 1.89,
"learning_rate": 0.00011088619760186804,
"loss": 4.1411,
"step": 1007500
},
{
"epoch": 1.89,
"learning_rate": 0.0001107923445981965,
"loss": 4.1428,
"step": 1008000
},
{
"epoch": 1.89,
"learning_rate": 0.00011069849159452498,
"loss": 4.1509,
"step": 1008500
},
{
"epoch": 1.89,
"learning_rate": 0.00011060463859085346,
"loss": 4.1412,
"step": 1009000
},
{
"epoch": 1.89,
"learning_rate": 0.00011051078558718192,
"loss": 4.1482,
"step": 1009500
},
{
"epoch": 1.9,
"learning_rate": 0.0001104169325835104,
"loss": 4.1383,
"step": 1010000
},
{
"epoch": 1.9,
"learning_rate": 0.00011032307957983887,
"loss": 4.1465,
"step": 1010500
},
{
"epoch": 1.9,
"learning_rate": 0.00011022922657616732,
"loss": 4.1444,
"step": 1011000
},
{
"epoch": 1.9,
"learning_rate": 0.0001101353735724958,
"loss": 4.1404,
"step": 1011500
},
{
"epoch": 1.9,
"learning_rate": 0.00011004152056882428,
"loss": 4.1512,
"step": 1012000
},
{
"epoch": 1.9,
"learning_rate": 0.00010994766756515274,
"loss": 4.1348,
"step": 1012500
},
{
"epoch": 1.9,
"learning_rate": 0.00010985381456148122,
"loss": 4.1288,
"step": 1013000
},
{
"epoch": 1.9,
"learning_rate": 0.0001097599615578097,
"loss": 4.1211,
"step": 1013500
},
{
"epoch": 1.9,
"learning_rate": 0.00010966610855413815,
"loss": 4.1403,
"step": 1014000
},
{
"epoch": 1.9,
"learning_rate": 0.00010957225555046663,
"loss": 4.1321,
"step": 1014500
},
{
"epoch": 1.91,
"learning_rate": 0.00010947840254679509,
"loss": 4.1273,
"step": 1015000
},
{
"epoch": 1.91,
"learning_rate": 0.00010938454954312357,
"loss": 4.1445,
"step": 1015500
},
{
"epoch": 1.91,
"learning_rate": 0.00010929069653945204,
"loss": 4.15,
"step": 1016000
},
{
"epoch": 1.91,
"learning_rate": 0.00010919684353578051,
"loss": 4.128,
"step": 1016500
},
{
"epoch": 1.91,
"learning_rate": 0.00010910299053210899,
"loss": 4.1393,
"step": 1017000
},
{
"epoch": 1.91,
"learning_rate": 0.00010900913752843746,
"loss": 4.1423,
"step": 1017500
},
{
"epoch": 1.91,
"learning_rate": 0.00010891528452476591,
"loss": 4.1301,
"step": 1018000
},
{
"epoch": 1.91,
"learning_rate": 0.00010882143152109439,
"loss": 4.1201,
"step": 1018500
},
{
"epoch": 1.91,
"learning_rate": 0.00010872757851742287,
"loss": 4.1287,
"step": 1019000
},
{
"epoch": 1.91,
"learning_rate": 0.00010863372551375133,
"loss": 4.1387,
"step": 1019500
},
{
"epoch": 1.91,
"learning_rate": 0.00010853987251007981,
"loss": 4.1147,
"step": 1020000
},
{
"epoch": 1.92,
"learning_rate": 0.00010844601950640826,
"loss": 4.1294,
"step": 1020500
},
{
"epoch": 1.92,
"learning_rate": 0.00010835216650273674,
"loss": 4.13,
"step": 1021000
},
{
"epoch": 1.92,
"learning_rate": 0.00010825831349906521,
"loss": 4.1342,
"step": 1021500
},
{
"epoch": 1.92,
"learning_rate": 0.00010816446049539368,
"loss": 4.1351,
"step": 1022000
},
{
"epoch": 1.92,
"learning_rate": 0.00010807060749172216,
"loss": 4.1385,
"step": 1022500
},
{
"epoch": 1.92,
"learning_rate": 0.00010797675448805063,
"loss": 4.1422,
"step": 1023000
},
{
"epoch": 1.92,
"learning_rate": 0.0001078829014843791,
"loss": 4.1245,
"step": 1023500
},
{
"epoch": 1.92,
"learning_rate": 0.00010778904848070757,
"loss": 4.1172,
"step": 1024000
},
{
"epoch": 1.92,
"learning_rate": 0.00010769519547703604,
"loss": 4.1479,
"step": 1024500
},
{
"epoch": 1.92,
"learning_rate": 0.0001076013424733645,
"loss": 4.1247,
"step": 1025000
},
{
"epoch": 1.92,
"learning_rate": 0.00010750748946969298,
"loss": 4.1183,
"step": 1025500
},
{
"epoch": 1.93,
"learning_rate": 0.00010741363646602144,
"loss": 4.1272,
"step": 1026000
},
{
"epoch": 1.93,
"learning_rate": 0.00010731978346234992,
"loss": 4.1143,
"step": 1026500
},
{
"epoch": 1.93,
"learning_rate": 0.0001072259304586784,
"loss": 4.1418,
"step": 1027000
},
{
"epoch": 1.93,
"learning_rate": 0.00010713207745500685,
"loss": 4.1353,
"step": 1027500
},
{
"epoch": 1.93,
"learning_rate": 0.00010703822445133533,
"loss": 4.1316,
"step": 1028000
},
{
"epoch": 1.93,
"learning_rate": 0.0001069443714476638,
"loss": 4.1255,
"step": 1028500
},
{
"epoch": 1.93,
"learning_rate": 0.00010685051844399227,
"loss": 4.119,
"step": 1029000
},
{
"epoch": 1.93,
"learning_rate": 0.00010675666544032074,
"loss": 4.1291,
"step": 1029500
},
{
"epoch": 1.93,
"learning_rate": 0.00010666281243664922,
"loss": 4.1499,
"step": 1030000
},
{
"epoch": 1.93,
"learning_rate": 0.00010656895943297769,
"loss": 4.1192,
"step": 1030500
},
{
"epoch": 1.94,
"learning_rate": 0.00010647510642930615,
"loss": 4.1316,
"step": 1031000
},
{
"epoch": 1.94,
"learning_rate": 0.00010638125342563463,
"loss": 4.1289,
"step": 1031500
},
{
"epoch": 1.94,
"learning_rate": 0.00010628740042196309,
"loss": 4.1488,
"step": 1032000
},
{
"epoch": 1.94,
"learning_rate": 0.00010619354741829157,
"loss": 4.1376,
"step": 1032500
},
{
"epoch": 1.94,
"learning_rate": 0.00010609969441462003,
"loss": 4.1437,
"step": 1033000
},
{
"epoch": 1.94,
"learning_rate": 0.00010600584141094851,
"loss": 4.134,
"step": 1033500
},
{
"epoch": 1.94,
"learning_rate": 0.00010591198840727699,
"loss": 4.1261,
"step": 1034000
},
{
"epoch": 1.94,
"learning_rate": 0.00010581813540360544,
"loss": 4.1293,
"step": 1034500
},
{
"epoch": 1.94,
"learning_rate": 0.00010572428239993392,
"loss": 4.1303,
"step": 1035000
},
{
"epoch": 1.94,
"learning_rate": 0.00010563042939626239,
"loss": 4.1335,
"step": 1035500
},
{
"epoch": 1.94,
"learning_rate": 0.00010553657639259086,
"loss": 4.1341,
"step": 1036000
},
{
"epoch": 1.95,
"learning_rate": 0.00010544272338891933,
"loss": 4.1472,
"step": 1036500
},
{
"epoch": 1.95,
"learning_rate": 0.00010534887038524781,
"loss": 4.1489,
"step": 1037000
},
{
"epoch": 1.95,
"learning_rate": 0.00010525501738157626,
"loss": 4.1246,
"step": 1037500
},
{
"epoch": 1.95,
"learning_rate": 0.00010516116437790474,
"loss": 4.1247,
"step": 1038000
},
{
"epoch": 1.95,
"learning_rate": 0.0001050673113742332,
"loss": 4.133,
"step": 1038500
},
{
"epoch": 1.95,
"learning_rate": 0.00010497345837056168,
"loss": 4.1452,
"step": 1039000
},
{
"epoch": 1.95,
"learning_rate": 0.00010487960536689016,
"loss": 4.1345,
"step": 1039500
},
{
"epoch": 1.95,
"learning_rate": 0.00010478575236321862,
"loss": 4.1238,
"step": 1040000
},
{
"epoch": 1.95,
"learning_rate": 0.0001046918993595471,
"loss": 4.1177,
"step": 1040500
},
{
"epoch": 1.95,
"learning_rate": 0.00010459804635587558,
"loss": 4.1273,
"step": 1041000
},
{
"epoch": 1.95,
"learning_rate": 0.00010450419335220403,
"loss": 4.1347,
"step": 1041500
},
{
"epoch": 1.96,
"learning_rate": 0.0001044103403485325,
"loss": 4.1266,
"step": 1042000
},
{
"epoch": 1.96,
"learning_rate": 0.00010431648734486098,
"loss": 4.1178,
"step": 1042500
},
{
"epoch": 1.96,
"learning_rate": 0.00010422263434118945,
"loss": 4.1322,
"step": 1043000
},
{
"epoch": 1.96,
"learning_rate": 0.00010412878133751792,
"loss": 4.147,
"step": 1043500
},
{
"epoch": 1.96,
"learning_rate": 0.00010403492833384637,
"loss": 4.1121,
"step": 1044000
},
{
"epoch": 1.96,
"learning_rate": 0.00010394107533017485,
"loss": 4.1315,
"step": 1044500
},
{
"epoch": 1.96,
"learning_rate": 0.00010384722232650333,
"loss": 4.1236,
"step": 1045000
},
{
"epoch": 1.96,
"learning_rate": 0.00010375336932283179,
"loss": 4.1159,
"step": 1045500
},
{
"epoch": 1.96,
"learning_rate": 0.00010365951631916027,
"loss": 4.1236,
"step": 1046000
},
{
"epoch": 1.96,
"learning_rate": 0.00010356566331548875,
"loss": 4.1284,
"step": 1046500
},
{
"epoch": 1.97,
"learning_rate": 0.00010347181031181721,
"loss": 4.135,
"step": 1047000
},
{
"epoch": 1.97,
"learning_rate": 0.00010337795730814569,
"loss": 4.1498,
"step": 1047500
},
{
"epoch": 1.97,
"learning_rate": 0.00010328410430447415,
"loss": 4.1342,
"step": 1048000
},
{
"epoch": 1.97,
"learning_rate": 0.00010319025130080262,
"loss": 4.1236,
"step": 1048500
},
{
"epoch": 1.97,
"learning_rate": 0.00010309639829713109,
"loss": 4.1221,
"step": 1049000
},
{
"epoch": 1.97,
"learning_rate": 0.00010300254529345957,
"loss": 4.1553,
"step": 1049500
},
{
"epoch": 1.97,
"learning_rate": 0.00010290869228978803,
"loss": 4.1391,
"step": 1050000
},
{
"epoch": 1.97,
"learning_rate": 0.00010281483928611651,
"loss": 4.1206,
"step": 1050500
},
{
"epoch": 1.97,
"learning_rate": 0.00010272098628244496,
"loss": 4.1113,
"step": 1051000
},
{
"epoch": 1.97,
"learning_rate": 0.00010262713327877344,
"loss": 4.1332,
"step": 1051500
},
{
"epoch": 1.97,
"learning_rate": 0.00010253328027510192,
"loss": 4.1186,
"step": 1052000
},
{
"epoch": 1.98,
"learning_rate": 0.00010243942727143038,
"loss": 4.1325,
"step": 1052500
},
{
"epoch": 1.98,
"learning_rate": 0.00010234557426775886,
"loss": 4.1181,
"step": 1053000
},
{
"epoch": 1.98,
"learning_rate": 0.00010225172126408734,
"loss": 4.1301,
"step": 1053500
},
{
"epoch": 1.98,
"learning_rate": 0.0001021578682604158,
"loss": 4.1245,
"step": 1054000
},
{
"epoch": 1.98,
"learning_rate": 0.00010206401525674426,
"loss": 4.128,
"step": 1054500
},
{
"epoch": 1.98,
"learning_rate": 0.00010197016225307274,
"loss": 4.1392,
"step": 1055000
},
{
"epoch": 1.98,
"learning_rate": 0.0001018763092494012,
"loss": 4.1226,
"step": 1055500
},
{
"epoch": 1.98,
"learning_rate": 0.00010178245624572968,
"loss": 4.1193,
"step": 1056000
},
{
"epoch": 1.98,
"learning_rate": 0.00010168860324205815,
"loss": 4.126,
"step": 1056500
},
{
"epoch": 1.98,
"learning_rate": 0.00010159475023838662,
"loss": 4.1291,
"step": 1057000
},
{
"epoch": 1.98,
"learning_rate": 0.0001015008972347151,
"loss": 4.1259,
"step": 1057500
},
{
"epoch": 1.99,
"learning_rate": 0.00010140704423104355,
"loss": 4.1266,
"step": 1058000
},
{
"epoch": 1.99,
"learning_rate": 0.00010131319122737203,
"loss": 4.1414,
"step": 1058500
},
{
"epoch": 1.99,
"learning_rate": 0.0001012193382237005,
"loss": 4.1284,
"step": 1059000
},
{
"epoch": 1.99,
"learning_rate": 0.00010112548522002897,
"loss": 4.1388,
"step": 1059500
},
{
"epoch": 1.99,
"learning_rate": 0.00010103163221635745,
"loss": 4.1371,
"step": 1060000
},
{
"epoch": 1.99,
"learning_rate": 0.00010093777921268592,
"loss": 4.1161,
"step": 1060500
},
{
"epoch": 1.99,
"learning_rate": 0.00010084392620901437,
"loss": 4.128,
"step": 1061000
},
{
"epoch": 1.99,
"learning_rate": 0.00010075007320534285,
"loss": 4.1318,
"step": 1061500
},
{
"epoch": 1.99,
"learning_rate": 0.00010065622020167133,
"loss": 4.113,
"step": 1062000
},
{
"epoch": 1.99,
"learning_rate": 0.0001005623671979998,
"loss": 4.1257,
"step": 1062500
},
{
"epoch": 2.0,
"learning_rate": 0.00010046851419432827,
"loss": 4.1213,
"step": 1063000
},
{
"epoch": 2.0,
"learning_rate": 0.00010037466119065673,
"loss": 4.1073,
"step": 1063500
},
{
"epoch": 2.0,
"learning_rate": 0.00010028080818698521,
"loss": 4.1265,
"step": 1064000
},
{
"epoch": 2.0,
"learning_rate": 0.00010018695518331369,
"loss": 4.115,
"step": 1064500
},
{
"epoch": 2.0,
"learning_rate": 0.00010009310217964214,
"loss": 4.1257,
"step": 1065000
},
{
"epoch": 2.0,
"learning_rate": 9.999924917597062e-05,
"loss": 4.1176,
"step": 1065500
},
{
"epoch": 2.0,
"learning_rate": 9.99053961722991e-05,
"loss": 4.1204,
"step": 1066000
},
{
"epoch": 2.0,
"learning_rate": 9.981154316862756e-05,
"loss": 4.1265,
"step": 1066500
},
{
"epoch": 2.0,
"learning_rate": 9.971769016495604e-05,
"loss": 4.1058,
"step": 1067000
},
{
"epoch": 2.0,
"learning_rate": 9.962383716128451e-05,
"loss": 4.105,
"step": 1067500
},
{
"epoch": 2.0,
"learning_rate": 9.952998415761296e-05,
"loss": 4.1223,
"step": 1068000
},
{
"epoch": 2.01,
"learning_rate": 9.943613115394144e-05,
"loss": 4.086,
"step": 1068500
},
{
"epoch": 2.01,
"learning_rate": 9.93422781502699e-05,
"loss": 4.1134,
"step": 1069000
},
{
"epoch": 2.01,
"learning_rate": 9.924842514659838e-05,
"loss": 4.1087,
"step": 1069500
},
{
"epoch": 2.01,
"learning_rate": 9.915457214292686e-05,
"loss": 4.1157,
"step": 1070000
},
{
"epoch": 2.01,
"learning_rate": 9.906071913925532e-05,
"loss": 4.1426,
"step": 1070500
},
{
"epoch": 2.01,
"learning_rate": 9.896686613558379e-05,
"loss": 4.1268,
"step": 1071000
},
{
"epoch": 2.01,
"learning_rate": 9.887301313191227e-05,
"loss": 4.1173,
"step": 1071500
},
{
"epoch": 2.01,
"learning_rate": 9.877916012824073e-05,
"loss": 4.1073,
"step": 1072000
},
{
"epoch": 2.01,
"learning_rate": 9.86853071245692e-05,
"loss": 4.1115,
"step": 1072500
},
{
"epoch": 2.01,
"learning_rate": 9.859145412089768e-05,
"loss": 4.1194,
"step": 1073000
},
{
"epoch": 2.02,
"learning_rate": 9.849760111722615e-05,
"loss": 4.1053,
"step": 1073500
},
{
"epoch": 2.02,
"learning_rate": 9.840374811355463e-05,
"loss": 4.0859,
"step": 1074000
},
{
"epoch": 2.02,
"learning_rate": 9.830989510988308e-05,
"loss": 4.1224,
"step": 1074500
},
{
"epoch": 2.02,
"learning_rate": 9.821604210621155e-05,
"loss": 4.123,
"step": 1075000
},
{
"epoch": 2.02,
"learning_rate": 9.812218910254003e-05,
"loss": 4.1193,
"step": 1075500
},
{
"epoch": 2.02,
"learning_rate": 9.80283360988685e-05,
"loss": 4.1113,
"step": 1076000
},
{
"epoch": 2.02,
"learning_rate": 9.793448309519697e-05,
"loss": 4.1098,
"step": 1076500
},
{
"epoch": 2.02,
"learning_rate": 9.784063009152545e-05,
"loss": 4.101,
"step": 1077000
},
{
"epoch": 2.02,
"learning_rate": 9.77467770878539e-05,
"loss": 4.1108,
"step": 1077500
},
{
"epoch": 2.02,
"learning_rate": 9.765292408418238e-05,
"loss": 4.1113,
"step": 1078000
},
{
"epoch": 2.02,
"learning_rate": 9.755907108051085e-05,
"loss": 4.1069,
"step": 1078500
},
{
"epoch": 2.03,
"learning_rate": 9.746521807683932e-05,
"loss": 4.107,
"step": 1079000
},
{
"epoch": 2.03,
"learning_rate": 9.73713650731678e-05,
"loss": 4.1067,
"step": 1079500
},
{
"epoch": 2.03,
"learning_rate": 9.727751206949627e-05,
"loss": 4.1144,
"step": 1080000
},
{
"epoch": 2.03,
"learning_rate": 9.718365906582474e-05,
"loss": 4.1251,
"step": 1080500
},
{
"epoch": 2.03,
"learning_rate": 9.708980606215321e-05,
"loss": 4.1126,
"step": 1081000
},
{
"epoch": 2.03,
"learning_rate": 9.699595305848166e-05,
"loss": 4.124,
"step": 1081500
},
{
"epoch": 2.03,
"learning_rate": 9.690210005481014e-05,
"loss": 4.1106,
"step": 1082000
},
{
"epoch": 2.03,
"learning_rate": 9.680824705113862e-05,
"loss": 4.1016,
"step": 1082500
},
{
"epoch": 2.03,
"learning_rate": 9.671439404746708e-05,
"loss": 4.085,
"step": 1083000
},
{
"epoch": 2.03,
"learning_rate": 9.662054104379556e-05,
"loss": 4.0947,
"step": 1083500
},
{
"epoch": 2.03,
"learning_rate": 9.652668804012404e-05,
"loss": 4.1078,
"step": 1084000
},
{
"epoch": 2.04,
"learning_rate": 9.643283503645249e-05,
"loss": 4.1102,
"step": 1084500
},
{
"epoch": 2.04,
"learning_rate": 9.633898203278097e-05,
"loss": 4.1315,
"step": 1085000
},
{
"epoch": 2.04,
"learning_rate": 9.624512902910944e-05,
"loss": 4.1096,
"step": 1085500
},
{
"epoch": 2.04,
"learning_rate": 9.615127602543791e-05,
"loss": 4.1068,
"step": 1086000
},
{
"epoch": 2.04,
"learning_rate": 9.605742302176638e-05,
"loss": 4.1076,
"step": 1086500
},
{
"epoch": 2.04,
"learning_rate": 9.596357001809485e-05,
"loss": 4.0988,
"step": 1087000
},
{
"epoch": 2.04,
"learning_rate": 9.586971701442333e-05,
"loss": 4.1168,
"step": 1087500
},
{
"epoch": 2.04,
"learning_rate": 9.577586401075179e-05,
"loss": 4.1267,
"step": 1088000
},
{
"epoch": 2.04,
"learning_rate": 9.568201100708025e-05,
"loss": 4.105,
"step": 1088500
},
{
"epoch": 2.04,
"learning_rate": 9.558815800340873e-05,
"loss": 4.1078,
"step": 1089000
},
{
"epoch": 2.05,
"learning_rate": 9.549430499973721e-05,
"loss": 4.1318,
"step": 1089500
},
{
"epoch": 2.05,
"learning_rate": 9.540045199606567e-05,
"loss": 4.1029,
"step": 1090000
},
{
"epoch": 2.05,
"learning_rate": 9.530659899239415e-05,
"loss": 4.1197,
"step": 1090500
},
{
"epoch": 2.05,
"learning_rate": 9.521274598872263e-05,
"loss": 4.1039,
"step": 1091000
},
{
"epoch": 2.05,
"learning_rate": 9.511889298505108e-05,
"loss": 4.1121,
"step": 1091500
},
{
"epoch": 2.05,
"learning_rate": 9.502503998137955e-05,
"loss": 4.1012,
"step": 1092000
},
{
"epoch": 2.05,
"learning_rate": 9.493118697770802e-05,
"loss": 4.1103,
"step": 1092500
},
{
"epoch": 2.05,
"learning_rate": 9.48373339740365e-05,
"loss": 4.0982,
"step": 1093000
},
{
"epoch": 2.05,
"learning_rate": 9.474348097036497e-05,
"loss": 4.1139,
"step": 1093500
},
{
"epoch": 2.05,
"learning_rate": 9.464962796669344e-05,
"loss": 4.1062,
"step": 1094000
},
{
"epoch": 2.05,
"learning_rate": 9.45557749630219e-05,
"loss": 4.1152,
"step": 1094500
},
{
"epoch": 2.06,
"learning_rate": 9.446192195935038e-05,
"loss": 4.11,
"step": 1095000
},
{
"epoch": 2.06,
"learning_rate": 9.436806895567884e-05,
"loss": 4.1151,
"step": 1095500
},
{
"epoch": 2.06,
"learning_rate": 9.427421595200732e-05,
"loss": 4.1043,
"step": 1096000
},
{
"epoch": 2.06,
"learning_rate": 9.41803629483358e-05,
"loss": 4.1052,
"step": 1096500
},
{
"epoch": 2.06,
"learning_rate": 9.408650994466426e-05,
"loss": 4.1109,
"step": 1097000
},
{
"epoch": 2.06,
"learning_rate": 9.399265694099274e-05,
"loss": 4.0984,
"step": 1097500
},
{
"epoch": 2.06,
"learning_rate": 9.389880393732122e-05,
"loss": 4.1029,
"step": 1098000
},
{
"epoch": 2.06,
"learning_rate": 9.380495093364967e-05,
"loss": 4.1077,
"step": 1098500
},
{
"epoch": 2.06,
"learning_rate": 9.371109792997814e-05,
"loss": 4.1014,
"step": 1099000
},
{
"epoch": 2.06,
"learning_rate": 9.361724492630661e-05,
"loss": 4.1199,
"step": 1099500
},
{
"epoch": 2.06,
"learning_rate": 9.352339192263508e-05,
"loss": 4.1035,
"step": 1100000
},
{
"epoch": 2.07,
"learning_rate": 9.342953891896356e-05,
"loss": 4.0974,
"step": 1100500
},
{
"epoch": 2.07,
"learning_rate": 9.333568591529201e-05,
"loss": 4.1303,
"step": 1101000
},
{
"epoch": 2.07,
"learning_rate": 9.324183291162049e-05,
"loss": 4.1143,
"step": 1101500
},
{
"epoch": 2.07,
"learning_rate": 9.314797990794897e-05,
"loss": 4.1117,
"step": 1102000
},
{
"epoch": 2.07,
"learning_rate": 9.305412690427743e-05,
"loss": 4.0956,
"step": 1102500
},
{
"epoch": 2.07,
"learning_rate": 9.296027390060591e-05,
"loss": 4.1063,
"step": 1103000
},
{
"epoch": 2.07,
"learning_rate": 9.286642089693439e-05,
"loss": 4.1058,
"step": 1103500
},
{
"epoch": 2.07,
"learning_rate": 9.277256789326285e-05,
"loss": 4.1128,
"step": 1104000
},
{
"epoch": 2.07,
"learning_rate": 9.267871488959133e-05,
"loss": 4.1244,
"step": 1104500
},
{
"epoch": 2.07,
"learning_rate": 9.258486188591978e-05,
"loss": 4.12,
"step": 1105000
},
{
"epoch": 2.08,
"learning_rate": 9.249100888224826e-05,
"loss": 4.1167,
"step": 1105500
},
{
"epoch": 2.08,
"learning_rate": 9.239715587857673e-05,
"loss": 4.1086,
"step": 1106000
},
{
"epoch": 2.08,
"learning_rate": 9.23033028749052e-05,
"loss": 4.1212,
"step": 1106500
},
{
"epoch": 2.08,
"learning_rate": 9.220944987123367e-05,
"loss": 4.1176,
"step": 1107000
},
{
"epoch": 2.08,
"learning_rate": 9.211559686756215e-05,
"loss": 4.1025,
"step": 1107500
},
{
"epoch": 2.08,
"learning_rate": 9.20217438638906e-05,
"loss": 4.104,
"step": 1108000
},
{
"epoch": 2.08,
"learning_rate": 9.192789086021908e-05,
"loss": 4.1013,
"step": 1108500
},
{
"epoch": 2.08,
"learning_rate": 9.183403785654756e-05,
"loss": 4.1294,
"step": 1109000
},
{
"epoch": 2.08,
"learning_rate": 9.174018485287602e-05,
"loss": 4.1124,
"step": 1109500
},
{
"epoch": 2.08,
"learning_rate": 9.16463318492045e-05,
"loss": 4.1205,
"step": 1110000
},
{
"epoch": 2.08,
"learning_rate": 9.155247884553296e-05,
"loss": 4.1046,
"step": 1110500
},
{
"epoch": 2.09,
"learning_rate": 9.145862584186144e-05,
"loss": 4.1032,
"step": 1111000
},
{
"epoch": 2.09,
"learning_rate": 9.13647728381899e-05,
"loss": 4.0988,
"step": 1111500
},
{
"epoch": 2.09,
"learning_rate": 9.127091983451837e-05,
"loss": 4.129,
"step": 1112000
},
{
"epoch": 2.09,
"learning_rate": 9.117706683084684e-05,
"loss": 4.1121,
"step": 1112500
},
{
"epoch": 2.09,
"learning_rate": 9.108321382717532e-05,
"loss": 4.1229,
"step": 1113000
},
{
"epoch": 2.09,
"learning_rate": 9.098936082350379e-05,
"loss": 4.1127,
"step": 1113500
},
{
"epoch": 2.09,
"learning_rate": 9.089550781983226e-05,
"loss": 4.1041,
"step": 1114000
},
{
"epoch": 2.09,
"learning_rate": 9.080165481616074e-05,
"loss": 4.1169,
"step": 1114500
},
{
"epoch": 2.09,
"learning_rate": 9.070780181248919e-05,
"loss": 4.1025,
"step": 1115000
},
{
"epoch": 2.09,
"learning_rate": 9.061394880881767e-05,
"loss": 4.0978,
"step": 1115500
},
{
"epoch": 2.09,
"learning_rate": 9.052009580514615e-05,
"loss": 4.0989,
"step": 1116000
},
{
"epoch": 2.1,
"learning_rate": 9.042624280147461e-05,
"loss": 4.1119,
"step": 1116500
},
{
"epoch": 2.1,
"learning_rate": 9.033238979780309e-05,
"loss": 4.1034,
"step": 1117000
},
{
"epoch": 2.1,
"learning_rate": 9.023853679413155e-05,
"loss": 4.0963,
"step": 1117500
},
{
"epoch": 2.1,
"learning_rate": 9.014468379046001e-05,
"loss": 4.1093,
"step": 1118000
},
{
"epoch": 2.1,
"learning_rate": 9.005083078678849e-05,
"loss": 4.0885,
"step": 1118500
},
{
"epoch": 2.1,
"learning_rate": 8.995697778311696e-05,
"loss": 4.0972,
"step": 1119000
},
{
"epoch": 2.1,
"learning_rate": 8.986312477944543e-05,
"loss": 4.0909,
"step": 1119500
},
{
"epoch": 2.1,
"learning_rate": 8.976927177577391e-05,
"loss": 4.1121,
"step": 1120000
},
{
"epoch": 2.1,
"learning_rate": 8.967541877210237e-05,
"loss": 4.0916,
"step": 1120500
},
{
"epoch": 2.1,
"learning_rate": 8.958156576843085e-05,
"loss": 4.0999,
"step": 1121000
},
{
"epoch": 2.11,
"learning_rate": 8.948771276475933e-05,
"loss": 4.1013,
"step": 1121500
},
{
"epoch": 2.11,
"learning_rate": 8.939385976108778e-05,
"loss": 4.1082,
"step": 1122000
},
{
"epoch": 2.11,
"learning_rate": 8.930000675741626e-05,
"loss": 4.1003,
"step": 1122500
},
{
"epoch": 2.11,
"learning_rate": 8.920615375374472e-05,
"loss": 4.1027,
"step": 1123000
},
{
"epoch": 2.11,
"learning_rate": 8.91123007500732e-05,
"loss": 4.1203,
"step": 1123500
},
{
"epoch": 2.11,
"learning_rate": 8.901844774640168e-05,
"loss": 4.1085,
"step": 1124000
},
{
"epoch": 2.11,
"learning_rate": 8.892459474273013e-05,
"loss": 4.1124,
"step": 1124500
},
{
"epoch": 2.11,
"learning_rate": 8.88307417390586e-05,
"loss": 4.1267,
"step": 1125000
},
{
"epoch": 2.11,
"learning_rate": 8.873688873538708e-05,
"loss": 4.1079,
"step": 1125500
},
{
"epoch": 2.11,
"learning_rate": 8.864303573171554e-05,
"loss": 4.1296,
"step": 1126000
},
{
"epoch": 2.11,
"learning_rate": 8.854918272804402e-05,
"loss": 4.1093,
"step": 1126500
},
{
"epoch": 2.12,
"learning_rate": 8.84553297243725e-05,
"loss": 4.1036,
"step": 1127000
},
{
"epoch": 2.12,
"learning_rate": 8.836147672070096e-05,
"loss": 4.1202,
"step": 1127500
},
{
"epoch": 2.12,
"learning_rate": 8.826762371702944e-05,
"loss": 4.1145,
"step": 1128000
},
{
"epoch": 2.12,
"learning_rate": 8.81737707133579e-05,
"loss": 4.1051,
"step": 1128500
},
{
"epoch": 2.12,
"learning_rate": 8.807991770968637e-05,
"loss": 4.0903,
"step": 1129000
},
{
"epoch": 2.12,
"learning_rate": 8.798606470601485e-05,
"loss": 4.1086,
"step": 1129500
},
{
"epoch": 2.12,
"learning_rate": 8.789221170234331e-05,
"loss": 4.1134,
"step": 1130000
},
{
"epoch": 2.12,
"learning_rate": 8.779835869867179e-05,
"loss": 4.1093,
"step": 1130500
},
{
"epoch": 2.12,
"learning_rate": 8.770450569500026e-05,
"loss": 4.0988,
"step": 1131000
},
{
"epoch": 2.12,
"learning_rate": 8.761065269132871e-05,
"loss": 4.1104,
"step": 1131500
},
{
"epoch": 2.12,
"learning_rate": 8.751679968765719e-05,
"loss": 4.1203,
"step": 1132000
},
{
"epoch": 2.13,
"learning_rate": 8.742294668398567e-05,
"loss": 4.0959,
"step": 1132500
},
{
"epoch": 2.13,
"learning_rate": 8.732909368031413e-05,
"loss": 4.1168,
"step": 1133000
},
{
"epoch": 2.13,
"learning_rate": 8.723524067664261e-05,
"loss": 4.1101,
"step": 1133500
},
{
"epoch": 2.13,
"learning_rate": 8.714138767297109e-05,
"loss": 4.1177,
"step": 1134000
},
{
"epoch": 2.13,
"learning_rate": 8.704753466929955e-05,
"loss": 4.0992,
"step": 1134500
},
{
"epoch": 2.13,
"learning_rate": 8.695368166562802e-05,
"loss": 4.1014,
"step": 1135000
},
{
"epoch": 2.13,
"learning_rate": 8.685982866195648e-05,
"loss": 4.096,
"step": 1135500
},
{
"epoch": 2.13,
"learning_rate": 8.676597565828496e-05,
"loss": 4.0882,
"step": 1136000
},
{
"epoch": 2.13,
"learning_rate": 8.667212265461343e-05,
"loss": 4.1013,
"step": 1136500
},
{
"epoch": 2.13,
"learning_rate": 8.65782696509419e-05,
"loss": 4.1161,
"step": 1137000
},
{
"epoch": 2.14,
"learning_rate": 8.648441664727038e-05,
"loss": 4.0952,
"step": 1137500
},
{
"epoch": 2.14,
"learning_rate": 8.639056364359885e-05,
"loss": 4.1192,
"step": 1138000
},
{
"epoch": 2.14,
"learning_rate": 8.62967106399273e-05,
"loss": 4.1057,
"step": 1138500
},
{
"epoch": 2.14,
"learning_rate": 8.620285763625578e-05,
"loss": 4.0951,
"step": 1139000
},
{
"epoch": 2.14,
"learning_rate": 8.610900463258426e-05,
"loss": 4.1038,
"step": 1139500
},
{
"epoch": 2.14,
"learning_rate": 8.601515162891272e-05,
"loss": 4.0837,
"step": 1140000
},
{
"epoch": 2.14,
"learning_rate": 8.59212986252412e-05,
"loss": 4.1054,
"step": 1140500
},
{
"epoch": 2.14,
"learning_rate": 8.582744562156966e-05,
"loss": 4.1175,
"step": 1141000
},
{
"epoch": 2.14,
"learning_rate": 8.573359261789813e-05,
"loss": 4.0941,
"step": 1141500
},
{
"epoch": 2.14,
"learning_rate": 8.56397396142266e-05,
"loss": 4.0994,
"step": 1142000
},
{
"epoch": 2.14,
"learning_rate": 8.554588661055507e-05,
"loss": 4.1103,
"step": 1142500
},
{
"epoch": 2.15,
"learning_rate": 8.545203360688355e-05,
"loss": 4.1026,
"step": 1143000
},
{
"epoch": 2.15,
"learning_rate": 8.535818060321202e-05,
"loss": 4.1127,
"step": 1143500
},
{
"epoch": 2.15,
"learning_rate": 8.526432759954049e-05,
"loss": 4.1059,
"step": 1144000
},
{
"epoch": 2.15,
"learning_rate": 8.517047459586897e-05,
"loss": 4.1182,
"step": 1144500
},
{
"epoch": 2.15,
"learning_rate": 8.507662159219744e-05,
"loss": 4.1071,
"step": 1145000
},
{
"epoch": 2.15,
"learning_rate": 8.498276858852589e-05,
"loss": 4.1002,
"step": 1145500
},
{
"epoch": 2.15,
"learning_rate": 8.488891558485437e-05,
"loss": 4.1182,
"step": 1146000
},
{
"epoch": 2.15,
"learning_rate": 8.479506258118285e-05,
"loss": 4.1034,
"step": 1146500
},
{
"epoch": 2.15,
"learning_rate": 8.470120957751131e-05,
"loss": 4.1135,
"step": 1147000
},
{
"epoch": 2.15,
"learning_rate": 8.460735657383979e-05,
"loss": 4.1225,
"step": 1147500
},
{
"epoch": 2.15,
"learning_rate": 8.451350357016824e-05,
"loss": 4.1207,
"step": 1148000
},
{
"epoch": 2.16,
"learning_rate": 8.441965056649672e-05,
"loss": 4.1078,
"step": 1148500
},
{
"epoch": 2.16,
"learning_rate": 8.43257975628252e-05,
"loss": 4.0904,
"step": 1149000
},
{
"epoch": 2.16,
"learning_rate": 8.423194455915366e-05,
"loss": 4.1124,
"step": 1149500
},
{
"epoch": 2.16,
"learning_rate": 8.413809155548214e-05,
"loss": 4.1007,
"step": 1150000
},
{
"epoch": 2.16,
"learning_rate": 8.404423855181061e-05,
"loss": 4.0937,
"step": 1150500
},
{
"epoch": 2.16,
"learning_rate": 8.395038554813908e-05,
"loss": 4.1043,
"step": 1151000
},
{
"epoch": 2.16,
"learning_rate": 8.385653254446755e-05,
"loss": 4.1088,
"step": 1151500
},
{
"epoch": 2.16,
"learning_rate": 8.376267954079602e-05,
"loss": 4.1029,
"step": 1152000
},
{
"epoch": 2.16,
"learning_rate": 8.366882653712448e-05,
"loss": 4.1002,
"step": 1152500
},
{
"epoch": 2.16,
"learning_rate": 8.357497353345296e-05,
"loss": 4.111,
"step": 1153000
},
{
"epoch": 2.17,
"learning_rate": 8.348112052978142e-05,
"loss": 4.1253,
"step": 1153500
},
{
"epoch": 2.17,
"learning_rate": 8.33872675261099e-05,
"loss": 4.1056,
"step": 1154000
},
{
"epoch": 2.17,
"learning_rate": 8.329341452243838e-05,
"loss": 4.0936,
"step": 1154500
},
{
"epoch": 2.17,
"learning_rate": 8.319956151876683e-05,
"loss": 4.1043,
"step": 1155000
},
{
"epoch": 2.17,
"learning_rate": 8.31057085150953e-05,
"loss": 4.0881,
"step": 1155500
},
{
"epoch": 2.17,
"learning_rate": 8.301185551142378e-05,
"loss": 4.0945,
"step": 1156000
},
{
"epoch": 2.17,
"learning_rate": 8.291800250775225e-05,
"loss": 4.1038,
"step": 1156500
},
{
"epoch": 2.17,
"learning_rate": 8.282414950408072e-05,
"loss": 4.1109,
"step": 1157000
},
{
"epoch": 2.17,
"learning_rate": 8.27302965004092e-05,
"loss": 4.1039,
"step": 1157500
},
{
"epoch": 2.17,
"learning_rate": 8.263644349673765e-05,
"loss": 4.1109,
"step": 1158000
},
{
"epoch": 2.17,
"learning_rate": 8.254259049306613e-05,
"loss": 4.104,
"step": 1158500
},
{
"epoch": 2.18,
"learning_rate": 8.24487374893946e-05,
"loss": 4.1112,
"step": 1159000
},
{
"epoch": 2.18,
"learning_rate": 8.235488448572307e-05,
"loss": 4.1087,
"step": 1159500
},
{
"epoch": 2.18,
"learning_rate": 8.226103148205155e-05,
"loss": 4.123,
"step": 1160000
},
{
"epoch": 2.18,
"learning_rate": 8.216717847838001e-05,
"loss": 4.1012,
"step": 1160500
},
{
"epoch": 2.18,
"learning_rate": 8.207332547470849e-05,
"loss": 4.119,
"step": 1161000
},
{
"epoch": 2.18,
"learning_rate": 8.197947247103697e-05,
"loss": 4.1136,
"step": 1161500
},
{
"epoch": 2.18,
"learning_rate": 8.188561946736542e-05,
"loss": 4.1048,
"step": 1162000
},
{
"epoch": 2.18,
"learning_rate": 8.17917664636939e-05,
"loss": 4.0936,
"step": 1162500
},
{
"epoch": 2.18,
"learning_rate": 8.169791346002237e-05,
"loss": 4.1135,
"step": 1163000
},
{
"epoch": 2.18,
"learning_rate": 8.160406045635084e-05,
"loss": 4.1141,
"step": 1163500
},
{
"epoch": 2.18,
"learning_rate": 8.151020745267931e-05,
"loss": 4.1368,
"step": 1164000
},
{
"epoch": 2.19,
"learning_rate": 8.141635444900779e-05,
"loss": 4.0932,
"step": 1164500
},
{
"epoch": 2.19,
"learning_rate": 8.132250144533624e-05,
"loss": 4.1007,
"step": 1165000
},
{
"epoch": 2.19,
"learning_rate": 8.122864844166472e-05,
"loss": 4.0984,
"step": 1165500
},
{
"epoch": 2.19,
"learning_rate": 8.113479543799318e-05,
"loss": 4.1073,
"step": 1166000
},
{
"epoch": 2.19,
"learning_rate": 8.104094243432166e-05,
"loss": 4.1027,
"step": 1166500
},
{
"epoch": 2.19,
"learning_rate": 8.094708943065014e-05,
"loss": 4.0946,
"step": 1167000
},
{
"epoch": 2.19,
"learning_rate": 8.08532364269786e-05,
"loss": 4.108,
"step": 1167500
},
{
"epoch": 2.19,
"learning_rate": 8.075938342330708e-05,
"loss": 4.1134,
"step": 1168000
},
{
"epoch": 2.19,
"learning_rate": 8.066553041963554e-05,
"loss": 4.1216,
"step": 1168500
},
{
"epoch": 2.19,
"learning_rate": 8.0571677415964e-05,
"loss": 4.0972,
"step": 1169000
},
{
"epoch": 2.2,
"learning_rate": 8.047782441229248e-05,
"loss": 4.1026,
"step": 1169500
},
{
"epoch": 2.2,
"learning_rate": 8.038397140862096e-05,
"loss": 4.0985,
"step": 1170000
},
{
"epoch": 2.2,
"learning_rate": 8.029011840494942e-05,
"loss": 4.0966,
"step": 1170500
},
{
"epoch": 2.2,
"learning_rate": 8.01962654012779e-05,
"loss": 4.1163,
"step": 1171000
},
{
"epoch": 2.2,
"learning_rate": 8.010241239760635e-05,
"loss": 4.0955,
"step": 1171500
},
{
"epoch": 2.2,
"learning_rate": 8.000855939393483e-05,
"loss": 4.1089,
"step": 1172000
},
{
"epoch": 2.2,
"learning_rate": 7.991470639026331e-05,
"loss": 4.1012,
"step": 1172500
},
{
"epoch": 2.2,
"learning_rate": 7.982085338659177e-05,
"loss": 4.102,
"step": 1173000
},
{
"epoch": 2.2,
"learning_rate": 7.972700038292025e-05,
"loss": 4.1096,
"step": 1173500
},
{
"epoch": 2.2,
"learning_rate": 7.963314737924873e-05,
"loss": 4.0962,
"step": 1174000
},
{
"epoch": 2.2,
"learning_rate": 7.953929437557719e-05,
"loss": 4.107,
"step": 1174500
},
{
"epoch": 2.21,
"learning_rate": 7.944544137190565e-05,
"loss": 4.0989,
"step": 1175000
},
{
"epoch": 2.21,
"learning_rate": 7.935158836823413e-05,
"loss": 4.106,
"step": 1175500
},
{
"epoch": 2.21,
"learning_rate": 7.92577353645626e-05,
"loss": 4.1022,
"step": 1176000
},
{
"epoch": 2.21,
"learning_rate": 7.916388236089107e-05,
"loss": 4.1093,
"step": 1176500
},
{
"epoch": 2.21,
"learning_rate": 7.907002935721955e-05,
"loss": 4.0966,
"step": 1177000
},
{
"epoch": 2.21,
"learning_rate": 7.897617635354801e-05,
"loss": 4.1013,
"step": 1177500
},
{
"epoch": 2.21,
"learning_rate": 7.888232334987649e-05,
"loss": 4.107,
"step": 1178000
},
{
"epoch": 2.21,
"learning_rate": 7.878847034620494e-05,
"loss": 4.1036,
"step": 1178500
},
{
"epoch": 2.21,
"learning_rate": 7.869461734253342e-05,
"loss": 4.1103,
"step": 1179000
},
{
"epoch": 2.21,
"learning_rate": 7.86007643388619e-05,
"loss": 4.1004,
"step": 1179500
},
{
"epoch": 2.21,
"learning_rate": 7.850691133519036e-05,
"loss": 4.1094,
"step": 1180000
},
{
"epoch": 2.22,
"learning_rate": 7.841305833151884e-05,
"loss": 4.0902,
"step": 1180500
},
{
"epoch": 2.22,
"learning_rate": 7.831920532784732e-05,
"loss": 4.1044,
"step": 1181000
},
{
"epoch": 2.22,
"learning_rate": 7.822535232417577e-05,
"loss": 4.102,
"step": 1181500
},
{
"epoch": 2.22,
"learning_rate": 7.813149932050424e-05,
"loss": 4.1137,
"step": 1182000
},
{
"epoch": 2.22,
"learning_rate": 7.803764631683272e-05,
"loss": 4.1135,
"step": 1182500
},
{
"epoch": 2.22,
"learning_rate": 7.794379331316118e-05,
"loss": 4.1144,
"step": 1183000
},
{
"epoch": 2.22,
"learning_rate": 7.784994030948966e-05,
"loss": 4.1082,
"step": 1183500
},
{
"epoch": 2.22,
"learning_rate": 7.775608730581813e-05,
"loss": 4.1202,
"step": 1184000
},
{
"epoch": 2.22,
"learning_rate": 7.76622343021466e-05,
"loss": 4.1067,
"step": 1184500
},
{
"epoch": 2.22,
"learning_rate": 7.756838129847508e-05,
"loss": 4.0964,
"step": 1185000
},
{
"epoch": 2.23,
"learning_rate": 7.747452829480353e-05,
"loss": 4.1089,
"step": 1185500
},
{
"epoch": 2.23,
"learning_rate": 7.738067529113201e-05,
"loss": 4.1162,
"step": 1186000
},
{
"epoch": 2.23,
"learning_rate": 7.728682228746049e-05,
"loss": 4.112,
"step": 1186500
},
{
"epoch": 2.23,
"learning_rate": 7.719296928378895e-05,
"loss": 4.1055,
"step": 1187000
},
{
"epoch": 2.23,
"learning_rate": 7.709911628011743e-05,
"loss": 4.1009,
"step": 1187500
},
{
"epoch": 2.23,
"learning_rate": 7.70052632764459e-05,
"loss": 4.0818,
"step": 1188000
},
{
"epoch": 2.23,
"learning_rate": 7.691141027277435e-05,
"loss": 4.1135,
"step": 1188500
},
{
"epoch": 2.23,
"learning_rate": 7.681755726910283e-05,
"loss": 4.0935,
"step": 1189000
},
{
"epoch": 2.23,
"learning_rate": 7.67237042654313e-05,
"loss": 4.1257,
"step": 1189500
},
{
"epoch": 2.23,
"learning_rate": 7.662985126175977e-05,
"loss": 4.1024,
"step": 1190000
},
{
"epoch": 2.23,
"learning_rate": 7.653599825808825e-05,
"loss": 4.0994,
"step": 1190500
},
{
"epoch": 2.24,
"learning_rate": 7.644214525441671e-05,
"loss": 4.1024,
"step": 1191000
},
{
"epoch": 2.24,
"learning_rate": 7.634829225074519e-05,
"loss": 4.1263,
"step": 1191500
},
{
"epoch": 2.24,
"learning_rate": 7.625443924707366e-05,
"loss": 4.1153,
"step": 1192000
},
{
"epoch": 2.24,
"learning_rate": 7.616058624340212e-05,
"loss": 4.0918,
"step": 1192500
},
{
"epoch": 2.24,
"learning_rate": 7.60667332397306e-05,
"loss": 4.1032,
"step": 1193000
},
{
"epoch": 2.24,
"learning_rate": 7.597288023605907e-05,
"loss": 4.1051,
"step": 1193500
},
{
"epoch": 2.24,
"learning_rate": 7.587902723238754e-05,
"loss": 4.1022,
"step": 1194000
},
{
"epoch": 2.24,
"learning_rate": 7.578517422871602e-05,
"loss": 4.106,
"step": 1194500
},
{
"epoch": 2.24,
"learning_rate": 7.569132122504449e-05,
"loss": 4.1124,
"step": 1195000
},
{
"epoch": 2.24,
"learning_rate": 7.559746822137294e-05,
"loss": 4.0926,
"step": 1195500
},
{
"epoch": 2.24,
"learning_rate": 7.550361521770142e-05,
"loss": 4.1068,
"step": 1196000
},
{
"epoch": 2.25,
"learning_rate": 7.540976221402988e-05,
"loss": 4.1004,
"step": 1196500
},
{
"epoch": 2.25,
"learning_rate": 7.531590921035836e-05,
"loss": 4.0918,
"step": 1197000
},
{
"epoch": 2.25,
"learning_rate": 7.522205620668684e-05,
"loss": 4.0949,
"step": 1197500
},
{
"epoch": 2.25,
"learning_rate": 7.51282032030153e-05,
"loss": 4.0948,
"step": 1198000
},
{
"epoch": 2.25,
"learning_rate": 7.503435019934377e-05,
"loss": 4.0955,
"step": 1198500
},
{
"epoch": 2.25,
"learning_rate": 7.494049719567224e-05,
"loss": 4.0958,
"step": 1199000
},
{
"epoch": 2.25,
"learning_rate": 7.484664419200072e-05,
"loss": 4.1228,
"step": 1199500
},
{
"epoch": 2.25,
"learning_rate": 7.475279118832919e-05,
"loss": 4.094,
"step": 1200000
},
{
"epoch": 2.25,
"learning_rate": 7.465893818465765e-05,
"loss": 4.1212,
"step": 1200500
},
{
"epoch": 2.25,
"learning_rate": 7.456508518098613e-05,
"loss": 4.1101,
"step": 1201000
},
{
"epoch": 2.26,
"learning_rate": 7.44712321773146e-05,
"loss": 4.1151,
"step": 1201500
},
{
"epoch": 2.26,
"learning_rate": 7.437737917364307e-05,
"loss": 4.1059,
"step": 1202000
},
{
"epoch": 2.26,
"learning_rate": 7.428352616997153e-05,
"loss": 4.1077,
"step": 1202500
},
{
"epoch": 2.26,
"learning_rate": 7.418967316630001e-05,
"loss": 4.0735,
"step": 1203000
},
{
"epoch": 2.26,
"learning_rate": 7.409582016262847e-05,
"loss": 4.1093,
"step": 1203500
},
{
"epoch": 2.26,
"learning_rate": 7.400196715895695e-05,
"loss": 4.0987,
"step": 1204000
},
{
"epoch": 2.26,
"learning_rate": 7.390811415528541e-05,
"loss": 4.093,
"step": 1204500
},
{
"epoch": 2.26,
"learning_rate": 7.381426115161389e-05,
"loss": 4.0789,
"step": 1205000
},
{
"epoch": 2.26,
"learning_rate": 7.372040814794236e-05,
"loss": 4.0952,
"step": 1205500
},
{
"epoch": 2.26,
"learning_rate": 7.362655514427083e-05,
"loss": 4.1015,
"step": 1206000
},
{
"epoch": 2.26,
"learning_rate": 7.35327021405993e-05,
"loss": 4.1063,
"step": 1206500
},
{
"epoch": 2.27,
"learning_rate": 7.343884913692777e-05,
"loss": 4.0828,
"step": 1207000
},
{
"epoch": 2.27,
"learning_rate": 7.334499613325624e-05,
"loss": 4.0942,
"step": 1207500
},
{
"epoch": 2.27,
"learning_rate": 7.325114312958472e-05,
"loss": 4.1159,
"step": 1208000
},
{
"epoch": 2.27,
"learning_rate": 7.31572901259132e-05,
"loss": 4.1063,
"step": 1208500
},
{
"epoch": 2.27,
"learning_rate": 7.306343712224166e-05,
"loss": 4.0955,
"step": 1209000
},
{
"epoch": 2.27,
"learning_rate": 7.296958411857012e-05,
"loss": 4.1058,
"step": 1209500
},
{
"epoch": 2.27,
"learning_rate": 7.287573111489859e-05,
"loss": 4.112,
"step": 1210000
},
{
"epoch": 2.27,
"learning_rate": 7.278187811122706e-05,
"loss": 4.0972,
"step": 1210500
},
{
"epoch": 2.27,
"learning_rate": 7.268802510755554e-05,
"loss": 4.1061,
"step": 1211000
},
{
"epoch": 2.27,
"learning_rate": 7.2594172103884e-05,
"loss": 4.094,
"step": 1211500
},
{
"epoch": 2.27,
"learning_rate": 7.250031910021247e-05,
"loss": 4.1078,
"step": 1212000
},
{
"epoch": 2.28,
"learning_rate": 7.240646609654095e-05,
"loss": 4.1167,
"step": 1212500
},
{
"epoch": 2.28,
"learning_rate": 7.231261309286942e-05,
"loss": 4.0967,
"step": 1213000
},
{
"epoch": 2.28,
"learning_rate": 7.221876008919789e-05,
"loss": 4.1071,
"step": 1213500
},
{
"epoch": 2.28,
"learning_rate": 7.212490708552636e-05,
"loss": 4.1017,
"step": 1214000
},
{
"epoch": 2.28,
"learning_rate": 7.203105408185483e-05,
"loss": 4.1092,
"step": 1214500
},
{
"epoch": 2.28,
"learning_rate": 7.19372010781833e-05,
"loss": 4.0823,
"step": 1215000
},
{
"epoch": 2.28,
"learning_rate": 7.184334807451177e-05,
"loss": 4.1099,
"step": 1215500
},
{
"epoch": 2.28,
"learning_rate": 7.174949507084025e-05,
"loss": 4.093,
"step": 1216000
},
{
"epoch": 2.28,
"learning_rate": 7.165564206716871e-05,
"loss": 4.089,
"step": 1216500
},
{
"epoch": 2.28,
"learning_rate": 7.156178906349717e-05,
"loss": 4.08,
"step": 1217000
},
{
"epoch": 2.29,
"learning_rate": 7.146793605982565e-05,
"loss": 4.1275,
"step": 1217500
},
{
"epoch": 2.29,
"learning_rate": 7.137408305615413e-05,
"loss": 4.1133,
"step": 1218000
},
{
"epoch": 2.29,
"learning_rate": 7.128023005248259e-05,
"loss": 4.0937,
"step": 1218500
},
{
"epoch": 2.29,
"learning_rate": 7.118637704881106e-05,
"loss": 4.111,
"step": 1219000
},
{
"epoch": 2.29,
"learning_rate": 7.109252404513953e-05,
"loss": 4.1103,
"step": 1219500
},
{
"epoch": 2.29,
"learning_rate": 7.099867104146801e-05,
"loss": 4.1054,
"step": 1220000
},
{
"epoch": 2.29,
"learning_rate": 7.090481803779648e-05,
"loss": 4.0776,
"step": 1220500
},
{
"epoch": 2.29,
"learning_rate": 7.081096503412494e-05,
"loss": 4.0992,
"step": 1221000
},
{
"epoch": 2.29,
"learning_rate": 7.071711203045342e-05,
"loss": 4.0908,
"step": 1221500
},
{
"epoch": 2.29,
"learning_rate": 7.062325902678188e-05,
"loss": 4.0832,
"step": 1222000
},
{
"epoch": 2.29,
"learning_rate": 7.052940602311036e-05,
"loss": 4.1159,
"step": 1222500
},
{
"epoch": 2.3,
"learning_rate": 7.043555301943884e-05,
"loss": 4.0935,
"step": 1223000
},
{
"epoch": 2.3,
"learning_rate": 7.03417000157673e-05,
"loss": 4.0953,
"step": 1223500
},
{
"epoch": 2.3,
"learning_rate": 7.024784701209576e-05,
"loss": 4.1198,
"step": 1224000
},
{
"epoch": 2.3,
"learning_rate": 7.015399400842424e-05,
"loss": 4.078,
"step": 1224500
},
{
"epoch": 2.3,
"learning_rate": 7.006014100475272e-05,
"loss": 4.0921,
"step": 1225000
},
{
"epoch": 2.3,
"learning_rate": 6.996628800108118e-05,
"loss": 4.0981,
"step": 1225500
},
{
"epoch": 2.3,
"learning_rate": 6.987243499740965e-05,
"loss": 4.0983,
"step": 1226000
},
{
"epoch": 2.3,
"learning_rate": 6.977858199373812e-05,
"loss": 4.0922,
"step": 1226500
},
{
"epoch": 2.3,
"learning_rate": 6.968472899006659e-05,
"loss": 4.1017,
"step": 1227000
},
{
"epoch": 2.3,
"learning_rate": 6.959087598639506e-05,
"loss": 4.1147,
"step": 1227500
},
{
"epoch": 2.31,
"learning_rate": 6.949702298272353e-05,
"loss": 4.0916,
"step": 1228000
},
{
"epoch": 2.31,
"learning_rate": 6.9403169979052e-05,
"loss": 4.0987,
"step": 1228500
},
{
"epoch": 2.31,
"learning_rate": 6.930931697538047e-05,
"loss": 4.1038,
"step": 1229000
},
{
"epoch": 2.31,
"learning_rate": 6.921546397170895e-05,
"loss": 4.1036,
"step": 1229500
},
{
"epoch": 2.31,
"learning_rate": 6.912161096803742e-05,
"loss": 4.1167,
"step": 1230000
},
{
"epoch": 2.31,
"learning_rate": 6.902775796436589e-05,
"loss": 4.0901,
"step": 1230500
},
{
"epoch": 2.31,
"learning_rate": 6.893390496069435e-05,
"loss": 4.0888,
"step": 1231000
},
{
"epoch": 2.31,
"learning_rate": 6.884005195702283e-05,
"loss": 4.078,
"step": 1231500
},
{
"epoch": 2.31,
"learning_rate": 6.874619895335131e-05,
"loss": 4.0876,
"step": 1232000
},
{
"epoch": 2.31,
"learning_rate": 6.865234594967977e-05,
"loss": 4.0953,
"step": 1232500
},
{
"epoch": 2.31,
"learning_rate": 6.855849294600823e-05,
"loss": 4.1031,
"step": 1233000
},
{
"epoch": 2.32,
"learning_rate": 6.84646399423367e-05,
"loss": 4.0961,
"step": 1233500
},
{
"epoch": 2.32,
"learning_rate": 6.837078693866518e-05,
"loss": 4.0956,
"step": 1234000
},
{
"epoch": 2.32,
"learning_rate": 6.827693393499365e-05,
"loss": 4.1023,
"step": 1234500
},
{
"epoch": 2.32,
"learning_rate": 6.818308093132212e-05,
"loss": 4.0994,
"step": 1235000
},
{
"epoch": 2.32,
"learning_rate": 6.80892279276506e-05,
"loss": 4.1028,
"step": 1235500
},
{
"epoch": 2.32,
"learning_rate": 6.799537492397906e-05,
"loss": 4.0757,
"step": 1236000
},
{
"epoch": 2.32,
"learning_rate": 6.790152192030754e-05,
"loss": 4.1009,
"step": 1236500
},
{
"epoch": 2.32,
"learning_rate": 6.7807668916636e-05,
"loss": 4.0908,
"step": 1237000
},
{
"epoch": 2.32,
"learning_rate": 6.771381591296448e-05,
"loss": 4.1133,
"step": 1237500
},
{
"epoch": 2.32,
"learning_rate": 6.761996290929294e-05,
"loss": 4.1037,
"step": 1238000
},
{
"epoch": 2.32,
"learning_rate": 6.752610990562142e-05,
"loss": 4.0813,
"step": 1238500
},
{
"epoch": 2.33,
"learning_rate": 6.743225690194988e-05,
"loss": 4.1041,
"step": 1239000
},
{
"epoch": 2.33,
"learning_rate": 6.733840389827836e-05,
"loss": 4.1025,
"step": 1239500
},
{
"epoch": 2.33,
"learning_rate": 6.724455089460682e-05,
"loss": 4.0822,
"step": 1240000
},
{
"epoch": 2.33,
"learning_rate": 6.715069789093529e-05,
"loss": 4.1142,
"step": 1240500
},
{
"epoch": 2.33,
"learning_rate": 6.705684488726376e-05,
"loss": 4.0983,
"step": 1241000
},
{
"epoch": 2.33,
"learning_rate": 6.696299188359224e-05,
"loss": 4.0867,
"step": 1241500
},
{
"epoch": 2.33,
"learning_rate": 6.68691388799207e-05,
"loss": 4.0851,
"step": 1242000
},
{
"epoch": 2.33,
"learning_rate": 6.677528587624917e-05,
"loss": 4.1189,
"step": 1242500
},
{
"epoch": 2.33,
"learning_rate": 6.668143287257765e-05,
"loss": 4.0714,
"step": 1243000
},
{
"epoch": 2.33,
"learning_rate": 6.658757986890612e-05,
"loss": 4.0986,
"step": 1243500
},
{
"epoch": 2.34,
"learning_rate": 6.649372686523459e-05,
"loss": 4.1201,
"step": 1244000
},
{
"epoch": 2.34,
"learning_rate": 6.639987386156307e-05,
"loss": 4.1051,
"step": 1244500
},
{
"epoch": 2.34,
"learning_rate": 6.630602085789153e-05,
"loss": 4.0986,
"step": 1245000
},
{
"epoch": 2.34,
"learning_rate": 6.621216785422e-05,
"loss": 4.1061,
"step": 1245500
},
{
"epoch": 2.34,
"learning_rate": 6.611831485054847e-05,
"loss": 4.1017,
"step": 1246000
},
{
"epoch": 2.34,
"learning_rate": 6.602446184687695e-05,
"loss": 4.0975,
"step": 1246500
},
{
"epoch": 2.34,
"learning_rate": 6.593060884320541e-05,
"loss": 4.0997,
"step": 1247000
},
{
"epoch": 2.34,
"learning_rate": 6.583675583953388e-05,
"loss": 4.1023,
"step": 1247500
},
{
"epoch": 2.34,
"learning_rate": 6.574290283586235e-05,
"loss": 4.0847,
"step": 1248000
},
{
"epoch": 2.34,
"learning_rate": 6.564904983219083e-05,
"loss": 4.0966,
"step": 1248500
},
{
"epoch": 2.34,
"learning_rate": 6.55551968285193e-05,
"loss": 4.0887,
"step": 1249000
},
{
"epoch": 2.35,
"learning_rate": 6.546134382484776e-05,
"loss": 4.0982,
"step": 1249500
},
{
"epoch": 2.35,
"learning_rate": 6.536749082117624e-05,
"loss": 4.0799,
"step": 1250000
},
{
"epoch": 2.35,
"learning_rate": 6.52736378175047e-05,
"loss": 4.092,
"step": 1250500
},
{
"epoch": 2.35,
"learning_rate": 6.517978481383318e-05,
"loss": 4.0915,
"step": 1251000
},
{
"epoch": 2.35,
"learning_rate": 6.508593181016164e-05,
"loss": 4.0926,
"step": 1251500
},
{
"epoch": 2.35,
"learning_rate": 6.499207880649012e-05,
"loss": 4.0992,
"step": 1252000
},
{
"epoch": 2.35,
"learning_rate": 6.489822580281858e-05,
"loss": 4.1042,
"step": 1252500
},
{
"epoch": 2.35,
"learning_rate": 6.480437279914706e-05,
"loss": 4.0993,
"step": 1253000
},
{
"epoch": 2.35,
"learning_rate": 6.471051979547554e-05,
"loss": 4.0977,
"step": 1253500
},
{
"epoch": 2.35,
"learning_rate": 6.4616666791804e-05,
"loss": 4.1063,
"step": 1254000
},
{
"epoch": 2.35,
"learning_rate": 6.452281378813247e-05,
"loss": 4.1063,
"step": 1254500
},
{
"epoch": 2.36,
"learning_rate": 6.442896078446094e-05,
"loss": 4.0871,
"step": 1255000
},
{
"epoch": 2.36,
"learning_rate": 6.43351077807894e-05,
"loss": 4.0933,
"step": 1255500
},
{
"epoch": 2.36,
"learning_rate": 6.424125477711788e-05,
"loss": 4.0835,
"step": 1256000
},
{
"epoch": 2.36,
"learning_rate": 6.414740177344635e-05,
"loss": 4.1049,
"step": 1256500
},
{
"epoch": 2.36,
"learning_rate": 6.405354876977483e-05,
"loss": 4.0941,
"step": 1257000
},
{
"epoch": 2.36,
"learning_rate": 6.395969576610329e-05,
"loss": 4.1,
"step": 1257500
},
{
"epoch": 2.36,
"learning_rate": 6.386584276243177e-05,
"loss": 4.102,
"step": 1258000
},
{
"epoch": 2.36,
"learning_rate": 6.377198975876023e-05,
"loss": 4.0943,
"step": 1258500
},
{
"epoch": 2.36,
"learning_rate": 6.367813675508871e-05,
"loss": 4.1028,
"step": 1259000
},
{
"epoch": 2.36,
"learning_rate": 6.358428375141717e-05,
"loss": 4.0894,
"step": 1259500
},
{
"epoch": 2.37,
"learning_rate": 6.349043074774565e-05,
"loss": 4.0958,
"step": 1260000
},
{
"epoch": 2.37,
"learning_rate": 6.339657774407411e-05,
"loss": 4.1097,
"step": 1260500
},
{
"epoch": 2.37,
"learning_rate": 6.330272474040259e-05,
"loss": 4.1162,
"step": 1261000
},
{
"epoch": 2.37,
"learning_rate": 6.320887173673105e-05,
"loss": 4.0978,
"step": 1261500
},
{
"epoch": 2.37,
"learning_rate": 6.311501873305952e-05,
"loss": 4.0811,
"step": 1262000
},
{
"epoch": 2.37,
"learning_rate": 6.3021165729388e-05,
"loss": 4.0884,
"step": 1262500
},
{
"epoch": 2.37,
"learning_rate": 6.292731272571647e-05,
"loss": 4.0902,
"step": 1263000
},
{
"epoch": 2.37,
"learning_rate": 6.283345972204494e-05,
"loss": 4.0878,
"step": 1263500
},
{
"epoch": 2.37,
"learning_rate": 6.27396067183734e-05,
"loss": 4.0935,
"step": 1264000
},
{
"epoch": 2.37,
"learning_rate": 6.264575371470188e-05,
"loss": 4.1081,
"step": 1264500
},
{
"epoch": 2.37,
"learning_rate": 6.255190071103036e-05,
"loss": 4.112,
"step": 1265000
},
{
"epoch": 2.38,
"learning_rate": 6.245804770735882e-05,
"loss": 4.0864,
"step": 1265500
},
{
"epoch": 2.38,
"learning_rate": 6.23641947036873e-05,
"loss": 4.1276,
"step": 1266000
},
{
"epoch": 2.38,
"learning_rate": 6.227034170001576e-05,
"loss": 4.0768,
"step": 1266500
},
{
"epoch": 2.38,
"learning_rate": 6.217648869634424e-05,
"loss": 4.095,
"step": 1267000
},
{
"epoch": 2.38,
"learning_rate": 6.20826356926727e-05,
"loss": 4.1089,
"step": 1267500
},
{
"epoch": 2.38,
"learning_rate": 6.198878268900118e-05,
"loss": 4.092,
"step": 1268000
},
{
"epoch": 2.38,
"learning_rate": 6.189492968532964e-05,
"loss": 4.07,
"step": 1268500
},
{
"epoch": 2.38,
"learning_rate": 6.180107668165811e-05,
"loss": 4.1085,
"step": 1269000
},
{
"epoch": 2.38,
"learning_rate": 6.170722367798658e-05,
"loss": 4.0897,
"step": 1269500
},
{
"epoch": 2.38,
"learning_rate": 6.161337067431506e-05,
"loss": 4.1174,
"step": 1270000
},
{
"epoch": 2.38,
"learning_rate": 6.151951767064353e-05,
"loss": 4.0826,
"step": 1270500
},
{
"epoch": 2.39,
"learning_rate": 6.142566466697199e-05,
"loss": 4.0797,
"step": 1271000
},
{
"epoch": 2.39,
"learning_rate": 6.133181166330047e-05,
"loss": 4.0879,
"step": 1271500
},
{
"epoch": 2.39,
"learning_rate": 6.123795865962894e-05,
"loss": 4.1003,
"step": 1272000
},
{
"epoch": 2.39,
"learning_rate": 6.114410565595741e-05,
"loss": 4.0919,
"step": 1272500
},
{
"epoch": 2.39,
"learning_rate": 6.105025265228587e-05,
"loss": 4.1016,
"step": 1273000
},
{
"epoch": 2.39,
"learning_rate": 6.095639964861435e-05,
"loss": 4.0906,
"step": 1273500
},
{
"epoch": 2.39,
"learning_rate": 6.086254664494282e-05,
"loss": 4.1043,
"step": 1274000
},
{
"epoch": 2.39,
"learning_rate": 6.076869364127129e-05,
"loss": 4.0874,
"step": 1274500
},
{
"epoch": 2.39,
"learning_rate": 6.067484063759976e-05,
"loss": 4.0982,
"step": 1275000
},
{
"epoch": 2.39,
"learning_rate": 6.058098763392823e-05,
"loss": 4.0894,
"step": 1275500
},
{
"epoch": 2.4,
"learning_rate": 6.04871346302567e-05,
"loss": 4.0958,
"step": 1276000
},
{
"epoch": 2.4,
"learning_rate": 6.039328162658517e-05,
"loss": 4.0812,
"step": 1276500
},
{
"epoch": 2.4,
"learning_rate": 6.0299428622913644e-05,
"loss": 4.0904,
"step": 1277000
},
{
"epoch": 2.4,
"learning_rate": 6.0205575619242115e-05,
"loss": 4.0993,
"step": 1277500
},
{
"epoch": 2.4,
"learning_rate": 6.011172261557058e-05,
"loss": 4.0997,
"step": 1278000
},
{
"epoch": 2.4,
"learning_rate": 6.001786961189905e-05,
"loss": 4.1008,
"step": 1278500
},
{
"epoch": 2.4,
"learning_rate": 5.992401660822753e-05,
"loss": 4.1016,
"step": 1279000
},
{
"epoch": 2.4,
"learning_rate": 5.9830163604556e-05,
"loss": 4.0974,
"step": 1279500
},
{
"epoch": 2.4,
"learning_rate": 5.973631060088446e-05,
"loss": 4.0807,
"step": 1280000
},
{
"epoch": 2.4,
"learning_rate": 5.964245759721294e-05,
"loss": 4.0798,
"step": 1280500
},
{
"epoch": 2.4,
"learning_rate": 5.954860459354141e-05,
"loss": 4.0915,
"step": 1281000
},
{
"epoch": 2.41,
"learning_rate": 5.945475158986987e-05,
"loss": 4.077,
"step": 1281500
},
{
"epoch": 2.41,
"learning_rate": 5.9360898586198344e-05,
"loss": 4.1096,
"step": 1282000
},
{
"epoch": 2.41,
"learning_rate": 5.926704558252682e-05,
"loss": 4.0865,
"step": 1282500
},
{
"epoch": 2.41,
"learning_rate": 5.917319257885529e-05,
"loss": 4.083,
"step": 1283000
},
{
"epoch": 2.41,
"learning_rate": 5.9079339575183756e-05,
"loss": 4.0864,
"step": 1283500
},
{
"epoch": 2.41,
"learning_rate": 5.898548657151223e-05,
"loss": 4.0902,
"step": 1284000
},
{
"epoch": 2.41,
"learning_rate": 5.8891633567840704e-05,
"loss": 4.0884,
"step": 1284500
},
{
"epoch": 2.41,
"learning_rate": 5.879778056416917e-05,
"loss": 4.1099,
"step": 1285000
},
{
"epoch": 2.41,
"learning_rate": 5.870392756049764e-05,
"loss": 4.0829,
"step": 1285500
},
{
"epoch": 2.41,
"learning_rate": 5.8610074556826116e-05,
"loss": 4.0975,
"step": 1286000
},
{
"epoch": 2.41,
"learning_rate": 5.851622155315458e-05,
"loss": 4.1111,
"step": 1286500
},
{
"epoch": 2.42,
"learning_rate": 5.842236854948305e-05,
"loss": 4.103,
"step": 1287000
},
{
"epoch": 2.42,
"learning_rate": 5.832851554581153e-05,
"loss": 4.0883,
"step": 1287500
},
{
"epoch": 2.42,
"learning_rate": 5.823466254214e-05,
"loss": 4.0897,
"step": 1288000
},
{
"epoch": 2.42,
"learning_rate": 5.814080953846846e-05,
"loss": 4.0964,
"step": 1288500
},
{
"epoch": 2.42,
"learning_rate": 5.804695653479693e-05,
"loss": 4.0823,
"step": 1289000
},
{
"epoch": 2.42,
"learning_rate": 5.795310353112541e-05,
"loss": 4.0833,
"step": 1289500
},
{
"epoch": 2.42,
"learning_rate": 5.7859250527453874e-05,
"loss": 4.0981,
"step": 1290000
},
{
"epoch": 2.42,
"learning_rate": 5.7765397523782345e-05,
"loss": 4.0781,
"step": 1290500
},
{
"epoch": 2.42,
"learning_rate": 5.7671544520110815e-05,
"loss": 4.0967,
"step": 1291000
},
{
"epoch": 2.42,
"learning_rate": 5.7577691516439286e-05,
"loss": 4.0889,
"step": 1291500
},
{
"epoch": 2.43,
"learning_rate": 5.748383851276776e-05,
"loss": 4.0886,
"step": 1292000
},
{
"epoch": 2.43,
"learning_rate": 5.738998550909623e-05,
"loss": 4.0745,
"step": 1292500
},
{
"epoch": 2.43,
"learning_rate": 5.7296132505424705e-05,
"loss": 4.1027,
"step": 1293000
},
{
"epoch": 2.43,
"learning_rate": 5.720227950175317e-05,
"loss": 4.0981,
"step": 1293500
},
{
"epoch": 2.43,
"learning_rate": 5.710842649808164e-05,
"loss": 4.0851,
"step": 1294000
},
{
"epoch": 2.43,
"learning_rate": 5.701457349441011e-05,
"loss": 4.0905,
"step": 1294500
},
{
"epoch": 2.43,
"learning_rate": 5.692072049073858e-05,
"loss": 4.0839,
"step": 1295000
},
{
"epoch": 2.43,
"learning_rate": 5.682686748706705e-05,
"loss": 4.0818,
"step": 1295500
},
{
"epoch": 2.43,
"learning_rate": 5.673301448339552e-05,
"loss": 4.085,
"step": 1296000
},
{
"epoch": 2.43,
"learning_rate": 5.6639161479724e-05,
"loss": 4.0981,
"step": 1296500
},
{
"epoch": 2.43,
"learning_rate": 5.654530847605246e-05,
"loss": 4.0758,
"step": 1297000
},
{
"epoch": 2.44,
"learning_rate": 5.6451455472380934e-05,
"loss": 4.078,
"step": 1297500
},
{
"epoch": 2.44,
"learning_rate": 5.63576024687094e-05,
"loss": 4.0894,
"step": 1298000
},
{
"epoch": 2.44,
"learning_rate": 5.6263749465037875e-05,
"loss": 4.088,
"step": 1298500
},
{
"epoch": 2.44,
"learning_rate": 5.6169896461366346e-05,
"loss": 4.0978,
"step": 1299000
},
{
"epoch": 2.44,
"learning_rate": 5.6076043457694816e-05,
"loss": 4.0809,
"step": 1299500
},
{
"epoch": 2.44,
"learning_rate": 5.598219045402328e-05,
"loss": 4.0888,
"step": 1300000
},
{
"epoch": 2.44,
"learning_rate": 5.588833745035176e-05,
"loss": 4.1212,
"step": 1300500
},
{
"epoch": 2.44,
"learning_rate": 5.579448444668023e-05,
"loss": 4.0825,
"step": 1301000
},
{
"epoch": 2.44,
"learning_rate": 5.570063144300869e-05,
"loss": 4.1032,
"step": 1301500
},
{
"epoch": 2.44,
"learning_rate": 5.560677843933717e-05,
"loss": 4.0911,
"step": 1302000
},
{
"epoch": 2.44,
"learning_rate": 5.551292543566564e-05,
"loss": 4.0875,
"step": 1302500
},
{
"epoch": 2.45,
"learning_rate": 5.541907243199411e-05,
"loss": 4.0608,
"step": 1303000
},
{
"epoch": 2.45,
"learning_rate": 5.5325219428322575e-05,
"loss": 4.0953,
"step": 1303500
},
{
"epoch": 2.45,
"learning_rate": 5.523136642465105e-05,
"loss": 4.0945,
"step": 1304000
},
{
"epoch": 2.45,
"learning_rate": 5.513751342097952e-05,
"loss": 4.1076,
"step": 1304500
},
{
"epoch": 2.45,
"learning_rate": 5.5043660417307987e-05,
"loss": 4.094,
"step": 1305000
},
{
"epoch": 2.45,
"learning_rate": 5.4949807413636464e-05,
"loss": 4.0832,
"step": 1305500
},
{
"epoch": 2.45,
"learning_rate": 5.4855954409964935e-05,
"loss": 4.1009,
"step": 1306000
},
{
"epoch": 2.45,
"learning_rate": 5.47621014062934e-05,
"loss": 4.0724,
"step": 1306500
},
{
"epoch": 2.45,
"learning_rate": 5.466824840262187e-05,
"loss": 4.1011,
"step": 1307000
},
{
"epoch": 2.45,
"learning_rate": 5.4574395398950346e-05,
"loss": 4.101,
"step": 1307500
},
{
"epoch": 2.46,
"learning_rate": 5.448054239527882e-05,
"loss": 4.0842,
"step": 1308000
},
{
"epoch": 2.46,
"learning_rate": 5.438668939160728e-05,
"loss": 4.0852,
"step": 1308500
},
{
"epoch": 2.46,
"learning_rate": 5.429283638793575e-05,
"loss": 4.0981,
"step": 1309000
},
{
"epoch": 2.46,
"learning_rate": 5.419898338426423e-05,
"loss": 4.0998,
"step": 1309500
},
{
"epoch": 2.46,
"learning_rate": 5.410513038059269e-05,
"loss": 4.0701,
"step": 1310000
},
{
"epoch": 2.46,
"learning_rate": 5.4011277376921164e-05,
"loss": 4.0971,
"step": 1310500
},
{
"epoch": 2.46,
"learning_rate": 5.391742437324964e-05,
"loss": 4.0919,
"step": 1311000
},
{
"epoch": 2.46,
"learning_rate": 5.382357136957811e-05,
"loss": 4.1148,
"step": 1311500
},
{
"epoch": 2.46,
"learning_rate": 5.3729718365906575e-05,
"loss": 4.0948,
"step": 1312000
},
{
"epoch": 2.46,
"learning_rate": 5.3635865362235046e-05,
"loss": 4.0855,
"step": 1312500
},
{
"epoch": 2.46,
"learning_rate": 5.3542012358563524e-05,
"loss": 4.0918,
"step": 1313000
},
{
"epoch": 2.47,
"learning_rate": 5.344815935489199e-05,
"loss": 4.0908,
"step": 1313500
},
{
"epoch": 2.47,
"learning_rate": 5.335430635122046e-05,
"loss": 4.1036,
"step": 1314000
},
{
"epoch": 2.47,
"learning_rate": 5.3260453347548935e-05,
"loss": 4.0688,
"step": 1314500
},
{
"epoch": 2.47,
"learning_rate": 5.31666003438774e-05,
"loss": 4.0748,
"step": 1315000
},
{
"epoch": 2.47,
"learning_rate": 5.307274734020587e-05,
"loss": 4.0831,
"step": 1315500
},
{
"epoch": 2.47,
"learning_rate": 5.297889433653434e-05,
"loss": 4.0808,
"step": 1316000
},
{
"epoch": 2.47,
"learning_rate": 5.288504133286282e-05,
"loss": 4.0749,
"step": 1316500
},
{
"epoch": 2.47,
"learning_rate": 5.279118832919128e-05,
"loss": 4.0916,
"step": 1317000
},
{
"epoch": 2.47,
"learning_rate": 5.269733532551975e-05,
"loss": 4.0932,
"step": 1317500
},
{
"epoch": 2.47,
"learning_rate": 5.260348232184822e-05,
"loss": 4.089,
"step": 1318000
},
{
"epoch": 2.47,
"learning_rate": 5.2509629318176694e-05,
"loss": 4.0886,
"step": 1318500
},
{
"epoch": 2.48,
"learning_rate": 5.2415776314505164e-05,
"loss": 4.085,
"step": 1319000
},
{
"epoch": 2.48,
"learning_rate": 5.2321923310833635e-05,
"loss": 4.0798,
"step": 1319500
},
{
"epoch": 2.48,
"learning_rate": 5.222807030716211e-05,
"loss": 4.0888,
"step": 1320000
},
{
"epoch": 2.48,
"learning_rate": 5.2134217303490576e-05,
"loss": 4.0912,
"step": 1320500
},
{
"epoch": 2.48,
"learning_rate": 5.204036429981905e-05,
"loss": 4.1052,
"step": 1321000
},
{
"epoch": 2.48,
"learning_rate": 5.194651129614751e-05,
"loss": 4.1082,
"step": 1321500
},
{
"epoch": 2.48,
"learning_rate": 5.185265829247599e-05,
"loss": 4.0893,
"step": 1322000
},
{
"epoch": 2.48,
"learning_rate": 5.175880528880446e-05,
"loss": 4.1092,
"step": 1322500
},
{
"epoch": 2.48,
"learning_rate": 5.166495228513293e-05,
"loss": 4.0737,
"step": 1323000
},
{
"epoch": 2.48,
"learning_rate": 5.15710992814614e-05,
"loss": 4.0798,
"step": 1323500
},
{
"epoch": 2.49,
"learning_rate": 5.147724627778987e-05,
"loss": 4.109,
"step": 1324000
},
{
"epoch": 2.49,
"learning_rate": 5.138339327411834e-05,
"loss": 4.0857,
"step": 1324500
},
{
"epoch": 2.49,
"learning_rate": 5.1289540270446805e-05,
"loss": 4.1154,
"step": 1325000
},
{
"epoch": 2.49,
"learning_rate": 5.119568726677528e-05,
"loss": 4.0871,
"step": 1325500
},
{
"epoch": 2.49,
"learning_rate": 5.1101834263103753e-05,
"loss": 4.0941,
"step": 1326000
},
{
"epoch": 2.49,
"learning_rate": 5.1007981259432224e-05,
"loss": 4.0885,
"step": 1326500
},
{
"epoch": 2.49,
"learning_rate": 5.091412825576069e-05,
"loss": 4.0857,
"step": 1327000
},
{
"epoch": 2.49,
"learning_rate": 5.0820275252089165e-05,
"loss": 4.0746,
"step": 1327500
},
{
"epoch": 2.49,
"learning_rate": 5.0726422248417636e-05,
"loss": 4.064,
"step": 1328000
},
{
"epoch": 2.49,
"learning_rate": 5.06325692447461e-05,
"loss": 4.0834,
"step": 1328500
},
{
"epoch": 2.49,
"learning_rate": 5.053871624107458e-05,
"loss": 4.0884,
"step": 1329000
},
{
"epoch": 2.5,
"learning_rate": 5.044486323740305e-05,
"loss": 4.0873,
"step": 1329500
},
{
"epoch": 2.5,
"learning_rate": 5.035101023373151e-05,
"loss": 4.0933,
"step": 1330000
},
{
"epoch": 2.5,
"learning_rate": 5.025715723005998e-05,
"loss": 4.0932,
"step": 1330500
},
{
"epoch": 2.5,
"learning_rate": 5.016330422638846e-05,
"loss": 4.0799,
"step": 1331000
},
{
"epoch": 2.5,
"learning_rate": 5.006945122271693e-05,
"loss": 4.0742,
"step": 1331500
},
{
"epoch": 2.5,
"learning_rate": 4.9975598219045394e-05,
"loss": 4.0959,
"step": 1332000
},
{
"epoch": 2.5,
"learning_rate": 4.988174521537387e-05,
"loss": 4.087,
"step": 1332500
},
{
"epoch": 2.5,
"learning_rate": 4.978789221170234e-05,
"loss": 4.0873,
"step": 1333000
},
{
"epoch": 2.5,
"learning_rate": 4.9694039208030806e-05,
"loss": 4.0715,
"step": 1333500
},
{
"epoch": 2.5,
"learning_rate": 4.960018620435928e-05,
"loss": 4.0868,
"step": 1334000
},
{
"epoch": 2.5,
"learning_rate": 4.9506333200687754e-05,
"loss": 4.0726,
"step": 1334500
},
{
"epoch": 2.51,
"learning_rate": 4.941248019701622e-05,
"loss": 4.0785,
"step": 1335000
},
{
"epoch": 2.51,
"learning_rate": 4.931862719334469e-05,
"loss": 4.0701,
"step": 1335500
},
{
"epoch": 2.51,
"learning_rate": 4.922477418967316e-05,
"loss": 4.0798,
"step": 1336000
},
{
"epoch": 2.51,
"learning_rate": 4.913092118600164e-05,
"loss": 4.0938,
"step": 1336500
},
{
"epoch": 2.51,
"learning_rate": 4.90370681823301e-05,
"loss": 4.0849,
"step": 1337000
},
{
"epoch": 2.51,
"learning_rate": 4.894321517865857e-05,
"loss": 4.0584,
"step": 1337500
},
{
"epoch": 2.51,
"learning_rate": 4.884936217498705e-05,
"loss": 4.0809,
"step": 1338000
},
{
"epoch": 2.51,
"learning_rate": 4.875550917131551e-05,
"loss": 4.0834,
"step": 1338500
},
{
"epoch": 2.51,
"learning_rate": 4.866165616764398e-05,
"loss": 4.1006,
"step": 1339000
},
{
"epoch": 2.51,
"learning_rate": 4.8567803163972454e-05,
"loss": 4.0869,
"step": 1339500
},
{
"epoch": 2.52,
"learning_rate": 4.847395016030093e-05,
"loss": 4.0998,
"step": 1340000
},
{
"epoch": 2.52,
"learning_rate": 4.8380097156629395e-05,
"loss": 4.0782,
"step": 1340500
},
{
"epoch": 2.52,
"learning_rate": 4.8286244152957866e-05,
"loss": 4.0942,
"step": 1341000
},
{
"epoch": 2.52,
"learning_rate": 4.819239114928634e-05,
"loss": 4.1044,
"step": 1341500
},
{
"epoch": 2.52,
"learning_rate": 4.809853814561481e-05,
"loss": 4.0865,
"step": 1342000
},
{
"epoch": 2.52,
"learning_rate": 4.800468514194328e-05,
"loss": 4.0917,
"step": 1342500
},
{
"epoch": 2.52,
"learning_rate": 4.791083213827175e-05,
"loss": 4.0874,
"step": 1343000
},
{
"epoch": 2.52,
"learning_rate": 4.781697913460022e-05,
"loss": 4.0948,
"step": 1343500
},
{
"epoch": 2.52,
"learning_rate": 4.772312613092869e-05,
"loss": 4.082,
"step": 1344000
},
{
"epoch": 2.52,
"learning_rate": 4.762927312725716e-05,
"loss": 4.0872,
"step": 1344500
},
{
"epoch": 2.52,
"learning_rate": 4.753542012358564e-05,
"loss": 4.0923,
"step": 1345000
},
{
"epoch": 2.53,
"learning_rate": 4.74415671199141e-05,
"loss": 4.094,
"step": 1345500
},
{
"epoch": 2.53,
"learning_rate": 4.734771411624257e-05,
"loss": 4.1156,
"step": 1346000
},
{
"epoch": 2.53,
"learning_rate": 4.725386111257104e-05,
"loss": 4.0886,
"step": 1346500
},
{
"epoch": 2.53,
"learning_rate": 4.7160008108899514e-05,
"loss": 4.1015,
"step": 1347000
},
{
"epoch": 2.53,
"learning_rate": 4.7066155105227984e-05,
"loss": 4.0873,
"step": 1347500
},
{
"epoch": 2.53,
"learning_rate": 4.6972302101556455e-05,
"loss": 4.0725,
"step": 1348000
},
{
"epoch": 2.53,
"learning_rate": 4.687844909788492e-05,
"loss": 4.077,
"step": 1348500
},
{
"epoch": 2.53,
"learning_rate": 4.6784596094213396e-05,
"loss": 4.0915,
"step": 1349000
},
{
"epoch": 2.53,
"learning_rate": 4.669074309054187e-05,
"loss": 4.0733,
"step": 1349500
},
{
"epoch": 2.53,
"learning_rate": 4.659689008687033e-05,
"loss": 4.0878,
"step": 1350000
},
{
"epoch": 2.53,
"learning_rate": 4.650303708319881e-05,
"loss": 4.0922,
"step": 1350500
},
{
"epoch": 2.54,
"learning_rate": 4.640918407952728e-05,
"loss": 4.0829,
"step": 1351000
},
{
"epoch": 2.54,
"learning_rate": 4.631533107585575e-05,
"loss": 4.0832,
"step": 1351500
},
{
"epoch": 2.54,
"learning_rate": 4.622147807218421e-05,
"loss": 4.0796,
"step": 1352000
},
{
"epoch": 2.54,
"learning_rate": 4.612762506851269e-05,
"loss": 4.0716,
"step": 1352500
},
{
"epoch": 2.54,
"learning_rate": 4.603377206484116e-05,
"loss": 4.0947,
"step": 1353000
},
{
"epoch": 2.54,
"learning_rate": 4.5939919061169625e-05,
"loss": 4.0737,
"step": 1353500
},
{
"epoch": 2.54,
"learning_rate": 4.58460660574981e-05,
"loss": 4.0652,
"step": 1354000
},
{
"epoch": 2.54,
"learning_rate": 4.575221305382657e-05,
"loss": 4.0775,
"step": 1354500
},
{
"epoch": 2.54,
"learning_rate": 4.5658360050155044e-05,
"loss": 4.0734,
"step": 1355000
},
{
"epoch": 2.54,
"learning_rate": 4.556450704648351e-05,
"loss": 4.0891,
"step": 1355500
},
{
"epoch": 2.55,
"learning_rate": 4.5470654042811985e-05,
"loss": 4.0902,
"step": 1356000
},
{
"epoch": 2.55,
"learning_rate": 4.5376801039140456e-05,
"loss": 4.0873,
"step": 1356500
},
{
"epoch": 2.55,
"learning_rate": 4.528294803546892e-05,
"loss": 4.1043,
"step": 1357000
},
{
"epoch": 2.55,
"learning_rate": 4.518909503179739e-05,
"loss": 4.0743,
"step": 1357500
},
{
"epoch": 2.55,
"learning_rate": 4.509524202812587e-05,
"loss": 4.0598,
"step": 1358000
},
{
"epoch": 2.55,
"learning_rate": 4.500138902445433e-05,
"loss": 4.0914,
"step": 1358500
},
{
"epoch": 2.55,
"learning_rate": 4.49075360207828e-05,
"loss": 4.0748,
"step": 1359000
},
{
"epoch": 2.55,
"learning_rate": 4.481368301711128e-05,
"loss": 4.091,
"step": 1359500
},
{
"epoch": 2.55,
"learning_rate": 4.471983001343975e-05,
"loss": 4.0587,
"step": 1360000
},
{
"epoch": 2.55,
"learning_rate": 4.4625977009768214e-05,
"loss": 4.0886,
"step": 1360500
},
{
"epoch": 2.55,
"learning_rate": 4.4532124006096685e-05,
"loss": 4.0904,
"step": 1361000
},
{
"epoch": 2.56,
"learning_rate": 4.443827100242516e-05,
"loss": 4.0911,
"step": 1361500
},
{
"epoch": 2.56,
"learning_rate": 4.4344417998753626e-05,
"loss": 4.0815,
"step": 1362000
},
{
"epoch": 2.56,
"learning_rate": 4.42505649950821e-05,
"loss": 4.0724,
"step": 1362500
},
{
"epoch": 2.56,
"learning_rate": 4.4156711991410574e-05,
"loss": 4.0758,
"step": 1363000
},
{
"epoch": 2.56,
"learning_rate": 4.4062858987739045e-05,
"loss": 4.0822,
"step": 1363500
},
{
"epoch": 2.56,
"learning_rate": 4.396900598406751e-05,
"loss": 4.0769,
"step": 1364000
},
{
"epoch": 2.56,
"learning_rate": 4.387515298039598e-05,
"loss": 4.067,
"step": 1364500
},
{
"epoch": 2.56,
"learning_rate": 4.3781299976724457e-05,
"loss": 4.0907,
"step": 1365000
},
{
"epoch": 2.56,
"learning_rate": 4.368744697305292e-05,
"loss": 4.0809,
"step": 1365500
},
{
"epoch": 2.56,
"learning_rate": 4.359359396938139e-05,
"loss": 4.0687,
"step": 1366000
},
{
"epoch": 2.57,
"learning_rate": 4.349974096570986e-05,
"loss": 4.078,
"step": 1366500
},
{
"epoch": 2.57,
"learning_rate": 4.340588796203833e-05,
"loss": 4.0769,
"step": 1367000
},
{
"epoch": 2.57,
"learning_rate": 4.33120349583668e-05,
"loss": 4.0747,
"step": 1367500
},
{
"epoch": 2.57,
"learning_rate": 4.3218181954695274e-05,
"loss": 4.0935,
"step": 1368000
},
{
"epoch": 2.57,
"learning_rate": 4.312432895102375e-05,
"loss": 4.0849,
"step": 1368500
},
{
"epoch": 2.57,
"learning_rate": 4.3030475947352215e-05,
"loss": 4.0822,
"step": 1369000
},
{
"epoch": 2.57,
"learning_rate": 4.2936622943680686e-05,
"loss": 4.0804,
"step": 1369500
},
{
"epoch": 2.57,
"learning_rate": 4.2842769940009156e-05,
"loss": 4.0616,
"step": 1370000
},
{
"epoch": 2.57,
"learning_rate": 4.274891693633763e-05,
"loss": 4.0836,
"step": 1370500
},
{
"epoch": 2.57,
"learning_rate": 4.26550639326661e-05,
"loss": 4.0786,
"step": 1371000
},
{
"epoch": 2.57,
"learning_rate": 4.256121092899457e-05,
"loss": 4.0962,
"step": 1371500
},
{
"epoch": 2.58,
"learning_rate": 4.2467357925323046e-05,
"loss": 4.0542,
"step": 1372000
},
{
"epoch": 2.58,
"learning_rate": 4.237350492165151e-05,
"loss": 4.066,
"step": 1372500
},
{
"epoch": 2.58,
"learning_rate": 4.227965191797998e-05,
"loss": 4.0942,
"step": 1373000
},
{
"epoch": 2.58,
"learning_rate": 4.2185798914308444e-05,
"loss": 4.1004,
"step": 1373500
},
{
"epoch": 2.58,
"learning_rate": 4.209194591063692e-05,
"loss": 4.0927,
"step": 1374000
},
{
"epoch": 2.58,
"learning_rate": 4.199809290696539e-05,
"loss": 4.0715,
"step": 1374500
},
{
"epoch": 2.58,
"learning_rate": 4.190423990329386e-05,
"loss": 4.0849,
"step": 1375000
},
{
"epoch": 2.58,
"learning_rate": 4.1810386899622327e-05,
"loss": 4.0863,
"step": 1375500
},
{
"epoch": 2.58,
"learning_rate": 4.1716533895950804e-05,
"loss": 4.0774,
"step": 1376000
},
{
"epoch": 2.58,
"learning_rate": 4.1622680892279275e-05,
"loss": 4.0682,
"step": 1376500
},
{
"epoch": 2.58,
"learning_rate": 4.152882788860774e-05,
"loss": 4.0664,
"step": 1377000
},
{
"epoch": 2.59,
"learning_rate": 4.1434974884936216e-05,
"loss": 4.0759,
"step": 1377500
},
{
"epoch": 2.59,
"learning_rate": 4.1341121881264686e-05,
"loss": 4.0804,
"step": 1378000
},
{
"epoch": 2.59,
"learning_rate": 4.124726887759315e-05,
"loss": 4.0909,
"step": 1378500
},
{
"epoch": 2.59,
"learning_rate": 4.115341587392162e-05,
"loss": 4.0613,
"step": 1379000
},
{
"epoch": 2.59,
"learning_rate": 4.10595628702501e-05,
"loss": 4.0658,
"step": 1379500
},
{
"epoch": 2.59,
"learning_rate": 4.096570986657857e-05,
"loss": 4.0913,
"step": 1380000
},
{
"epoch": 2.59,
"learning_rate": 4.087185686290703e-05,
"loss": 4.0913,
"step": 1380500
},
{
"epoch": 2.59,
"learning_rate": 4.077800385923551e-05,
"loss": 4.0813,
"step": 1381000
},
{
"epoch": 2.59,
"learning_rate": 4.068415085556398e-05,
"loss": 4.0881,
"step": 1381500
},
{
"epoch": 2.59,
"learning_rate": 4.0590297851892445e-05,
"loss": 4.0741,
"step": 1382000
},
{
"epoch": 2.6,
"learning_rate": 4.0496444848220915e-05,
"loss": 4.0748,
"step": 1382500
},
{
"epoch": 2.6,
"learning_rate": 4.040259184454939e-05,
"loss": 4.0922,
"step": 1383000
},
{
"epoch": 2.6,
"learning_rate": 4.0308738840877864e-05,
"loss": 4.068,
"step": 1383500
},
{
"epoch": 2.6,
"learning_rate": 4.021488583720633e-05,
"loss": 4.1101,
"step": 1384000
},
{
"epoch": 2.6,
"learning_rate": 4.01210328335348e-05,
"loss": 4.0935,
"step": 1384500
},
{
"epoch": 2.6,
"learning_rate": 4.0027179829863275e-05,
"loss": 4.0758,
"step": 1385000
},
{
"epoch": 2.6,
"learning_rate": 3.993332682619174e-05,
"loss": 4.0839,
"step": 1385500
},
{
"epoch": 2.6,
"learning_rate": 3.983947382252021e-05,
"loss": 4.1031,
"step": 1386000
},
{
"epoch": 2.6,
"learning_rate": 3.974562081884869e-05,
"loss": 4.0771,
"step": 1386500
},
{
"epoch": 2.6,
"learning_rate": 3.965176781517715e-05,
"loss": 4.0786,
"step": 1387000
},
{
"epoch": 2.6,
"learning_rate": 3.955791481150562e-05,
"loss": 4.0738,
"step": 1387500
},
{
"epoch": 2.61,
"learning_rate": 3.946406180783409e-05,
"loss": 4.072,
"step": 1388000
},
{
"epoch": 2.61,
"learning_rate": 3.937020880416257e-05,
"loss": 4.091,
"step": 1388500
},
{
"epoch": 2.61,
"learning_rate": 3.9276355800491034e-05,
"loss": 4.088,
"step": 1389000
},
{
"epoch": 2.61,
"learning_rate": 3.9182502796819504e-05,
"loss": 4.0842,
"step": 1389500
},
{
"epoch": 2.61,
"learning_rate": 3.908864979314798e-05,
"loss": 4.0651,
"step": 1390000
},
{
"epoch": 2.61,
"learning_rate": 3.8994796789476446e-05,
"loss": 4.0968,
"step": 1390500
},
{
"epoch": 2.61,
"learning_rate": 3.8900943785804916e-05,
"loss": 4.06,
"step": 1391000
},
{
"epoch": 2.61,
"learning_rate": 3.880709078213339e-05,
"loss": 4.0594,
"step": 1391500
},
{
"epoch": 2.61,
"learning_rate": 3.8713237778461864e-05,
"loss": 4.0864,
"step": 1392000
},
{
"epoch": 2.61,
"learning_rate": 3.861938477479033e-05,
"loss": 4.083,
"step": 1392500
},
{
"epoch": 2.61,
"learning_rate": 3.85255317711188e-05,
"loss": 4.0956,
"step": 1393000
},
{
"epoch": 2.62,
"learning_rate": 3.843167876744726e-05,
"loss": 4.0824,
"step": 1393500
},
{
"epoch": 2.62,
"learning_rate": 3.833782576377574e-05,
"loss": 4.0737,
"step": 1394000
},
{
"epoch": 2.62,
"learning_rate": 3.824397276010421e-05,
"loss": 4.0801,
"step": 1394500
},
{
"epoch": 2.62,
"learning_rate": 3.815011975643268e-05,
"loss": 4.0845,
"step": 1395000
},
{
"epoch": 2.62,
"learning_rate": 3.805626675276115e-05,
"loss": 4.0736,
"step": 1395500
},
{
"epoch": 2.62,
"learning_rate": 3.796241374908962e-05,
"loss": 4.0924,
"step": 1396000
},
{
"epoch": 2.62,
"learning_rate": 3.7868560745418093e-05,
"loss": 4.0812,
"step": 1396500
},
{
"epoch": 2.62,
"learning_rate": 3.777470774174656e-05,
"loss": 4.0882,
"step": 1397000
},
{
"epoch": 2.62,
"learning_rate": 3.7680854738075035e-05,
"loss": 4.0813,
"step": 1397500
},
{
"epoch": 2.62,
"learning_rate": 3.7587001734403505e-05,
"loss": 4.0796,
"step": 1398000
},
{
"epoch": 2.63,
"learning_rate": 3.7493148730731976e-05,
"loss": 4.0755,
"step": 1398500
},
{
"epoch": 2.63,
"learning_rate": 3.739929572706045e-05,
"loss": 4.0857,
"step": 1399000
},
{
"epoch": 2.63,
"learning_rate": 3.730544272338892e-05,
"loss": 4.0851,
"step": 1399500
},
{
"epoch": 2.63,
"learning_rate": 3.721158971971739e-05,
"loss": 4.0766,
"step": 1400000
},
{
"epoch": 2.63,
"learning_rate": 3.711773671604586e-05,
"loss": 4.0766,
"step": 1400500
},
{
"epoch": 2.63,
"learning_rate": 3.702388371237433e-05,
"loss": 4.0816,
"step": 1401000
},
{
"epoch": 2.63,
"learning_rate": 3.69300307087028e-05,
"loss": 4.0744,
"step": 1401500
},
{
"epoch": 2.63,
"learning_rate": 3.6836177705031264e-05,
"loss": 4.0843,
"step": 1402000
},
{
"epoch": 2.63,
"learning_rate": 3.674232470135974e-05,
"loss": 4.0984,
"step": 1402500
},
{
"epoch": 2.63,
"learning_rate": 3.664847169768821e-05,
"loss": 4.0624,
"step": 1403000
},
{
"epoch": 2.63,
"learning_rate": 3.655461869401668e-05,
"loss": 4.0806,
"step": 1403500
},
{
"epoch": 2.64,
"learning_rate": 3.646076569034515e-05,
"loss": 4.0838,
"step": 1404000
},
{
"epoch": 2.64,
"learning_rate": 3.636691268667362e-05,
"loss": 4.0784,
"step": 1404500
},
{
"epoch": 2.64,
"learning_rate": 3.6273059683002094e-05,
"loss": 4.0847,
"step": 1405000
},
{
"epoch": 2.64,
"learning_rate": 3.617920667933056e-05,
"loss": 4.0783,
"step": 1405500
},
{
"epoch": 2.64,
"learning_rate": 3.6085353675659036e-05,
"loss": 4.0807,
"step": 1406000
},
{
"epoch": 2.64,
"learning_rate": 3.59915006719875e-05,
"loss": 4.0524,
"step": 1406500
},
{
"epoch": 2.64,
"learning_rate": 3.589764766831598e-05,
"loss": 4.0808,
"step": 1407000
},
{
"epoch": 2.64,
"learning_rate": 3.580379466464445e-05,
"loss": 4.0714,
"step": 1407500
},
{
"epoch": 2.64,
"learning_rate": 3.570994166097291e-05,
"loss": 4.0916,
"step": 1408000
},
{
"epoch": 2.64,
"learning_rate": 3.561608865730139e-05,
"loss": 4.0711,
"step": 1408500
},
{
"epoch": 2.64,
"learning_rate": 3.552223565362985e-05,
"loss": 4.0768,
"step": 1409000
},
{
"epoch": 2.65,
"learning_rate": 3.542838264995833e-05,
"loss": 4.0762,
"step": 1409500
},
{
"epoch": 2.65,
"learning_rate": 3.5334529646286794e-05,
"loss": 4.1018,
"step": 1410000
},
{
"epoch": 2.65,
"learning_rate": 3.5240676642615265e-05,
"loss": 4.0837,
"step": 1410500
},
{
"epoch": 2.65,
"learning_rate": 3.5146823638943735e-05,
"loss": 4.0754,
"step": 1411000
},
{
"epoch": 2.65,
"learning_rate": 3.5052970635272206e-05,
"loss": 4.0883,
"step": 1411500
},
{
"epoch": 2.65,
"learning_rate": 3.495911763160068e-05,
"loss": 4.073,
"step": 1412000
},
{
"epoch": 2.65,
"learning_rate": 3.486526462792915e-05,
"loss": 4.0878,
"step": 1412500
},
{
"epoch": 2.65,
"learning_rate": 3.477141162425762e-05,
"loss": 4.0713,
"step": 1413000
},
{
"epoch": 2.65,
"learning_rate": 3.467755862058609e-05,
"loss": 4.0709,
"step": 1413500
},
{
"epoch": 2.65,
"learning_rate": 3.458370561691456e-05,
"loss": 4.083,
"step": 1414000
},
{
"epoch": 2.66,
"learning_rate": 3.448985261324303e-05,
"loss": 4.0697,
"step": 1414500
},
{
"epoch": 2.66,
"learning_rate": 3.43959996095715e-05,
"loss": 4.0878,
"step": 1415000
},
{
"epoch": 2.66,
"learning_rate": 3.430214660589997e-05,
"loss": 4.075,
"step": 1415500
},
{
"epoch": 2.66,
"learning_rate": 3.420829360222844e-05,
"loss": 4.085,
"step": 1416000
},
{
"epoch": 2.66,
"learning_rate": 3.411444059855691e-05,
"loss": 4.0705,
"step": 1416500
},
{
"epoch": 2.66,
"learning_rate": 3.402058759488538e-05,
"loss": 4.0619,
"step": 1417000
},
{
"epoch": 2.66,
"learning_rate": 3.3926734591213854e-05,
"loss": 4.0675,
"step": 1417500
},
{
"epoch": 2.66,
"learning_rate": 3.3832881587542324e-05,
"loss": 4.0697,
"step": 1418000
},
{
"epoch": 2.66,
"learning_rate": 3.3739028583870795e-05,
"loss": 4.0825,
"step": 1418500
},
{
"epoch": 2.66,
"learning_rate": 3.3645175580199265e-05,
"loss": 4.0742,
"step": 1419000
},
{
"epoch": 2.66,
"learning_rate": 3.3551322576527736e-05,
"loss": 4.082,
"step": 1419500
},
{
"epoch": 2.67,
"learning_rate": 3.345746957285621e-05,
"loss": 4.0752,
"step": 1420000
},
{
"epoch": 2.67,
"learning_rate": 3.336361656918468e-05,
"loss": 4.0694,
"step": 1420500
},
{
"epoch": 2.67,
"learning_rate": 3.326976356551315e-05,
"loss": 4.0844,
"step": 1421000
},
{
"epoch": 2.67,
"learning_rate": 3.317591056184162e-05,
"loss": 4.0892,
"step": 1421500
},
{
"epoch": 2.67,
"learning_rate": 3.308205755817009e-05,
"loss": 4.1071,
"step": 1422000
},
{
"epoch": 2.67,
"learning_rate": 3.298820455449856e-05,
"loss": 4.0752,
"step": 1422500
},
{
"epoch": 2.67,
"learning_rate": 3.289435155082703e-05,
"loss": 4.0692,
"step": 1423000
},
{
"epoch": 2.67,
"learning_rate": 3.28004985471555e-05,
"loss": 4.0879,
"step": 1423500
},
{
"epoch": 2.67,
"learning_rate": 3.270664554348397e-05,
"loss": 4.0688,
"step": 1424000
},
{
"epoch": 2.67,
"learning_rate": 3.261279253981244e-05,
"loss": 4.0851,
"step": 1424500
},
{
"epoch": 2.67,
"learning_rate": 3.251893953614091e-05,
"loss": 4.0762,
"step": 1425000
},
{
"epoch": 2.68,
"learning_rate": 3.2425086532469384e-05,
"loss": 4.0752,
"step": 1425500
},
{
"epoch": 2.68,
"learning_rate": 3.2331233528797854e-05,
"loss": 4.0928,
"step": 1426000
},
{
"epoch": 2.68,
"learning_rate": 3.2237380525126325e-05,
"loss": 4.0672,
"step": 1426500
},
{
"epoch": 2.68,
"learning_rate": 3.2143527521454796e-05,
"loss": 4.0802,
"step": 1427000
},
{
"epoch": 2.68,
"learning_rate": 3.2049674517783266e-05,
"loss": 4.0856,
"step": 1427500
},
{
"epoch": 2.68,
"learning_rate": 3.195582151411173e-05,
"loss": 4.0751,
"step": 1428000
},
{
"epoch": 2.68,
"learning_rate": 3.186196851044021e-05,
"loss": 4.0848,
"step": 1428500
},
{
"epoch": 2.68,
"learning_rate": 3.176811550676867e-05,
"loss": 4.0826,
"step": 1429000
},
{
"epoch": 2.68,
"learning_rate": 3.167426250309715e-05,
"loss": 4.0575,
"step": 1429500
},
{
"epoch": 2.68,
"learning_rate": 3.158040949942562e-05,
"loss": 4.0846,
"step": 1430000
},
{
"epoch": 2.69,
"learning_rate": 3.1486556495754083e-05,
"loss": 4.0905,
"step": 1430500
},
{
"epoch": 2.69,
"learning_rate": 3.139270349208256e-05,
"loss": 4.0635,
"step": 1431000
},
{
"epoch": 2.69,
"learning_rate": 3.1298850488411025e-05,
"loss": 4.0681,
"step": 1431500
},
{
"epoch": 2.69,
"learning_rate": 3.12049974847395e-05,
"loss": 4.0833,
"step": 1432000
},
{
"epoch": 2.69,
"learning_rate": 3.1111144481067966e-05,
"loss": 4.0834,
"step": 1432500
},
{
"epoch": 2.69,
"learning_rate": 3.1017291477396443e-05,
"loss": 4.0623,
"step": 1433000
},
{
"epoch": 2.69,
"learning_rate": 3.0923438473724914e-05,
"loss": 4.0842,
"step": 1433500
},
{
"epoch": 2.69,
"learning_rate": 3.082958547005338e-05,
"loss": 4.0805,
"step": 1434000
},
{
"epoch": 2.69,
"learning_rate": 3.0735732466381855e-05,
"loss": 4.0886,
"step": 1434500
},
{
"epoch": 2.69,
"learning_rate": 3.064187946271032e-05,
"loss": 4.0627,
"step": 1435000
},
{
"epoch": 2.69,
"learning_rate": 3.0548026459038797e-05,
"loss": 4.0817,
"step": 1435500
},
{
"epoch": 2.7,
"learning_rate": 3.045417345536726e-05,
"loss": 4.1082,
"step": 1436000
},
{
"epoch": 2.7,
"learning_rate": 3.0360320451695734e-05,
"loss": 4.0759,
"step": 1436500
},
{
"epoch": 2.7,
"learning_rate": 3.0266467448024202e-05,
"loss": 4.0907,
"step": 1437000
},
{
"epoch": 2.7,
"learning_rate": 3.0172614444352676e-05,
"loss": 4.0807,
"step": 1437500
},
{
"epoch": 2.7,
"learning_rate": 3.0078761440681146e-05,
"loss": 4.0782,
"step": 1438000
},
{
"epoch": 2.7,
"learning_rate": 2.9984908437009614e-05,
"loss": 4.0759,
"step": 1438500
},
{
"epoch": 2.7,
"learning_rate": 2.9891055433338088e-05,
"loss": 4.0529,
"step": 1439000
},
{
"epoch": 2.7,
"learning_rate": 2.9797202429666555e-05,
"loss": 4.0761,
"step": 1439500
},
{
"epoch": 2.7,
"learning_rate": 2.970334942599503e-05,
"loss": 4.0891,
"step": 1440000
},
{
"epoch": 2.7,
"learning_rate": 2.9609496422323496e-05,
"loss": 4.0623,
"step": 1440500
},
{
"epoch": 2.7,
"learning_rate": 2.951564341865197e-05,
"loss": 4.0779,
"step": 1441000
},
{
"epoch": 2.71,
"learning_rate": 2.9421790414980438e-05,
"loss": 4.0642,
"step": 1441500
},
{
"epoch": 2.71,
"learning_rate": 2.9327937411308908e-05,
"loss": 4.0584,
"step": 1442000
},
{
"epoch": 2.71,
"learning_rate": 2.9234084407637382e-05,
"loss": 4.0769,
"step": 1442500
},
{
"epoch": 2.71,
"learning_rate": 2.914023140396585e-05,
"loss": 4.0739,
"step": 1443000
},
{
"epoch": 2.71,
"learning_rate": 2.9046378400294323e-05,
"loss": 4.0837,
"step": 1443500
},
{
"epoch": 2.71,
"learning_rate": 2.895252539662279e-05,
"loss": 4.0882,
"step": 1444000
},
{
"epoch": 2.71,
"learning_rate": 2.885867239295126e-05,
"loss": 4.0639,
"step": 1444500
},
{
"epoch": 2.71,
"learning_rate": 2.8764819389279732e-05,
"loss": 4.0715,
"step": 1445000
},
{
"epoch": 2.71,
"learning_rate": 2.8670966385608203e-05,
"loss": 4.071,
"step": 1445500
},
{
"epoch": 2.71,
"learning_rate": 2.857711338193667e-05,
"loss": 4.0727,
"step": 1446000
},
{
"epoch": 2.72,
"learning_rate": 2.8483260378265144e-05,
"loss": 4.0621,
"step": 1446500
},
{
"epoch": 2.72,
"learning_rate": 2.8389407374593615e-05,
"loss": 4.0671,
"step": 1447000
},
{
"epoch": 2.72,
"learning_rate": 2.8295554370922085e-05,
"loss": 4.0786,
"step": 1447500
},
{
"epoch": 2.72,
"learning_rate": 2.8201701367250556e-05,
"loss": 4.0629,
"step": 1448000
},
{
"epoch": 2.72,
"learning_rate": 2.8107848363579023e-05,
"loss": 4.0769,
"step": 1448500
},
{
"epoch": 2.72,
"learning_rate": 2.8013995359907497e-05,
"loss": 4.084,
"step": 1449000
},
{
"epoch": 2.72,
"learning_rate": 2.7920142356235964e-05,
"loss": 4.0597,
"step": 1449500
},
{
"epoch": 2.72,
"learning_rate": 2.782628935256444e-05,
"loss": 4.0746,
"step": 1450000
},
{
"epoch": 2.72,
"learning_rate": 2.7732436348892906e-05,
"loss": 4.0876,
"step": 1450500
},
{
"epoch": 2.72,
"learning_rate": 2.763858334522138e-05,
"loss": 4.0802,
"step": 1451000
},
{
"epoch": 2.72,
"learning_rate": 2.754473034154985e-05,
"loss": 4.0643,
"step": 1451500
},
{
"epoch": 2.73,
"learning_rate": 2.7450877337878318e-05,
"loss": 4.0735,
"step": 1452000
},
{
"epoch": 2.73,
"learning_rate": 2.735702433420679e-05,
"loss": 4.0577,
"step": 1452500
},
{
"epoch": 2.73,
"learning_rate": 2.726317133053526e-05,
"loss": 4.0716,
"step": 1453000
},
{
"epoch": 2.73,
"learning_rate": 2.7169318326863733e-05,
"loss": 4.0616,
"step": 1453500
},
{
"epoch": 2.73,
"learning_rate": 2.70754653231922e-05,
"loss": 4.0756,
"step": 1454000
},
{
"epoch": 2.73,
"learning_rate": 2.698161231952067e-05,
"loss": 4.0606,
"step": 1454500
},
{
"epoch": 2.73,
"learning_rate": 2.688775931584914e-05,
"loss": 4.0849,
"step": 1455000
},
{
"epoch": 2.73,
"learning_rate": 2.6793906312177612e-05,
"loss": 4.0531,
"step": 1455500
},
{
"epoch": 2.73,
"learning_rate": 2.6700053308506086e-05,
"loss": 4.0721,
"step": 1456000
},
{
"epoch": 2.73,
"learning_rate": 2.6606200304834553e-05,
"loss": 4.0736,
"step": 1456500
},
{
"epoch": 2.73,
"learning_rate": 2.6512347301163024e-05,
"loss": 4.074,
"step": 1457000
},
{
"epoch": 2.74,
"learning_rate": 2.6418494297491495e-05,
"loss": 4.1067,
"step": 1457500
},
{
"epoch": 2.74,
"learning_rate": 2.6324641293819965e-05,
"loss": 4.0933,
"step": 1458000
},
{
"epoch": 2.74,
"learning_rate": 2.6230788290148436e-05,
"loss": 4.0737,
"step": 1458500
},
{
"epoch": 2.74,
"learning_rate": 2.6136935286476907e-05,
"loss": 4.0638,
"step": 1459000
},
{
"epoch": 2.74,
"learning_rate": 2.6043082282805374e-05,
"loss": 4.0896,
"step": 1459500
},
{
"epoch": 2.74,
"learning_rate": 2.5949229279133848e-05,
"loss": 4.0674,
"step": 1460000
},
{
"epoch": 2.74,
"learning_rate": 2.585537627546232e-05,
"loss": 4.0543,
"step": 1460500
},
{
"epoch": 2.74,
"learning_rate": 2.576152327179079e-05,
"loss": 4.0558,
"step": 1461000
},
{
"epoch": 2.74,
"learning_rate": 2.566767026811926e-05,
"loss": 4.0691,
"step": 1461500
},
{
"epoch": 2.74,
"learning_rate": 2.5573817264447727e-05,
"loss": 4.0653,
"step": 1462000
},
{
"epoch": 2.75,
"learning_rate": 2.54799642607762e-05,
"loss": 4.0803,
"step": 1462500
},
{
"epoch": 2.75,
"learning_rate": 2.5386111257104668e-05,
"loss": 4.0641,
"step": 1463000
},
{
"epoch": 2.75,
"learning_rate": 2.5292258253433142e-05,
"loss": 4.0812,
"step": 1463500
},
{
"epoch": 2.75,
"learning_rate": 2.519840524976161e-05,
"loss": 4.0733,
"step": 1464000
},
{
"epoch": 2.75,
"learning_rate": 2.510455224609008e-05,
"loss": 4.0701,
"step": 1464500
},
{
"epoch": 2.75,
"learning_rate": 2.5010699242418554e-05,
"loss": 4.082,
"step": 1465000
},
{
"epoch": 2.75,
"learning_rate": 2.491684623874702e-05,
"loss": 4.0887,
"step": 1465500
},
{
"epoch": 2.75,
"learning_rate": 2.4822993235075496e-05,
"loss": 4.0729,
"step": 1466000
},
{
"epoch": 2.75,
"learning_rate": 2.4729140231403963e-05,
"loss": 4.0826,
"step": 1466500
},
{
"epoch": 2.75,
"learning_rate": 2.4635287227732433e-05,
"loss": 4.073,
"step": 1467000
},
{
"epoch": 2.75,
"learning_rate": 2.4541434224060904e-05,
"loss": 4.0581,
"step": 1467500
},
{
"epoch": 2.76,
"learning_rate": 2.4447581220389375e-05,
"loss": 4.0781,
"step": 1468000
},
{
"epoch": 2.76,
"learning_rate": 2.4353728216717845e-05,
"loss": 4.0805,
"step": 1468500
},
{
"epoch": 2.76,
"learning_rate": 2.4259875213046316e-05,
"loss": 4.057,
"step": 1469000
},
{
"epoch": 2.76,
"learning_rate": 2.416602220937479e-05,
"loss": 4.0634,
"step": 1469500
},
{
"epoch": 2.76,
"learning_rate": 2.4072169205703257e-05,
"loss": 4.0545,
"step": 1470000
},
{
"epoch": 2.76,
"learning_rate": 2.3978316202031728e-05,
"loss": 4.083,
"step": 1470500
},
{
"epoch": 2.76,
"learning_rate": 2.38844631983602e-05,
"loss": 4.0732,
"step": 1471000
},
{
"epoch": 2.76,
"learning_rate": 2.379061019468867e-05,
"loss": 4.0605,
"step": 1471500
},
{
"epoch": 2.76,
"learning_rate": 2.3696757191017136e-05,
"loss": 4.0614,
"step": 1472000
},
{
"epoch": 2.76,
"learning_rate": 2.360290418734561e-05,
"loss": 4.0822,
"step": 1472500
},
{
"epoch": 2.76,
"learning_rate": 2.3509051183674078e-05,
"loss": 4.0573,
"step": 1473000
},
{
"epoch": 2.77,
"learning_rate": 2.3415198180002552e-05,
"loss": 4.0701,
"step": 1473500
},
{
"epoch": 2.77,
"learning_rate": 2.3321345176331022e-05,
"loss": 4.0729,
"step": 1474000
},
{
"epoch": 2.77,
"learning_rate": 2.322749217265949e-05,
"loss": 4.06,
"step": 1474500
},
{
"epoch": 2.77,
"learning_rate": 2.3133639168987964e-05,
"loss": 4.0639,
"step": 1475000
},
{
"epoch": 2.77,
"learning_rate": 2.303978616531643e-05,
"loss": 4.0811,
"step": 1475500
},
{
"epoch": 2.77,
"learning_rate": 2.2945933161644905e-05,
"loss": 4.0953,
"step": 1476000
},
{
"epoch": 2.77,
"learning_rate": 2.2852080157973372e-05,
"loss": 4.0624,
"step": 1476500
},
{
"epoch": 2.77,
"learning_rate": 2.2758227154301846e-05,
"loss": 4.0748,
"step": 1477000
},
{
"epoch": 2.77,
"learning_rate": 2.2664374150630313e-05,
"loss": 4.0562,
"step": 1477500
},
{
"epoch": 2.77,
"learning_rate": 2.2570521146958784e-05,
"loss": 4.0767,
"step": 1478000
},
{
"epoch": 2.78,
"learning_rate": 2.2476668143287258e-05,
"loss": 4.0814,
"step": 1478500
},
{
"epoch": 2.78,
"learning_rate": 2.2382815139615725e-05,
"loss": 4.0775,
"step": 1479000
},
{
"epoch": 2.78,
"learning_rate": 2.22889621359442e-05,
"loss": 4.0572,
"step": 1479500
},
{
"epoch": 2.78,
"learning_rate": 2.2195109132272667e-05,
"loss": 4.0755,
"step": 1480000
},
{
"epoch": 2.78,
"learning_rate": 2.2101256128601137e-05,
"loss": 4.0798,
"step": 1480500
},
{
"epoch": 2.78,
"learning_rate": 2.2007403124929608e-05,
"loss": 4.0777,
"step": 1481000
},
{
"epoch": 2.78,
"learning_rate": 2.191355012125808e-05,
"loss": 4.0596,
"step": 1481500
},
{
"epoch": 2.78,
"learning_rate": 2.1819697117586546e-05,
"loss": 4.0721,
"step": 1482000
},
{
"epoch": 2.78,
"learning_rate": 2.172584411391502e-05,
"loss": 4.0976,
"step": 1482500
},
{
"epoch": 2.78,
"learning_rate": 2.163199111024349e-05,
"loss": 4.0837,
"step": 1483000
},
{
"epoch": 2.78,
"learning_rate": 2.153813810657196e-05,
"loss": 4.0534,
"step": 1483500
},
{
"epoch": 2.79,
"learning_rate": 2.1444285102900432e-05,
"loss": 4.0872,
"step": 1484000
},
{
"epoch": 2.79,
"learning_rate": 2.1350432099228902e-05,
"loss": 4.0767,
"step": 1484500
},
{
"epoch": 2.79,
"learning_rate": 2.1256579095557373e-05,
"loss": 4.0758,
"step": 1485000
},
{
"epoch": 2.79,
"learning_rate": 2.116272609188584e-05,
"loss": 4.0661,
"step": 1485500
},
{
"epoch": 2.79,
"learning_rate": 2.1068873088214314e-05,
"loss": 4.0621,
"step": 1486000
},
{
"epoch": 2.79,
"learning_rate": 2.097502008454278e-05,
"loss": 4.0947,
"step": 1486500
},
{
"epoch": 2.79,
"learning_rate": 2.0881167080871256e-05,
"loss": 4.0672,
"step": 1487000
},
{
"epoch": 2.79,
"learning_rate": 2.0787314077199726e-05,
"loss": 4.086,
"step": 1487500
},
{
"epoch": 2.79,
"learning_rate": 2.0693461073528194e-05,
"loss": 4.0562,
"step": 1488000
},
{
"epoch": 2.79,
"learning_rate": 2.0599608069856668e-05,
"loss": 4.0736,
"step": 1488500
},
{
"epoch": 2.79,
"learning_rate": 2.0505755066185135e-05,
"loss": 4.0753,
"step": 1489000
},
{
"epoch": 2.8,
"learning_rate": 2.041190206251361e-05,
"loss": 4.0578,
"step": 1489500
},
{
"epoch": 2.8,
"learning_rate": 2.0318049058842076e-05,
"loss": 4.0664,
"step": 1490000
},
{
"epoch": 2.8,
"learning_rate": 2.0224196055170547e-05,
"loss": 4.0841,
"step": 1490500
},
{
"epoch": 2.8,
"learning_rate": 2.013034305149902e-05,
"loss": 4.0693,
"step": 1491000
},
{
"epoch": 2.8,
"learning_rate": 2.0036490047827488e-05,
"loss": 4.0514,
"step": 1491500
},
{
"epoch": 2.8,
"learning_rate": 1.9942637044155962e-05,
"loss": 4.0778,
"step": 1492000
},
{
"epoch": 2.8,
"learning_rate": 1.984878404048443e-05,
"loss": 4.0732,
"step": 1492500
},
{
"epoch": 2.8,
"learning_rate": 1.97549310368129e-05,
"loss": 4.0693,
"step": 1493000
},
{
"epoch": 2.8,
"learning_rate": 1.966107803314137e-05,
"loss": 4.0502,
"step": 1493500
},
{
"epoch": 2.8,
"learning_rate": 1.956722502946984e-05,
"loss": 4.0793,
"step": 1494000
},
{
"epoch": 2.81,
"learning_rate": 1.9473372025798312e-05,
"loss": 4.0909,
"step": 1494500
},
{
"epoch": 2.81,
"learning_rate": 1.9379519022126783e-05,
"loss": 4.0733,
"step": 1495000
},
{
"epoch": 2.81,
"learning_rate": 1.9285666018455257e-05,
"loss": 4.0509,
"step": 1495500
},
{
"epoch": 2.81,
"learning_rate": 1.9191813014783724e-05,
"loss": 4.0561,
"step": 1496000
},
{
"epoch": 2.81,
"learning_rate": 1.9097960011112194e-05,
"loss": 4.0527,
"step": 1496500
},
{
"epoch": 2.81,
"learning_rate": 1.9004107007440665e-05,
"loss": 4.0767,
"step": 1497000
},
{
"epoch": 2.81,
"learning_rate": 1.8910254003769136e-05,
"loss": 4.0684,
"step": 1497500
},
{
"epoch": 2.81,
"learning_rate": 1.8816401000097603e-05,
"loss": 4.0715,
"step": 1498000
},
{
"epoch": 2.81,
"learning_rate": 1.8722547996426077e-05,
"loss": 4.0734,
"step": 1498500
},
{
"epoch": 2.81,
"learning_rate": 1.8628694992754548e-05,
"loss": 4.0651,
"step": 1499000
},
{
"epoch": 2.81,
"learning_rate": 1.8534841989083018e-05,
"loss": 4.0526,
"step": 1499500
},
{
"epoch": 2.82,
"learning_rate": 1.844098898541149e-05,
"loss": 4.0517,
"step": 1500000
},
{
"epoch": 2.82,
"learning_rate": 1.8347135981739956e-05,
"loss": 4.0661,
"step": 1500500
},
{
"epoch": 2.82,
"learning_rate": 1.8253282978068427e-05,
"loss": 4.0654,
"step": 1501000
},
{
"epoch": 2.82,
"learning_rate": 1.8159429974396897e-05,
"loss": 4.0687,
"step": 1501500
},
{
"epoch": 2.82,
"learning_rate": 1.8065576970725368e-05,
"loss": 4.1042,
"step": 1502000
},
{
"epoch": 2.82,
"learning_rate": 1.7971723967053842e-05,
"loss": 4.0794,
"step": 1502500
},
{
"epoch": 2.82,
"learning_rate": 1.7877870963382313e-05,
"loss": 4.0609,
"step": 1503000
},
{
"epoch": 2.82,
"learning_rate": 1.778401795971078e-05,
"loss": 4.06,
"step": 1503500
},
{
"epoch": 2.82,
"learning_rate": 1.769016495603925e-05,
"loss": 4.0815,
"step": 1504000
},
{
"epoch": 2.82,
"learning_rate": 1.759631195236772e-05,
"loss": 4.0839,
"step": 1504500
},
{
"epoch": 2.82,
"learning_rate": 1.7502458948696192e-05,
"loss": 4.0673,
"step": 1505000
},
{
"epoch": 2.83,
"learning_rate": 1.7408605945024663e-05,
"loss": 4.0789,
"step": 1505500
},
{
"epoch": 2.83,
"learning_rate": 1.7314752941353133e-05,
"loss": 4.0882,
"step": 1506000
},
{
"epoch": 2.83,
"learning_rate": 1.7220899937681604e-05,
"loss": 4.0565,
"step": 1506500
},
{
"epoch": 2.83,
"learning_rate": 1.7127046934010074e-05,
"loss": 4.071,
"step": 1507000
},
{
"epoch": 2.83,
"learning_rate": 1.7033193930338545e-05,
"loss": 4.0723,
"step": 1507500
},
{
"epoch": 2.83,
"learning_rate": 1.6939340926667016e-05,
"loss": 4.0561,
"step": 1508000
},
{
"epoch": 2.83,
"learning_rate": 1.6845487922995486e-05,
"loss": 4.0665,
"step": 1508500
},
{
"epoch": 2.83,
"learning_rate": 1.6751634919323957e-05,
"loss": 4.0753,
"step": 1509000
},
{
"epoch": 2.83,
"learning_rate": 1.6657781915652428e-05,
"loss": 4.0621,
"step": 1509500
},
{
"epoch": 2.83,
"learning_rate": 1.65639289119809e-05,
"loss": 4.0514,
"step": 1510000
},
{
"epoch": 2.84,
"learning_rate": 1.6470075908309366e-05,
"loss": 4.0546,
"step": 1510500
},
{
"epoch": 2.84,
"learning_rate": 1.637622290463784e-05,
"loss": 4.0733,
"step": 1511000
},
{
"epoch": 2.84,
"learning_rate": 1.628236990096631e-05,
"loss": 4.057,
"step": 1511500
},
{
"epoch": 2.84,
"learning_rate": 1.618851689729478e-05,
"loss": 4.0731,
"step": 1512000
},
{
"epoch": 2.84,
"learning_rate": 1.609466389362325e-05,
"loss": 4.0748,
"step": 1512500
},
{
"epoch": 2.84,
"learning_rate": 1.6000810889951722e-05,
"loss": 4.0901,
"step": 1513000
},
{
"epoch": 2.84,
"learning_rate": 1.590695788628019e-05,
"loss": 4.0753,
"step": 1513500
},
{
"epoch": 2.84,
"learning_rate": 1.581310488260866e-05,
"loss": 4.0635,
"step": 1514000
},
{
"epoch": 2.84,
"learning_rate": 1.571925187893713e-05,
"loss": 4.0726,
"step": 1514500
},
{
"epoch": 2.84,
"learning_rate": 1.56253988752656e-05,
"loss": 4.0904,
"step": 1515000
},
{
"epoch": 2.84,
"learning_rate": 1.5531545871594075e-05,
"loss": 4.0687,
"step": 1515500
},
{
"epoch": 2.85,
"learning_rate": 1.5437692867922546e-05,
"loss": 4.0737,
"step": 1516000
},
{
"epoch": 2.85,
"learning_rate": 1.5343839864251013e-05,
"loss": 4.0753,
"step": 1516500
},
{
"epoch": 2.85,
"learning_rate": 1.5249986860579486e-05,
"loss": 4.0761,
"step": 1517000
},
{
"epoch": 2.85,
"learning_rate": 1.5156133856907955e-05,
"loss": 4.0725,
"step": 1517500
},
{
"epoch": 2.85,
"learning_rate": 1.5062280853236425e-05,
"loss": 4.0736,
"step": 1518000
},
{
"epoch": 2.85,
"learning_rate": 1.4968427849564896e-05,
"loss": 4.0662,
"step": 1518500
},
{
"epoch": 2.85,
"learning_rate": 1.4874574845893366e-05,
"loss": 4.0811,
"step": 1519000
},
{
"epoch": 2.85,
"learning_rate": 1.4780721842221835e-05,
"loss": 4.0701,
"step": 1519500
},
{
"epoch": 2.85,
"learning_rate": 1.468686883855031e-05,
"loss": 4.0552,
"step": 1520000
},
{
"epoch": 2.85,
"learning_rate": 1.4593015834878778e-05,
"loss": 4.0639,
"step": 1520500
},
{
"epoch": 2.86,
"learning_rate": 1.4499162831207249e-05,
"loss": 4.0573,
"step": 1521000
},
{
"epoch": 2.86,
"learning_rate": 1.440530982753572e-05,
"loss": 4.0606,
"step": 1521500
},
{
"epoch": 2.86,
"learning_rate": 1.431145682386419e-05,
"loss": 4.0827,
"step": 1522000
},
{
"epoch": 2.86,
"learning_rate": 1.421760382019266e-05,
"loss": 4.0573,
"step": 1522500
},
{
"epoch": 2.86,
"learning_rate": 1.412375081652113e-05,
"loss": 4.0568,
"step": 1523000
},
{
"epoch": 2.86,
"learning_rate": 1.40298978128496e-05,
"loss": 4.0633,
"step": 1523500
},
{
"epoch": 2.86,
"learning_rate": 1.3936044809178071e-05,
"loss": 4.0646,
"step": 1524000
},
{
"epoch": 2.86,
"learning_rate": 1.3842191805506544e-05,
"loss": 4.0796,
"step": 1524500
},
{
"epoch": 2.86,
"learning_rate": 1.3748338801835014e-05,
"loss": 4.0707,
"step": 1525000
},
{
"epoch": 2.86,
"learning_rate": 1.3654485798163483e-05,
"loss": 4.0444,
"step": 1525500
},
{
"epoch": 2.86,
"learning_rate": 1.3560632794491954e-05,
"loss": 4.0624,
"step": 1526000
},
{
"epoch": 2.87,
"learning_rate": 1.3466779790820424e-05,
"loss": 4.065,
"step": 1526500
},
{
"epoch": 2.87,
"learning_rate": 1.3372926787148895e-05,
"loss": 4.0685,
"step": 1527000
},
{
"epoch": 2.87,
"learning_rate": 1.3279073783477364e-05,
"loss": 4.0811,
"step": 1527500
},
{
"epoch": 2.87,
"learning_rate": 1.3185220779805835e-05,
"loss": 4.0686,
"step": 1528000
},
{
"epoch": 2.87,
"learning_rate": 1.3091367776134305e-05,
"loss": 4.0632,
"step": 1528500
},
{
"epoch": 2.87,
"learning_rate": 1.2997514772462778e-05,
"loss": 4.0716,
"step": 1529000
},
{
"epoch": 2.87,
"learning_rate": 1.2903661768791248e-05,
"loss": 4.0852,
"step": 1529500
},
{
"epoch": 2.87,
"learning_rate": 1.2809808765119719e-05,
"loss": 4.0707,
"step": 1530000
},
{
"epoch": 2.87,
"learning_rate": 1.2715955761448188e-05,
"loss": 4.0465,
"step": 1530500
},
{
"epoch": 2.87,
"learning_rate": 1.2622102757776658e-05,
"loss": 4.0381,
"step": 1531000
},
{
"epoch": 2.87,
"learning_rate": 1.2528249754105129e-05,
"loss": 4.0863,
"step": 1531500
},
{
"epoch": 2.88,
"learning_rate": 1.24343967504336e-05,
"loss": 4.0686,
"step": 1532000
},
{
"epoch": 2.88,
"learning_rate": 1.2340543746762069e-05,
"loss": 4.0502,
"step": 1532500
},
{
"epoch": 2.88,
"learning_rate": 1.224669074309054e-05,
"loss": 4.044,
"step": 1533000
},
{
"epoch": 2.88,
"learning_rate": 1.2152837739419012e-05,
"loss": 4.0523,
"step": 1533500
},
{
"epoch": 2.88,
"learning_rate": 1.2058984735747482e-05,
"loss": 4.0548,
"step": 1534000
},
{
"epoch": 2.88,
"learning_rate": 1.1965131732075953e-05,
"loss": 4.063,
"step": 1534500
},
{
"epoch": 2.88,
"learning_rate": 1.1871278728404424e-05,
"loss": 4.0483,
"step": 1535000
},
{
"epoch": 2.88,
"learning_rate": 1.1777425724732893e-05,
"loss": 4.0641,
"step": 1535500
},
{
"epoch": 2.88,
"learning_rate": 1.1683572721061363e-05,
"loss": 4.067,
"step": 1536000
},
{
"epoch": 2.88,
"learning_rate": 1.1589719717389834e-05,
"loss": 4.0757,
"step": 1536500
},
{
"epoch": 2.89,
"learning_rate": 1.1495866713718304e-05,
"loss": 4.0793,
"step": 1537000
},
{
"epoch": 2.89,
"learning_rate": 1.1402013710046777e-05,
"loss": 4.0673,
"step": 1537500
},
{
"epoch": 2.89,
"learning_rate": 1.1308160706375247e-05,
"loss": 4.0763,
"step": 1538000
},
{
"epoch": 2.89,
"learning_rate": 1.1214307702703716e-05,
"loss": 4.0714,
"step": 1538500
},
{
"epoch": 2.89,
"learning_rate": 1.1120454699032187e-05,
"loss": 4.0724,
"step": 1539000
},
{
"epoch": 2.89,
"learning_rate": 1.1026601695360658e-05,
"loss": 4.0746,
"step": 1539500
},
{
"epoch": 2.89,
"learning_rate": 1.0932748691689128e-05,
"loss": 4.0671,
"step": 1540000
},
{
"epoch": 2.89,
"learning_rate": 1.0838895688017597e-05,
"loss": 4.0652,
"step": 1540500
},
{
"epoch": 2.89,
"learning_rate": 1.0745042684346068e-05,
"loss": 4.0546,
"step": 1541000
},
{
"epoch": 2.89,
"learning_rate": 1.0651189680674539e-05,
"loss": 4.0641,
"step": 1541500
},
{
"epoch": 2.89,
"learning_rate": 1.0557336677003011e-05,
"loss": 4.0924,
"step": 1542000
},
{
"epoch": 2.9,
"learning_rate": 1.0463483673331481e-05,
"loss": 4.0365,
"step": 1542500
},
{
"epoch": 2.9,
"learning_rate": 1.0369630669659952e-05,
"loss": 4.0726,
"step": 1543000
},
{
"epoch": 2.9,
"learning_rate": 1.0275777665988421e-05,
"loss": 4.0716,
"step": 1543500
},
{
"epoch": 2.9,
"learning_rate": 1.0181924662316892e-05,
"loss": 4.0571,
"step": 1544000
},
{
"epoch": 2.9,
"learning_rate": 1.0088071658645362e-05,
"loss": 4.068,
"step": 1544500
},
{
"epoch": 2.9,
"learning_rate": 9.994218654973833e-06,
"loss": 4.0666,
"step": 1545000
},
{
"epoch": 2.9,
"learning_rate": 9.900365651302302e-06,
"loss": 4.0692,
"step": 1545500
},
{
"epoch": 2.9,
"learning_rate": 9.806512647630773e-06,
"loss": 4.0601,
"step": 1546000
},
{
"epoch": 2.9,
"learning_rate": 9.712659643959245e-06,
"loss": 4.0684,
"step": 1546500
},
{
"epoch": 2.9,
"learning_rate": 9.618806640287716e-06,
"loss": 4.0693,
"step": 1547000
},
{
"epoch": 2.9,
"learning_rate": 9.524953636616186e-06,
"loss": 4.0503,
"step": 1547500
},
{
"epoch": 2.91,
"learning_rate": 9.431100632944657e-06,
"loss": 4.0649,
"step": 1548000
},
{
"epoch": 2.91,
"learning_rate": 9.337247629273126e-06,
"loss": 4.0477,
"step": 1548500
},
{
"epoch": 2.91,
"learning_rate": 9.243394625601596e-06,
"loss": 4.0735,
"step": 1549000
},
{
"epoch": 2.91,
"learning_rate": 9.149541621930067e-06,
"loss": 4.0613,
"step": 1549500
},
{
"epoch": 2.91,
"learning_rate": 9.055688618258538e-06,
"loss": 4.0593,
"step": 1550000
},
{
"epoch": 2.91,
"learning_rate": 8.961835614587008e-06,
"loss": 4.0832,
"step": 1550500
},
{
"epoch": 2.91,
"learning_rate": 8.867982610915479e-06,
"loss": 4.0533,
"step": 1551000
},
{
"epoch": 2.91,
"learning_rate": 8.77412960724395e-06,
"loss": 4.0752,
"step": 1551500
},
{
"epoch": 2.91,
"learning_rate": 8.68027660357242e-06,
"loss": 4.0505,
"step": 1552000
},
{
"epoch": 2.91,
"learning_rate": 8.586423599900891e-06,
"loss": 4.0665,
"step": 1552500
},
{
"epoch": 2.92,
"learning_rate": 8.492570596229362e-06,
"loss": 4.0617,
"step": 1553000
},
{
"epoch": 2.92,
"learning_rate": 8.39871759255783e-06,
"loss": 4.0793,
"step": 1553500
},
{
"epoch": 2.92,
"learning_rate": 8.304864588886301e-06,
"loss": 4.0449,
"step": 1554000
},
{
"epoch": 2.92,
"learning_rate": 8.211011585214773e-06,
"loss": 4.0949,
"step": 1554500
},
{
"epoch": 2.92,
"learning_rate": 8.117158581543242e-06,
"loss": 4.0613,
"step": 1555000
},
{
"epoch": 2.92,
"learning_rate": 8.023305577871713e-06,
"loss": 4.0632,
"step": 1555500
},
{
"epoch": 2.92,
"learning_rate": 7.929452574200184e-06,
"loss": 4.0474,
"step": 1556000
},
{
"epoch": 2.92,
"learning_rate": 7.835599570528654e-06,
"loss": 4.0592,
"step": 1556500
},
{
"epoch": 2.92,
"learning_rate": 7.741746566857125e-06,
"loss": 4.0555,
"step": 1557000
},
{
"epoch": 2.92,
"learning_rate": 7.647893563185596e-06,
"loss": 4.0757,
"step": 1557500
},
{
"epoch": 2.92,
"learning_rate": 7.5540405595140654e-06,
"loss": 4.0636,
"step": 1558000
},
{
"epoch": 2.93,
"learning_rate": 7.460187555842537e-06,
"loss": 4.0772,
"step": 1558500
},
{
"epoch": 2.93,
"learning_rate": 7.3663345521710076e-06,
"loss": 4.0507,
"step": 1559000
},
{
"epoch": 2.93,
"learning_rate": 7.272481548499477e-06,
"loss": 4.062,
"step": 1559500
},
{
"epoch": 2.93,
"learning_rate": 7.178628544827948e-06,
"loss": 4.0709,
"step": 1560000
},
{
"epoch": 2.93,
"learning_rate": 7.084775541156418e-06,
"loss": 4.066,
"step": 1560500
},
{
"epoch": 2.93,
"learning_rate": 6.990922537484889e-06,
"loss": 4.0429,
"step": 1561000
},
{
"epoch": 2.93,
"learning_rate": 6.89706953381336e-06,
"loss": 4.0554,
"step": 1561500
},
{
"epoch": 2.93,
"learning_rate": 6.80321653014183e-06,
"loss": 4.0622,
"step": 1562000
},
{
"epoch": 2.93,
"learning_rate": 6.7093635264703e-06,
"loss": 4.0711,
"step": 1562500
},
{
"epoch": 2.93,
"learning_rate": 6.615510522798772e-06,
"loss": 4.0514,
"step": 1563000
},
{
"epoch": 2.93,
"learning_rate": 6.521657519127242e-06,
"loss": 4.0655,
"step": 1563500
},
{
"epoch": 2.94,
"learning_rate": 6.427804515455712e-06,
"loss": 4.0667,
"step": 1564000
},
{
"epoch": 2.94,
"learning_rate": 6.333951511784182e-06,
"loss": 4.0616,
"step": 1564500
},
{
"epoch": 2.94,
"learning_rate": 6.240098508112653e-06,
"loss": 4.069,
"step": 1565000
},
{
"epoch": 2.94,
"learning_rate": 6.146245504441124e-06,
"loss": 4.0408,
"step": 1565500
},
{
"epoch": 2.94,
"learning_rate": 6.052392500769594e-06,
"loss": 4.0799,
"step": 1566000
},
{
"epoch": 2.94,
"learning_rate": 5.958539497098065e-06,
"loss": 4.0542,
"step": 1566500
},
{
"epoch": 2.94,
"learning_rate": 5.864686493426534e-06,
"loss": 4.0523,
"step": 1567000
},
{
"epoch": 2.94,
"learning_rate": 5.770833489755006e-06,
"loss": 4.0604,
"step": 1567500
},
{
"epoch": 2.94,
"learning_rate": 5.6769804860834765e-06,
"loss": 4.0685,
"step": 1568000
},
{
"epoch": 2.94,
"learning_rate": 5.583127482411946e-06,
"loss": 4.0656,
"step": 1568500
},
{
"epoch": 2.95,
"learning_rate": 5.489274478740417e-06,
"loss": 4.0775,
"step": 1569000
},
{
"epoch": 2.95,
"learning_rate": 5.395421475068887e-06,
"loss": 4.0758,
"step": 1569500
},
{
"epoch": 2.95,
"learning_rate": 5.301568471397358e-06,
"loss": 4.0527,
"step": 1570000
},
{
"epoch": 2.95,
"learning_rate": 5.207715467725829e-06,
"loss": 4.0844,
"step": 1570500
},
{
"epoch": 2.95,
"learning_rate": 5.113862464054299e-06,
"loss": 4.0691,
"step": 1571000
},
{
"epoch": 2.95,
"learning_rate": 5.020009460382769e-06,
"loss": 4.0732,
"step": 1571500
},
{
"epoch": 2.95,
"learning_rate": 4.926156456711241e-06,
"loss": 4.0753,
"step": 1572000
},
{
"epoch": 2.95,
"learning_rate": 4.832303453039711e-06,
"loss": 4.07,
"step": 1572500
},
{
"epoch": 2.95,
"learning_rate": 4.738450449368181e-06,
"loss": 4.0587,
"step": 1573000
},
{
"epoch": 2.95,
"learning_rate": 4.644597445696652e-06,
"loss": 4.0684,
"step": 1573500
},
{
"epoch": 2.95,
"learning_rate": 4.5507444420251225e-06,
"loss": 4.0555,
"step": 1574000
},
{
"epoch": 2.96,
"learning_rate": 4.456891438353592e-06,
"loss": 4.046,
"step": 1574500
},
{
"epoch": 2.96,
"learning_rate": 4.363038434682063e-06,
"loss": 4.0417,
"step": 1575000
},
{
"epoch": 2.96,
"learning_rate": 4.269185431010534e-06,
"loss": 4.0441,
"step": 1575500
},
{
"epoch": 2.96,
"learning_rate": 4.175332427339004e-06,
"loss": 4.0691,
"step": 1576000
},
{
"epoch": 2.96,
"learning_rate": 4.081479423667475e-06,
"loss": 4.0547,
"step": 1576500
},
{
"epoch": 2.96,
"learning_rate": 3.9876264199959455e-06,
"loss": 4.08,
"step": 1577000
},
{
"epoch": 2.96,
"learning_rate": 3.893773416324415e-06,
"loss": 4.0607,
"step": 1577500
},
{
"epoch": 2.96,
"learning_rate": 3.7999204126528864e-06,
"loss": 4.063,
"step": 1578000
},
{
"epoch": 2.96,
"learning_rate": 3.7060674089813566e-06,
"loss": 4.068,
"step": 1578500
},
{
"epoch": 2.96,
"learning_rate": 3.6122144053098273e-06,
"loss": 4.0872,
"step": 1579000
},
{
"epoch": 2.96,
"learning_rate": 3.518361401638298e-06,
"loss": 4.0464,
"step": 1579500
},
{
"epoch": 2.97,
"learning_rate": 3.424508397966768e-06,
"loss": 4.068,
"step": 1580000
},
{
"epoch": 2.97,
"learning_rate": 3.3306553942952387e-06,
"loss": 4.0741,
"step": 1580500
},
{
"epoch": 2.97,
"learning_rate": 3.236802390623709e-06,
"loss": 4.0678,
"step": 1581000
},
{
"epoch": 2.97,
"learning_rate": 3.14294938695218e-06,
"loss": 4.0486,
"step": 1581500
},
{
"epoch": 2.97,
"learning_rate": 3.0490963832806502e-06,
"loss": 4.0615,
"step": 1582000
},
{
"epoch": 2.97,
"learning_rate": 2.955243379609121e-06,
"loss": 4.0524,
"step": 1582500
},
{
"epoch": 2.97,
"learning_rate": 2.861390375937591e-06,
"loss": 4.0626,
"step": 1583000
},
{
"epoch": 2.97,
"learning_rate": 2.7675373722660617e-06,
"loss": 4.0577,
"step": 1583500
},
{
"epoch": 2.97,
"learning_rate": 2.6736843685945324e-06,
"loss": 4.0545,
"step": 1584000
},
{
"epoch": 2.97,
"learning_rate": 2.579831364923003e-06,
"loss": 4.0658,
"step": 1584500
},
{
"epoch": 2.98,
"learning_rate": 2.4859783612514732e-06,
"loss": 4.0748,
"step": 1585000
},
{
"epoch": 2.98,
"learning_rate": 2.3921253575799435e-06,
"loss": 4.0625,
"step": 1585500
},
{
"epoch": 2.98,
"learning_rate": 2.2982723539084145e-06,
"loss": 4.0686,
"step": 1586000
},
{
"epoch": 2.98,
"learning_rate": 2.2044193502368847e-06,
"loss": 4.0765,
"step": 1586500
},
{
"epoch": 2.98,
"learning_rate": 2.1105663465653554e-06,
"loss": 4.0554,
"step": 1587000
},
{
"epoch": 2.98,
"learning_rate": 2.0167133428938256e-06,
"loss": 4.0595,
"step": 1587500
},
{
"epoch": 2.98,
"learning_rate": 1.9228603392222962e-06,
"loss": 4.0785,
"step": 1588000
},
{
"epoch": 2.98,
"learning_rate": 1.8290073355507667e-06,
"loss": 4.0654,
"step": 1588500
},
{
"epoch": 2.98,
"learning_rate": 1.7351543318792373e-06,
"loss": 4.0748,
"step": 1589000
},
{
"epoch": 2.98,
"learning_rate": 1.6413013282077077e-06,
"loss": 4.0668,
"step": 1589500
},
{
"epoch": 2.98,
"learning_rate": 1.5474483245361784e-06,
"loss": 4.0526,
"step": 1590000
},
{
"epoch": 2.99,
"learning_rate": 1.4535953208646488e-06,
"loss": 4.0478,
"step": 1590500
},
{
"epoch": 2.99,
"learning_rate": 1.3597423171931194e-06,
"loss": 4.0519,
"step": 1591000
},
{
"epoch": 2.99,
"learning_rate": 1.2658893135215899e-06,
"loss": 4.0559,
"step": 1591500
},
{
"epoch": 2.99,
"learning_rate": 1.1720363098500605e-06,
"loss": 4.0642,
"step": 1592000
},
{
"epoch": 2.99,
"learning_rate": 1.0781833061785307e-06,
"loss": 4.0688,
"step": 1592500
},
{
"epoch": 2.99,
"learning_rate": 9.843303025070014e-07,
"loss": 4.0684,
"step": 1593000
},
{
"epoch": 2.99,
"learning_rate": 8.904772988354719e-07,
"loss": 4.045,
"step": 1593500
},
{
"epoch": 2.99,
"learning_rate": 7.966242951639423e-07,
"loss": 4.0677,
"step": 1594000
},
{
"epoch": 2.99,
"learning_rate": 7.027712914924129e-07,
"loss": 4.0525,
"step": 1594500
},
{
"epoch": 2.99,
"learning_rate": 6.089182878208833e-07,
"loss": 4.0839,
"step": 1595000
},
{
"epoch": 2.99,
"learning_rate": 5.150652841493538e-07,
"loss": 4.0531,
"step": 1595500
},
{
"epoch": 3.0,
"learning_rate": 4.2121228047782437e-07,
"loss": 4.0731,
"step": 1596000
},
{
"epoch": 3.0,
"learning_rate": 3.2735927680629485e-07,
"loss": 4.0568,
"step": 1596500
},
{
"epoch": 3.0,
"learning_rate": 2.3350627313476536e-07,
"loss": 4.0647,
"step": 1597000
},
{
"epoch": 3.0,
"learning_rate": 1.396532694632359e-07,
"loss": 4.0598,
"step": 1597500
},
{
"epoch": 3.0,
"learning_rate": 4.580026579170639e-08,
"loss": 4.0552,
"step": 1598000
},
{
"epoch": 3.0,
"step": 1598244,
"total_flos": 5.01129636544512e+18,
"train_loss": 0.7859680134460877,
"train_runtime": 210975.8843,
"train_samples_per_second": 45.453,
"train_steps_per_second": 7.575
}
],
"max_steps": 1598244,
"num_train_epochs": 3,
"total_flos": 5.01129636544512e+18,
"trial_name": null,
"trial_params": null
}