MMR1-7B-SFT / trainer_state.json
jxjessieli's picture
Add files using upload-large-folder tool
7afa8b4 verified
raw
history blame
277 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.99960508648606,
"eval_steps": 500,
"global_step": 15825,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0031593081115235764,
"grad_norm": 3.847676639099889,
"learning_rate": 6.317119393556539e-08,
"loss": 0.5818,
"step": 10
},
{
"epoch": 0.006318616223047153,
"grad_norm": 3.8262659782800736,
"learning_rate": 1.2634238787113078e-07,
"loss": 0.5856,
"step": 20
},
{
"epoch": 0.00947792433457073,
"grad_norm": 3.2562040895477886,
"learning_rate": 1.8951358180669618e-07,
"loss": 0.5757,
"step": 30
},
{
"epoch": 0.012637232446094306,
"grad_norm": 2.3293562431253108,
"learning_rate": 2.5268477574226156e-07,
"loss": 0.5599,
"step": 40
},
{
"epoch": 0.01579654055761788,
"grad_norm": 1.7404444983787009,
"learning_rate": 3.158559696778269e-07,
"loss": 0.5394,
"step": 50
},
{
"epoch": 0.01895584866914146,
"grad_norm": 1.3641917061000175,
"learning_rate": 3.7902716361339236e-07,
"loss": 0.5113,
"step": 60
},
{
"epoch": 0.022115156780665033,
"grad_norm": 0.9564559310967178,
"learning_rate": 4.421983575489577e-07,
"loss": 0.4871,
"step": 70
},
{
"epoch": 0.02527446489218861,
"grad_norm": 0.7099841750448822,
"learning_rate": 5.053695514845231e-07,
"loss": 0.4714,
"step": 80
},
{
"epoch": 0.028433773003712186,
"grad_norm": 0.5402837514386732,
"learning_rate": 5.685407454200885e-07,
"loss": 0.451,
"step": 90
},
{
"epoch": 0.03159308111523576,
"grad_norm": 0.4807271971224434,
"learning_rate": 6.317119393556538e-07,
"loss": 0.4323,
"step": 100
},
{
"epoch": 0.03475238922675934,
"grad_norm": 0.34807058040387906,
"learning_rate": 6.948831332912193e-07,
"loss": 0.4219,
"step": 110
},
{
"epoch": 0.03791169733828292,
"grad_norm": 0.3269546213934352,
"learning_rate": 7.580543272267847e-07,
"loss": 0.4152,
"step": 120
},
{
"epoch": 0.04107100544980649,
"grad_norm": 0.29931985054640087,
"learning_rate": 8.212255211623501e-07,
"loss": 0.4046,
"step": 130
},
{
"epoch": 0.044230313561330066,
"grad_norm": 0.3007912763355078,
"learning_rate": 8.843967150979154e-07,
"loss": 0.3975,
"step": 140
},
{
"epoch": 0.04738962167285365,
"grad_norm": 0.3314878634217047,
"learning_rate": 9.475679090334808e-07,
"loss": 0.398,
"step": 150
},
{
"epoch": 0.05054892978437722,
"grad_norm": 0.3099556109376344,
"learning_rate": 1.0107391029690462e-06,
"loss": 0.3931,
"step": 160
},
{
"epoch": 0.0537082378959008,
"grad_norm": 0.29068208472557255,
"learning_rate": 1.0739102969046116e-06,
"loss": 0.3858,
"step": 170
},
{
"epoch": 0.05686754600742437,
"grad_norm": 0.27910793332859557,
"learning_rate": 1.137081490840177e-06,
"loss": 0.3808,
"step": 180
},
{
"epoch": 0.06002685411894795,
"grad_norm": 0.30546375486352645,
"learning_rate": 1.2002526847757423e-06,
"loss": 0.3762,
"step": 190
},
{
"epoch": 0.06318616223047152,
"grad_norm": 0.3554409408339123,
"learning_rate": 1.2634238787113076e-06,
"loss": 0.3735,
"step": 200
},
{
"epoch": 0.0663454703419951,
"grad_norm": 0.32484090299900514,
"learning_rate": 1.3265950726468732e-06,
"loss": 0.3674,
"step": 210
},
{
"epoch": 0.06950477845351868,
"grad_norm": 0.3068743877352151,
"learning_rate": 1.3897662665824385e-06,
"loss": 0.3666,
"step": 220
},
{
"epoch": 0.07266408656504225,
"grad_norm": 0.29618802262032184,
"learning_rate": 1.4529374605180039e-06,
"loss": 0.3618,
"step": 230
},
{
"epoch": 0.07582339467656583,
"grad_norm": 0.3148809652314096,
"learning_rate": 1.5161086544535694e-06,
"loss": 0.3624,
"step": 240
},
{
"epoch": 0.07898270278808942,
"grad_norm": 0.2804644371038647,
"learning_rate": 1.5792798483891348e-06,
"loss": 0.3613,
"step": 250
},
{
"epoch": 0.08214201089961298,
"grad_norm": 0.3309683996287944,
"learning_rate": 1.6424510423247001e-06,
"loss": 0.3581,
"step": 260
},
{
"epoch": 0.08530131901113656,
"grad_norm": 0.29235969381132293,
"learning_rate": 1.7056222362602653e-06,
"loss": 0.3556,
"step": 270
},
{
"epoch": 0.08846062712266013,
"grad_norm": 0.3059950184586691,
"learning_rate": 1.7687934301958308e-06,
"loss": 0.3485,
"step": 280
},
{
"epoch": 0.09161993523418371,
"grad_norm": 0.3065943367603657,
"learning_rate": 1.8319646241313962e-06,
"loss": 0.3441,
"step": 290
},
{
"epoch": 0.0947792433457073,
"grad_norm": 0.29754666934058893,
"learning_rate": 1.8951358180669615e-06,
"loss": 0.3486,
"step": 300
},
{
"epoch": 0.09793855145723086,
"grad_norm": 0.31042175851236,
"learning_rate": 1.9583070120025267e-06,
"loss": 0.3458,
"step": 310
},
{
"epoch": 0.10109785956875444,
"grad_norm": 0.28167478495509757,
"learning_rate": 2.0214782059380925e-06,
"loss": 0.3425,
"step": 320
},
{
"epoch": 0.10425716768027801,
"grad_norm": 0.2937174634980692,
"learning_rate": 2.084649399873658e-06,
"loss": 0.3409,
"step": 330
},
{
"epoch": 0.1074164757918016,
"grad_norm": 0.28351076022747446,
"learning_rate": 2.147820593809223e-06,
"loss": 0.3377,
"step": 340
},
{
"epoch": 0.11057578390332518,
"grad_norm": 0.2873794548808371,
"learning_rate": 2.2109917877447885e-06,
"loss": 0.3365,
"step": 350
},
{
"epoch": 0.11373509201484874,
"grad_norm": 0.28077300443160785,
"learning_rate": 2.274162981680354e-06,
"loss": 0.3395,
"step": 360
},
{
"epoch": 0.11689440012637232,
"grad_norm": 0.2895451240097338,
"learning_rate": 2.337334175615919e-06,
"loss": 0.3356,
"step": 370
},
{
"epoch": 0.1200537082378959,
"grad_norm": 0.29469241884731345,
"learning_rate": 2.4005053695514845e-06,
"loss": 0.3323,
"step": 380
},
{
"epoch": 0.12321301634941947,
"grad_norm": 0.31436693982320113,
"learning_rate": 2.4636765634870503e-06,
"loss": 0.333,
"step": 390
},
{
"epoch": 0.12637232446094304,
"grad_norm": 0.2882094750389832,
"learning_rate": 2.5268477574226152e-06,
"loss": 0.3316,
"step": 400
},
{
"epoch": 0.12953163257246664,
"grad_norm": 0.33020346533639344,
"learning_rate": 2.590018951358181e-06,
"loss": 0.328,
"step": 410
},
{
"epoch": 0.1326909406839902,
"grad_norm": 0.3442838133962943,
"learning_rate": 2.6531901452937464e-06,
"loss": 0.3254,
"step": 420
},
{
"epoch": 0.13585024879551377,
"grad_norm": 0.3032556873766918,
"learning_rate": 2.7163613392293113e-06,
"loss": 0.3249,
"step": 430
},
{
"epoch": 0.13900955690703737,
"grad_norm": 0.3056937672984876,
"learning_rate": 2.779532533164877e-06,
"loss": 0.3229,
"step": 440
},
{
"epoch": 0.14216886501856094,
"grad_norm": 0.28744855606538844,
"learning_rate": 2.8427037271004424e-06,
"loss": 0.3233,
"step": 450
},
{
"epoch": 0.1453281731300845,
"grad_norm": 0.31562420003771263,
"learning_rate": 2.9058749210360078e-06,
"loss": 0.3226,
"step": 460
},
{
"epoch": 0.1484874812416081,
"grad_norm": 0.2778274627103786,
"learning_rate": 2.969046114971573e-06,
"loss": 0.3209,
"step": 470
},
{
"epoch": 0.15164678935313167,
"grad_norm": 0.31527911586537605,
"learning_rate": 3.032217308907139e-06,
"loss": 0.3233,
"step": 480
},
{
"epoch": 0.15480609746465523,
"grad_norm": 0.28579902930861095,
"learning_rate": 3.095388502842704e-06,
"loss": 0.3187,
"step": 490
},
{
"epoch": 0.15796540557617883,
"grad_norm": 0.2854450060082136,
"learning_rate": 3.1585596967782696e-06,
"loss": 0.3164,
"step": 500
},
{
"epoch": 0.1611247136877024,
"grad_norm": 0.2783395921293009,
"learning_rate": 3.2217308907138345e-06,
"loss": 0.3172,
"step": 510
},
{
"epoch": 0.16428402179922597,
"grad_norm": 0.30491989041976003,
"learning_rate": 3.2849020846494003e-06,
"loss": 0.3129,
"step": 520
},
{
"epoch": 0.16744332991074953,
"grad_norm": 0.32468595503092035,
"learning_rate": 3.3480732785849656e-06,
"loss": 0.3147,
"step": 530
},
{
"epoch": 0.17060263802227313,
"grad_norm": 0.31416335531159423,
"learning_rate": 3.4112444725205306e-06,
"loss": 0.3125,
"step": 540
},
{
"epoch": 0.1737619461337967,
"grad_norm": 0.36383194617757575,
"learning_rate": 3.4744156664560963e-06,
"loss": 0.3138,
"step": 550
},
{
"epoch": 0.17692125424532026,
"grad_norm": 0.29533777762093066,
"learning_rate": 3.5375868603916617e-06,
"loss": 0.3116,
"step": 560
},
{
"epoch": 0.18008056235684386,
"grad_norm": 0.3493771802079338,
"learning_rate": 3.600758054327227e-06,
"loss": 0.3135,
"step": 570
},
{
"epoch": 0.18323987046836743,
"grad_norm": 0.2944691289331218,
"learning_rate": 3.6639292482627924e-06,
"loss": 0.3163,
"step": 580
},
{
"epoch": 0.186399178579891,
"grad_norm": 0.31590918798291073,
"learning_rate": 3.727100442198358e-06,
"loss": 0.3087,
"step": 590
},
{
"epoch": 0.1895584866914146,
"grad_norm": 0.36702711115113235,
"learning_rate": 3.790271636133923e-06,
"loss": 0.3048,
"step": 600
},
{
"epoch": 0.19271779480293816,
"grad_norm": 0.34337289204312266,
"learning_rate": 3.853442830069489e-06,
"loss": 0.3097,
"step": 610
},
{
"epoch": 0.19587710291446173,
"grad_norm": 0.3043737272471374,
"learning_rate": 3.916614024005053e-06,
"loss": 0.3071,
"step": 620
},
{
"epoch": 0.19903641102598532,
"grad_norm": 0.31254100353406794,
"learning_rate": 3.9797852179406196e-06,
"loss": 0.3035,
"step": 630
},
{
"epoch": 0.2021957191375089,
"grad_norm": 0.2865164407577743,
"learning_rate": 4.042956411876185e-06,
"loss": 0.3047,
"step": 640
},
{
"epoch": 0.20535502724903246,
"grad_norm": 0.29447310088157563,
"learning_rate": 4.10612760581175e-06,
"loss": 0.3037,
"step": 650
},
{
"epoch": 0.20851433536055602,
"grad_norm": 0.29267707519023506,
"learning_rate": 4.169298799747316e-06,
"loss": 0.3028,
"step": 660
},
{
"epoch": 0.21167364347207962,
"grad_norm": 0.2963460269610618,
"learning_rate": 4.232469993682881e-06,
"loss": 0.305,
"step": 670
},
{
"epoch": 0.2148329515836032,
"grad_norm": 0.2978257495744734,
"learning_rate": 4.295641187618446e-06,
"loss": 0.3004,
"step": 680
},
{
"epoch": 0.21799225969512676,
"grad_norm": 0.2906359775378807,
"learning_rate": 4.358812381554012e-06,
"loss": 0.3019,
"step": 690
},
{
"epoch": 0.22115156780665035,
"grad_norm": 0.3446901435895929,
"learning_rate": 4.421983575489577e-06,
"loss": 0.3024,
"step": 700
},
{
"epoch": 0.22431087591817392,
"grad_norm": 0.30756534246023176,
"learning_rate": 4.485154769425142e-06,
"loss": 0.2959,
"step": 710
},
{
"epoch": 0.2274701840296975,
"grad_norm": 0.3155947525143884,
"learning_rate": 4.548325963360708e-06,
"loss": 0.3005,
"step": 720
},
{
"epoch": 0.23062949214122108,
"grad_norm": 0.2995756375311246,
"learning_rate": 4.611497157296273e-06,
"loss": 0.2975,
"step": 730
},
{
"epoch": 0.23378880025274465,
"grad_norm": 0.30858061818787486,
"learning_rate": 4.674668351231838e-06,
"loss": 0.2978,
"step": 740
},
{
"epoch": 0.23694810836426822,
"grad_norm": 0.3338257461588683,
"learning_rate": 4.737839545167405e-06,
"loss": 0.2933,
"step": 750
},
{
"epoch": 0.2401074164757918,
"grad_norm": 0.29022817054284084,
"learning_rate": 4.801010739102969e-06,
"loss": 0.2972,
"step": 760
},
{
"epoch": 0.24326672458731538,
"grad_norm": 0.2896880604505418,
"learning_rate": 4.8641819330385344e-06,
"loss": 0.2946,
"step": 770
},
{
"epoch": 0.24642603269883895,
"grad_norm": 0.2847438243168221,
"learning_rate": 4.927353126974101e-06,
"loss": 0.2969,
"step": 780
},
{
"epoch": 0.24958534081036254,
"grad_norm": 0.3037714021164623,
"learning_rate": 4.990524320909665e-06,
"loss": 0.2937,
"step": 790
},
{
"epoch": 0.2527446489218861,
"grad_norm": 0.32395568014513065,
"learning_rate": 5.0536955148452305e-06,
"loss": 0.2939,
"step": 800
},
{
"epoch": 0.2559039570334097,
"grad_norm": 0.2957719635763268,
"learning_rate": 5.116866708780797e-06,
"loss": 0.2932,
"step": 810
},
{
"epoch": 0.2590632651449333,
"grad_norm": 0.3165621516450349,
"learning_rate": 5.180037902716362e-06,
"loss": 0.2928,
"step": 820
},
{
"epoch": 0.2622225732564568,
"grad_norm": 0.3232166339204694,
"learning_rate": 5.2432090966519265e-06,
"loss": 0.2901,
"step": 830
},
{
"epoch": 0.2653818813679804,
"grad_norm": 0.3040549753465353,
"learning_rate": 5.306380290587493e-06,
"loss": 0.2881,
"step": 840
},
{
"epoch": 0.268541189479504,
"grad_norm": 0.32781940117167635,
"learning_rate": 5.369551484523058e-06,
"loss": 0.2908,
"step": 850
},
{
"epoch": 0.27170049759102755,
"grad_norm": 0.3327478795766515,
"learning_rate": 5.432722678458623e-06,
"loss": 0.2907,
"step": 860
},
{
"epoch": 0.27485980570255114,
"grad_norm": 0.2856004412508293,
"learning_rate": 5.495893872394189e-06,
"loss": 0.2934,
"step": 870
},
{
"epoch": 0.27801911381407474,
"grad_norm": 0.3604841472498219,
"learning_rate": 5.559065066329754e-06,
"loss": 0.2855,
"step": 880
},
{
"epoch": 0.2811784219255983,
"grad_norm": 0.32653022277153904,
"learning_rate": 5.6222362602653195e-06,
"loss": 0.2876,
"step": 890
},
{
"epoch": 0.28433773003712187,
"grad_norm": 0.3123066830281955,
"learning_rate": 5.685407454200885e-06,
"loss": 0.2887,
"step": 900
},
{
"epoch": 0.28749703814864547,
"grad_norm": 0.31118386648194923,
"learning_rate": 5.74857864813645e-06,
"loss": 0.2889,
"step": 910
},
{
"epoch": 0.290656346260169,
"grad_norm": 0.29015768543262505,
"learning_rate": 5.8117498420720155e-06,
"loss": 0.2876,
"step": 920
},
{
"epoch": 0.2938156543716926,
"grad_norm": 0.2885072388494909,
"learning_rate": 5.874921036007582e-06,
"loss": 0.2868,
"step": 930
},
{
"epoch": 0.2969749624832162,
"grad_norm": 0.3282216748098807,
"learning_rate": 5.938092229943146e-06,
"loss": 0.2877,
"step": 940
},
{
"epoch": 0.30013427059473974,
"grad_norm": 0.29797634077774454,
"learning_rate": 6.001263423878712e-06,
"loss": 0.2873,
"step": 950
},
{
"epoch": 0.30329357870626333,
"grad_norm": 0.33087996389175034,
"learning_rate": 6.064434617814278e-06,
"loss": 0.2838,
"step": 960
},
{
"epoch": 0.30645288681778693,
"grad_norm": 0.29661512432744086,
"learning_rate": 6.127605811749843e-06,
"loss": 0.2877,
"step": 970
},
{
"epoch": 0.30961219492931047,
"grad_norm": 0.31023012973801395,
"learning_rate": 6.190777005685408e-06,
"loss": 0.2826,
"step": 980
},
{
"epoch": 0.31277150304083406,
"grad_norm": 0.30885706884471026,
"learning_rate": 6.253948199620974e-06,
"loss": 0.2845,
"step": 990
},
{
"epoch": 0.31593081115235766,
"grad_norm": 0.3540472583639826,
"learning_rate": 6.317119393556539e-06,
"loss": 0.2809,
"step": 1000
},
{
"epoch": 0.3190901192638812,
"grad_norm": 0.2730722833734264,
"learning_rate": 6.380290587492104e-06,
"loss": 0.2787,
"step": 1010
},
{
"epoch": 0.3222494273754048,
"grad_norm": 0.3320814722084985,
"learning_rate": 6.443461781427669e-06,
"loss": 0.2813,
"step": 1020
},
{
"epoch": 0.32540873548692834,
"grad_norm": 0.290985113799801,
"learning_rate": 6.506632975363235e-06,
"loss": 0.2782,
"step": 1030
},
{
"epoch": 0.32856804359845193,
"grad_norm": 0.321948058443697,
"learning_rate": 6.5698041692988006e-06,
"loss": 0.2799,
"step": 1040
},
{
"epoch": 0.3317273517099755,
"grad_norm": 0.3247951324518689,
"learning_rate": 6.632975363234365e-06,
"loss": 0.2853,
"step": 1050
},
{
"epoch": 0.33488665982149907,
"grad_norm": 0.37466669888496,
"learning_rate": 6.696146557169931e-06,
"loss": 0.2847,
"step": 1060
},
{
"epoch": 0.33804596793302266,
"grad_norm": 0.3158284283569005,
"learning_rate": 6.759317751105497e-06,
"loss": 0.2793,
"step": 1070
},
{
"epoch": 0.34120527604454626,
"grad_norm": 0.2844891421044668,
"learning_rate": 6.822488945041061e-06,
"loss": 0.2812,
"step": 1080
},
{
"epoch": 0.3443645841560698,
"grad_norm": 0.33051991039937395,
"learning_rate": 6.885660138976627e-06,
"loss": 0.2811,
"step": 1090
},
{
"epoch": 0.3475238922675934,
"grad_norm": 0.3186151235138414,
"learning_rate": 6.948831332912193e-06,
"loss": 0.2851,
"step": 1100
},
{
"epoch": 0.350683200379117,
"grad_norm": 0.3123969158631383,
"learning_rate": 7.012002526847758e-06,
"loss": 0.2798,
"step": 1110
},
{
"epoch": 0.35384250849064053,
"grad_norm": 0.3280651223802418,
"learning_rate": 7.075173720783323e-06,
"loss": 0.2835,
"step": 1120
},
{
"epoch": 0.3570018166021641,
"grad_norm": 0.29332806586366567,
"learning_rate": 7.138344914718889e-06,
"loss": 0.2775,
"step": 1130
},
{
"epoch": 0.3601611247136877,
"grad_norm": 0.33171088512344676,
"learning_rate": 7.201516108654454e-06,
"loss": 0.2752,
"step": 1140
},
{
"epoch": 0.36332043282521126,
"grad_norm": 0.3000902915772275,
"learning_rate": 7.26468730259002e-06,
"loss": 0.2807,
"step": 1150
},
{
"epoch": 0.36647974093673485,
"grad_norm": 0.2883327293237225,
"learning_rate": 7.327858496525585e-06,
"loss": 0.2762,
"step": 1160
},
{
"epoch": 0.36963904904825845,
"grad_norm": 0.3144189091034534,
"learning_rate": 7.39102969046115e-06,
"loss": 0.2801,
"step": 1170
},
{
"epoch": 0.372798357159782,
"grad_norm": 0.356268108886898,
"learning_rate": 7.454200884396716e-06,
"loss": 0.2777,
"step": 1180
},
{
"epoch": 0.3759576652713056,
"grad_norm": 0.3128810321586775,
"learning_rate": 7.517372078332281e-06,
"loss": 0.2757,
"step": 1190
},
{
"epoch": 0.3791169733828292,
"grad_norm": 0.30434623170055075,
"learning_rate": 7.580543272267846e-06,
"loss": 0.2784,
"step": 1200
},
{
"epoch": 0.3822762814943527,
"grad_norm": 0.3388464380231131,
"learning_rate": 7.643714466203413e-06,
"loss": 0.2784,
"step": 1210
},
{
"epoch": 0.3854355896058763,
"grad_norm": 0.3110630401877264,
"learning_rate": 7.706885660138978e-06,
"loss": 0.2743,
"step": 1220
},
{
"epoch": 0.3885948977173999,
"grad_norm": 0.2771106506778223,
"learning_rate": 7.770056854074542e-06,
"loss": 0.2757,
"step": 1230
},
{
"epoch": 0.39175420582892345,
"grad_norm": 0.3175411734943708,
"learning_rate": 7.833228048010107e-06,
"loss": 0.2762,
"step": 1240
},
{
"epoch": 0.39491351394044705,
"grad_norm": 0.3072276379244834,
"learning_rate": 7.896399241945673e-06,
"loss": 0.2757,
"step": 1250
},
{
"epoch": 0.39807282205197064,
"grad_norm": 0.31787735230000674,
"learning_rate": 7.959570435881239e-06,
"loss": 0.2716,
"step": 1260
},
{
"epoch": 0.4012321301634942,
"grad_norm": 0.32573848304334413,
"learning_rate": 8.022741629816804e-06,
"loss": 0.2718,
"step": 1270
},
{
"epoch": 0.4043914382750178,
"grad_norm": 0.31968318506791704,
"learning_rate": 8.08591282375237e-06,
"loss": 0.2753,
"step": 1280
},
{
"epoch": 0.4075507463865414,
"grad_norm": 0.3038009939073212,
"learning_rate": 8.149084017687934e-06,
"loss": 0.2709,
"step": 1290
},
{
"epoch": 0.4107100544980649,
"grad_norm": 0.32085469500420516,
"learning_rate": 8.2122552116235e-06,
"loss": 0.2707,
"step": 1300
},
{
"epoch": 0.4138693626095885,
"grad_norm": 0.3065889908229096,
"learning_rate": 8.275426405559067e-06,
"loss": 0.2768,
"step": 1310
},
{
"epoch": 0.41702867072111205,
"grad_norm": 0.31763603457220624,
"learning_rate": 8.338597599494631e-06,
"loss": 0.2709,
"step": 1320
},
{
"epoch": 0.42018797883263564,
"grad_norm": 0.3109152063857626,
"learning_rate": 8.401768793430196e-06,
"loss": 0.2739,
"step": 1330
},
{
"epoch": 0.42334728694415924,
"grad_norm": 0.29181104171061434,
"learning_rate": 8.464939987365762e-06,
"loss": 0.2686,
"step": 1340
},
{
"epoch": 0.4265065950556828,
"grad_norm": 0.2935442340016787,
"learning_rate": 8.528111181301328e-06,
"loss": 0.2707,
"step": 1350
},
{
"epoch": 0.4296659031672064,
"grad_norm": 0.2906035979144556,
"learning_rate": 8.591282375236893e-06,
"loss": 0.2731,
"step": 1360
},
{
"epoch": 0.43282521127872997,
"grad_norm": 0.29758936608537967,
"learning_rate": 8.654453569172459e-06,
"loss": 0.2709,
"step": 1370
},
{
"epoch": 0.4359845193902535,
"grad_norm": 0.33154204605407617,
"learning_rate": 8.717624763108023e-06,
"loss": 0.2707,
"step": 1380
},
{
"epoch": 0.4391438275017771,
"grad_norm": 0.33339797215462064,
"learning_rate": 8.780795957043588e-06,
"loss": 0.2713,
"step": 1390
},
{
"epoch": 0.4423031356133007,
"grad_norm": 0.3155178562410378,
"learning_rate": 8.843967150979154e-06,
"loss": 0.2675,
"step": 1400
},
{
"epoch": 0.44546244372482424,
"grad_norm": 0.3265426814247614,
"learning_rate": 8.90713834491472e-06,
"loss": 0.2683,
"step": 1410
},
{
"epoch": 0.44862175183634784,
"grad_norm": 0.36971141677875463,
"learning_rate": 8.970309538850285e-06,
"loss": 0.2677,
"step": 1420
},
{
"epoch": 0.45178105994787143,
"grad_norm": 0.30518969540463764,
"learning_rate": 9.033480732785851e-06,
"loss": 0.2688,
"step": 1430
},
{
"epoch": 0.454940368059395,
"grad_norm": 0.34536630526318685,
"learning_rate": 9.096651926721415e-06,
"loss": 0.2673,
"step": 1440
},
{
"epoch": 0.45809967617091857,
"grad_norm": 0.35749309238694066,
"learning_rate": 9.159823120656982e-06,
"loss": 0.2691,
"step": 1450
},
{
"epoch": 0.46125898428244216,
"grad_norm": 0.31258023613884145,
"learning_rate": 9.222994314592546e-06,
"loss": 0.2664,
"step": 1460
},
{
"epoch": 0.4644182923939657,
"grad_norm": 0.3450789196077563,
"learning_rate": 9.286165508528112e-06,
"loss": 0.268,
"step": 1470
},
{
"epoch": 0.4675776005054893,
"grad_norm": 0.29121813500205246,
"learning_rate": 9.349336702463677e-06,
"loss": 0.2677,
"step": 1480
},
{
"epoch": 0.4707369086170129,
"grad_norm": 0.3220463754226227,
"learning_rate": 9.412507896399243e-06,
"loss": 0.2667,
"step": 1490
},
{
"epoch": 0.47389621672853643,
"grad_norm": 0.33704239601038527,
"learning_rate": 9.47567909033481e-06,
"loss": 0.2646,
"step": 1500
},
{
"epoch": 0.47705552484006003,
"grad_norm": 0.2863325950552584,
"learning_rate": 9.538850284270374e-06,
"loss": 0.2705,
"step": 1510
},
{
"epoch": 0.4802148329515836,
"grad_norm": 0.31752841534155907,
"learning_rate": 9.602021478205938e-06,
"loss": 0.2664,
"step": 1520
},
{
"epoch": 0.48337414106310717,
"grad_norm": 0.32880306966393713,
"learning_rate": 9.665192672141504e-06,
"loss": 0.2631,
"step": 1530
},
{
"epoch": 0.48653344917463076,
"grad_norm": 0.31843129282620164,
"learning_rate": 9.728363866077069e-06,
"loss": 0.2625,
"step": 1540
},
{
"epoch": 0.48969275728615436,
"grad_norm": 0.30470755245172276,
"learning_rate": 9.791535060012635e-06,
"loss": 0.2666,
"step": 1550
},
{
"epoch": 0.4928520653976779,
"grad_norm": 0.33123646966581777,
"learning_rate": 9.854706253948201e-06,
"loss": 0.2646,
"step": 1560
},
{
"epoch": 0.4960113735092015,
"grad_norm": 0.28677001614790365,
"learning_rate": 9.917877447883766e-06,
"loss": 0.2622,
"step": 1570
},
{
"epoch": 0.4991706816207251,
"grad_norm": 0.324282072097384,
"learning_rate": 9.98104864181933e-06,
"loss": 0.2678,
"step": 1580
},
{
"epoch": 0.5023299897322486,
"grad_norm": 0.3255697445472025,
"learning_rate": 9.999994039347758e-06,
"loss": 0.2637,
"step": 1590
},
{
"epoch": 0.5054892978437722,
"grad_norm": 0.2933750585789845,
"learning_rate": 9.999964844350574e-06,
"loss": 0.2649,
"step": 1600
},
{
"epoch": 0.5086486059552958,
"grad_norm": 0.33377765453076996,
"learning_rate": 9.999911320336655e-06,
"loss": 0.267,
"step": 1610
},
{
"epoch": 0.5118079140668194,
"grad_norm": 0.304237717093902,
"learning_rate": 9.999833467566438e-06,
"loss": 0.2633,
"step": 1620
},
{
"epoch": 0.5149672221783429,
"grad_norm": 0.3058481570433111,
"learning_rate": 9.999731286418741e-06,
"loss": 0.262,
"step": 1630
},
{
"epoch": 0.5181265302898665,
"grad_norm": 0.3392309580496954,
"learning_rate": 9.999604777390763e-06,
"loss": 0.2593,
"step": 1640
},
{
"epoch": 0.5212858384013901,
"grad_norm": 0.2895018374347362,
"learning_rate": 9.999453941098077e-06,
"loss": 0.2625,
"step": 1650
},
{
"epoch": 0.5244451465129136,
"grad_norm": 0.2912777670223621,
"learning_rate": 9.999278778274627e-06,
"loss": 0.266,
"step": 1660
},
{
"epoch": 0.5276044546244373,
"grad_norm": 0.3529258059433674,
"learning_rate": 9.999079289772724e-06,
"loss": 0.2619,
"step": 1670
},
{
"epoch": 0.5307637627359608,
"grad_norm": 0.2738135177871275,
"learning_rate": 9.99885547656305e-06,
"loss": 0.2618,
"step": 1680
},
{
"epoch": 0.5339230708474844,
"grad_norm": 0.2692997188904244,
"learning_rate": 9.998607339734643e-06,
"loss": 0.2606,
"step": 1690
},
{
"epoch": 0.537082378959008,
"grad_norm": 0.2949366840504707,
"learning_rate": 9.998334880494898e-06,
"loss": 0.2612,
"step": 1700
},
{
"epoch": 0.5402416870705316,
"grad_norm": 0.3057325239685516,
"learning_rate": 9.998038100169554e-06,
"loss": 0.261,
"step": 1710
},
{
"epoch": 0.5434009951820551,
"grad_norm": 0.27124805299961813,
"learning_rate": 9.997717000202696e-06,
"loss": 0.2598,
"step": 1720
},
{
"epoch": 0.5465603032935787,
"grad_norm": 0.30051977914101224,
"learning_rate": 9.997371582156747e-06,
"loss": 0.2602,
"step": 1730
},
{
"epoch": 0.5497196114051023,
"grad_norm": 0.3075193191916608,
"learning_rate": 9.997001847712456e-06,
"loss": 0.2601,
"step": 1740
},
{
"epoch": 0.5528789195166258,
"grad_norm": 0.29754589174948665,
"learning_rate": 9.996607798668887e-06,
"loss": 0.2592,
"step": 1750
},
{
"epoch": 0.5560382276281495,
"grad_norm": 0.33700900362331376,
"learning_rate": 9.99618943694342e-06,
"loss": 0.262,
"step": 1760
},
{
"epoch": 0.559197535739673,
"grad_norm": 0.3265608460452239,
"learning_rate": 9.995746764571736e-06,
"loss": 0.259,
"step": 1770
},
{
"epoch": 0.5623568438511966,
"grad_norm": 0.31549240271112217,
"learning_rate": 9.995279783707805e-06,
"loss": 0.2607,
"step": 1780
},
{
"epoch": 0.5655161519627202,
"grad_norm": 0.3586012625330008,
"learning_rate": 9.994788496623884e-06,
"loss": 0.2572,
"step": 1790
},
{
"epoch": 0.5686754600742437,
"grad_norm": 0.3056154836660224,
"learning_rate": 9.994272905710491e-06,
"loss": 0.2526,
"step": 1800
},
{
"epoch": 0.5718347681857673,
"grad_norm": 0.2613411851814494,
"learning_rate": 9.993733013476412e-06,
"loss": 0.2561,
"step": 1810
},
{
"epoch": 0.5749940762972909,
"grad_norm": 0.3254616196951068,
"learning_rate": 9.993168822548672e-06,
"loss": 0.257,
"step": 1820
},
{
"epoch": 0.5781533844088145,
"grad_norm": 0.3020785656622962,
"learning_rate": 9.992580335672535e-06,
"loss": 0.2557,
"step": 1830
},
{
"epoch": 0.581312692520338,
"grad_norm": 0.26993214887498285,
"learning_rate": 9.99196755571148e-06,
"loss": 0.2536,
"step": 1840
},
{
"epoch": 0.5844720006318617,
"grad_norm": 0.2639927502536523,
"learning_rate": 9.991330485647195e-06,
"loss": 0.26,
"step": 1850
},
{
"epoch": 0.5876313087433852,
"grad_norm": 0.30065294121954683,
"learning_rate": 9.990669128579562e-06,
"loss": 0.2605,
"step": 1860
},
{
"epoch": 0.5907906168549087,
"grad_norm": 0.30583938194208843,
"learning_rate": 9.989983487726634e-06,
"loss": 0.2563,
"step": 1870
},
{
"epoch": 0.5939499249664324,
"grad_norm": 0.2864579077847969,
"learning_rate": 9.989273566424629e-06,
"loss": 0.2606,
"step": 1880
},
{
"epoch": 0.5971092330779559,
"grad_norm": 0.3026768306473177,
"learning_rate": 9.98853936812791e-06,
"loss": 0.258,
"step": 1890
},
{
"epoch": 0.6002685411894795,
"grad_norm": 0.29314955217944727,
"learning_rate": 9.987780896408966e-06,
"loss": 0.2589,
"step": 1900
},
{
"epoch": 0.6034278493010031,
"grad_norm": 0.31058695039428386,
"learning_rate": 9.986998154958395e-06,
"loss": 0.253,
"step": 1910
},
{
"epoch": 0.6065871574125267,
"grad_norm": 0.2845944676061047,
"learning_rate": 9.986191147584893e-06,
"loss": 0.2546,
"step": 1920
},
{
"epoch": 0.6097464655240502,
"grad_norm": 0.29933495312410735,
"learning_rate": 9.985359878215224e-06,
"loss": 0.2552,
"step": 1930
},
{
"epoch": 0.6129057736355739,
"grad_norm": 0.2759459833479636,
"learning_rate": 9.984504350894213e-06,
"loss": 0.2574,
"step": 1940
},
{
"epoch": 0.6160650817470974,
"grad_norm": 0.3066829895649084,
"learning_rate": 9.983624569784714e-06,
"loss": 0.2553,
"step": 1950
},
{
"epoch": 0.6192243898586209,
"grad_norm": 0.2690944667845755,
"learning_rate": 9.982720539167601e-06,
"loss": 0.2568,
"step": 1960
},
{
"epoch": 0.6223836979701446,
"grad_norm": 0.2946036416755621,
"learning_rate": 9.981792263441739e-06,
"loss": 0.2543,
"step": 1970
},
{
"epoch": 0.6255430060816681,
"grad_norm": 0.2747932187325911,
"learning_rate": 9.980839747123967e-06,
"loss": 0.2557,
"step": 1980
},
{
"epoch": 0.6287023141931917,
"grad_norm": 0.2851964847837964,
"learning_rate": 9.979862994849074e-06,
"loss": 0.2541,
"step": 1990
},
{
"epoch": 0.6318616223047153,
"grad_norm": 0.31890735265964804,
"learning_rate": 9.978862011369779e-06,
"loss": 0.2558,
"step": 2000
},
{
"epoch": 0.6350209304162389,
"grad_norm": 0.28038726746988707,
"learning_rate": 9.977836801556705e-06,
"loss": 0.2538,
"step": 2010
},
{
"epoch": 0.6381802385277624,
"grad_norm": 0.295646403302159,
"learning_rate": 9.976787370398355e-06,
"loss": 0.2546,
"step": 2020
},
{
"epoch": 0.641339546639286,
"grad_norm": 0.2902155436032145,
"learning_rate": 9.975713723001093e-06,
"loss": 0.251,
"step": 2030
},
{
"epoch": 0.6444988547508096,
"grad_norm": 0.2828393586793592,
"learning_rate": 9.974615864589112e-06,
"loss": 0.2559,
"step": 2040
},
{
"epoch": 0.6476581628623331,
"grad_norm": 0.28577077749952917,
"learning_rate": 9.97349380050441e-06,
"loss": 0.2531,
"step": 2050
},
{
"epoch": 0.6508174709738567,
"grad_norm": 0.29564024961232643,
"learning_rate": 9.972347536206772e-06,
"loss": 0.2506,
"step": 2060
},
{
"epoch": 0.6539767790853803,
"grad_norm": 0.33183704608229764,
"learning_rate": 9.971177077273732e-06,
"loss": 0.2534,
"step": 2070
},
{
"epoch": 0.6571360871969039,
"grad_norm": 0.28977544469285044,
"learning_rate": 9.969982429400556e-06,
"loss": 0.2537,
"step": 2080
},
{
"epoch": 0.6602953953084274,
"grad_norm": 0.29923378551865065,
"learning_rate": 9.968763598400202e-06,
"loss": 0.2569,
"step": 2090
},
{
"epoch": 0.663454703419951,
"grad_norm": 0.3012488805857487,
"learning_rate": 9.967520590203305e-06,
"loss": 0.2509,
"step": 2100
},
{
"epoch": 0.6666140115314746,
"grad_norm": 0.2916467483906387,
"learning_rate": 9.966253410858145e-06,
"loss": 0.2551,
"step": 2110
},
{
"epoch": 0.6697733196429981,
"grad_norm": 0.27652645594826225,
"learning_rate": 9.964962066530604e-06,
"loss": 0.2515,
"step": 2120
},
{
"epoch": 0.6729326277545218,
"grad_norm": 0.27562185290008373,
"learning_rate": 9.963646563504158e-06,
"loss": 0.2544,
"step": 2130
},
{
"epoch": 0.6760919358660453,
"grad_norm": 0.25732340414313354,
"learning_rate": 9.962306908179833e-06,
"loss": 0.2515,
"step": 2140
},
{
"epoch": 0.6792512439775689,
"grad_norm": 0.26515843698964003,
"learning_rate": 9.96094310707617e-06,
"loss": 0.2494,
"step": 2150
},
{
"epoch": 0.6824105520890925,
"grad_norm": 0.28168758705492475,
"learning_rate": 9.959555166829204e-06,
"loss": 0.2494,
"step": 2160
},
{
"epoch": 0.685569860200616,
"grad_norm": 0.24631536243430951,
"learning_rate": 9.95814309419243e-06,
"loss": 0.2519,
"step": 2170
},
{
"epoch": 0.6887291683121396,
"grad_norm": 0.26426587738324664,
"learning_rate": 9.956706896036762e-06,
"loss": 0.2533,
"step": 2180
},
{
"epoch": 0.6918884764236632,
"grad_norm": 0.24898568379039823,
"learning_rate": 9.955246579350505e-06,
"loss": 0.2491,
"step": 2190
},
{
"epoch": 0.6950477845351868,
"grad_norm": 0.30540369742862966,
"learning_rate": 9.953762151239327e-06,
"loss": 0.2478,
"step": 2200
},
{
"epoch": 0.6982070926467103,
"grad_norm": 0.2709499727026948,
"learning_rate": 9.952253618926212e-06,
"loss": 0.2515,
"step": 2210
},
{
"epoch": 0.701366400758234,
"grad_norm": 0.3003234996003297,
"learning_rate": 9.95072098975143e-06,
"loss": 0.2541,
"step": 2220
},
{
"epoch": 0.7045257088697575,
"grad_norm": 0.2736168957433189,
"learning_rate": 9.949164271172512e-06,
"loss": 0.2499,
"step": 2230
},
{
"epoch": 0.7076850169812811,
"grad_norm": 0.2651616205282266,
"learning_rate": 9.947583470764193e-06,
"loss": 0.2506,
"step": 2240
},
{
"epoch": 0.7108443250928047,
"grad_norm": 0.2650203207221141,
"learning_rate": 9.945978596218391e-06,
"loss": 0.2488,
"step": 2250
},
{
"epoch": 0.7140036332043282,
"grad_norm": 0.31633222399887706,
"learning_rate": 9.944349655344168e-06,
"loss": 0.2504,
"step": 2260
},
{
"epoch": 0.7171629413158518,
"grad_norm": 0.3450314977108967,
"learning_rate": 9.942696656067683e-06,
"loss": 0.2487,
"step": 2270
},
{
"epoch": 0.7203222494273754,
"grad_norm": 0.2676203266167299,
"learning_rate": 9.941019606432163e-06,
"loss": 0.2515,
"step": 2280
},
{
"epoch": 0.723481557538899,
"grad_norm": 0.27501120919754213,
"learning_rate": 9.93931851459786e-06,
"loss": 0.2472,
"step": 2290
},
{
"epoch": 0.7266408656504225,
"grad_norm": 0.2721444766021836,
"learning_rate": 9.937593388842008e-06,
"loss": 0.2484,
"step": 2300
},
{
"epoch": 0.7298001737619462,
"grad_norm": 0.3055396538597429,
"learning_rate": 9.935844237558792e-06,
"loss": 0.2491,
"step": 2310
},
{
"epoch": 0.7329594818734697,
"grad_norm": 0.2687664962558202,
"learning_rate": 9.934071069259295e-06,
"loss": 0.2511,
"step": 2320
},
{
"epoch": 0.7361187899849932,
"grad_norm": 0.26485303698836093,
"learning_rate": 9.932273892571467e-06,
"loss": 0.2493,
"step": 2330
},
{
"epoch": 0.7392780980965169,
"grad_norm": 0.27030949688183054,
"learning_rate": 9.930452716240077e-06,
"loss": 0.2465,
"step": 2340
},
{
"epoch": 0.7424374062080404,
"grad_norm": 0.26582715073120317,
"learning_rate": 9.928607549126677e-06,
"loss": 0.2492,
"step": 2350
},
{
"epoch": 0.745596714319564,
"grad_norm": 0.26161071455089063,
"learning_rate": 9.926738400209546e-06,
"loss": 0.2473,
"step": 2360
},
{
"epoch": 0.7487560224310876,
"grad_norm": 0.27281673381922744,
"learning_rate": 9.924845278583661e-06,
"loss": 0.2461,
"step": 2370
},
{
"epoch": 0.7519153305426112,
"grad_norm": 0.25026144589450994,
"learning_rate": 9.922928193460644e-06,
"loss": 0.2447,
"step": 2380
},
{
"epoch": 0.7550746386541347,
"grad_norm": 0.2928912991449991,
"learning_rate": 9.920987154168719e-06,
"loss": 0.2461,
"step": 2390
},
{
"epoch": 0.7582339467656584,
"grad_norm": 0.2850696882452383,
"learning_rate": 9.919022170152668e-06,
"loss": 0.2499,
"step": 2400
},
{
"epoch": 0.7613932548771819,
"grad_norm": 0.30443484331190923,
"learning_rate": 9.917033250973786e-06,
"loss": 0.2493,
"step": 2410
},
{
"epoch": 0.7645525629887054,
"grad_norm": 0.3212209432208665,
"learning_rate": 9.915020406309828e-06,
"loss": 0.2491,
"step": 2420
},
{
"epoch": 0.7677118711002291,
"grad_norm": 0.28504423491604935,
"learning_rate": 9.912983645954973e-06,
"loss": 0.2474,
"step": 2430
},
{
"epoch": 0.7708711792117526,
"grad_norm": 0.2878869891714329,
"learning_rate": 9.910922979819762e-06,
"loss": 0.2492,
"step": 2440
},
{
"epoch": 0.7740304873232762,
"grad_norm": 0.2771202754746543,
"learning_rate": 9.908838417931062e-06,
"loss": 0.2472,
"step": 2450
},
{
"epoch": 0.7771897954347998,
"grad_norm": 0.31577544480370157,
"learning_rate": 9.906729970432014e-06,
"loss": 0.249,
"step": 2460
},
{
"epoch": 0.7803491035463234,
"grad_norm": 0.3082921227301929,
"learning_rate": 9.904597647581982e-06,
"loss": 0.2468,
"step": 2470
},
{
"epoch": 0.7835084116578469,
"grad_norm": 0.2562560339656447,
"learning_rate": 9.9024414597565e-06,
"loss": 0.2495,
"step": 2480
},
{
"epoch": 0.7866677197693706,
"grad_norm": 0.257309029607144,
"learning_rate": 9.90026141744723e-06,
"loss": 0.2474,
"step": 2490
},
{
"epoch": 0.7898270278808941,
"grad_norm": 0.2512470913827528,
"learning_rate": 9.898057531261904e-06,
"loss": 0.2472,
"step": 2500
},
{
"epoch": 0.7929863359924176,
"grad_norm": 0.255003256149547,
"learning_rate": 9.89582981192427e-06,
"loss": 0.2443,
"step": 2510
},
{
"epoch": 0.7961456441039413,
"grad_norm": 0.24903458333429462,
"learning_rate": 9.893578270274054e-06,
"loss": 0.2473,
"step": 2520
},
{
"epoch": 0.7993049522154648,
"grad_norm": 0.25472580330098815,
"learning_rate": 9.891302917266886e-06,
"loss": 0.2501,
"step": 2530
},
{
"epoch": 0.8024642603269884,
"grad_norm": 0.2876016304280073,
"learning_rate": 9.889003763974272e-06,
"loss": 0.248,
"step": 2540
},
{
"epoch": 0.805623568438512,
"grad_norm": 0.26547023285812826,
"learning_rate": 9.886680821583512e-06,
"loss": 0.2462,
"step": 2550
},
{
"epoch": 0.8087828765500356,
"grad_norm": 0.2541356171021246,
"learning_rate": 9.884334101397666e-06,
"loss": 0.2481,
"step": 2560
},
{
"epoch": 0.8119421846615591,
"grad_norm": 0.27501124097499335,
"learning_rate": 9.881963614835499e-06,
"loss": 0.2417,
"step": 2570
},
{
"epoch": 0.8151014927730827,
"grad_norm": 0.24719251381186472,
"learning_rate": 9.879569373431408e-06,
"loss": 0.2466,
"step": 2580
},
{
"epoch": 0.8182608008846063,
"grad_norm": 0.3030702344650738,
"learning_rate": 9.877151388835384e-06,
"loss": 0.2472,
"step": 2590
},
{
"epoch": 0.8214201089961298,
"grad_norm": 0.2516088806018057,
"learning_rate": 9.87470967281295e-06,
"loss": 0.2448,
"step": 2600
},
{
"epoch": 0.8245794171076535,
"grad_norm": 0.27693954241387997,
"learning_rate": 9.872244237245096e-06,
"loss": 0.2453,
"step": 2610
},
{
"epoch": 0.827738725219177,
"grad_norm": 0.25236801181159096,
"learning_rate": 9.869755094128234e-06,
"loss": 0.2444,
"step": 2620
},
{
"epoch": 0.8308980333307006,
"grad_norm": 0.2511412465708672,
"learning_rate": 9.867242255574127e-06,
"loss": 0.2459,
"step": 2630
},
{
"epoch": 0.8340573414422241,
"grad_norm": 0.28252199150173957,
"learning_rate": 9.864705733809842e-06,
"loss": 0.245,
"step": 2640
},
{
"epoch": 0.8372166495537477,
"grad_norm": 0.25900018707343897,
"learning_rate": 9.862145541177681e-06,
"loss": 0.2434,
"step": 2650
},
{
"epoch": 0.8403759576652713,
"grad_norm": 0.24962743931418693,
"learning_rate": 9.859561690135125e-06,
"loss": 0.2461,
"step": 2660
},
{
"epoch": 0.8435352657767948,
"grad_norm": 0.28196557811696227,
"learning_rate": 9.856954193254773e-06,
"loss": 0.2475,
"step": 2670
},
{
"epoch": 0.8466945738883185,
"grad_norm": 0.26537277820682614,
"learning_rate": 9.854323063224282e-06,
"loss": 0.2451,
"step": 2680
},
{
"epoch": 0.849853881999842,
"grad_norm": 0.2925684725032324,
"learning_rate": 9.851668312846303e-06,
"loss": 0.2453,
"step": 2690
},
{
"epoch": 0.8530131901113656,
"grad_norm": 0.31194880176939827,
"learning_rate": 9.848989955038422e-06,
"loss": 0.2446,
"step": 2700
},
{
"epoch": 0.8561724982228892,
"grad_norm": 0.26907966931847127,
"learning_rate": 9.84628800283309e-06,
"loss": 0.2416,
"step": 2710
},
{
"epoch": 0.8593318063344128,
"grad_norm": 0.2855876195388184,
"learning_rate": 9.843562469377568e-06,
"loss": 0.2435,
"step": 2720
},
{
"epoch": 0.8624911144459363,
"grad_norm": 0.26929494670820325,
"learning_rate": 9.84081336793386e-06,
"loss": 0.246,
"step": 2730
},
{
"epoch": 0.8656504225574599,
"grad_norm": 0.2560753102085232,
"learning_rate": 9.838040711878648e-06,
"loss": 0.2423,
"step": 2740
},
{
"epoch": 0.8688097306689835,
"grad_norm": 0.2726015414676623,
"learning_rate": 9.835244514703223e-06,
"loss": 0.2427,
"step": 2750
},
{
"epoch": 0.871969038780507,
"grad_norm": 0.24937962474701536,
"learning_rate": 9.83242479001343e-06,
"loss": 0.2439,
"step": 2760
},
{
"epoch": 0.8751283468920307,
"grad_norm": 0.2758063411552256,
"learning_rate": 9.82958155152959e-06,
"loss": 0.2448,
"step": 2770
},
{
"epoch": 0.8782876550035542,
"grad_norm": 0.2905878430819007,
"learning_rate": 9.826714813086439e-06,
"loss": 0.2412,
"step": 2780
},
{
"epoch": 0.8814469631150778,
"grad_norm": 0.2558643463285868,
"learning_rate": 9.82382458863306e-06,
"loss": 0.2406,
"step": 2790
},
{
"epoch": 0.8846062712266014,
"grad_norm": 0.2642935160526405,
"learning_rate": 9.820910892232816e-06,
"loss": 0.2444,
"step": 2800
},
{
"epoch": 0.8877655793381249,
"grad_norm": 0.2592764595190498,
"learning_rate": 9.817973738063283e-06,
"loss": 0.2386,
"step": 2810
},
{
"epoch": 0.8909248874496485,
"grad_norm": 0.2507512507039411,
"learning_rate": 9.815013140416171e-06,
"loss": 0.246,
"step": 2820
},
{
"epoch": 0.8940841955611721,
"grad_norm": 0.25908992057608105,
"learning_rate": 9.812029113697271e-06,
"loss": 0.2395,
"step": 2830
},
{
"epoch": 0.8972435036726957,
"grad_norm": 0.2612092586416161,
"learning_rate": 9.809021672426371e-06,
"loss": 0.24,
"step": 2840
},
{
"epoch": 0.9004028117842192,
"grad_norm": 0.2673187039088244,
"learning_rate": 9.805990831237194e-06,
"loss": 0.2444,
"step": 2850
},
{
"epoch": 0.9035621198957429,
"grad_norm": 0.2437036853392356,
"learning_rate": 9.802936604877316e-06,
"loss": 0.241,
"step": 2860
},
{
"epoch": 0.9067214280072664,
"grad_norm": 0.2825822044337206,
"learning_rate": 9.799859008208112e-06,
"loss": 0.2419,
"step": 2870
},
{
"epoch": 0.90988073611879,
"grad_norm": 0.28354580397363727,
"learning_rate": 9.796758056204662e-06,
"loss": 0.2427,
"step": 2880
},
{
"epoch": 0.9130400442303136,
"grad_norm": 0.24849879403822714,
"learning_rate": 9.7936337639557e-06,
"loss": 0.2407,
"step": 2890
},
{
"epoch": 0.9161993523418371,
"grad_norm": 0.26017599732454766,
"learning_rate": 9.790486146663522e-06,
"loss": 0.2403,
"step": 2900
},
{
"epoch": 0.9193586604533607,
"grad_norm": 0.2718913199408004,
"learning_rate": 9.78731521964392e-06,
"loss": 0.2428,
"step": 2910
},
{
"epoch": 0.9225179685648843,
"grad_norm": 0.23612120956344837,
"learning_rate": 9.784120998326115e-06,
"loss": 0.2401,
"step": 2920
},
{
"epoch": 0.9256772766764079,
"grad_norm": 0.24629353188155112,
"learning_rate": 9.780903498252665e-06,
"loss": 0.2392,
"step": 2930
},
{
"epoch": 0.9288365847879314,
"grad_norm": 0.2459160847664615,
"learning_rate": 9.777662735079406e-06,
"loss": 0.2404,
"step": 2940
},
{
"epoch": 0.9319958928994551,
"grad_norm": 0.2475965886432302,
"learning_rate": 9.77439872457536e-06,
"loss": 0.2412,
"step": 2950
},
{
"epoch": 0.9351552010109786,
"grad_norm": 0.28786836243759434,
"learning_rate": 9.771111482622677e-06,
"loss": 0.2408,
"step": 2960
},
{
"epoch": 0.9383145091225021,
"grad_norm": 0.2556612314956361,
"learning_rate": 9.76780102521654e-06,
"loss": 0.2395,
"step": 2970
},
{
"epoch": 0.9414738172340258,
"grad_norm": 0.26540547133463965,
"learning_rate": 9.764467368465098e-06,
"loss": 0.2408,
"step": 2980
},
{
"epoch": 0.9446331253455493,
"grad_norm": 0.27099661447259094,
"learning_rate": 9.761110528589382e-06,
"loss": 0.2411,
"step": 2990
},
{
"epoch": 0.9477924334570729,
"grad_norm": 0.2665281174710895,
"learning_rate": 9.75773052192323e-06,
"loss": 0.2411,
"step": 3000
},
{
"epoch": 0.9509517415685965,
"grad_norm": 0.27363802003153875,
"learning_rate": 9.754327364913208e-06,
"loss": 0.2378,
"step": 3010
},
{
"epoch": 0.9541110496801201,
"grad_norm": 0.2319637502396465,
"learning_rate": 9.75090107411852e-06,
"loss": 0.2423,
"step": 3020
},
{
"epoch": 0.9572703577916436,
"grad_norm": 0.24790838457350073,
"learning_rate": 9.747451666210946e-06,
"loss": 0.2418,
"step": 3030
},
{
"epoch": 0.9604296659031673,
"grad_norm": 0.28516778958623945,
"learning_rate": 9.743979157974739e-06,
"loss": 0.2416,
"step": 3040
},
{
"epoch": 0.9635889740146908,
"grad_norm": 0.24493900212151187,
"learning_rate": 9.740483566306565e-06,
"loss": 0.2398,
"step": 3050
},
{
"epoch": 0.9667482821262143,
"grad_norm": 0.2873027578326869,
"learning_rate": 9.736964908215402e-06,
"loss": 0.2396,
"step": 3060
},
{
"epoch": 0.969907590237738,
"grad_norm": 0.24814910114506777,
"learning_rate": 9.733423200822469e-06,
"loss": 0.2391,
"step": 3070
},
{
"epoch": 0.9730668983492615,
"grad_norm": 0.24906741016474904,
"learning_rate": 9.729858461361142e-06,
"loss": 0.242,
"step": 3080
},
{
"epoch": 0.9762262064607851,
"grad_norm": 0.2826980721069527,
"learning_rate": 9.726270707176859e-06,
"loss": 0.2399,
"step": 3090
},
{
"epoch": 0.9793855145723087,
"grad_norm": 0.26392318924970115,
"learning_rate": 9.722659955727055e-06,
"loss": 0.2395,
"step": 3100
},
{
"epoch": 0.9825448226838323,
"grad_norm": 0.2855609937725699,
"learning_rate": 9.719026224581054e-06,
"loss": 0.2379,
"step": 3110
},
{
"epoch": 0.9857041307953558,
"grad_norm": 0.2502787654544348,
"learning_rate": 9.715369531420006e-06,
"loss": 0.2394,
"step": 3120
},
{
"epoch": 0.9888634389068794,
"grad_norm": 0.23500842887120083,
"learning_rate": 9.711689894036785e-06,
"loss": 0.2366,
"step": 3130
},
{
"epoch": 0.992022747018403,
"grad_norm": 0.24586324530930273,
"learning_rate": 9.707987330335906e-06,
"loss": 0.2378,
"step": 3140
},
{
"epoch": 0.9951820551299265,
"grad_norm": 0.23967550366976703,
"learning_rate": 9.704261858333445e-06,
"loss": 0.2388,
"step": 3150
},
{
"epoch": 0.9983413632414502,
"grad_norm": 0.2725545852571603,
"learning_rate": 9.700513496156945e-06,
"loss": 0.2378,
"step": 3160
},
{
"epoch": 1.0012637232446093,
"grad_norm": 0.25488826469533976,
"learning_rate": 9.696742262045324e-06,
"loss": 0.2171,
"step": 3170
},
{
"epoch": 1.004423031356133,
"grad_norm": 0.2434423686192198,
"learning_rate": 9.692948174348798e-06,
"loss": 0.2256,
"step": 3180
},
{
"epoch": 1.0075823394676566,
"grad_norm": 0.29889977976697246,
"learning_rate": 9.689131251528778e-06,
"loss": 0.2249,
"step": 3190
},
{
"epoch": 1.0107416475791802,
"grad_norm": 0.2533307535141922,
"learning_rate": 9.685291512157793e-06,
"loss": 0.2265,
"step": 3200
},
{
"epoch": 1.0139009556907037,
"grad_norm": 0.2663724197725429,
"learning_rate": 9.68142897491939e-06,
"loss": 0.226,
"step": 3210
},
{
"epoch": 1.0170602638022272,
"grad_norm": 0.2791907738011004,
"learning_rate": 9.677543658608047e-06,
"loss": 0.2262,
"step": 3220
},
{
"epoch": 1.0202195719137508,
"grad_norm": 0.23622696465246065,
"learning_rate": 9.673635582129084e-06,
"loss": 0.2222,
"step": 3230
},
{
"epoch": 1.0233788800252746,
"grad_norm": 0.28281839802186026,
"learning_rate": 9.669704764498564e-06,
"loss": 0.2246,
"step": 3240
},
{
"epoch": 1.026538188136798,
"grad_norm": 0.26869842180643333,
"learning_rate": 9.66575122484321e-06,
"loss": 0.2249,
"step": 3250
},
{
"epoch": 1.0296974962483216,
"grad_norm": 0.26529568216979593,
"learning_rate": 9.661774982400301e-06,
"loss": 0.223,
"step": 3260
},
{
"epoch": 1.0328568043598452,
"grad_norm": 0.2453451964854557,
"learning_rate": 9.65777605651759e-06,
"loss": 0.2238,
"step": 3270
},
{
"epoch": 1.0360161124713687,
"grad_norm": 0.24227112445437904,
"learning_rate": 9.653754466653195e-06,
"loss": 0.222,
"step": 3280
},
{
"epoch": 1.0391754205828923,
"grad_norm": 0.26279583121049077,
"learning_rate": 9.649710232375526e-06,
"loss": 0.2236,
"step": 3290
},
{
"epoch": 1.042334728694416,
"grad_norm": 0.31955584307225327,
"learning_rate": 9.645643373363166e-06,
"loss": 0.2229,
"step": 3300
},
{
"epoch": 1.0454940368059396,
"grad_norm": 0.257107141717547,
"learning_rate": 9.64155390940479e-06,
"loss": 0.2256,
"step": 3310
},
{
"epoch": 1.048653344917463,
"grad_norm": 0.28510160571621773,
"learning_rate": 9.637441860399065e-06,
"loss": 0.2243,
"step": 3320
},
{
"epoch": 1.0518126530289866,
"grad_norm": 0.24137541551602262,
"learning_rate": 9.633307246354558e-06,
"loss": 0.2237,
"step": 3330
},
{
"epoch": 1.0549719611405102,
"grad_norm": 0.26654943800407926,
"learning_rate": 9.629150087389625e-06,
"loss": 0.2253,
"step": 3340
},
{
"epoch": 1.0581312692520337,
"grad_norm": 0.24362961602020153,
"learning_rate": 9.624970403732328e-06,
"loss": 0.2291,
"step": 3350
},
{
"epoch": 1.0612905773635575,
"grad_norm": 0.24350680136934802,
"learning_rate": 9.620768215720327e-06,
"loss": 0.2229,
"step": 3360
},
{
"epoch": 1.064449885475081,
"grad_norm": 0.2410847673171125,
"learning_rate": 9.61654354380079e-06,
"loss": 0.2291,
"step": 3370
},
{
"epoch": 1.0676091935866046,
"grad_norm": 0.24859633294553296,
"learning_rate": 9.612296408530279e-06,
"loss": 0.224,
"step": 3380
},
{
"epoch": 1.070768501698128,
"grad_norm": 0.33635244899144884,
"learning_rate": 9.608026830574666e-06,
"loss": 0.2219,
"step": 3390
},
{
"epoch": 1.0739278098096516,
"grad_norm": 0.288768968554062,
"learning_rate": 9.603734830709029e-06,
"loss": 0.2252,
"step": 3400
},
{
"epoch": 1.0770871179211752,
"grad_norm": 0.2697580145809562,
"learning_rate": 9.599420429817534e-06,
"loss": 0.2234,
"step": 3410
},
{
"epoch": 1.080246426032699,
"grad_norm": 0.2520287953499914,
"learning_rate": 9.595083648893361e-06,
"loss": 0.2218,
"step": 3420
},
{
"epoch": 1.0834057341442225,
"grad_norm": 0.2538820882514716,
"learning_rate": 9.59072450903858e-06,
"loss": 0.2245,
"step": 3430
},
{
"epoch": 1.086565042255746,
"grad_norm": 0.24006555139584898,
"learning_rate": 9.586343031464056e-06,
"loss": 0.2245,
"step": 3440
},
{
"epoch": 1.0897243503672696,
"grad_norm": 0.24296965136255919,
"learning_rate": 9.581939237489347e-06,
"loss": 0.2227,
"step": 3450
},
{
"epoch": 1.092883658478793,
"grad_norm": 0.2641190618925988,
"learning_rate": 9.577513148542601e-06,
"loss": 0.2224,
"step": 3460
},
{
"epoch": 1.0960429665903166,
"grad_norm": 0.22534300284603093,
"learning_rate": 9.573064786160447e-06,
"loss": 0.2265,
"step": 3470
},
{
"epoch": 1.0992022747018404,
"grad_norm": 0.24227259587968128,
"learning_rate": 9.568594171987894e-06,
"loss": 0.2269,
"step": 3480
},
{
"epoch": 1.102361582813364,
"grad_norm": 0.24660324114104168,
"learning_rate": 9.564101327778223e-06,
"loss": 0.2252,
"step": 3490
},
{
"epoch": 1.1055208909248875,
"grad_norm": 0.2727222419616193,
"learning_rate": 9.559586275392887e-06,
"loss": 0.2222,
"step": 3500
},
{
"epoch": 1.108680199036411,
"grad_norm": 0.23732734292357127,
"learning_rate": 9.555049036801394e-06,
"loss": 0.2251,
"step": 3510
},
{
"epoch": 1.1118395071479346,
"grad_norm": 0.2499998501537187,
"learning_rate": 9.550489634081213e-06,
"loss": 0.2235,
"step": 3520
},
{
"epoch": 1.114998815259458,
"grad_norm": 0.2505631262518965,
"learning_rate": 9.545908089417655e-06,
"loss": 0.2268,
"step": 3530
},
{
"epoch": 1.1181581233709816,
"grad_norm": 0.24831276616046985,
"learning_rate": 9.541304425103772e-06,
"loss": 0.2258,
"step": 3540
},
{
"epoch": 1.1213174314825054,
"grad_norm": 0.24804609760497534,
"learning_rate": 9.536678663540247e-06,
"loss": 0.2232,
"step": 3550
},
{
"epoch": 1.124476739594029,
"grad_norm": 0.2268843330169224,
"learning_rate": 9.532030827235285e-06,
"loss": 0.2223,
"step": 3560
},
{
"epoch": 1.1276360477055525,
"grad_norm": 0.22228229533403326,
"learning_rate": 9.527360938804503e-06,
"loss": 0.2261,
"step": 3570
},
{
"epoch": 1.130795355817076,
"grad_norm": 0.25616837563557016,
"learning_rate": 9.522669020970821e-06,
"loss": 0.2248,
"step": 3580
},
{
"epoch": 1.1339546639285996,
"grad_norm": 0.24799181444300183,
"learning_rate": 9.517955096564344e-06,
"loss": 0.2249,
"step": 3590
},
{
"epoch": 1.1371139720401233,
"grad_norm": 0.2382662787923193,
"learning_rate": 9.513219188522266e-06,
"loss": 0.2216,
"step": 3600
},
{
"epoch": 1.1402732801516469,
"grad_norm": 0.25171104996591714,
"learning_rate": 9.508461319888744e-06,
"loss": 0.2225,
"step": 3610
},
{
"epoch": 1.1434325882631704,
"grad_norm": 0.23415739363429328,
"learning_rate": 9.503681513814797e-06,
"loss": 0.2236,
"step": 3620
},
{
"epoch": 1.146591896374694,
"grad_norm": 0.2432154138833874,
"learning_rate": 9.498879793558184e-06,
"loss": 0.2234,
"step": 3630
},
{
"epoch": 1.1497512044862175,
"grad_norm": 0.22687839285475797,
"learning_rate": 9.494056182483293e-06,
"loss": 0.2222,
"step": 3640
},
{
"epoch": 1.152910512597741,
"grad_norm": 0.26826441248684857,
"learning_rate": 9.489210704061036e-06,
"loss": 0.2216,
"step": 3650
},
{
"epoch": 1.1560698207092646,
"grad_norm": 0.24724769280109898,
"learning_rate": 9.484343381868722e-06,
"loss": 0.2242,
"step": 3660
},
{
"epoch": 1.1592291288207883,
"grad_norm": 0.2584281422449668,
"learning_rate": 9.479454239589948e-06,
"loss": 0.2248,
"step": 3670
},
{
"epoch": 1.1623884369323119,
"grad_norm": 0.2445110224992667,
"learning_rate": 9.47454330101449e-06,
"loss": 0.2207,
"step": 3680
},
{
"epoch": 1.1655477450438354,
"grad_norm": 0.22768485500843333,
"learning_rate": 9.469610590038175e-06,
"loss": 0.2231,
"step": 3690
},
{
"epoch": 1.168707053155359,
"grad_norm": 0.23303026391453427,
"learning_rate": 9.464656130662775e-06,
"loss": 0.2237,
"step": 3700
},
{
"epoch": 1.1718663612668825,
"grad_norm": 0.2489422466683634,
"learning_rate": 9.45967994699588e-06,
"loss": 0.2249,
"step": 3710
},
{
"epoch": 1.1750256693784062,
"grad_norm": 0.26953611174046954,
"learning_rate": 9.454682063250798e-06,
"loss": 0.2214,
"step": 3720
},
{
"epoch": 1.1781849774899298,
"grad_norm": 0.24590519797285548,
"learning_rate": 9.449662503746416e-06,
"loss": 0.2238,
"step": 3730
},
{
"epoch": 1.1813442856014533,
"grad_norm": 0.23511526883880332,
"learning_rate": 9.444621292907095e-06,
"loss": 0.2224,
"step": 3740
},
{
"epoch": 1.1845035937129769,
"grad_norm": 0.22848149314341606,
"learning_rate": 9.439558455262547e-06,
"loss": 0.2214,
"step": 3750
},
{
"epoch": 1.1876629018245004,
"grad_norm": 0.25030584511199394,
"learning_rate": 9.43447401544772e-06,
"loss": 0.2206,
"step": 3760
},
{
"epoch": 1.190822209936024,
"grad_norm": 0.2395716904991911,
"learning_rate": 9.429367998202671e-06,
"loss": 0.2203,
"step": 3770
},
{
"epoch": 1.1939815180475475,
"grad_norm": 0.24389646038824445,
"learning_rate": 9.424240428372454e-06,
"loss": 0.2231,
"step": 3780
},
{
"epoch": 1.1971408261590712,
"grad_norm": 0.2511376763314995,
"learning_rate": 9.419091330906985e-06,
"loss": 0.2229,
"step": 3790
},
{
"epoch": 1.2003001342705948,
"grad_norm": 0.2309948734333885,
"learning_rate": 9.413920730860936e-06,
"loss": 0.2217,
"step": 3800
},
{
"epoch": 1.2034594423821183,
"grad_norm": 0.22403672252100287,
"learning_rate": 9.408728653393613e-06,
"loss": 0.2209,
"step": 3810
},
{
"epoch": 1.2066187504936419,
"grad_norm": 0.23770720039134421,
"learning_rate": 9.403515123768817e-06,
"loss": 0.2262,
"step": 3820
},
{
"epoch": 1.2097780586051654,
"grad_norm": 0.23637922011574056,
"learning_rate": 9.398280167354737e-06,
"loss": 0.2211,
"step": 3830
},
{
"epoch": 1.2129373667166892,
"grad_norm": 0.23752969597662485,
"learning_rate": 9.39302380962382e-06,
"loss": 0.2232,
"step": 3840
},
{
"epoch": 1.2160966748282127,
"grad_norm": 0.23711964499813667,
"learning_rate": 9.38774607615265e-06,
"loss": 0.2199,
"step": 3850
},
{
"epoch": 1.2192559829397362,
"grad_norm": 0.2594025147223781,
"learning_rate": 9.382446992621822e-06,
"loss": 0.2219,
"step": 3860
},
{
"epoch": 1.2224152910512598,
"grad_norm": 0.24109832395545655,
"learning_rate": 9.377126584815812e-06,
"loss": 0.2212,
"step": 3870
},
{
"epoch": 1.2255745991627833,
"grad_norm": 0.22269010324866226,
"learning_rate": 9.371784878622863e-06,
"loss": 0.221,
"step": 3880
},
{
"epoch": 1.2287339072743069,
"grad_norm": 0.22390895916446435,
"learning_rate": 9.36642190003485e-06,
"loss": 0.2206,
"step": 3890
},
{
"epoch": 1.2318932153858304,
"grad_norm": 0.27627761911305704,
"learning_rate": 9.361037675147152e-06,
"loss": 0.2209,
"step": 3900
},
{
"epoch": 1.2350525234973542,
"grad_norm": 0.2420598861318583,
"learning_rate": 9.355632230158537e-06,
"loss": 0.2179,
"step": 3910
},
{
"epoch": 1.2382118316088777,
"grad_norm": 0.2325111241248699,
"learning_rate": 9.35020559137102e-06,
"loss": 0.2195,
"step": 3920
},
{
"epoch": 1.2413711397204013,
"grad_norm": 0.24210875124091263,
"learning_rate": 9.344757785189743e-06,
"loss": 0.2209,
"step": 3930
},
{
"epoch": 1.2445304478319248,
"grad_norm": 0.23465411344270035,
"learning_rate": 9.339288838122848e-06,
"loss": 0.2218,
"step": 3940
},
{
"epoch": 1.2476897559434483,
"grad_norm": 0.24940394557867132,
"learning_rate": 9.333798776781344e-06,
"loss": 0.2207,
"step": 3950
},
{
"epoch": 1.250849064054972,
"grad_norm": 0.24164502781099362,
"learning_rate": 9.328287627878974e-06,
"loss": 0.2239,
"step": 3960
},
{
"epoch": 1.2540083721664956,
"grad_norm": 0.24030120002479546,
"learning_rate": 9.322755418232094e-06,
"loss": 0.2222,
"step": 3970
},
{
"epoch": 1.2571676802780192,
"grad_norm": 0.24260325995919346,
"learning_rate": 9.317202174759541e-06,
"loss": 0.2205,
"step": 3980
},
{
"epoch": 1.2603269883895427,
"grad_norm": 0.24390555474193987,
"learning_rate": 9.311627924482494e-06,
"loss": 0.2201,
"step": 3990
},
{
"epoch": 1.2634862965010663,
"grad_norm": 0.2373559456916262,
"learning_rate": 9.306032694524346e-06,
"loss": 0.2211,
"step": 4000
},
{
"epoch": 1.2669615354237422,
"grad_norm": 0.23593037133395878,
"learning_rate": 9.300416512110582e-06,
"loss": 0.2212,
"step": 4010
},
{
"epoch": 1.2701208435352658,
"grad_norm": 0.2683209112120544,
"learning_rate": 9.29477940456863e-06,
"loss": 0.2233,
"step": 4020
},
{
"epoch": 1.2732801516467893,
"grad_norm": 0.24625101625854237,
"learning_rate": 9.289121399327742e-06,
"loss": 0.2204,
"step": 4030
},
{
"epoch": 1.2764394597583129,
"grad_norm": 0.24487225016448577,
"learning_rate": 9.283442523918848e-06,
"loss": 0.2216,
"step": 4040
},
{
"epoch": 1.2795987678698366,
"grad_norm": 0.22210615337013012,
"learning_rate": 9.27774280597444e-06,
"loss": 0.2215,
"step": 4050
},
{
"epoch": 1.2827580759813602,
"grad_norm": 0.22108031546901602,
"learning_rate": 9.272022273228414e-06,
"loss": 0.2204,
"step": 4060
},
{
"epoch": 1.2859173840928837,
"grad_norm": 0.24375060995624975,
"learning_rate": 9.266280953515958e-06,
"loss": 0.2206,
"step": 4070
},
{
"epoch": 1.2890766922044072,
"grad_norm": 0.2480993257402374,
"learning_rate": 9.260518874773395e-06,
"loss": 0.2241,
"step": 4080
},
{
"epoch": 1.2922360003159308,
"grad_norm": 0.22812840181798105,
"learning_rate": 9.25473606503807e-06,
"loss": 0.2229,
"step": 4090
},
{
"epoch": 1.2953953084274543,
"grad_norm": 0.23578170889449204,
"learning_rate": 9.248932552448191e-06,
"loss": 0.2202,
"step": 4100
},
{
"epoch": 1.2985546165389779,
"grad_norm": 0.23821605850175842,
"learning_rate": 9.24310836524271e-06,
"loss": 0.223,
"step": 4110
},
{
"epoch": 1.3017139246505016,
"grad_norm": 0.22354575939426694,
"learning_rate": 9.237263531761178e-06,
"loss": 0.2206,
"step": 4120
},
{
"epoch": 1.3048732327620252,
"grad_norm": 0.25832314829209013,
"learning_rate": 9.2313980804436e-06,
"loss": 0.2231,
"step": 4130
},
{
"epoch": 1.3080325408735487,
"grad_norm": 0.25538340887467387,
"learning_rate": 9.225512039830316e-06,
"loss": 0.2175,
"step": 4140
},
{
"epoch": 1.3111918489850722,
"grad_norm": 0.25215426741015406,
"learning_rate": 9.219605438561836e-06,
"loss": 0.2237,
"step": 4150
},
{
"epoch": 1.3143511570965958,
"grad_norm": 0.2313336444417292,
"learning_rate": 9.213678305378728e-06,
"loss": 0.2172,
"step": 4160
},
{
"epoch": 1.3175104652081195,
"grad_norm": 0.2278642515048215,
"learning_rate": 9.207730669121458e-06,
"loss": 0.2198,
"step": 4170
},
{
"epoch": 1.320669773319643,
"grad_norm": 0.2202393432518963,
"learning_rate": 9.201762558730256e-06,
"loss": 0.2217,
"step": 4180
},
{
"epoch": 1.3238290814311666,
"grad_norm": 0.23993000450443083,
"learning_rate": 9.19577400324498e-06,
"loss": 0.218,
"step": 4190
},
{
"epoch": 1.3269883895426902,
"grad_norm": 0.21595002622084153,
"learning_rate": 9.189765031804965e-06,
"loss": 0.2176,
"step": 4200
},
{
"epoch": 1.3301476976542137,
"grad_norm": 0.2526302368911067,
"learning_rate": 9.183735673648893e-06,
"loss": 0.2201,
"step": 4210
},
{
"epoch": 1.3333070057657372,
"grad_norm": 0.2431796699930924,
"learning_rate": 9.177685958114641e-06,
"loss": 0.2197,
"step": 4220
},
{
"epoch": 1.3364663138772608,
"grad_norm": 0.22807380467388666,
"learning_rate": 9.171615914639143e-06,
"loss": 0.2184,
"step": 4230
},
{
"epoch": 1.3396256219887843,
"grad_norm": 0.23018273423897978,
"learning_rate": 9.16552557275824e-06,
"loss": 0.2212,
"step": 4240
},
{
"epoch": 1.342784930100308,
"grad_norm": 0.25116936857534927,
"learning_rate": 9.159414962106551e-06,
"loss": 0.2205,
"step": 4250
},
{
"epoch": 1.3459442382118316,
"grad_norm": 0.2535484242407173,
"learning_rate": 9.153284112417314e-06,
"loss": 0.2173,
"step": 4260
},
{
"epoch": 1.3491035463233552,
"grad_norm": 0.21916754861180746,
"learning_rate": 9.147133053522243e-06,
"loss": 0.2206,
"step": 4270
},
{
"epoch": 1.3522628544348787,
"grad_norm": 0.24428352296386502,
"learning_rate": 9.140961815351399e-06,
"loss": 0.2203,
"step": 4280
},
{
"epoch": 1.3554221625464025,
"grad_norm": 0.22564630207596492,
"learning_rate": 9.13477042793302e-06,
"loss": 0.2185,
"step": 4290
},
{
"epoch": 1.358581470657926,
"grad_norm": 0.2203268383689168,
"learning_rate": 9.128558921393391e-06,
"loss": 0.2196,
"step": 4300
},
{
"epoch": 1.3617407787694495,
"grad_norm": 0.22885301789778367,
"learning_rate": 9.122327325956697e-06,
"loss": 0.2188,
"step": 4310
},
{
"epoch": 1.364900086880973,
"grad_norm": 0.23826459623192525,
"learning_rate": 9.116075671944865e-06,
"loss": 0.2196,
"step": 4320
},
{
"epoch": 1.3680593949924966,
"grad_norm": 0.23832523948959192,
"learning_rate": 9.109803989777432e-06,
"loss": 0.22,
"step": 4330
},
{
"epoch": 1.3712187031040202,
"grad_norm": 0.23458728809270832,
"learning_rate": 9.103512309971381e-06,
"loss": 0.2201,
"step": 4340
},
{
"epoch": 1.3743780112155437,
"grad_norm": 0.2505289166305433,
"learning_rate": 9.097200663141007e-06,
"loss": 0.2204,
"step": 4350
},
{
"epoch": 1.3775373193270672,
"grad_norm": 0.23547611606996885,
"learning_rate": 9.090869079997756e-06,
"loss": 0.2176,
"step": 4360
},
{
"epoch": 1.380696627438591,
"grad_norm": 0.23300517643120455,
"learning_rate": 9.084517591350085e-06,
"loss": 0.2196,
"step": 4370
},
{
"epoch": 1.3838559355501145,
"grad_norm": 0.23821378937778367,
"learning_rate": 9.078146228103302e-06,
"loss": 0.2205,
"step": 4380
},
{
"epoch": 1.387015243661638,
"grad_norm": 0.24777379778962433,
"learning_rate": 9.07175502125943e-06,
"loss": 0.217,
"step": 4390
},
{
"epoch": 1.3901745517731616,
"grad_norm": 0.2475049075002878,
"learning_rate": 9.065344001917042e-06,
"loss": 0.2182,
"step": 4400
},
{
"epoch": 1.3933338598846854,
"grad_norm": 0.22578213255742105,
"learning_rate": 9.058913201271116e-06,
"loss": 0.2193,
"step": 4410
},
{
"epoch": 1.396493167996209,
"grad_norm": 0.2363691948058237,
"learning_rate": 9.052462650612886e-06,
"loss": 0.2203,
"step": 4420
},
{
"epoch": 1.3996524761077325,
"grad_norm": 0.22912256425928543,
"learning_rate": 9.045992381329678e-06,
"loss": 0.219,
"step": 4430
},
{
"epoch": 1.402811784219256,
"grad_norm": 0.24248708988856363,
"learning_rate": 9.039502424904778e-06,
"loss": 0.2197,
"step": 4440
},
{
"epoch": 1.4059710923307795,
"grad_norm": 0.24586583420100006,
"learning_rate": 9.032992812917253e-06,
"loss": 0.217,
"step": 4450
},
{
"epoch": 1.409130400442303,
"grad_norm": 0.2352960559617627,
"learning_rate": 9.026463577041823e-06,
"loss": 0.2187,
"step": 4460
},
{
"epoch": 1.4122897085538266,
"grad_norm": 0.24173709757573497,
"learning_rate": 9.019914749048689e-06,
"loss": 0.221,
"step": 4470
},
{
"epoch": 1.4154490166653502,
"grad_norm": 0.23957045024343024,
"learning_rate": 9.01334636080338e-06,
"loss": 0.2186,
"step": 4480
},
{
"epoch": 1.418608324776874,
"grad_norm": 0.2419319656288058,
"learning_rate": 9.00675844426661e-06,
"loss": 0.2194,
"step": 4490
},
{
"epoch": 1.4217676328883975,
"grad_norm": 0.21881593406458033,
"learning_rate": 9.00015103149411e-06,
"loss": 0.2186,
"step": 4500
},
{
"epoch": 1.424926940999921,
"grad_norm": 0.24136336388843918,
"learning_rate": 8.993524154636475e-06,
"loss": 0.2194,
"step": 4510
},
{
"epoch": 1.4280862491114446,
"grad_norm": 0.24438868998683702,
"learning_rate": 8.986877845939013e-06,
"loss": 0.2182,
"step": 4520
},
{
"epoch": 1.431245557222968,
"grad_norm": 0.2885217400112698,
"learning_rate": 8.980212137741584e-06,
"loss": 0.2177,
"step": 4530
},
{
"epoch": 1.4344048653344919,
"grad_norm": 0.24449644579392688,
"learning_rate": 8.973527062478438e-06,
"loss": 0.221,
"step": 4540
},
{
"epoch": 1.4375641734460154,
"grad_norm": 0.23524427018475397,
"learning_rate": 8.966822652678068e-06,
"loss": 0.2187,
"step": 4550
},
{
"epoch": 1.440723481557539,
"grad_norm": 0.2288850724812931,
"learning_rate": 8.960098940963042e-06,
"loss": 0.2181,
"step": 4560
},
{
"epoch": 1.4438827896690625,
"grad_norm": 0.21740524732083277,
"learning_rate": 8.953355960049848e-06,
"loss": 0.2171,
"step": 4570
},
{
"epoch": 1.447042097780586,
"grad_norm": 0.22515210418111187,
"learning_rate": 8.946593742748737e-06,
"loss": 0.2198,
"step": 4580
},
{
"epoch": 1.4502014058921096,
"grad_norm": 0.2241208354134438,
"learning_rate": 8.93981232196356e-06,
"loss": 0.2176,
"step": 4590
},
{
"epoch": 1.453360714003633,
"grad_norm": 0.23239653090715226,
"learning_rate": 8.933011730691609e-06,
"loss": 0.2193,
"step": 4600
},
{
"epoch": 1.4565200221151569,
"grad_norm": 0.2591474679289352,
"learning_rate": 8.926192002023457e-06,
"loss": 0.2152,
"step": 4610
},
{
"epoch": 1.4596793302266804,
"grad_norm": 0.25412246361370894,
"learning_rate": 8.919353169142794e-06,
"loss": 0.221,
"step": 4620
},
{
"epoch": 1.462838638338204,
"grad_norm": 0.24625096243975705,
"learning_rate": 8.912495265326274e-06,
"loss": 0.2176,
"step": 4630
},
{
"epoch": 1.4659979464497275,
"grad_norm": 0.23707218603543193,
"learning_rate": 8.905618323943337e-06,
"loss": 0.2195,
"step": 4640
},
{
"epoch": 1.469157254561251,
"grad_norm": 0.2428731037963279,
"learning_rate": 8.898722378456066e-06,
"loss": 0.2194,
"step": 4650
},
{
"epoch": 1.4723165626727748,
"grad_norm": 0.23709943181558119,
"learning_rate": 8.89180746241901e-06,
"loss": 0.2189,
"step": 4660
},
{
"epoch": 1.4754758707842983,
"grad_norm": 0.22433268675724657,
"learning_rate": 8.88487360947903e-06,
"loss": 0.2177,
"step": 4670
},
{
"epoch": 1.4786351788958219,
"grad_norm": 0.20952198638497543,
"learning_rate": 8.877920853375127e-06,
"loss": 0.2168,
"step": 4680
},
{
"epoch": 1.4817944870073454,
"grad_norm": 0.21717097039950753,
"learning_rate": 8.87094922793828e-06,
"loss": 0.2159,
"step": 4690
},
{
"epoch": 1.484953795118869,
"grad_norm": 0.23796957102894456,
"learning_rate": 8.86395876709129e-06,
"loss": 0.2151,
"step": 4700
},
{
"epoch": 1.4881131032303925,
"grad_norm": 0.2488834756996504,
"learning_rate": 8.856949504848602e-06,
"loss": 0.2154,
"step": 4710
},
{
"epoch": 1.491272411341916,
"grad_norm": 0.25338534020929043,
"learning_rate": 8.849921475316147e-06,
"loss": 0.2182,
"step": 4720
},
{
"epoch": 1.4944317194534398,
"grad_norm": 0.22919814445941372,
"learning_rate": 8.842874712691176e-06,
"loss": 0.2167,
"step": 4730
},
{
"epoch": 1.4975910275649633,
"grad_norm": 0.2256770534043575,
"learning_rate": 8.83580925126209e-06,
"loss": 0.2173,
"step": 4740
},
{
"epoch": 1.5007503356764869,
"grad_norm": 0.22659615457077606,
"learning_rate": 8.828725125408277e-06,
"loss": 0.2195,
"step": 4750
},
{
"epoch": 1.5039096437880104,
"grad_norm": 0.2525621807127862,
"learning_rate": 8.821622369599945e-06,
"loss": 0.2189,
"step": 4760
},
{
"epoch": 1.5070689518995342,
"grad_norm": 0.23576094914242274,
"learning_rate": 8.814501018397948e-06,
"loss": 0.2169,
"step": 4770
},
{
"epoch": 1.5102282600110577,
"grad_norm": 0.23522191103261228,
"learning_rate": 8.807361106453623e-06,
"loss": 0.2174,
"step": 4780
},
{
"epoch": 1.5133875681225812,
"grad_norm": 0.25147556181231623,
"learning_rate": 8.800202668508624e-06,
"loss": 0.2182,
"step": 4790
},
{
"epoch": 1.5165468762341048,
"grad_norm": 0.229827362444795,
"learning_rate": 8.793025739394747e-06,
"loss": 0.2188,
"step": 4800
},
{
"epoch": 1.5197061843456283,
"grad_norm": 0.23171793705210642,
"learning_rate": 8.78583035403376e-06,
"loss": 0.2192,
"step": 4810
},
{
"epoch": 1.5228654924571519,
"grad_norm": 0.24117108800540396,
"learning_rate": 8.778616547437244e-06,
"loss": 0.2154,
"step": 4820
},
{
"epoch": 1.5260248005686754,
"grad_norm": 0.2239538620184076,
"learning_rate": 8.771384354706407e-06,
"loss": 0.2189,
"step": 4830
},
{
"epoch": 1.529184108680199,
"grad_norm": 0.22643911431321556,
"learning_rate": 8.764133811031926e-06,
"loss": 0.219,
"step": 4840
},
{
"epoch": 1.5323434167917225,
"grad_norm": 0.2309154933998562,
"learning_rate": 8.756864951693767e-06,
"loss": 0.2161,
"step": 4850
},
{
"epoch": 1.5355027249032462,
"grad_norm": 0.22487805218107695,
"learning_rate": 8.749577812061019e-06,
"loss": 0.2155,
"step": 4860
},
{
"epoch": 1.5386620330147698,
"grad_norm": 0.2508153221101582,
"learning_rate": 8.74227242759172e-06,
"loss": 0.2165,
"step": 4870
},
{
"epoch": 1.5418213411262933,
"grad_norm": 0.2294176475790702,
"learning_rate": 8.734948833832684e-06,
"loss": 0.2194,
"step": 4880
},
{
"epoch": 1.544980649237817,
"grad_norm": 0.24196930828876226,
"learning_rate": 8.72760706641933e-06,
"loss": 0.2179,
"step": 4890
},
{
"epoch": 1.5481399573493406,
"grad_norm": 0.2067023139224669,
"learning_rate": 8.720247161075504e-06,
"loss": 0.2141,
"step": 4900
},
{
"epoch": 1.5512992654608642,
"grad_norm": 0.22150939402397368,
"learning_rate": 8.71286915361331e-06,
"loss": 0.2167,
"step": 4910
},
{
"epoch": 1.5544585735723877,
"grad_norm": 0.22765148037671673,
"learning_rate": 8.705473079932935e-06,
"loss": 0.2193,
"step": 4920
},
{
"epoch": 1.5576178816839112,
"grad_norm": 0.2541351407544811,
"learning_rate": 8.698058976022473e-06,
"loss": 0.2178,
"step": 4930
},
{
"epoch": 1.5607771897954348,
"grad_norm": 0.22926147481484557,
"learning_rate": 8.690626877957745e-06,
"loss": 0.217,
"step": 4940
},
{
"epoch": 1.5639364979069583,
"grad_norm": 0.22296251412940335,
"learning_rate": 8.683176821902135e-06,
"loss": 0.2169,
"step": 4950
},
{
"epoch": 1.5670958060184819,
"grad_norm": 0.20884559665213961,
"learning_rate": 8.675708844106407e-06,
"loss": 0.2177,
"step": 4960
},
{
"epoch": 1.5702551141300054,
"grad_norm": 0.24025626100283112,
"learning_rate": 8.668222980908527e-06,
"loss": 0.2197,
"step": 4970
},
{
"epoch": 1.573414422241529,
"grad_norm": 0.23773563954990934,
"learning_rate": 8.66071926873349e-06,
"loss": 0.2152,
"step": 4980
},
{
"epoch": 1.5765737303530527,
"grad_norm": 0.2145137444703373,
"learning_rate": 8.65319774409314e-06,
"loss": 0.2171,
"step": 4990
},
{
"epoch": 1.5797330384645762,
"grad_norm": 0.22033335790324296,
"learning_rate": 8.645658443585992e-06,
"loss": 0.2186,
"step": 5000
},
{
"epoch": 1.5828923465760998,
"grad_norm": 0.22804805519869795,
"learning_rate": 8.638101403897062e-06,
"loss": 0.2174,
"step": 5010
},
{
"epoch": 1.5860516546876235,
"grad_norm": 0.2289269600023062,
"learning_rate": 8.630526661797673e-06,
"loss": 0.2164,
"step": 5020
},
{
"epoch": 1.589210962799147,
"grad_norm": 0.2165226855369354,
"learning_rate": 8.622934254145292e-06,
"loss": 0.2173,
"step": 5030
},
{
"epoch": 1.5923702709106706,
"grad_norm": 0.21269449117197298,
"learning_rate": 8.615324217883341e-06,
"loss": 0.2158,
"step": 5040
},
{
"epoch": 1.5955295790221942,
"grad_norm": 0.21841160515610453,
"learning_rate": 8.607696590041021e-06,
"loss": 0.2181,
"step": 5050
},
{
"epoch": 1.5986888871337177,
"grad_norm": 0.23472942411314004,
"learning_rate": 8.60005140773313e-06,
"loss": 0.2158,
"step": 5060
},
{
"epoch": 1.6018481952452412,
"grad_norm": 0.2599264549024524,
"learning_rate": 8.592388708159881e-06,
"loss": 0.2202,
"step": 5070
},
{
"epoch": 1.6050075033567648,
"grad_norm": 0.22673012824944389,
"learning_rate": 8.584708528606728e-06,
"loss": 0.2165,
"step": 5080
},
{
"epoch": 1.6081668114682883,
"grad_norm": 0.2206529413228289,
"learning_rate": 8.577010906444174e-06,
"loss": 0.2187,
"step": 5090
},
{
"epoch": 1.6113261195798119,
"grad_norm": 0.22895351987221169,
"learning_rate": 8.569295879127602e-06,
"loss": 0.2159,
"step": 5100
},
{
"epoch": 1.6144854276913356,
"grad_norm": 0.23022447681447986,
"learning_rate": 8.56156348419708e-06,
"loss": 0.2157,
"step": 5110
},
{
"epoch": 1.6176447358028592,
"grad_norm": 0.2183798711751106,
"learning_rate": 8.553813759277185e-06,
"loss": 0.2169,
"step": 5120
},
{
"epoch": 1.6208040439143827,
"grad_norm": 0.22109788078597084,
"learning_rate": 8.546046742076819e-06,
"loss": 0.214,
"step": 5130
},
{
"epoch": 1.6239633520259065,
"grad_norm": 0.2690538258988973,
"learning_rate": 8.538262470389027e-06,
"loss": 0.2189,
"step": 5140
},
{
"epoch": 1.62712266013743,
"grad_norm": 0.22791429863696558,
"learning_rate": 8.530460982090812e-06,
"loss": 0.2146,
"step": 5150
},
{
"epoch": 1.6302819682489536,
"grad_norm": 0.23247506925321845,
"learning_rate": 8.522642315142948e-06,
"loss": 0.2174,
"step": 5160
},
{
"epoch": 1.633441276360477,
"grad_norm": 0.22524917238532172,
"learning_rate": 8.514806507589796e-06,
"loss": 0.2135,
"step": 5170
},
{
"epoch": 1.6366005844720006,
"grad_norm": 0.21074048952604266,
"learning_rate": 8.506953597559125e-06,
"loss": 0.2168,
"step": 5180
},
{
"epoch": 1.6397598925835242,
"grad_norm": 0.24438387341045803,
"learning_rate": 8.49908362326192e-06,
"loss": 0.2181,
"step": 5190
},
{
"epoch": 1.6429192006950477,
"grad_norm": 0.2486574671755116,
"learning_rate": 8.491196622992196e-06,
"loss": 0.2131,
"step": 5200
},
{
"epoch": 1.6460785088065713,
"grad_norm": 0.2256817140951327,
"learning_rate": 8.483292635126814e-06,
"loss": 0.2168,
"step": 5210
},
{
"epoch": 1.6492378169180948,
"grad_norm": 0.23268219491083575,
"learning_rate": 8.475371698125298e-06,
"loss": 0.2199,
"step": 5220
},
{
"epoch": 1.6523971250296186,
"grad_norm": 0.22549590195988278,
"learning_rate": 8.46743385052964e-06,
"loss": 0.2148,
"step": 5230
},
{
"epoch": 1.655556433141142,
"grad_norm": 0.22816476144990433,
"learning_rate": 8.459479130964114e-06,
"loss": 0.2161,
"step": 5240
},
{
"epoch": 1.6587157412526656,
"grad_norm": 0.22538580259762195,
"learning_rate": 8.451507578135099e-06,
"loss": 0.2153,
"step": 5250
},
{
"epoch": 1.6618750493641894,
"grad_norm": 0.2378857848685177,
"learning_rate": 8.443519230830871e-06,
"loss": 0.2165,
"step": 5260
},
{
"epoch": 1.665034357475713,
"grad_norm": 0.21964276783718112,
"learning_rate": 8.435514127921432e-06,
"loss": 0.2152,
"step": 5270
},
{
"epoch": 1.6681936655872365,
"grad_norm": 0.2278255608379614,
"learning_rate": 8.427492308358314e-06,
"loss": 0.2151,
"step": 5280
},
{
"epoch": 1.67135297369876,
"grad_norm": 0.22151461379504075,
"learning_rate": 8.419453811174384e-06,
"loss": 0.2159,
"step": 5290
},
{
"epoch": 1.6745122818102836,
"grad_norm": 0.23436228542360216,
"learning_rate": 8.411398675483668e-06,
"loss": 0.2139,
"step": 5300
},
{
"epoch": 1.677671589921807,
"grad_norm": 0.23116461868460544,
"learning_rate": 8.403326940481146e-06,
"loss": 0.2141,
"step": 5310
},
{
"epoch": 1.6808308980333306,
"grad_norm": 0.22621704434121967,
"learning_rate": 8.39523864544257e-06,
"loss": 0.2178,
"step": 5320
},
{
"epoch": 1.6839902061448542,
"grad_norm": 0.24437450666792623,
"learning_rate": 8.387133829724266e-06,
"loss": 0.2148,
"step": 5330
},
{
"epoch": 1.6871495142563777,
"grad_norm": 0.2285633102632245,
"learning_rate": 8.379012532762956e-06,
"loss": 0.211,
"step": 5340
},
{
"epoch": 1.6903088223679015,
"grad_norm": 0.23322244192934505,
"learning_rate": 8.370874794075548e-06,
"loss": 0.2159,
"step": 5350
},
{
"epoch": 1.693468130479425,
"grad_norm": 0.22379930823118785,
"learning_rate": 8.36272065325896e-06,
"loss": 0.2139,
"step": 5360
},
{
"epoch": 1.6966274385909486,
"grad_norm": 0.2120873572584099,
"learning_rate": 8.354550149989912e-06,
"loss": 0.2152,
"step": 5370
},
{
"epoch": 1.6997867467024723,
"grad_norm": 0.2274107944742338,
"learning_rate": 8.346363324024752e-06,
"loss": 0.2154,
"step": 5380
},
{
"epoch": 1.7029460548139959,
"grad_norm": 0.22062326975674185,
"learning_rate": 8.338160215199239e-06,
"loss": 0.213,
"step": 5390
},
{
"epoch": 1.7061053629255194,
"grad_norm": 0.20566039440830033,
"learning_rate": 8.329940863428372e-06,
"loss": 0.2142,
"step": 5400
},
{
"epoch": 1.709264671037043,
"grad_norm": 0.22328688203535454,
"learning_rate": 8.321705308706178e-06,
"loss": 0.2174,
"step": 5410
},
{
"epoch": 1.7124239791485665,
"grad_norm": 0.2284198217298347,
"learning_rate": 8.313453591105534e-06,
"loss": 0.2166,
"step": 5420
},
{
"epoch": 1.71558328726009,
"grad_norm": 0.24234082572473092,
"learning_rate": 8.305185750777951e-06,
"loss": 0.2153,
"step": 5430
},
{
"epoch": 1.7187425953716136,
"grad_norm": 0.21687647127855095,
"learning_rate": 8.296901827953403e-06,
"loss": 0.2164,
"step": 5440
},
{
"epoch": 1.721901903483137,
"grad_norm": 0.2121864508017843,
"learning_rate": 8.288601862940109e-06,
"loss": 0.2139,
"step": 5450
},
{
"epoch": 1.7250612115946606,
"grad_norm": 0.22730841948699315,
"learning_rate": 8.280285896124351e-06,
"loss": 0.2174,
"step": 5460
},
{
"epoch": 1.7282205197061844,
"grad_norm": 0.2364301598385313,
"learning_rate": 8.271953967970273e-06,
"loss": 0.214,
"step": 5470
},
{
"epoch": 1.731379827817708,
"grad_norm": 0.2502748891595353,
"learning_rate": 8.263606119019684e-06,
"loss": 0.2158,
"step": 5480
},
{
"epoch": 1.7345391359292315,
"grad_norm": 0.23803855035434604,
"learning_rate": 8.255242389891863e-06,
"loss": 0.217,
"step": 5490
},
{
"epoch": 1.737698444040755,
"grad_norm": 0.23212647695583102,
"learning_rate": 8.246862821283354e-06,
"loss": 0.2147,
"step": 5500
},
{
"epoch": 1.7408577521522788,
"grad_norm": 0.24431104358398667,
"learning_rate": 8.238467453967778e-06,
"loss": 0.2154,
"step": 5510
},
{
"epoch": 1.7440170602638023,
"grad_norm": 0.21470725579185915,
"learning_rate": 8.23005632879563e-06,
"loss": 0.2141,
"step": 5520
},
{
"epoch": 1.7471763683753259,
"grad_norm": 0.21322893537867463,
"learning_rate": 8.221629486694076e-06,
"loss": 0.2137,
"step": 5530
},
{
"epoch": 1.7503356764868494,
"grad_norm": 0.22024980684855178,
"learning_rate": 8.213186968666761e-06,
"loss": 0.216,
"step": 5540
},
{
"epoch": 1.753494984598373,
"grad_norm": 0.21887176967393748,
"learning_rate": 8.20472881579361e-06,
"loss": 0.2162,
"step": 5550
},
{
"epoch": 1.7566542927098965,
"grad_norm": 0.21439885224310692,
"learning_rate": 8.196255069230618e-06,
"loss": 0.215,
"step": 5560
},
{
"epoch": 1.75981360082142,
"grad_norm": 0.22279903289185554,
"learning_rate": 8.187765770209662e-06,
"loss": 0.2149,
"step": 5570
},
{
"epoch": 1.7629729089329436,
"grad_norm": 0.23141919021697108,
"learning_rate": 8.179260960038286e-06,
"loss": 0.2158,
"step": 5580
},
{
"epoch": 1.766132217044467,
"grad_norm": 0.21177829542109405,
"learning_rate": 8.17074068009952e-06,
"loss": 0.2162,
"step": 5590
},
{
"epoch": 1.7692915251559909,
"grad_norm": 0.23088215353230407,
"learning_rate": 8.162204971851662e-06,
"loss": 0.215,
"step": 5600
},
{
"epoch": 1.7724508332675144,
"grad_norm": 0.22123248711116683,
"learning_rate": 8.153653876828081e-06,
"loss": 0.2128,
"step": 5610
},
{
"epoch": 1.775610141379038,
"grad_norm": 0.21216354452493327,
"learning_rate": 8.145087436637014e-06,
"loss": 0.2161,
"step": 5620
},
{
"epoch": 1.7787694494905617,
"grad_norm": 0.2176779002539683,
"learning_rate": 8.13650569296137e-06,
"loss": 0.2147,
"step": 5630
},
{
"epoch": 1.7819287576020852,
"grad_norm": 0.22379805142011377,
"learning_rate": 8.12790868755852e-06,
"loss": 0.2149,
"step": 5640
},
{
"epoch": 1.7850880657136088,
"grad_norm": 0.23156638443169314,
"learning_rate": 8.119296462260094e-06,
"loss": 0.2116,
"step": 5650
},
{
"epoch": 1.7882473738251323,
"grad_norm": 0.23142885687079817,
"learning_rate": 8.110669058971783e-06,
"loss": 0.2168,
"step": 5660
},
{
"epoch": 1.7914066819366559,
"grad_norm": 0.21278539872224353,
"learning_rate": 8.102026519673127e-06,
"loss": 0.2136,
"step": 5670
},
{
"epoch": 1.7945659900481794,
"grad_norm": 0.22604551176497345,
"learning_rate": 8.093368886417323e-06,
"loss": 0.2139,
"step": 5680
},
{
"epoch": 1.797725298159703,
"grad_norm": 0.21945486381827597,
"learning_rate": 8.084696201331005e-06,
"loss": 0.2148,
"step": 5690
},
{
"epoch": 1.8008846062712265,
"grad_norm": 0.2554690895004349,
"learning_rate": 8.07600850661405e-06,
"loss": 0.215,
"step": 5700
},
{
"epoch": 1.80404391438275,
"grad_norm": 0.24996286626429107,
"learning_rate": 8.067305844539369e-06,
"loss": 0.2183,
"step": 5710
},
{
"epoch": 1.8072032224942738,
"grad_norm": 0.24383324293020725,
"learning_rate": 8.058588257452705e-06,
"loss": 0.2121,
"step": 5720
},
{
"epoch": 1.8103625306057973,
"grad_norm": 0.2169636811525225,
"learning_rate": 8.049855787772416e-06,
"loss": 0.2145,
"step": 5730
},
{
"epoch": 1.8135218387173209,
"grad_norm": 0.24944768584905086,
"learning_rate": 8.041108477989283e-06,
"loss": 0.2141,
"step": 5740
},
{
"epoch": 1.8166811468288446,
"grad_norm": 0.21757143809376578,
"learning_rate": 8.032346370666297e-06,
"loss": 0.2138,
"step": 5750
},
{
"epoch": 1.8198404549403682,
"grad_norm": 0.20693258010147997,
"learning_rate": 8.023569508438444e-06,
"loss": 0.2159,
"step": 5760
},
{
"epoch": 1.8229997630518917,
"grad_norm": 0.21253234218456654,
"learning_rate": 8.014777934012515e-06,
"loss": 0.2156,
"step": 5770
},
{
"epoch": 1.8261590711634152,
"grad_norm": 0.2199083063027526,
"learning_rate": 8.005971690166879e-06,
"loss": 0.2147,
"step": 5780
},
{
"epoch": 1.8293183792749388,
"grad_norm": 0.24052412314651891,
"learning_rate": 7.99715081975129e-06,
"loss": 0.2129,
"step": 5790
},
{
"epoch": 1.8324776873864623,
"grad_norm": 0.21391433161968806,
"learning_rate": 7.98831536568667e-06,
"loss": 0.2142,
"step": 5800
},
{
"epoch": 1.8356369954979859,
"grad_norm": 0.2071590477738613,
"learning_rate": 7.979465370964904e-06,
"loss": 0.2111,
"step": 5810
},
{
"epoch": 1.8387963036095094,
"grad_norm": 0.20813594996772833,
"learning_rate": 7.97060087864863e-06,
"loss": 0.2139,
"step": 5820
},
{
"epoch": 1.841955611721033,
"grad_norm": 0.21828875123993477,
"learning_rate": 7.961721931871023e-06,
"loss": 0.2125,
"step": 5830
},
{
"epoch": 1.8451149198325567,
"grad_norm": 0.22962238030556992,
"learning_rate": 7.9528285738356e-06,
"loss": 0.214,
"step": 5840
},
{
"epoch": 1.8482742279440803,
"grad_norm": 0.22398065321127122,
"learning_rate": 7.943920847815995e-06,
"loss": 0.2111,
"step": 5850
},
{
"epoch": 1.8514335360556038,
"grad_norm": 0.23666023387603707,
"learning_rate": 7.934998797155757e-06,
"loss": 0.2136,
"step": 5860
},
{
"epoch": 1.8545928441671276,
"grad_norm": 0.24325581332600177,
"learning_rate": 7.926062465268133e-06,
"loss": 0.2146,
"step": 5870
},
{
"epoch": 1.857752152278651,
"grad_norm": 0.22514971896352026,
"learning_rate": 7.917111895635865e-06,
"loss": 0.2143,
"step": 5880
},
{
"epoch": 1.8609114603901746,
"grad_norm": 0.21406995986557276,
"learning_rate": 7.908147131810968e-06,
"loss": 0.2147,
"step": 5890
},
{
"epoch": 1.8640707685016982,
"grad_norm": 0.22801297726410702,
"learning_rate": 7.899168217414526e-06,
"loss": 0.2124,
"step": 5900
},
{
"epoch": 1.8672300766132217,
"grad_norm": 0.20686065594624745,
"learning_rate": 7.890175196136484e-06,
"loss": 0.2128,
"step": 5910
},
{
"epoch": 1.8703893847247453,
"grad_norm": 0.219199870102877,
"learning_rate": 7.881168111735417e-06,
"loss": 0.212,
"step": 5920
},
{
"epoch": 1.8735486928362688,
"grad_norm": 0.2106377311590809,
"learning_rate": 7.872147008038335e-06,
"loss": 0.2131,
"step": 5930
},
{
"epoch": 1.8767080009477923,
"grad_norm": 0.22171325213770007,
"learning_rate": 7.863111928940465e-06,
"loss": 0.2144,
"step": 5940
},
{
"epoch": 1.8798673090593159,
"grad_norm": 0.2408651871754287,
"learning_rate": 7.854062918405034e-06,
"loss": 0.2145,
"step": 5950
},
{
"epoch": 1.8830266171708396,
"grad_norm": 0.22645870425489717,
"learning_rate": 7.845000020463058e-06,
"loss": 0.2157,
"step": 5960
},
{
"epoch": 1.8861859252823632,
"grad_norm": 0.21229610496145646,
"learning_rate": 7.835923279213124e-06,
"loss": 0.2153,
"step": 5970
},
{
"epoch": 1.8893452333938867,
"grad_norm": 0.22168579889983991,
"learning_rate": 7.826832738821182e-06,
"loss": 0.2135,
"step": 5980
},
{
"epoch": 1.8925045415054105,
"grad_norm": 0.2503714943772614,
"learning_rate": 7.817728443520324e-06,
"loss": 0.214,
"step": 5990
},
{
"epoch": 1.895663849616934,
"grad_norm": 0.21389350571126534,
"learning_rate": 7.808610437610572e-06,
"loss": 0.2139,
"step": 6000
},
{
"epoch": 1.8988231577284576,
"grad_norm": 0.21396113064462294,
"learning_rate": 7.799478765458665e-06,
"loss": 0.215,
"step": 6010
},
{
"epoch": 1.901982465839981,
"grad_norm": 0.21646665487064554,
"learning_rate": 7.790333471497831e-06,
"loss": 0.2137,
"step": 6020
},
{
"epoch": 1.9051417739515046,
"grad_norm": 0.2173302972697527,
"learning_rate": 7.781174600227587e-06,
"loss": 0.215,
"step": 6030
},
{
"epoch": 1.9083010820630282,
"grad_norm": 0.208526571079878,
"learning_rate": 7.772002196213517e-06,
"loss": 0.2144,
"step": 6040
},
{
"epoch": 1.9114603901745517,
"grad_norm": 0.23762678091854472,
"learning_rate": 7.762816304087042e-06,
"loss": 0.2154,
"step": 6050
},
{
"epoch": 1.9146196982860753,
"grad_norm": 0.23394045959731094,
"learning_rate": 7.753616968545223e-06,
"loss": 0.214,
"step": 6060
},
{
"epoch": 1.9177790063975988,
"grad_norm": 0.21611757282074848,
"learning_rate": 7.744404234350536e-06,
"loss": 0.2125,
"step": 6070
},
{
"epoch": 1.9209383145091226,
"grad_norm": 0.21843363212182215,
"learning_rate": 7.735178146330647e-06,
"loss": 0.2119,
"step": 6080
},
{
"epoch": 1.924097622620646,
"grad_norm": 0.20825290374476896,
"learning_rate": 7.7259387493782e-06,
"loss": 0.2114,
"step": 6090
},
{
"epoch": 1.9272569307321696,
"grad_norm": 0.20701940040618752,
"learning_rate": 7.716686088450601e-06,
"loss": 0.2118,
"step": 6100
},
{
"epoch": 1.9304162388436932,
"grad_norm": 0.22116343279147632,
"learning_rate": 7.707420208569793e-06,
"loss": 0.2114,
"step": 6110
},
{
"epoch": 1.933575546955217,
"grad_norm": 0.21636101800504964,
"learning_rate": 7.698141154822048e-06,
"loss": 0.216,
"step": 6120
},
{
"epoch": 1.9367348550667405,
"grad_norm": 0.21079410410879035,
"learning_rate": 7.68884897235773e-06,
"loss": 0.213,
"step": 6130
},
{
"epoch": 1.939894163178264,
"grad_norm": 0.24490335061347607,
"learning_rate": 7.679543706391088e-06,
"loss": 0.2128,
"step": 6140
},
{
"epoch": 1.9430534712897876,
"grad_norm": 0.22964720738637706,
"learning_rate": 7.670225402200037e-06,
"loss": 0.2111,
"step": 6150
},
{
"epoch": 1.946212779401311,
"grad_norm": 0.21860013623402114,
"learning_rate": 7.660894105125932e-06,
"loss": 0.2134,
"step": 6160
},
{
"epoch": 1.9493720875128346,
"grad_norm": 0.2564661587161801,
"learning_rate": 7.651549860573347e-06,
"loss": 0.2097,
"step": 6170
},
{
"epoch": 1.9525313956243582,
"grad_norm": 0.22698394312118578,
"learning_rate": 7.642192714009861e-06,
"loss": 0.2124,
"step": 6180
},
{
"epoch": 1.9556907037358817,
"grad_norm": 0.2076381468352638,
"learning_rate": 7.632822710965826e-06,
"loss": 0.2115,
"step": 6190
},
{
"epoch": 1.9588500118474053,
"grad_norm": 0.2262277080179204,
"learning_rate": 7.623439897034155e-06,
"loss": 0.2161,
"step": 6200
},
{
"epoch": 1.962009319958929,
"grad_norm": 0.22968981943777275,
"learning_rate": 7.614044317870099e-06,
"loss": 0.212,
"step": 6210
},
{
"epoch": 1.9651686280704526,
"grad_norm": 0.21734537077847677,
"learning_rate": 7.604636019191018e-06,
"loss": 0.2112,
"step": 6220
},
{
"epoch": 1.968327936181976,
"grad_norm": 0.2162091973430056,
"learning_rate": 7.595215046776165e-06,
"loss": 0.2112,
"step": 6230
},
{
"epoch": 1.9714872442934999,
"grad_norm": 0.22143440320403707,
"learning_rate": 7.585781446466464e-06,
"loss": 0.2108,
"step": 6240
},
{
"epoch": 1.9746465524050234,
"grad_norm": 0.21858157789999919,
"learning_rate": 7.5763352641642785e-06,
"loss": 0.2145,
"step": 6250
},
{
"epoch": 1.977805860516547,
"grad_norm": 0.23297936516023993,
"learning_rate": 7.566876545833197e-06,
"loss": 0.2123,
"step": 6260
},
{
"epoch": 1.9809651686280705,
"grad_norm": 0.22908027579219403,
"learning_rate": 7.55740533749781e-06,
"loss": 0.2114,
"step": 6270
},
{
"epoch": 1.984124476739594,
"grad_norm": 0.2202073126271702,
"learning_rate": 7.547921685243475e-06,
"loss": 0.2113,
"step": 6280
},
{
"epoch": 1.9872837848511176,
"grad_norm": 0.2309779316659973,
"learning_rate": 7.538425635216105e-06,
"loss": 0.2136,
"step": 6290
},
{
"epoch": 1.990443092962641,
"grad_norm": 0.2116437633996568,
"learning_rate": 7.5289172336219375e-06,
"loss": 0.2122,
"step": 6300
},
{
"epoch": 1.9936024010741646,
"grad_norm": 0.21586666623876277,
"learning_rate": 7.51939652672731e-06,
"loss": 0.2098,
"step": 6310
},
{
"epoch": 1.9967617091856882,
"grad_norm": 0.22548426849872574,
"learning_rate": 7.509863560858432e-06,
"loss": 0.2114,
"step": 6320
},
{
"epoch": 1.999921017297212,
"grad_norm": 0.2263338267805787,
"learning_rate": 7.5003183824011726e-06,
"loss": 0.2131,
"step": 6330
},
{
"epoch": 2.0031593081115235,
"grad_norm": 0.2127546790886278,
"learning_rate": 7.490761037800816e-06,
"loss": 0.1968,
"step": 6340
},
{
"epoch": 2.006318616223047,
"grad_norm": 0.2069882189331785,
"learning_rate": 7.48119157356185e-06,
"loss": 0.1914,
"step": 6350
},
{
"epoch": 2.0094779243345706,
"grad_norm": 0.20703568644153889,
"learning_rate": 7.471610036247733e-06,
"loss": 0.1897,
"step": 6360
},
{
"epoch": 2.012637232446094,
"grad_norm": 0.2055646371637049,
"learning_rate": 7.462016472480668e-06,
"loss": 0.194,
"step": 6370
},
{
"epoch": 2.0157965405576177,
"grad_norm": 0.2226339706612743,
"learning_rate": 7.452410928941378e-06,
"loss": 0.1921,
"step": 6380
},
{
"epoch": 2.0189558486691412,
"grad_norm": 0.22355427767751265,
"learning_rate": 7.442793452368879e-06,
"loss": 0.1918,
"step": 6390
},
{
"epoch": 2.0221151567806652,
"grad_norm": 0.2073037658523876,
"learning_rate": 7.433164089560251e-06,
"loss": 0.192,
"step": 6400
},
{
"epoch": 2.0252744648921888,
"grad_norm": 0.23504421002096731,
"learning_rate": 7.423522887370405e-06,
"loss": 0.1904,
"step": 6410
},
{
"epoch": 2.0284337730037123,
"grad_norm": 0.20581970412966663,
"learning_rate": 7.413869892711867e-06,
"loss": 0.1918,
"step": 6420
},
{
"epoch": 2.031593081115236,
"grad_norm": 0.21545977722772292,
"learning_rate": 7.40420515255454e-06,
"loss": 0.1953,
"step": 6430
},
{
"epoch": 2.0347523892267594,
"grad_norm": 0.21275493409460883,
"learning_rate": 7.394528713925482e-06,
"loss": 0.1926,
"step": 6440
},
{
"epoch": 2.037911697338283,
"grad_norm": 0.2100072176135639,
"learning_rate": 7.38484062390867e-06,
"loss": 0.1928,
"step": 6450
},
{
"epoch": 2.0410710054498065,
"grad_norm": 0.2152551253488428,
"learning_rate": 7.375140929644776e-06,
"loss": 0.1913,
"step": 6460
},
{
"epoch": 2.04423031356133,
"grad_norm": 0.23722168142518216,
"learning_rate": 7.365429678330938e-06,
"loss": 0.193,
"step": 6470
},
{
"epoch": 2.0473896216728535,
"grad_norm": 0.21165262894718706,
"learning_rate": 7.355706917220524e-06,
"loss": 0.1919,
"step": 6480
},
{
"epoch": 2.050548929784377,
"grad_norm": 0.19900789144822892,
"learning_rate": 7.345972693622916e-06,
"loss": 0.1934,
"step": 6490
},
{
"epoch": 2.0537082378959006,
"grad_norm": 0.22490326358812657,
"learning_rate": 7.336227054903258e-06,
"loss": 0.1947,
"step": 6500
},
{
"epoch": 2.056867546007424,
"grad_norm": 0.2140405521043961,
"learning_rate": 7.3264700484822504e-06,
"loss": 0.1916,
"step": 6510
},
{
"epoch": 2.060026854118948,
"grad_norm": 0.21185203080938914,
"learning_rate": 7.316701721835899e-06,
"loss": 0.1921,
"step": 6520
},
{
"epoch": 2.0631861622304717,
"grad_norm": 0.21223665536640837,
"learning_rate": 7.306922122495296e-06,
"loss": 0.1906,
"step": 6530
},
{
"epoch": 2.0663454703419952,
"grad_norm": 0.21789344473823863,
"learning_rate": 7.297131298046381e-06,
"loss": 0.1912,
"step": 6540
},
{
"epoch": 2.0695047784535188,
"grad_norm": 0.2186374831032629,
"learning_rate": 7.287329296129716e-06,
"loss": 0.1905,
"step": 6550
},
{
"epoch": 2.0726640865650423,
"grad_norm": 0.21221974732525253,
"learning_rate": 7.2775161644402504e-06,
"loss": 0.1911,
"step": 6560
},
{
"epoch": 2.075823394676566,
"grad_norm": 0.22468404766263275,
"learning_rate": 7.267691950727089e-06,
"loss": 0.1918,
"step": 6570
},
{
"epoch": 2.0789827027880894,
"grad_norm": 0.22280157355766247,
"learning_rate": 7.257856702793262e-06,
"loss": 0.1953,
"step": 6580
},
{
"epoch": 2.082142010899613,
"grad_norm": 0.2185553426185545,
"learning_rate": 7.248010468495486e-06,
"loss": 0.1927,
"step": 6590
},
{
"epoch": 2.0853013190111365,
"grad_norm": 0.21359923659529395,
"learning_rate": 7.238153295743936e-06,
"loss": 0.1914,
"step": 6600
},
{
"epoch": 2.08846062712266,
"grad_norm": 0.21132207896334118,
"learning_rate": 7.228285232502016e-06,
"loss": 0.1916,
"step": 6610
},
{
"epoch": 2.0916199352341835,
"grad_norm": 0.22929120062926403,
"learning_rate": 7.218406326786119e-06,
"loss": 0.1927,
"step": 6620
},
{
"epoch": 2.094779243345707,
"grad_norm": 0.21485998840313886,
"learning_rate": 7.208516626665394e-06,
"loss": 0.1937,
"step": 6630
},
{
"epoch": 2.097938551457231,
"grad_norm": 0.21872022787914924,
"learning_rate": 7.198616180261515e-06,
"loss": 0.1925,
"step": 6640
},
{
"epoch": 2.1010978595687546,
"grad_norm": 0.2221942323992186,
"learning_rate": 7.188705035748447e-06,
"loss": 0.191,
"step": 6650
},
{
"epoch": 2.104257167680278,
"grad_norm": 0.23507618605208405,
"learning_rate": 7.178783241352209e-06,
"loss": 0.1913,
"step": 6660
},
{
"epoch": 2.1074164757918017,
"grad_norm": 0.2124831637924298,
"learning_rate": 7.168850845350642e-06,
"loss": 0.1937,
"step": 6670
},
{
"epoch": 2.1105757839033252,
"grad_norm": 0.27819912434383975,
"learning_rate": 7.158907896073171e-06,
"loss": 0.1907,
"step": 6680
},
{
"epoch": 2.1137350920148488,
"grad_norm": 0.2140478018660185,
"learning_rate": 7.148954441900569e-06,
"loss": 0.1937,
"step": 6690
},
{
"epoch": 2.1168944001263723,
"grad_norm": 0.20813723734002745,
"learning_rate": 7.13899053126473e-06,
"loss": 0.1928,
"step": 6700
},
{
"epoch": 2.120053708237896,
"grad_norm": 0.21914914363643226,
"learning_rate": 7.1290162126484255e-06,
"loss": 0.1911,
"step": 6710
},
{
"epoch": 2.1232130163494194,
"grad_norm": 0.20921158225021147,
"learning_rate": 7.119031534585068e-06,
"loss": 0.1911,
"step": 6720
},
{
"epoch": 2.126372324460943,
"grad_norm": 0.21063028898492417,
"learning_rate": 7.109036545658478e-06,
"loss": 0.1909,
"step": 6730
},
{
"epoch": 2.1295316325724665,
"grad_norm": 0.20519838588236927,
"learning_rate": 7.099031294502651e-06,
"loss": 0.1926,
"step": 6740
},
{
"epoch": 2.13269094068399,
"grad_norm": 0.19866370874843575,
"learning_rate": 7.089015829801513e-06,
"loss": 0.1918,
"step": 6750
},
{
"epoch": 2.1358502487955136,
"grad_norm": 0.23526951667689947,
"learning_rate": 7.078990200288685e-06,
"loss": 0.1914,
"step": 6760
},
{
"epoch": 2.1390095569070375,
"grad_norm": 0.2119435665033379,
"learning_rate": 7.0689544547472564e-06,
"loss": 0.1926,
"step": 6770
},
{
"epoch": 2.142168865018561,
"grad_norm": 0.2144608848885566,
"learning_rate": 7.058908642009532e-06,
"loss": 0.1929,
"step": 6780
},
{
"epoch": 2.1453281731300846,
"grad_norm": 0.2359992087538063,
"learning_rate": 7.048852810956805e-06,
"loss": 0.1939,
"step": 6790
},
{
"epoch": 2.148487481241608,
"grad_norm": 0.2210600215142186,
"learning_rate": 7.038787010519117e-06,
"loss": 0.1926,
"step": 6800
},
{
"epoch": 2.1516467893531317,
"grad_norm": 0.24951718000615125,
"learning_rate": 7.0287112896750166e-06,
"loss": 0.1911,
"step": 6810
},
{
"epoch": 2.1548060974646552,
"grad_norm": 0.2175553461891265,
"learning_rate": 7.018625697451327e-06,
"loss": 0.1934,
"step": 6820
},
{
"epoch": 2.1579654055761788,
"grad_norm": 0.20956901895344687,
"learning_rate": 7.008530282922896e-06,
"loss": 0.1913,
"step": 6830
},
{
"epoch": 2.1611247136877023,
"grad_norm": 0.2224814576315156,
"learning_rate": 6.998425095212378e-06,
"loss": 0.1927,
"step": 6840
},
{
"epoch": 2.164284021799226,
"grad_norm": 0.20337670529721538,
"learning_rate": 6.9883101834899694e-06,
"loss": 0.1922,
"step": 6850
},
{
"epoch": 2.1674433299107494,
"grad_norm": 0.2198594709438925,
"learning_rate": 6.978185596973192e-06,
"loss": 0.1917,
"step": 6860
},
{
"epoch": 2.170602638022273,
"grad_norm": 0.21033108894938493,
"learning_rate": 6.968051384926634e-06,
"loss": 0.1938,
"step": 6870
},
{
"epoch": 2.173761946133797,
"grad_norm": 0.22864890990862674,
"learning_rate": 6.957907596661729e-06,
"loss": 0.1915,
"step": 6880
},
{
"epoch": 2.1769212542453205,
"grad_norm": 0.21930675227402627,
"learning_rate": 6.9477542815365025e-06,
"loss": 0.1934,
"step": 6890
},
{
"epoch": 2.180080562356844,
"grad_norm": 0.2079046170539124,
"learning_rate": 6.937591488955335e-06,
"loss": 0.1913,
"step": 6900
},
{
"epoch": 2.1832398704683675,
"grad_norm": 0.227129021822185,
"learning_rate": 6.927419268368727e-06,
"loss": 0.1928,
"step": 6910
},
{
"epoch": 2.186399178579891,
"grad_norm": 0.21476017288096094,
"learning_rate": 6.917237669273047e-06,
"loss": 0.195,
"step": 6920
},
{
"epoch": 2.1895584866914146,
"grad_norm": 0.2101092960463205,
"learning_rate": 6.907046741210308e-06,
"loss": 0.1929,
"step": 6930
},
{
"epoch": 2.192717794802938,
"grad_norm": 0.21529278522230835,
"learning_rate": 6.8968465337679056e-06,
"loss": 0.1932,
"step": 6940
},
{
"epoch": 2.1958771029144617,
"grad_norm": 0.22276990935035607,
"learning_rate": 6.886637096578395e-06,
"loss": 0.1932,
"step": 6950
},
{
"epoch": 2.1990364110259852,
"grad_norm": 0.22806514291674243,
"learning_rate": 6.876418479319238e-06,
"loss": 0.1938,
"step": 6960
},
{
"epoch": 2.202195719137509,
"grad_norm": 0.20310603052055423,
"learning_rate": 6.866190731712566e-06,
"loss": 0.1945,
"step": 6970
},
{
"epoch": 2.2053550272490323,
"grad_norm": 0.2000583112376738,
"learning_rate": 6.8559539035249405e-06,
"loss": 0.1906,
"step": 6980
},
{
"epoch": 2.208514335360556,
"grad_norm": 0.21643622482829,
"learning_rate": 6.8457080445671e-06,
"loss": 0.1928,
"step": 6990
},
{
"epoch": 2.2116736434720794,
"grad_norm": 0.21135933665048376,
"learning_rate": 6.835453204693733e-06,
"loss": 0.1927,
"step": 7000
},
{
"epoch": 2.2148329515836034,
"grad_norm": 0.21880301226347054,
"learning_rate": 6.825189433803223e-06,
"loss": 0.1912,
"step": 7010
},
{
"epoch": 2.217992259695127,
"grad_norm": 0.21873208245413148,
"learning_rate": 6.814916781837413e-06,
"loss": 0.1922,
"step": 7020
},
{
"epoch": 2.2211515678066505,
"grad_norm": 0.19919301034094944,
"learning_rate": 6.804635298781358e-06,
"loss": 0.1914,
"step": 7030
},
{
"epoch": 2.224310875918174,
"grad_norm": 0.21709044589388574,
"learning_rate": 6.7943450346630845e-06,
"loss": 0.1949,
"step": 7040
},
{
"epoch": 2.2274701840296975,
"grad_norm": 0.19238634340689642,
"learning_rate": 6.784046039553347e-06,
"loss": 0.1926,
"step": 7050
},
{
"epoch": 2.230629492141221,
"grad_norm": 0.20786949384009987,
"learning_rate": 6.7737383635653805e-06,
"loss": 0.191,
"step": 7060
},
{
"epoch": 2.2337888002527446,
"grad_norm": 0.19859606904892732,
"learning_rate": 6.763422056854666e-06,
"loss": 0.1921,
"step": 7070
},
{
"epoch": 2.236948108364268,
"grad_norm": 0.22013791343337974,
"learning_rate": 6.753097169618672e-06,
"loss": 0.1892,
"step": 7080
},
{
"epoch": 2.2401074164757917,
"grad_norm": 0.22775949599854897,
"learning_rate": 6.742763752096625e-06,
"loss": 0.1924,
"step": 7090
},
{
"epoch": 2.2432667245873152,
"grad_norm": 0.2128328590833451,
"learning_rate": 6.732421854569254e-06,
"loss": 0.191,
"step": 7100
},
{
"epoch": 2.246426032698839,
"grad_norm": 0.21644470882444383,
"learning_rate": 6.722071527358557e-06,
"loss": 0.1935,
"step": 7110
},
{
"epoch": 2.2495853408103628,
"grad_norm": 0.21312175511661602,
"learning_rate": 6.7117128208275384e-06,
"loss": 0.1931,
"step": 7120
},
{
"epoch": 2.252744648921886,
"grad_norm": 0.22730243091149457,
"learning_rate": 6.701345785379987e-06,
"loss": 0.1922,
"step": 7130
},
{
"epoch": 2.25590395703341,
"grad_norm": 0.23578039614437363,
"learning_rate": 6.69097047146021e-06,
"loss": 0.1911,
"step": 7140
},
{
"epoch": 2.2590632651449334,
"grad_norm": 0.21392963590443645,
"learning_rate": 6.6805869295528e-06,
"loss": 0.1916,
"step": 7150
},
{
"epoch": 2.262222573256457,
"grad_norm": 0.21695056856445175,
"learning_rate": 6.6701952101823885e-06,
"loss": 0.1906,
"step": 7160
},
{
"epoch": 2.2653818813679805,
"grad_norm": 0.21461312040450123,
"learning_rate": 6.659795363913389e-06,
"loss": 0.1902,
"step": 7170
},
{
"epoch": 2.268541189479504,
"grad_norm": 0.2285522952212848,
"learning_rate": 6.649387441349767e-06,
"loss": 0.1903,
"step": 7180
},
{
"epoch": 2.2717004975910275,
"grad_norm": 0.2005700632922379,
"learning_rate": 6.6389714931347825e-06,
"loss": 0.1908,
"step": 7190
},
{
"epoch": 2.274859805702551,
"grad_norm": 0.22483814706686375,
"learning_rate": 6.628547569950748e-06,
"loss": 0.1943,
"step": 7200
},
{
"epoch": 2.2780191138140746,
"grad_norm": 0.21339185695046756,
"learning_rate": 6.61811572251878e-06,
"loss": 0.1923,
"step": 7210
},
{
"epoch": 2.281178421925598,
"grad_norm": 0.22013090536921387,
"learning_rate": 6.607676001598553e-06,
"loss": 0.1931,
"step": 7220
},
{
"epoch": 2.2843377300371217,
"grad_norm": 0.21150686936631666,
"learning_rate": 6.597228457988053e-06,
"loss": 0.1933,
"step": 7230
},
{
"epoch": 2.2874970381486452,
"grad_norm": 0.22030620455805627,
"learning_rate": 6.58677314252333e-06,
"loss": 0.1913,
"step": 7240
},
{
"epoch": 2.2906563462601692,
"grad_norm": 0.20354686364957472,
"learning_rate": 6.576310106078255e-06,
"loss": 0.1935,
"step": 7250
},
{
"epoch": 2.2938156543716928,
"grad_norm": 0.2266197363553553,
"learning_rate": 6.565839399564258e-06,
"loss": 0.1943,
"step": 7260
},
{
"epoch": 2.2969749624832163,
"grad_norm": 0.21248852160067305,
"learning_rate": 6.555361073930098e-06,
"loss": 0.1923,
"step": 7270
},
{
"epoch": 2.30013427059474,
"grad_norm": 0.20476133265911461,
"learning_rate": 6.544875180161605e-06,
"loss": 0.1908,
"step": 7280
},
{
"epoch": 2.3032935787062634,
"grad_norm": 0.20464174752072192,
"learning_rate": 6.534381769281437e-06,
"loss": 0.1905,
"step": 7290
},
{
"epoch": 2.306452886817787,
"grad_norm": 0.21832471477385795,
"learning_rate": 6.523880892348824e-06,
"loss": 0.194,
"step": 7300
},
{
"epoch": 2.3096121949293105,
"grad_norm": 0.19969512810776804,
"learning_rate": 6.513372600459329e-06,
"loss": 0.1914,
"step": 7310
},
{
"epoch": 2.312771503040834,
"grad_norm": 0.21518488246695772,
"learning_rate": 6.502856944744593e-06,
"loss": 0.1937,
"step": 7320
},
{
"epoch": 2.3159308111523575,
"grad_norm": 0.22335845799273413,
"learning_rate": 6.49233397637209e-06,
"loss": 0.1909,
"step": 7330
},
{
"epoch": 2.319090119263881,
"grad_norm": 0.2258181355415704,
"learning_rate": 6.48180374654487e-06,
"loss": 0.1918,
"step": 7340
},
{
"epoch": 2.3222494273754046,
"grad_norm": 0.22062705120624707,
"learning_rate": 6.471266306501325e-06,
"loss": 0.1925,
"step": 7350
},
{
"epoch": 2.325408735486928,
"grad_norm": 0.21451058313158836,
"learning_rate": 6.4607217075149265e-06,
"loss": 0.1909,
"step": 7360
},
{
"epoch": 2.3285680435984517,
"grad_norm": 0.20119666338740025,
"learning_rate": 6.450170000893978e-06,
"loss": 0.1912,
"step": 7370
},
{
"epoch": 2.3317273517099757,
"grad_norm": 0.20548278813927204,
"learning_rate": 6.439611237981373e-06,
"loss": 0.1903,
"step": 7380
},
{
"epoch": 2.3348866598214992,
"grad_norm": 0.2220285180002872,
"learning_rate": 6.429045470154333e-06,
"loss": 0.1922,
"step": 7390
},
{
"epoch": 2.3380459679330228,
"grad_norm": 0.20989770488757611,
"learning_rate": 6.418472748824172e-06,
"loss": 0.1905,
"step": 7400
},
{
"epoch": 2.3412052760445463,
"grad_norm": 0.21011715599104358,
"learning_rate": 6.407893125436031e-06,
"loss": 0.1918,
"step": 7410
},
{
"epoch": 2.34436458415607,
"grad_norm": 0.19838344804989674,
"learning_rate": 6.397306651468641e-06,
"loss": 0.1909,
"step": 7420
},
{
"epoch": 2.3475238922675934,
"grad_norm": 0.2169687246351887,
"learning_rate": 6.386713378434064e-06,
"loss": 0.1927,
"step": 7430
},
{
"epoch": 2.350683200379117,
"grad_norm": 0.204594134231941,
"learning_rate": 6.376113357877445e-06,
"loss": 0.1925,
"step": 7440
},
{
"epoch": 2.3538425084906405,
"grad_norm": 0.20966117593645678,
"learning_rate": 6.365506641376762e-06,
"loss": 0.1897,
"step": 7450
},
{
"epoch": 2.357001816602164,
"grad_norm": 0.20527775997224504,
"learning_rate": 6.354893280542576e-06,
"loss": 0.1929,
"step": 7460
},
{
"epoch": 2.3601611247136876,
"grad_norm": 0.20065093914673782,
"learning_rate": 6.344273327017778e-06,
"loss": 0.193,
"step": 7470
},
{
"epoch": 2.363320432825211,
"grad_norm": 0.22557801837324515,
"learning_rate": 6.333646832477334e-06,
"loss": 0.1898,
"step": 7480
},
{
"epoch": 2.366479740936735,
"grad_norm": 0.21882992475206528,
"learning_rate": 6.32301384862804e-06,
"loss": 0.1941,
"step": 7490
},
{
"epoch": 2.3696390490482586,
"grad_norm": 0.21331149519342535,
"learning_rate": 6.31237442720827e-06,
"loss": 0.1928,
"step": 7500
},
{
"epoch": 2.372798357159782,
"grad_norm": 0.2135806830134628,
"learning_rate": 6.301728619987722e-06,
"loss": 0.1912,
"step": 7510
},
{
"epoch": 2.3759576652713057,
"grad_norm": 0.20992047195155106,
"learning_rate": 6.29107647876716e-06,
"loss": 0.1924,
"step": 7520
},
{
"epoch": 2.3791169733828292,
"grad_norm": 0.22347906358520409,
"learning_rate": 6.280418055378175e-06,
"loss": 0.1929,
"step": 7530
},
{
"epoch": 2.3822762814943528,
"grad_norm": 0.20738356404474784,
"learning_rate": 6.269753401682924e-06,
"loss": 0.1921,
"step": 7540
},
{
"epoch": 2.3854355896058763,
"grad_norm": 0.21987439925453983,
"learning_rate": 6.25908256957388e-06,
"loss": 0.1914,
"step": 7550
},
{
"epoch": 2.3885948977174,
"grad_norm": 0.20180229410257866,
"learning_rate": 6.248405610973579e-06,
"loss": 0.1915,
"step": 7560
},
{
"epoch": 2.3917542058289234,
"grad_norm": 0.204547582458948,
"learning_rate": 6.237722577834366e-06,
"loss": 0.1926,
"step": 7570
},
{
"epoch": 2.394913513940447,
"grad_norm": 0.20792182149152452,
"learning_rate": 6.227033522138145e-06,
"loss": 0.1933,
"step": 7580
},
{
"epoch": 2.3980728220519705,
"grad_norm": 0.20943317325322225,
"learning_rate": 6.216338495896125e-06,
"loss": 0.192,
"step": 7590
},
{
"epoch": 2.401232130163494,
"grad_norm": 0.2184828315646208,
"learning_rate": 6.205637551148567e-06,
"loss": 0.1931,
"step": 7600
},
{
"epoch": 2.4043914382750176,
"grad_norm": 0.22863771124698407,
"learning_rate": 6.194930739964529e-06,
"loss": 0.1928,
"step": 7610
},
{
"epoch": 2.4075507463865415,
"grad_norm": 0.22337051398699578,
"learning_rate": 6.1842181144416145e-06,
"loss": 0.1924,
"step": 7620
},
{
"epoch": 2.410710054498065,
"grad_norm": 0.21034627944404075,
"learning_rate": 6.17349972670572e-06,
"loss": 0.1916,
"step": 7630
},
{
"epoch": 2.4138693626095886,
"grad_norm": 0.20200815442203482,
"learning_rate": 6.162775628910781e-06,
"loss": 0.1934,
"step": 7640
},
{
"epoch": 2.417028670721112,
"grad_norm": 0.20113233320753685,
"learning_rate": 6.152045873238512e-06,
"loss": 0.1915,
"step": 7650
},
{
"epoch": 2.4201879788326357,
"grad_norm": 0.2148019344588209,
"learning_rate": 6.141310511898162e-06,
"loss": 0.1901,
"step": 7660
},
{
"epoch": 2.4233472869441592,
"grad_norm": 0.2156955634575946,
"learning_rate": 6.130569597126257e-06,
"loss": 0.1896,
"step": 7670
},
{
"epoch": 2.426506595055683,
"grad_norm": 0.22700196837396544,
"learning_rate": 6.119823181186342e-06,
"loss": 0.1923,
"step": 7680
},
{
"epoch": 2.4296659031672063,
"grad_norm": 0.20832014160995668,
"learning_rate": 6.109071316368732e-06,
"loss": 0.1943,
"step": 7690
},
{
"epoch": 2.43282521127873,
"grad_norm": 0.22039569297008865,
"learning_rate": 6.0983140549902544e-06,
"loss": 0.1918,
"step": 7700
},
{
"epoch": 2.4359845193902534,
"grad_norm": 0.2032368232732068,
"learning_rate": 6.087551449393996e-06,
"loss": 0.1908,
"step": 7710
},
{
"epoch": 2.439143827501777,
"grad_norm": 0.21501063236631873,
"learning_rate": 6.0767835519490455e-06,
"loss": 0.193,
"step": 7720
},
{
"epoch": 2.442303135613301,
"grad_norm": 0.21020124149593053,
"learning_rate": 6.066010415050246e-06,
"loss": 0.1912,
"step": 7730
},
{
"epoch": 2.445462443724824,
"grad_norm": 0.21241622539470262,
"learning_rate": 6.0552320911179295e-06,
"loss": 0.1909,
"step": 7740
},
{
"epoch": 2.448621751836348,
"grad_norm": 0.21470252522546168,
"learning_rate": 6.04444863259767e-06,
"loss": 0.1928,
"step": 7750
},
{
"epoch": 2.4517810599478715,
"grad_norm": 0.21167973032496565,
"learning_rate": 6.033660091960025e-06,
"loss": 0.1936,
"step": 7760
},
{
"epoch": 2.454940368059395,
"grad_norm": 0.2051994018828898,
"learning_rate": 6.02286652170028e-06,
"loss": 0.1938,
"step": 7770
},
{
"epoch": 2.4580996761709186,
"grad_norm": 0.20457172842704202,
"learning_rate": 6.0120679743381945e-06,
"loss": 0.19,
"step": 7780
},
{
"epoch": 2.461258984282442,
"grad_norm": 0.2188284827007198,
"learning_rate": 6.001264502417749e-06,
"loss": 0.1923,
"step": 7790
},
{
"epoch": 2.4644182923939657,
"grad_norm": 0.2196693674996576,
"learning_rate": 5.990456158506879e-06,
"loss": 0.1905,
"step": 7800
},
{
"epoch": 2.4675776005054892,
"grad_norm": 0.206778112224436,
"learning_rate": 5.979642995197231e-06,
"loss": 0.1932,
"step": 7810
},
{
"epoch": 2.470736908617013,
"grad_norm": 0.21023136041598797,
"learning_rate": 5.968825065103904e-06,
"loss": 0.1914,
"step": 7820
},
{
"epoch": 2.4738962167285363,
"grad_norm": 0.20965600357426922,
"learning_rate": 5.958002420865184e-06,
"loss": 0.1908,
"step": 7830
},
{
"epoch": 2.47705552484006,
"grad_norm": 0.20939819430473985,
"learning_rate": 5.947175115142303e-06,
"loss": 0.1923,
"step": 7840
},
{
"epoch": 2.4802148329515834,
"grad_norm": 0.21000576536235138,
"learning_rate": 5.936343200619171e-06,
"loss": 0.1906,
"step": 7850
},
{
"epoch": 2.4833741410631074,
"grad_norm": 0.21638448903532395,
"learning_rate": 5.925506730002125e-06,
"loss": 0.1922,
"step": 7860
},
{
"epoch": 2.486533449174631,
"grad_norm": 0.21066924946782678,
"learning_rate": 5.914665756019672e-06,
"loss": 0.1926,
"step": 7870
},
{
"epoch": 2.4896927572861545,
"grad_norm": 0.21573344183841558,
"learning_rate": 5.903820331422228e-06,
"loss": 0.1929,
"step": 7880
},
{
"epoch": 2.492852065397678,
"grad_norm": 0.20934900894495664,
"learning_rate": 5.8929705089818665e-06,
"loss": 0.1915,
"step": 7890
},
{
"epoch": 2.4960113735092015,
"grad_norm": 0.19194977706940777,
"learning_rate": 5.882116341492063e-06,
"loss": 0.1918,
"step": 7900
},
{
"epoch": 2.499170681620725,
"grad_norm": 0.22582333577460095,
"learning_rate": 5.8712578817674356e-06,
"loss": 0.1909,
"step": 7910
},
{
"epoch": 2.5023299897322486,
"grad_norm": 0.2083520216392237,
"learning_rate": 5.860395182643481e-06,
"loss": 0.1915,
"step": 7920
},
{
"epoch": 2.505489297843772,
"grad_norm": 0.21101939233964928,
"learning_rate": 5.84952829697633e-06,
"loss": 0.1907,
"step": 7930
},
{
"epoch": 2.5086486059552957,
"grad_norm": 0.21143622985502697,
"learning_rate": 5.838657277642484e-06,
"loss": 0.1935,
"step": 7940
},
{
"epoch": 2.5118079140668192,
"grad_norm": 0.20175953224841772,
"learning_rate": 5.8277821775385575e-06,
"loss": 0.1924,
"step": 7950
},
{
"epoch": 2.514967222178343,
"grad_norm": 0.21068052700964154,
"learning_rate": 5.816903049581021e-06,
"loss": 0.1937,
"step": 7960
},
{
"epoch": 2.5181265302898668,
"grad_norm": 0.21882832311876035,
"learning_rate": 5.806019946705942e-06,
"loss": 0.191,
"step": 7970
},
{
"epoch": 2.52128583840139,
"grad_norm": 0.21291172763480593,
"learning_rate": 5.795132921868732e-06,
"loss": 0.1909,
"step": 7980
},
{
"epoch": 2.524445146512914,
"grad_norm": 0.2062457751313165,
"learning_rate": 5.784242028043886e-06,
"loss": 0.1913,
"step": 7990
},
{
"epoch": 2.5276044546244374,
"grad_norm": 0.19898272324165425,
"learning_rate": 5.773347318224726e-06,
"loss": 0.1926,
"step": 8000
},
{
"epoch": 2.530763762735961,
"grad_norm": 0.21382734065324233,
"learning_rate": 5.762448845423136e-06,
"loss": 0.1897,
"step": 8010
},
{
"epoch": 2.5339230708474845,
"grad_norm": 0.19860488219214512,
"learning_rate": 5.751546662669319e-06,
"loss": 0.1916,
"step": 8020
},
{
"epoch": 2.537082378959008,
"grad_norm": 0.20598643966772848,
"learning_rate": 5.74064082301152e-06,
"loss": 0.1889,
"step": 8030
},
{
"epoch": 2.5402416870705316,
"grad_norm": 0.20481756196092588,
"learning_rate": 5.729731379515787e-06,
"loss": 0.1916,
"step": 8040
},
{
"epoch": 2.543400995182055,
"grad_norm": 0.21953218134084496,
"learning_rate": 5.718818385265701e-06,
"loss": 0.1921,
"step": 8050
},
{
"epoch": 2.5465603032935786,
"grad_norm": 0.21169625628604868,
"learning_rate": 5.707901893362116e-06,
"loss": 0.1925,
"step": 8060
},
{
"epoch": 2.549719611405102,
"grad_norm": 0.20604257642179108,
"learning_rate": 5.696981956922909e-06,
"loss": 0.1906,
"step": 8070
},
{
"epoch": 2.5528789195166257,
"grad_norm": 0.2041617688415921,
"learning_rate": 5.686058629082718e-06,
"loss": 0.191,
"step": 8080
},
{
"epoch": 2.5560382276281493,
"grad_norm": 0.21493038957804925,
"learning_rate": 5.6751319629926834e-06,
"loss": 0.1903,
"step": 8090
},
{
"epoch": 2.5591975357396732,
"grad_norm": 0.22786525193461765,
"learning_rate": 5.664202011820183e-06,
"loss": 0.1907,
"step": 8100
},
{
"epoch": 2.5623568438511963,
"grad_norm": 0.20645959253391757,
"learning_rate": 5.653268828748588e-06,
"loss": 0.1901,
"step": 8110
},
{
"epoch": 2.5655161519627203,
"grad_norm": 0.19358622503604414,
"learning_rate": 5.642332466976989e-06,
"loss": 0.1878,
"step": 8120
},
{
"epoch": 2.568675460074244,
"grad_norm": 0.19969978459877627,
"learning_rate": 5.631392979719945e-06,
"loss": 0.1903,
"step": 8130
},
{
"epoch": 2.5718347681857674,
"grad_norm": 0.2143548911374617,
"learning_rate": 5.620450420207227e-06,
"loss": 0.1911,
"step": 8140
},
{
"epoch": 2.574994076297291,
"grad_norm": 0.21122139807224502,
"learning_rate": 5.609504841683551e-06,
"loss": 0.1904,
"step": 8150
},
{
"epoch": 2.5781533844088145,
"grad_norm": 0.21283424203204657,
"learning_rate": 5.598556297408322e-06,
"loss": 0.1929,
"step": 8160
},
{
"epoch": 2.581312692520338,
"grad_norm": 0.20416368302758592,
"learning_rate": 5.587604840655379e-06,
"loss": 0.1924,
"step": 8170
},
{
"epoch": 2.5844720006318616,
"grad_norm": 0.20184025843385978,
"learning_rate": 5.576650524712734e-06,
"loss": 0.1912,
"step": 8180
},
{
"epoch": 2.587631308743385,
"grad_norm": 0.20798749173073353,
"learning_rate": 5.565693402882306e-06,
"loss": 0.1923,
"step": 8190
},
{
"epoch": 2.5907906168549086,
"grad_norm": 0.2110396741272342,
"learning_rate": 5.554733528479672e-06,
"loss": 0.1901,
"step": 8200
},
{
"epoch": 2.5939499249664326,
"grad_norm": 0.2166668205307208,
"learning_rate": 5.5437709548337985e-06,
"loss": 0.191,
"step": 8210
},
{
"epoch": 2.5971092330779557,
"grad_norm": 0.22301181516302854,
"learning_rate": 5.53280573528679e-06,
"loss": 0.1911,
"step": 8220
},
{
"epoch": 2.6002685411894797,
"grad_norm": 0.20791087766916325,
"learning_rate": 5.521837923193621e-06,
"loss": 0.1889,
"step": 8230
},
{
"epoch": 2.6034278493010032,
"grad_norm": 0.21867859844216495,
"learning_rate": 5.510867571921887e-06,
"loss": 0.19,
"step": 8240
},
{
"epoch": 2.606587157412527,
"grad_norm": 0.20256198649149781,
"learning_rate": 5.499894734851533e-06,
"loss": 0.1908,
"step": 8250
},
{
"epoch": 2.6097464655240503,
"grad_norm": 0.2118493538585681,
"learning_rate": 5.488919465374601e-06,
"loss": 0.1898,
"step": 8260
},
{
"epoch": 2.612905773635574,
"grad_norm": 0.20908873607701223,
"learning_rate": 5.477941816894973e-06,
"loss": 0.1904,
"step": 8270
},
{
"epoch": 2.6160650817470974,
"grad_norm": 0.18994734431648216,
"learning_rate": 5.4669618428281e-06,
"loss": 0.1895,
"step": 8280
},
{
"epoch": 2.619224389858621,
"grad_norm": 0.21464906838442485,
"learning_rate": 5.455979596600752e-06,
"loss": 0.1906,
"step": 8290
},
{
"epoch": 2.6223836979701445,
"grad_norm": 0.20545088012285806,
"learning_rate": 5.444995131650757e-06,
"loss": 0.1916,
"step": 8300
},
{
"epoch": 2.625543006081668,
"grad_norm": 0.2308591130429511,
"learning_rate": 5.434008501426739e-06,
"loss": 0.1915,
"step": 8310
},
{
"epoch": 2.6287023141931916,
"grad_norm": 0.19533870441750978,
"learning_rate": 5.423019759387851e-06,
"loss": 0.1905,
"step": 8320
},
{
"epoch": 2.631861622304715,
"grad_norm": 0.21360189157598528,
"learning_rate": 5.41202895900353e-06,
"loss": 0.1891,
"step": 8330
},
{
"epoch": 2.635020930416239,
"grad_norm": 0.2086177679970007,
"learning_rate": 5.401036153753224e-06,
"loss": 0.1894,
"step": 8340
},
{
"epoch": 2.638180238527762,
"grad_norm": 0.20511520788720256,
"learning_rate": 5.390041397126139e-06,
"loss": 0.191,
"step": 8350
},
{
"epoch": 2.641339546639286,
"grad_norm": 0.20744339959234745,
"learning_rate": 5.379044742620975e-06,
"loss": 0.1906,
"step": 8360
},
{
"epoch": 2.6444988547508097,
"grad_norm": 0.21002258208476907,
"learning_rate": 5.368046243745664e-06,
"loss": 0.19,
"step": 8370
},
{
"epoch": 2.6476581628623332,
"grad_norm": 0.20045395931004378,
"learning_rate": 5.357045954017117e-06,
"loss": 0.1918,
"step": 8380
},
{
"epoch": 2.650817470973857,
"grad_norm": 0.20206701855858772,
"learning_rate": 5.346043926960955e-06,
"loss": 0.1914,
"step": 8390
},
{
"epoch": 2.6539767790853803,
"grad_norm": 0.21096628622208463,
"learning_rate": 5.335040216111259e-06,
"loss": 0.192,
"step": 8400
},
{
"epoch": 2.657136087196904,
"grad_norm": 0.20546853070131513,
"learning_rate": 5.324034875010293e-06,
"loss": 0.1913,
"step": 8410
},
{
"epoch": 2.6602953953084274,
"grad_norm": 0.21143968803860907,
"learning_rate": 5.313027957208262e-06,
"loss": 0.19,
"step": 8420
},
{
"epoch": 2.663454703419951,
"grad_norm": 0.20813089241185626,
"learning_rate": 5.30201951626304e-06,
"loss": 0.1908,
"step": 8430
},
{
"epoch": 2.6666140115314745,
"grad_norm": 0.2065605941239096,
"learning_rate": 5.291009605739912e-06,
"loss": 0.1914,
"step": 8440
},
{
"epoch": 2.669773319642998,
"grad_norm": 0.211794362752723,
"learning_rate": 5.279998279211315e-06,
"loss": 0.194,
"step": 8450
},
{
"epoch": 2.6729326277545216,
"grad_norm": 0.21353505062189707,
"learning_rate": 5.2689855902565725e-06,
"loss": 0.1907,
"step": 8460
},
{
"epoch": 2.6760919358660455,
"grad_norm": 0.20548536484778626,
"learning_rate": 5.257971592461643e-06,
"loss": 0.1889,
"step": 8470
},
{
"epoch": 2.6792512439775686,
"grad_norm": 0.20132489673742388,
"learning_rate": 5.2469563394188485e-06,
"loss": 0.1913,
"step": 8480
},
{
"epoch": 2.6824105520890926,
"grad_norm": 0.2183068296850621,
"learning_rate": 5.235939884726624e-06,
"loss": 0.1899,
"step": 8490
},
{
"epoch": 2.685569860200616,
"grad_norm": 0.21831688267562407,
"learning_rate": 5.224922281989245e-06,
"loss": 0.1885,
"step": 8500
},
{
"epoch": 2.6887291683121397,
"grad_norm": 0.20042363588520107,
"learning_rate": 5.213903584816578e-06,
"loss": 0.1919,
"step": 8510
},
{
"epoch": 2.6918884764236632,
"grad_norm": 0.19948817447604472,
"learning_rate": 5.202883846823816e-06,
"loss": 0.1869,
"step": 8520
},
{
"epoch": 2.695047784535187,
"grad_norm": 0.20763152988351818,
"learning_rate": 5.1918631216312095e-06,
"loss": 0.1892,
"step": 8530
},
{
"epoch": 2.6982070926467103,
"grad_norm": 0.21738471086652061,
"learning_rate": 5.1808414628638206e-06,
"loss": 0.1904,
"step": 8540
},
{
"epoch": 2.701366400758234,
"grad_norm": 0.21100460968688434,
"learning_rate": 5.16981892415125e-06,
"loss": 0.1899,
"step": 8550
},
{
"epoch": 2.7045257088697574,
"grad_norm": 0.21418226702194354,
"learning_rate": 5.158795559127379e-06,
"loss": 0.191,
"step": 8560
},
{
"epoch": 2.707685016981281,
"grad_norm": 0.20136963314709463,
"learning_rate": 5.147771421430112e-06,
"loss": 0.1903,
"step": 8570
},
{
"epoch": 2.710844325092805,
"grad_norm": 0.19436149226210073,
"learning_rate": 5.136746564701113e-06,
"loss": 0.1921,
"step": 8580
},
{
"epoch": 2.714003633204328,
"grad_norm": 0.2140972731026424,
"learning_rate": 5.125721042585542e-06,
"loss": 0.1896,
"step": 8590
},
{
"epoch": 2.717162941315852,
"grad_norm": 0.20787084627216532,
"learning_rate": 5.114694908731801e-06,
"loss": 0.1915,
"step": 8600
},
{
"epoch": 2.7203222494273755,
"grad_norm": 0.21044073918475403,
"learning_rate": 5.103668216791266e-06,
"loss": 0.1924,
"step": 8610
},
{
"epoch": 2.723481557538899,
"grad_norm": 0.20735197165221705,
"learning_rate": 5.092641020418026e-06,
"loss": 0.1886,
"step": 8620
},
{
"epoch": 2.7266408656504226,
"grad_norm": 0.206930807103157,
"learning_rate": 5.0816133732686305e-06,
"loss": 0.1898,
"step": 8630
},
{
"epoch": 2.729800173761946,
"grad_norm": 0.19205839670275318,
"learning_rate": 5.070585329001819e-06,
"loss": 0.1908,
"step": 8640
},
{
"epoch": 2.7329594818734697,
"grad_norm": 0.212328301487025,
"learning_rate": 5.059556941278261e-06,
"loss": 0.1903,
"step": 8650
},
{
"epoch": 2.7361187899849932,
"grad_norm": 0.2099153942052239,
"learning_rate": 5.048528263760301e-06,
"loss": 0.1909,
"step": 8660
},
{
"epoch": 2.739278098096517,
"grad_norm": 0.2137298602899187,
"learning_rate": 5.037499350111693e-06,
"loss": 0.1886,
"step": 8670
},
{
"epoch": 2.7424374062080403,
"grad_norm": 0.20013893402808897,
"learning_rate": 5.026470253997339e-06,
"loss": 0.1918,
"step": 8680
},
{
"epoch": 2.745596714319564,
"grad_norm": 0.2029041595541199,
"learning_rate": 5.0154410290830295e-06,
"loss": 0.1896,
"step": 8690
},
{
"epoch": 2.7487560224310874,
"grad_norm": 0.21844048806518204,
"learning_rate": 5.004411729035179e-06,
"loss": 0.1903,
"step": 8700
},
{
"epoch": 2.7519153305426114,
"grad_norm": 0.20229493690485986,
"learning_rate": 4.9933824075205735e-06,
"loss": 0.1889,
"step": 8710
},
{
"epoch": 2.7550746386541345,
"grad_norm": 0.20057196812482364,
"learning_rate": 4.982353118206095e-06,
"loss": 0.1905,
"step": 8720
},
{
"epoch": 2.7582339467656585,
"grad_norm": 0.20127831749959146,
"learning_rate": 4.971323914758479e-06,
"loss": 0.192,
"step": 8730
},
{
"epoch": 2.761393254877182,
"grad_norm": 0.19694835136970584,
"learning_rate": 4.9602948508440365e-06,
"loss": 0.1899,
"step": 8740
},
{
"epoch": 2.7645525629887056,
"grad_norm": 0.20744913562422332,
"learning_rate": 4.949265980128398e-06,
"loss": 0.1903,
"step": 8750
},
{
"epoch": 2.767711871100229,
"grad_norm": 0.1938079327085531,
"learning_rate": 4.938237356276261e-06,
"loss": 0.1909,
"step": 8760
},
{
"epoch": 2.7708711792117526,
"grad_norm": 0.2007179212194115,
"learning_rate": 4.9272090329511136e-06,
"loss": 0.1897,
"step": 8770
},
{
"epoch": 2.774030487323276,
"grad_norm": 0.21505457205199155,
"learning_rate": 4.916181063814989e-06,
"loss": 0.1925,
"step": 8780
},
{
"epoch": 2.7771897954347997,
"grad_norm": 0.21435123984044802,
"learning_rate": 4.905153502528193e-06,
"loss": 0.1902,
"step": 8790
},
{
"epoch": 2.7803491035463233,
"grad_norm": 0.2061154947415264,
"learning_rate": 4.894126402749044e-06,
"loss": 0.1898,
"step": 8800
},
{
"epoch": 2.783508411657847,
"grad_norm": 0.2131667185188113,
"learning_rate": 4.883099818133624e-06,
"loss": 0.193,
"step": 8810
},
{
"epoch": 2.7866677197693708,
"grad_norm": 0.21301174700619682,
"learning_rate": 4.872073802335499e-06,
"loss": 0.1906,
"step": 8820
},
{
"epoch": 2.789827027880894,
"grad_norm": 0.2108477817285028,
"learning_rate": 4.86104840900547e-06,
"loss": 0.19,
"step": 8830
},
{
"epoch": 2.792986335992418,
"grad_norm": 0.202466254802374,
"learning_rate": 4.850023691791313e-06,
"loss": 0.1913,
"step": 8840
},
{
"epoch": 2.7961456441039414,
"grad_norm": 0.21383889045040108,
"learning_rate": 4.838999704337507e-06,
"loss": 0.1905,
"step": 8850
},
{
"epoch": 2.799304952215465,
"grad_norm": 0.22342476096804859,
"learning_rate": 4.82797650028499e-06,
"loss": 0.1929,
"step": 8860
},
{
"epoch": 2.8024642603269885,
"grad_norm": 0.2124173070769737,
"learning_rate": 4.816954133270879e-06,
"loss": 0.1902,
"step": 8870
},
{
"epoch": 2.805623568438512,
"grad_norm": 0.20879593574150696,
"learning_rate": 4.805932656928218e-06,
"loss": 0.1907,
"step": 8880
},
{
"epoch": 2.8087828765500356,
"grad_norm": 0.2009544133407569,
"learning_rate": 4.794912124885728e-06,
"loss": 0.1924,
"step": 8890
},
{
"epoch": 2.811942184661559,
"grad_norm": 0.2176494138664626,
"learning_rate": 4.78389259076752e-06,
"loss": 0.19,
"step": 8900
},
{
"epoch": 2.8151014927730826,
"grad_norm": 0.20657867775507024,
"learning_rate": 4.772874108192864e-06,
"loss": 0.1886,
"step": 8910
},
{
"epoch": 2.818260800884606,
"grad_norm": 0.19602872268511337,
"learning_rate": 4.761856730775902e-06,
"loss": 0.1901,
"step": 8920
},
{
"epoch": 2.8214201089961297,
"grad_norm": 0.21196238191751543,
"learning_rate": 4.750840512125403e-06,
"loss": 0.1883,
"step": 8930
},
{
"epoch": 2.8245794171076533,
"grad_norm": 0.22132755218766037,
"learning_rate": 4.7398255058445e-06,
"loss": 0.1884,
"step": 8940
},
{
"epoch": 2.8277387252191772,
"grad_norm": 0.21199745533152217,
"learning_rate": 4.72881176553042e-06,
"loss": 0.1893,
"step": 8950
},
{
"epoch": 2.8308980333307003,
"grad_norm": 0.20115752802009068,
"learning_rate": 4.717799344774241e-06,
"loss": 0.19,
"step": 8960
},
{
"epoch": 2.8340573414422243,
"grad_norm": 0.1971895330735378,
"learning_rate": 4.706788297160608e-06,
"loss": 0.1914,
"step": 8970
},
{
"epoch": 2.837216649553748,
"grad_norm": 0.22093757797756977,
"learning_rate": 4.69577867626749e-06,
"loss": 0.1911,
"step": 8980
},
{
"epoch": 2.8403759576652714,
"grad_norm": 0.20276757417965852,
"learning_rate": 4.684770535665917e-06,
"loss": 0.1894,
"step": 8990
},
{
"epoch": 2.843535265776795,
"grad_norm": 0.2052738035290844,
"learning_rate": 4.673763928919712e-06,
"loss": 0.1904,
"step": 9000
},
{
"epoch": 2.8466945738883185,
"grad_norm": 0.1997948903035985,
"learning_rate": 4.662758909585233e-06,
"loss": 0.1902,
"step": 9010
},
{
"epoch": 2.849853881999842,
"grad_norm": 0.21607370638898796,
"learning_rate": 4.651755531211121e-06,
"loss": 0.1885,
"step": 9020
},
{
"epoch": 2.8530131901113656,
"grad_norm": 0.2072331158936079,
"learning_rate": 4.640753847338022e-06,
"loss": 0.1903,
"step": 9030
},
{
"epoch": 2.856172498222889,
"grad_norm": 0.2095788996901622,
"learning_rate": 4.629753911498348e-06,
"loss": 0.1906,
"step": 9040
},
{
"epoch": 2.8593318063344126,
"grad_norm": 0.21107554604389273,
"learning_rate": 4.618755777215998e-06,
"loss": 0.1875,
"step": 9050
},
{
"epoch": 2.862491114445936,
"grad_norm": 0.20742194888322327,
"learning_rate": 4.607759498006105e-06,
"loss": 0.1899,
"step": 9060
},
{
"epoch": 2.8656504225574597,
"grad_norm": 0.1998899750485672,
"learning_rate": 4.596765127374781e-06,
"loss": 0.1887,
"step": 9070
},
{
"epoch": 2.8688097306689837,
"grad_norm": 0.21434577458006387,
"learning_rate": 4.5857727188188426e-06,
"loss": 0.19,
"step": 9080
},
{
"epoch": 2.871969038780507,
"grad_norm": 0.20486890206169453,
"learning_rate": 4.57478232582557e-06,
"loss": 0.1888,
"step": 9090
},
{
"epoch": 2.875128346892031,
"grad_norm": 0.19626003974934564,
"learning_rate": 4.563794001872428e-06,
"loss": 0.189,
"step": 9100
},
{
"epoch": 2.8782876550035543,
"grad_norm": 0.19598418482256752,
"learning_rate": 4.5528078004268125e-06,
"loss": 0.1908,
"step": 9110
},
{
"epoch": 2.881446963115078,
"grad_norm": 0.20771737890224903,
"learning_rate": 4.5418237749458e-06,
"loss": 0.1893,
"step": 9120
},
{
"epoch": 2.8846062712266014,
"grad_norm": 0.21369972050863945,
"learning_rate": 4.5308419788758705e-06,
"loss": 0.1914,
"step": 9130
},
{
"epoch": 2.887765579338125,
"grad_norm": 0.19830047599777365,
"learning_rate": 4.519862465652664e-06,
"loss": 0.1891,
"step": 9140
},
{
"epoch": 2.8909248874496485,
"grad_norm": 0.21728853363365294,
"learning_rate": 4.508885288700706e-06,
"loss": 0.1878,
"step": 9150
},
{
"epoch": 2.894084195561172,
"grad_norm": 0.21489567981218682,
"learning_rate": 4.497910501433153e-06,
"loss": 0.1892,
"step": 9160
},
{
"epoch": 2.8972435036726956,
"grad_norm": 0.1999440134687983,
"learning_rate": 4.486938157251544e-06,
"loss": 0.1913,
"step": 9170
},
{
"epoch": 2.900402811784219,
"grad_norm": 0.21121245524106846,
"learning_rate": 4.475968309545519e-06,
"loss": 0.192,
"step": 9180
},
{
"epoch": 2.903562119895743,
"grad_norm": 0.19232313304792983,
"learning_rate": 4.465001011692575e-06,
"loss": 0.1884,
"step": 9190
},
{
"epoch": 2.906721428007266,
"grad_norm": 0.2047713363163475,
"learning_rate": 4.454036317057804e-06,
"loss": 0.1897,
"step": 9200
},
{
"epoch": 2.90988073611879,
"grad_norm": 0.20838735166818767,
"learning_rate": 4.443074278993625e-06,
"loss": 0.1868,
"step": 9210
},
{
"epoch": 2.9130400442303137,
"grad_norm": 0.19377646969116016,
"learning_rate": 4.43211495083954e-06,
"loss": 0.1899,
"step": 9220
},
{
"epoch": 2.9161993523418372,
"grad_norm": 0.2081826810286359,
"learning_rate": 4.421158385921856e-06,
"loss": 0.1901,
"step": 9230
},
{
"epoch": 2.919358660453361,
"grad_norm": 0.19836105918450578,
"learning_rate": 4.410204637553437e-06,
"loss": 0.1897,
"step": 9240
},
{
"epoch": 2.9225179685648843,
"grad_norm": 0.20794480096225132,
"learning_rate": 4.3992537590334485e-06,
"loss": 0.1904,
"step": 9250
},
{
"epoch": 2.925677276676408,
"grad_norm": 0.21149092375760936,
"learning_rate": 4.38830580364708e-06,
"loss": 0.1897,
"step": 9260
},
{
"epoch": 2.9288365847879314,
"grad_norm": 0.20074752053463094,
"learning_rate": 4.377360824665309e-06,
"loss": 0.1876,
"step": 9270
},
{
"epoch": 2.931995892899455,
"grad_norm": 0.20279658516073718,
"learning_rate": 4.366418875344624e-06,
"loss": 0.1888,
"step": 9280
},
{
"epoch": 2.9351552010109785,
"grad_norm": 0.202921993136915,
"learning_rate": 4.3554800089267705e-06,
"loss": 0.192,
"step": 9290
},
{
"epoch": 2.938314509122502,
"grad_norm": 0.2112414397403086,
"learning_rate": 4.344544278638499e-06,
"loss": 0.1883,
"step": 9300
},
{
"epoch": 2.9414738172340256,
"grad_norm": 0.1979648780466572,
"learning_rate": 4.333611737691296e-06,
"loss": 0.188,
"step": 9310
},
{
"epoch": 2.9446331253455496,
"grad_norm": 0.19936914995735966,
"learning_rate": 4.322682439281126e-06,
"loss": 0.1876,
"step": 9320
},
{
"epoch": 2.9477924334570726,
"grad_norm": 0.2045530468116589,
"learning_rate": 4.311756436588185e-06,
"loss": 0.1861,
"step": 9330
},
{
"epoch": 2.9509517415685966,
"grad_norm": 0.2207404912444104,
"learning_rate": 4.300833782776624e-06,
"loss": 0.1894,
"step": 9340
},
{
"epoch": 2.95411104968012,
"grad_norm": 0.19298829375532547,
"learning_rate": 4.289914530994303e-06,
"loss": 0.1885,
"step": 9350
},
{
"epoch": 2.9572703577916437,
"grad_norm": 0.20201449973484348,
"learning_rate": 4.27899873437253e-06,
"loss": 0.1892,
"step": 9360
},
{
"epoch": 2.9604296659031673,
"grad_norm": 0.2038957223369961,
"learning_rate": 4.268086446025793e-06,
"loss": 0.1884,
"step": 9370
},
{
"epoch": 2.963588974014691,
"grad_norm": 0.21578914203411362,
"learning_rate": 4.25717771905152e-06,
"loss": 0.1892,
"step": 9380
},
{
"epoch": 2.9667482821262143,
"grad_norm": 0.21096600784690153,
"learning_rate": 4.2462726065298e-06,
"loss": 0.1902,
"step": 9390
},
{
"epoch": 2.969907590237738,
"grad_norm": 0.2033196759446758,
"learning_rate": 4.235371161523141e-06,
"loss": 0.1892,
"step": 9400
},
{
"epoch": 2.9730668983492614,
"grad_norm": 0.20356432378382075,
"learning_rate": 4.224473437076204e-06,
"loss": 0.1905,
"step": 9410
},
{
"epoch": 2.976226206460785,
"grad_norm": 0.21038412746729923,
"learning_rate": 4.2135794862155454e-06,
"loss": 0.1912,
"step": 9420
},
{
"epoch": 2.979385514572309,
"grad_norm": 0.2033988073630781,
"learning_rate": 4.20268936194936e-06,
"loss": 0.1897,
"step": 9430
},
{
"epoch": 2.982544822683832,
"grad_norm": 0.20236599162357977,
"learning_rate": 4.191803117267223e-06,
"loss": 0.1893,
"step": 9440
},
{
"epoch": 2.985704130795356,
"grad_norm": 0.20641674023136433,
"learning_rate": 4.180920805139835e-06,
"loss": 0.1888,
"step": 9450
},
{
"epoch": 2.9888634389068796,
"grad_norm": 0.22914246659818197,
"learning_rate": 4.170042478518759e-06,
"loss": 0.1875,
"step": 9460
},
{
"epoch": 2.992022747018403,
"grad_norm": 0.20738233476923548,
"learning_rate": 4.159168190336162e-06,
"loss": 0.187,
"step": 9470
},
{
"epoch": 2.9951820551299266,
"grad_norm": 0.20804494541317625,
"learning_rate": 4.148297993504566e-06,
"loss": 0.1902,
"step": 9480
},
{
"epoch": 2.99834136324145,
"grad_norm": 0.19974693835491714,
"learning_rate": 4.137431940916584e-06,
"loss": 0.1866,
"step": 9490
},
{
"epoch": 3.001579654055762,
"grad_norm": 0.19231070887741758,
"learning_rate": 4.12657008544466e-06,
"loss": 0.1846,
"step": 9500
},
{
"epoch": 3.0047389621672855,
"grad_norm": 0.20508127207154292,
"learning_rate": 4.115712479940821e-06,
"loss": 0.1717,
"step": 9510
},
{
"epoch": 3.007898270278809,
"grad_norm": 0.19947206755336946,
"learning_rate": 4.10485917723641e-06,
"loss": 0.1711,
"step": 9520
},
{
"epoch": 3.0110575783903326,
"grad_norm": 0.19753840539867382,
"learning_rate": 4.0940102301418375e-06,
"loss": 0.1721,
"step": 9530
},
{
"epoch": 3.014216886501856,
"grad_norm": 0.1992468045730409,
"learning_rate": 4.083165691446314e-06,
"loss": 0.1719,
"step": 9540
},
{
"epoch": 3.0173761946133797,
"grad_norm": 0.20255881958221392,
"learning_rate": 4.072325613917605e-06,
"loss": 0.1719,
"step": 9550
},
{
"epoch": 3.0205355027249032,
"grad_norm": 0.1899039859129075,
"learning_rate": 4.061490050301767e-06,
"loss": 0.1699,
"step": 9560
},
{
"epoch": 3.0236948108364268,
"grad_norm": 0.20612020049628732,
"learning_rate": 4.050659053322892e-06,
"loss": 0.1714,
"step": 9570
},
{
"epoch": 3.0268541189479503,
"grad_norm": 0.21789263057401254,
"learning_rate": 4.039832675682854e-06,
"loss": 0.1723,
"step": 9580
},
{
"epoch": 3.030013427059474,
"grad_norm": 0.2026996020270893,
"learning_rate": 4.0290109700610445e-06,
"loss": 0.17,
"step": 9590
},
{
"epoch": 3.0331727351709974,
"grad_norm": 0.19422527184239666,
"learning_rate": 4.0181939891141276e-06,
"loss": 0.1715,
"step": 9600
},
{
"epoch": 3.036332043282521,
"grad_norm": 0.19811136542886754,
"learning_rate": 4.007381785475776e-06,
"loss": 0.1707,
"step": 9610
},
{
"epoch": 3.039491351394045,
"grad_norm": 0.21473702063694924,
"learning_rate": 3.996574411756412e-06,
"loss": 0.1717,
"step": 9620
},
{
"epoch": 3.0426506595055685,
"grad_norm": 0.20093271189509854,
"learning_rate": 3.9857719205429666e-06,
"loss": 0.1698,
"step": 9630
},
{
"epoch": 3.045809967617092,
"grad_norm": 0.2054432390601117,
"learning_rate": 3.974974364398604e-06,
"loss": 0.1722,
"step": 9640
},
{
"epoch": 3.0489692757286155,
"grad_norm": 0.21106353915988532,
"learning_rate": 3.964181795862476e-06,
"loss": 0.1702,
"step": 9650
},
{
"epoch": 3.052128583840139,
"grad_norm": 0.20956851733321105,
"learning_rate": 3.9533942674494736e-06,
"loss": 0.1712,
"step": 9660
},
{
"epoch": 3.0552878919516626,
"grad_norm": 0.2151757551569205,
"learning_rate": 3.942611831649953e-06,
"loss": 0.1723,
"step": 9670
},
{
"epoch": 3.058447200063186,
"grad_norm": 0.20087929271216232,
"learning_rate": 3.931834540929498e-06,
"loss": 0.1729,
"step": 9680
},
{
"epoch": 3.0616065081747097,
"grad_norm": 0.19812955505838692,
"learning_rate": 3.9210624477286545e-06,
"loss": 0.1702,
"step": 9690
},
{
"epoch": 3.0647658162862332,
"grad_norm": 0.19934250074039184,
"learning_rate": 3.910295604462675e-06,
"loss": 0.1718,
"step": 9700
},
{
"epoch": 3.0679251243977568,
"grad_norm": 0.22292842551894693,
"learning_rate": 3.899534063521274e-06,
"loss": 0.1703,
"step": 9710
},
{
"epoch": 3.0710844325092803,
"grad_norm": 0.20621823300766834,
"learning_rate": 3.888777877268361e-06,
"loss": 0.1718,
"step": 9720
},
{
"epoch": 3.074243740620804,
"grad_norm": 0.20627308571255917,
"learning_rate": 3.8780270980417865e-06,
"loss": 0.1715,
"step": 9730
},
{
"epoch": 3.077403048732328,
"grad_norm": 0.20092589221833093,
"learning_rate": 3.867281778153103e-06,
"loss": 0.1708,
"step": 9740
},
{
"epoch": 3.0805623568438514,
"grad_norm": 0.20178715984754905,
"learning_rate": 3.856541969887284e-06,
"loss": 0.1713,
"step": 9750
},
{
"epoch": 3.083721664955375,
"grad_norm": 0.20333774590909526,
"learning_rate": 3.8458077255024985e-06,
"loss": 0.1711,
"step": 9760
},
{
"epoch": 3.0868809730668985,
"grad_norm": 0.19812370974809254,
"learning_rate": 3.835079097229834e-06,
"loss": 0.1716,
"step": 9770
},
{
"epoch": 3.090040281178422,
"grad_norm": 0.19168564591899634,
"learning_rate": 3.82435613727305e-06,
"loss": 0.1712,
"step": 9780
},
{
"epoch": 3.0931995892899455,
"grad_norm": 0.20301128877899655,
"learning_rate": 3.8136388978083318e-06,
"loss": 0.1717,
"step": 9790
},
{
"epoch": 3.096358897401469,
"grad_norm": 0.21119806157032167,
"learning_rate": 3.802927430984024e-06,
"loss": 0.1713,
"step": 9800
},
{
"epoch": 3.0995182055129926,
"grad_norm": 0.19958619654256107,
"learning_rate": 3.7922217889203815e-06,
"loss": 0.1729,
"step": 9810
},
{
"epoch": 3.102677513624516,
"grad_norm": 0.1968913471359473,
"learning_rate": 3.781522023709325e-06,
"loss": 0.172,
"step": 9820
},
{
"epoch": 3.1058368217360397,
"grad_norm": 0.20650603439149065,
"learning_rate": 3.770828187414169e-06,
"loss": 0.1714,
"step": 9830
},
{
"epoch": 3.1089961298475632,
"grad_norm": 0.20480879307691913,
"learning_rate": 3.7601403320693877e-06,
"loss": 0.1731,
"step": 9840
},
{
"epoch": 3.112155437959087,
"grad_norm": 0.19841361175349975,
"learning_rate": 3.7494585096803475e-06,
"loss": 0.17,
"step": 9850
},
{
"epoch": 3.1153147460706103,
"grad_norm": 0.20023910284222765,
"learning_rate": 3.7387827722230592e-06,
"loss": 0.1719,
"step": 9860
},
{
"epoch": 3.1184740541821343,
"grad_norm": 0.20538502307132153,
"learning_rate": 3.72811317164393e-06,
"loss": 0.1714,
"step": 9870
},
{
"epoch": 3.121633362293658,
"grad_norm": 0.2025961874013164,
"learning_rate": 3.7174497598595004e-06,
"loss": 0.1731,
"step": 9880
},
{
"epoch": 3.1247926704051814,
"grad_norm": 0.19897689525350173,
"learning_rate": 3.7067925887562035e-06,
"loss": 0.1709,
"step": 9890
},
{
"epoch": 3.127951978516705,
"grad_norm": 0.19674997421298326,
"learning_rate": 3.6961417101901004e-06,
"loss": 0.1709,
"step": 9900
},
{
"epoch": 3.1311112866282285,
"grad_norm": 0.20048513567462214,
"learning_rate": 3.6854971759866343e-06,
"loss": 0.168,
"step": 9910
},
{
"epoch": 3.134270594739752,
"grad_norm": 0.20877865427323217,
"learning_rate": 3.6748590379403837e-06,
"loss": 0.1699,
"step": 9920
},
{
"epoch": 3.1374299028512755,
"grad_norm": 0.20947180408342017,
"learning_rate": 3.664227347814796e-06,
"loss": 0.1718,
"step": 9930
},
{
"epoch": 3.140589210962799,
"grad_norm": 0.19984590881674016,
"learning_rate": 3.653602157341953e-06,
"loss": 0.1744,
"step": 9940
},
{
"epoch": 3.1437485190743226,
"grad_norm": 0.20244914750147294,
"learning_rate": 3.6429835182223028e-06,
"loss": 0.1701,
"step": 9950
},
{
"epoch": 3.146907827185846,
"grad_norm": 0.20651719839215915,
"learning_rate": 3.632371482124416e-06,
"loss": 0.1722,
"step": 9960
},
{
"epoch": 3.1500671352973697,
"grad_norm": 0.21320130534174414,
"learning_rate": 3.621766100684742e-06,
"loss": 0.1719,
"step": 9970
},
{
"epoch": 3.1532264434088937,
"grad_norm": 0.1982018761614747,
"learning_rate": 3.6111674255073415e-06,
"loss": 0.1697,
"step": 9980
},
{
"epoch": 3.1563857515204172,
"grad_norm": 0.19540192155635913,
"learning_rate": 3.600575508163643e-06,
"loss": 0.1716,
"step": 9990
},
{
"epoch": 3.1595450596319408,
"grad_norm": 0.19265346677541417,
"learning_rate": 3.5899904001922014e-06,
"loss": 0.1723,
"step": 10000
},
{
"epoch": 3.1627043677434643,
"grad_norm": 0.2602993365623357,
"learning_rate": 3.579412153098428e-06,
"loss": 0.1717,
"step": 10010
},
{
"epoch": 3.165863675854988,
"grad_norm": 0.2072576388794693,
"learning_rate": 3.568840818354359e-06,
"loss": 0.1705,
"step": 10020
},
{
"epoch": 3.1690229839665114,
"grad_norm": 0.20292312081490704,
"learning_rate": 3.5582764473983898e-06,
"loss": 0.1708,
"step": 10030
},
{
"epoch": 3.172182292078035,
"grad_norm": 0.19839564056735176,
"learning_rate": 3.5477190916350314e-06,
"loss": 0.173,
"step": 10040
},
{
"epoch": 3.1753416001895585,
"grad_norm": 0.21059840553848913,
"learning_rate": 3.5371688024346663e-06,
"loss": 0.1728,
"step": 10050
},
{
"epoch": 3.178500908301082,
"grad_norm": 0.19127556281230237,
"learning_rate": 3.5266256311332838e-06,
"loss": 0.1717,
"step": 10060
},
{
"epoch": 3.1816602164126055,
"grad_norm": 0.20280055781097764,
"learning_rate": 3.5160896290322466e-06,
"loss": 0.1718,
"step": 10070
},
{
"epoch": 3.184819524524129,
"grad_norm": 0.19870253928511597,
"learning_rate": 3.5055608473980275e-06,
"loss": 0.173,
"step": 10080
},
{
"epoch": 3.1879788326356526,
"grad_norm": 0.20124551003316218,
"learning_rate": 3.495039337461966e-06,
"loss": 0.1714,
"step": 10090
},
{
"epoch": 3.191138140747176,
"grad_norm": 0.1974863499022992,
"learning_rate": 3.484525150420024e-06,
"loss": 0.1727,
"step": 10100
},
{
"epoch": 3.1942974488587,
"grad_norm": 0.2056406098760328,
"learning_rate": 3.474018337432526e-06,
"loss": 0.1711,
"step": 10110
},
{
"epoch": 3.1974567569702237,
"grad_norm": 0.1993829016013529,
"learning_rate": 3.4635189496239147e-06,
"loss": 0.1723,
"step": 10120
},
{
"epoch": 3.2006160650817472,
"grad_norm": 0.20235980644501536,
"learning_rate": 3.4530270380825106e-06,
"loss": 0.1719,
"step": 10130
},
{
"epoch": 3.2037753731932708,
"grad_norm": 0.21045502485536316,
"learning_rate": 3.442542653860246e-06,
"loss": 0.1728,
"step": 10140
},
{
"epoch": 3.2069346813047943,
"grad_norm": 0.1983591302141842,
"learning_rate": 3.4320658479724358e-06,
"loss": 0.1714,
"step": 10150
},
{
"epoch": 3.210093989416318,
"grad_norm": 0.20220358320589318,
"learning_rate": 3.4215966713975137e-06,
"loss": 0.1721,
"step": 10160
},
{
"epoch": 3.2132532975278414,
"grad_norm": 0.20148720905716092,
"learning_rate": 3.41113517507679e-06,
"loss": 0.1722,
"step": 10170
},
{
"epoch": 3.216412605639365,
"grad_norm": 0.21958364967195199,
"learning_rate": 3.400681409914211e-06,
"loss": 0.1717,
"step": 10180
},
{
"epoch": 3.2195719137508885,
"grad_norm": 0.20443876928391433,
"learning_rate": 3.390235426776095e-06,
"loss": 0.1723,
"step": 10190
},
{
"epoch": 3.222731221862412,
"grad_norm": 0.20468900776748283,
"learning_rate": 3.3797972764909044e-06,
"loss": 0.1728,
"step": 10200
},
{
"epoch": 3.2258905299739355,
"grad_norm": 0.20712330871995108,
"learning_rate": 3.3693670098489794e-06,
"loss": 0.1717,
"step": 10210
},
{
"epoch": 3.229049838085459,
"grad_norm": 0.20539340759709276,
"learning_rate": 3.3589446776023026e-06,
"loss": 0.1735,
"step": 10220
},
{
"epoch": 3.2322091461969826,
"grad_norm": 0.20853940556733697,
"learning_rate": 3.3485303304642523e-06,
"loss": 0.1734,
"step": 10230
},
{
"epoch": 3.2353684543085066,
"grad_norm": 0.20622108613855025,
"learning_rate": 3.338124019109348e-06,
"loss": 0.1731,
"step": 10240
},
{
"epoch": 3.23852776242003,
"grad_norm": 0.20725725534935868,
"learning_rate": 3.3277257941730112e-06,
"loss": 0.1701,
"step": 10250
},
{
"epoch": 3.2416870705315537,
"grad_norm": 0.20483333136083265,
"learning_rate": 3.3173357062513156e-06,
"loss": 0.1726,
"step": 10260
},
{
"epoch": 3.2448463786430772,
"grad_norm": 0.19730208138726862,
"learning_rate": 3.30695380590074e-06,
"loss": 0.1719,
"step": 10270
},
{
"epoch": 3.2480056867546008,
"grad_norm": 0.19161529058623247,
"learning_rate": 3.2965801436379268e-06,
"loss": 0.1703,
"step": 10280
},
{
"epoch": 3.2511649948661243,
"grad_norm": 0.20183756272218625,
"learning_rate": 3.2862147699394308e-06,
"loss": 0.1707,
"step": 10290
},
{
"epoch": 3.254324302977648,
"grad_norm": 0.2077259975279863,
"learning_rate": 3.2758577352414746e-06,
"loss": 0.1724,
"step": 10300
},
{
"epoch": 3.2574836110891714,
"grad_norm": 0.19909554676713112,
"learning_rate": 3.2655090899397104e-06,
"loss": 0.1727,
"step": 10310
},
{
"epoch": 3.260642919200695,
"grad_norm": 0.19458384557088879,
"learning_rate": 3.255168884388962e-06,
"loss": 0.1706,
"step": 10320
},
{
"epoch": 3.2638022273122185,
"grad_norm": 0.20300444839784934,
"learning_rate": 3.2448371689029917e-06,
"loss": 0.17,
"step": 10330
},
{
"epoch": 3.266961535423742,
"grad_norm": 0.2037381038102816,
"learning_rate": 3.2345139937542493e-06,
"loss": 0.1707,
"step": 10340
},
{
"epoch": 3.270120843535266,
"grad_norm": 0.20261293016006007,
"learning_rate": 3.2241994091736264e-06,
"loss": 0.1716,
"step": 10350
},
{
"epoch": 3.2732801516467895,
"grad_norm": 0.20445117127878168,
"learning_rate": 3.2138934653502157e-06,
"loss": 0.1715,
"step": 10360
},
{
"epoch": 3.276439459758313,
"grad_norm": 0.20006168658634946,
"learning_rate": 3.2035962124310677e-06,
"loss": 0.1699,
"step": 10370
},
{
"epoch": 3.2795987678698366,
"grad_norm": 0.2048357399994623,
"learning_rate": 3.1933077005209413e-06,
"loss": 0.1714,
"step": 10380
},
{
"epoch": 3.28275807598136,
"grad_norm": 0.20764582211703872,
"learning_rate": 3.1830279796820655e-06,
"loss": 0.1726,
"step": 10390
},
{
"epoch": 3.2859173840928837,
"grad_norm": 0.19507013108646712,
"learning_rate": 3.17275709993389e-06,
"loss": 0.1686,
"step": 10400
},
{
"epoch": 3.2890766922044072,
"grad_norm": 0.20376140005700832,
"learning_rate": 3.1624951112528486e-06,
"loss": 0.1727,
"step": 10410
},
{
"epoch": 3.2922360003159308,
"grad_norm": 0.19814895837042856,
"learning_rate": 3.152242063572111e-06,
"loss": 0.172,
"step": 10420
},
{
"epoch": 3.2953953084274543,
"grad_norm": 0.20809655011071984,
"learning_rate": 3.1419980067813416e-06,
"loss": 0.1723,
"step": 10430
},
{
"epoch": 3.298554616538978,
"grad_norm": 0.2130288394869752,
"learning_rate": 3.131762990726457e-06,
"loss": 0.1693,
"step": 10440
},
{
"epoch": 3.3017139246505014,
"grad_norm": 0.20474107277059672,
"learning_rate": 3.1215370652093817e-06,
"loss": 0.1728,
"step": 10450
},
{
"epoch": 3.304873232762025,
"grad_norm": 0.19685492816170327,
"learning_rate": 3.1113202799878104e-06,
"loss": 0.1736,
"step": 10460
},
{
"epoch": 3.3080325408735485,
"grad_norm": 0.20719245855817442,
"learning_rate": 3.1011126847749573e-06,
"loss": 0.1718,
"step": 10470
},
{
"epoch": 3.3111918489850725,
"grad_norm": 0.19594664304604262,
"learning_rate": 3.090914329239325e-06,
"loss": 0.1705,
"step": 10480
},
{
"epoch": 3.314351157096596,
"grad_norm": 0.19759328866966638,
"learning_rate": 3.0807252630044535e-06,
"loss": 0.1738,
"step": 10490
},
{
"epoch": 3.3175104652081195,
"grad_norm": 0.2039023386475025,
"learning_rate": 3.0705455356486847e-06,
"loss": 0.1709,
"step": 10500
},
{
"epoch": 3.320669773319643,
"grad_norm": 0.20102237377028126,
"learning_rate": 3.0603751967049196e-06,
"loss": 0.1731,
"step": 10510
},
{
"epoch": 3.3238290814311666,
"grad_norm": 0.20154946206713367,
"learning_rate": 3.050214295660373e-06,
"loss": 0.1744,
"step": 10520
},
{
"epoch": 3.32698838954269,
"grad_norm": 0.20754788610456004,
"learning_rate": 3.0400628819563394e-06,
"loss": 0.1725,
"step": 10530
},
{
"epoch": 3.3301476976542137,
"grad_norm": 0.21440535676787434,
"learning_rate": 3.02992100498795e-06,
"loss": 0.1711,
"step": 10540
},
{
"epoch": 3.3333070057657372,
"grad_norm": 0.1985749560929464,
"learning_rate": 3.0197887141039295e-06,
"loss": 0.1716,
"step": 10550
},
{
"epoch": 3.336466313877261,
"grad_norm": 0.20486416168285063,
"learning_rate": 3.009666058606361e-06,
"loss": 0.1712,
"step": 10560
},
{
"epoch": 3.3396256219887843,
"grad_norm": 0.20464416545173006,
"learning_rate": 2.999553087750441e-06,
"loss": 0.1715,
"step": 10570
},
{
"epoch": 3.342784930100308,
"grad_norm": 0.19277745618987774,
"learning_rate": 2.9894498507442403e-06,
"loss": 0.1696,
"step": 10580
},
{
"epoch": 3.345944238211832,
"grad_norm": 0.19243002023701336,
"learning_rate": 2.979356396748474e-06,
"loss": 0.1722,
"step": 10590
},
{
"epoch": 3.349103546323355,
"grad_norm": 0.184939999971442,
"learning_rate": 2.969272774876246e-06,
"loss": 0.1704,
"step": 10600
},
{
"epoch": 3.352262854434879,
"grad_norm": 0.20963451528121682,
"learning_rate": 2.9591990341928233e-06,
"loss": 0.172,
"step": 10610
},
{
"epoch": 3.3554221625464025,
"grad_norm": 0.21203412019436482,
"learning_rate": 2.9491352237153924e-06,
"loss": 0.1719,
"step": 10620
},
{
"epoch": 3.358581470657926,
"grad_norm": 0.19976350882936353,
"learning_rate": 2.9390813924128187e-06,
"loss": 0.1716,
"step": 10630
},
{
"epoch": 3.3617407787694495,
"grad_norm": 0.19765127820077447,
"learning_rate": 2.9290375892054145e-06,
"loss": 0.1719,
"step": 10640
},
{
"epoch": 3.364900086880973,
"grad_norm": 0.19819054962322868,
"learning_rate": 2.9190038629646928e-06,
"loss": 0.1718,
"step": 10650
},
{
"epoch": 3.3680593949924966,
"grad_norm": 0.2018843941060178,
"learning_rate": 2.9089802625131357e-06,
"loss": 0.1715,
"step": 10660
},
{
"epoch": 3.37121870310402,
"grad_norm": 0.2053114073761089,
"learning_rate": 2.898966836623956e-06,
"loss": 0.1712,
"step": 10670
},
{
"epoch": 3.3743780112155437,
"grad_norm": 0.1834559540518353,
"learning_rate": 2.888963634020856e-06,
"loss": 0.1718,
"step": 10680
},
{
"epoch": 3.3775373193270672,
"grad_norm": 0.21217338223459672,
"learning_rate": 2.8789707033777958e-06,
"loss": 0.17,
"step": 10690
},
{
"epoch": 3.380696627438591,
"grad_norm": 0.20413368942330248,
"learning_rate": 2.868988093318755e-06,
"loss": 0.17,
"step": 10700
},
{
"epoch": 3.3838559355501143,
"grad_norm": 0.20529368260563738,
"learning_rate": 2.8590158524174847e-06,
"loss": 0.1706,
"step": 10710
},
{
"epoch": 3.3870152436616383,
"grad_norm": 0.1996839471001918,
"learning_rate": 2.849054029197299e-06,
"loss": 0.1728,
"step": 10720
},
{
"epoch": 3.390174551773162,
"grad_norm": 0.21107475110462812,
"learning_rate": 2.8391026721308048e-06,
"loss": 0.1726,
"step": 10730
},
{
"epoch": 3.3933338598846854,
"grad_norm": 0.1977207222688015,
"learning_rate": 2.8291618296396906e-06,
"loss": 0.1717,
"step": 10740
},
{
"epoch": 3.396493167996209,
"grad_norm": 0.20112571153294398,
"learning_rate": 2.819231550094482e-06,
"loss": 0.171,
"step": 10750
},
{
"epoch": 3.3996524761077325,
"grad_norm": 0.1998438118160792,
"learning_rate": 2.8093118818143054e-06,
"loss": 0.1714,
"step": 10760
},
{
"epoch": 3.402811784219256,
"grad_norm": 0.20497839555948202,
"learning_rate": 2.799402873066657e-06,
"loss": 0.1718,
"step": 10770
},
{
"epoch": 3.4059710923307795,
"grad_norm": 0.20138016538674214,
"learning_rate": 2.789504572067163e-06,
"loss": 0.1723,
"step": 10780
},
{
"epoch": 3.409130400442303,
"grad_norm": 0.2119166594988366,
"learning_rate": 2.7796170269793448e-06,
"loss": 0.1714,
"step": 10790
},
{
"epoch": 3.4122897085538266,
"grad_norm": 0.207710303365895,
"learning_rate": 2.7697402859143973e-06,
"loss": 0.1731,
"step": 10800
},
{
"epoch": 3.41544901666535,
"grad_norm": 0.2002743432404299,
"learning_rate": 2.7598743969309323e-06,
"loss": 0.1705,
"step": 10810
},
{
"epoch": 3.4186083247768737,
"grad_norm": 0.1979710890338057,
"learning_rate": 2.7500194080347652e-06,
"loss": 0.1698,
"step": 10820
},
{
"epoch": 3.4217676328883972,
"grad_norm": 0.20374184595460798,
"learning_rate": 2.740175367178671e-06,
"loss": 0.1731,
"step": 10830
},
{
"epoch": 3.424926940999921,
"grad_norm": 0.21111483778852924,
"learning_rate": 2.7303423222621532e-06,
"loss": 0.1712,
"step": 10840
},
{
"epoch": 3.4280862491114448,
"grad_norm": 0.209638712778081,
"learning_rate": 2.7205203211312113e-06,
"loss": 0.1695,
"step": 10850
},
{
"epoch": 3.4312455572229683,
"grad_norm": 0.20749198974074953,
"learning_rate": 2.710709411578108e-06,
"loss": 0.1701,
"step": 10860
},
{
"epoch": 3.434404865334492,
"grad_norm": 0.20185076643899905,
"learning_rate": 2.700909641341136e-06,
"loss": 0.1716,
"step": 10870
},
{
"epoch": 3.4375641734460154,
"grad_norm": 0.20360708501600241,
"learning_rate": 2.6911210581043827e-06,
"loss": 0.1717,
"step": 10880
},
{
"epoch": 3.440723481557539,
"grad_norm": 0.19759420416489065,
"learning_rate": 2.6813437094975058e-06,
"loss": 0.1702,
"step": 10890
},
{
"epoch": 3.4438827896690625,
"grad_norm": 0.21258398444562132,
"learning_rate": 2.6715776430954948e-06,
"loss": 0.1712,
"step": 10900
},
{
"epoch": 3.447042097780586,
"grad_norm": 0.204687059081065,
"learning_rate": 2.661822906418443e-06,
"loss": 0.1713,
"step": 10910
},
{
"epoch": 3.4502014058921096,
"grad_norm": 0.21216451437708195,
"learning_rate": 2.652079546931314e-06,
"loss": 0.172,
"step": 10920
},
{
"epoch": 3.453360714003633,
"grad_norm": 0.20495187704615014,
"learning_rate": 2.642347612043713e-06,
"loss": 0.172,
"step": 10930
},
{
"epoch": 3.4565200221151566,
"grad_norm": 0.20577240721425508,
"learning_rate": 2.632627149109653e-06,
"loss": 0.1724,
"step": 10940
},
{
"epoch": 3.45967933022668,
"grad_norm": 0.20332569317373442,
"learning_rate": 2.622918205427332e-06,
"loss": 0.1728,
"step": 10950
},
{
"epoch": 3.462838638338204,
"grad_norm": 0.20833963528994082,
"learning_rate": 2.613220828238887e-06,
"loss": 0.1723,
"step": 10960
},
{
"epoch": 3.4659979464497277,
"grad_norm": 0.20062775671760197,
"learning_rate": 2.6035350647301825e-06,
"loss": 0.1697,
"step": 10970
},
{
"epoch": 3.4691572545612512,
"grad_norm": 0.1925666545023208,
"learning_rate": 2.5938609620305697e-06,
"loss": 0.1721,
"step": 10980
},
{
"epoch": 3.4723165626727748,
"grad_norm": 0.20615750012695935,
"learning_rate": 2.584198567212663e-06,
"loss": 0.1693,
"step": 10990
},
{
"epoch": 3.4754758707842983,
"grad_norm": 0.205868926105775,
"learning_rate": 2.5745479272921035e-06,
"loss": 0.1715,
"step": 11000
},
{
"epoch": 3.478635178895822,
"grad_norm": 0.19907532345392218,
"learning_rate": 2.5649090892273394e-06,
"loss": 0.1697,
"step": 11010
},
{
"epoch": 3.4817944870073454,
"grad_norm": 0.20241604647559086,
"learning_rate": 2.5552820999193893e-06,
"loss": 0.1714,
"step": 11020
},
{
"epoch": 3.484953795118869,
"grad_norm": 0.19925318676474033,
"learning_rate": 2.5456670062116227e-06,
"loss": 0.1702,
"step": 11030
},
{
"epoch": 3.4881131032303925,
"grad_norm": 0.20118707025616442,
"learning_rate": 2.5360638548895177e-06,
"loss": 0.1687,
"step": 11040
},
{
"epoch": 3.491272411341916,
"grad_norm": 0.21011772565832204,
"learning_rate": 2.526472692680455e-06,
"loss": 0.1723,
"step": 11050
},
{
"epoch": 3.4944317194534396,
"grad_norm": 0.2114462945794177,
"learning_rate": 2.5168935662534676e-06,
"loss": 0.1713,
"step": 11060
},
{
"epoch": 3.497591027564963,
"grad_norm": 0.20322439000336912,
"learning_rate": 2.507326522219031e-06,
"loss": 0.1722,
"step": 11070
},
{
"epoch": 3.5007503356764866,
"grad_norm": 0.20423924785828376,
"learning_rate": 2.497771607128826e-06,
"loss": 0.1711,
"step": 11080
},
{
"epoch": 3.5039096437880106,
"grad_norm": 0.21367822416543628,
"learning_rate": 2.4882288674755196e-06,
"loss": 0.1702,
"step": 11090
},
{
"epoch": 3.507068951899534,
"grad_norm": 0.20077382774697636,
"learning_rate": 2.4786983496925273e-06,
"loss": 0.1723,
"step": 11100
},
{
"epoch": 3.5102282600110577,
"grad_norm": 0.21321414495009436,
"learning_rate": 2.4691801001538083e-06,
"loss": 0.1696,
"step": 11110
},
{
"epoch": 3.5133875681225812,
"grad_norm": 0.203537804839117,
"learning_rate": 2.459674165173611e-06,
"loss": 0.1698,
"step": 11120
},
{
"epoch": 3.516546876234105,
"grad_norm": 0.2009040955436558,
"learning_rate": 2.450180591006278e-06,
"loss": 0.1716,
"step": 11130
},
{
"epoch": 3.5197061843456283,
"grad_norm": 0.20513705543314245,
"learning_rate": 2.440699423845994e-06,
"loss": 0.1721,
"step": 11140
},
{
"epoch": 3.522865492457152,
"grad_norm": 0.19738195470784428,
"learning_rate": 2.43123070982658e-06,
"loss": 0.1716,
"step": 11150
},
{
"epoch": 3.5260248005686754,
"grad_norm": 0.19923618992627787,
"learning_rate": 2.4217744950212603e-06,
"loss": 0.1722,
"step": 11160
},
{
"epoch": 3.529184108680199,
"grad_norm": 0.19543045451037108,
"learning_rate": 2.4123308254424397e-06,
"loss": 0.1722,
"step": 11170
},
{
"epoch": 3.5323434167917225,
"grad_norm": 0.20098623956184636,
"learning_rate": 2.4028997470414813e-06,
"loss": 0.1721,
"step": 11180
},
{
"epoch": 3.535502724903246,
"grad_norm": 0.19666520667412346,
"learning_rate": 2.393481305708481e-06,
"loss": 0.1718,
"step": 11190
},
{
"epoch": 3.53866203301477,
"grad_norm": 0.20769071659100188,
"learning_rate": 2.38407554727204e-06,
"loss": 0.1721,
"step": 11200
},
{
"epoch": 3.541821341126293,
"grad_norm": 0.20902129214374304,
"learning_rate": 2.3746825174990586e-06,
"loss": 0.1734,
"step": 11210
},
{
"epoch": 3.544980649237817,
"grad_norm": 0.18991888715497546,
"learning_rate": 2.365302262094485e-06,
"loss": 0.1718,
"step": 11220
},
{
"epoch": 3.5481399573493406,
"grad_norm": 0.18872022535120644,
"learning_rate": 2.3559348267011265e-06,
"loss": 0.1717,
"step": 11230
},
{
"epoch": 3.551299265460864,
"grad_norm": 0.2032395005721165,
"learning_rate": 2.3465802568993974e-06,
"loss": 0.1696,
"step": 11240
},
{
"epoch": 3.5544585735723877,
"grad_norm": 0.20260872935920152,
"learning_rate": 2.3372385982071155e-06,
"loss": 0.1699,
"step": 11250
},
{
"epoch": 3.5576178816839112,
"grad_norm": 0.19808829127100105,
"learning_rate": 2.3279098960792743e-06,
"loss": 0.1693,
"step": 11260
},
{
"epoch": 3.560777189795435,
"grad_norm": 0.20833962705825296,
"learning_rate": 2.318594195907826e-06,
"loss": 0.1716,
"step": 11270
},
{
"epoch": 3.5639364979069583,
"grad_norm": 0.20099933117353708,
"learning_rate": 2.3092915430214486e-06,
"loss": 0.171,
"step": 11280
},
{
"epoch": 3.567095806018482,
"grad_norm": 0.20739851880627683,
"learning_rate": 2.3000019826853464e-06,
"loss": 0.1693,
"step": 11290
},
{
"epoch": 3.5702551141300054,
"grad_norm": 0.19883060983816717,
"learning_rate": 2.2907255601010048e-06,
"loss": 0.1706,
"step": 11300
},
{
"epoch": 3.573414422241529,
"grad_norm": 0.200914416630117,
"learning_rate": 2.2814623204059954e-06,
"loss": 0.1705,
"step": 11310
},
{
"epoch": 3.5765737303530525,
"grad_norm": 0.18451322741288337,
"learning_rate": 2.272212308673733e-06,
"loss": 0.1702,
"step": 11320
},
{
"epoch": 3.5797330384645765,
"grad_norm": 0.20604109813637425,
"learning_rate": 2.262975569913274e-06,
"loss": 0.1716,
"step": 11330
},
{
"epoch": 3.5828923465760996,
"grad_norm": 0.20483250038192008,
"learning_rate": 2.2537521490690885e-06,
"loss": 0.1692,
"step": 11340
},
{
"epoch": 3.5860516546876235,
"grad_norm": 0.20171939003906209,
"learning_rate": 2.2445420910208444e-06,
"loss": 0.1687,
"step": 11350
},
{
"epoch": 3.589210962799147,
"grad_norm": 0.20410002768909777,
"learning_rate": 2.2353454405831878e-06,
"loss": 0.1681,
"step": 11360
},
{
"epoch": 3.5923702709106706,
"grad_norm": 0.1943525698679139,
"learning_rate": 2.2261622425055275e-06,
"loss": 0.1726,
"step": 11370
},
{
"epoch": 3.595529579022194,
"grad_norm": 0.19993993998805085,
"learning_rate": 2.2169925414718084e-06,
"loss": 0.1719,
"step": 11380
},
{
"epoch": 3.5986888871337177,
"grad_norm": 0.20390963358156805,
"learning_rate": 2.207836382100314e-06,
"loss": 0.1701,
"step": 11390
},
{
"epoch": 3.6018481952452412,
"grad_norm": 0.2052658002840104,
"learning_rate": 2.1986938089434217e-06,
"loss": 0.1715,
"step": 11400
},
{
"epoch": 3.605007503356765,
"grad_norm": 0.2050183493439441,
"learning_rate": 2.1895648664874107e-06,
"loss": 0.1719,
"step": 11410
},
{
"epoch": 3.6081668114682883,
"grad_norm": 0.2133941623970417,
"learning_rate": 2.1804495991522312e-06,
"loss": 0.1704,
"step": 11420
},
{
"epoch": 3.611326119579812,
"grad_norm": 0.19332656069708545,
"learning_rate": 2.171348051291293e-06,
"loss": 0.1681,
"step": 11430
},
{
"epoch": 3.614485427691336,
"grad_norm": 0.1933434601782223,
"learning_rate": 2.1622602671912507e-06,
"loss": 0.1704,
"step": 11440
},
{
"epoch": 3.617644735802859,
"grad_norm": 0.1983561300379488,
"learning_rate": 2.1531862910717864e-06,
"loss": 0.1706,
"step": 11450
},
{
"epoch": 3.620804043914383,
"grad_norm": 0.1979155158963241,
"learning_rate": 2.1441261670853886e-06,
"loss": 0.1686,
"step": 11460
},
{
"epoch": 3.6239633520259065,
"grad_norm": 0.19862020700111335,
"learning_rate": 2.1350799393171565e-06,
"loss": 0.1729,
"step": 11470
},
{
"epoch": 3.62712266013743,
"grad_norm": 0.19911281754040644,
"learning_rate": 2.1260476517845573e-06,
"loss": 0.1715,
"step": 11480
},
{
"epoch": 3.6302819682489536,
"grad_norm": 0.19710089391849986,
"learning_rate": 2.117029348437243e-06,
"loss": 0.1713,
"step": 11490
},
{
"epoch": 3.633441276360477,
"grad_norm": 0.2043341418376342,
"learning_rate": 2.108025073156806e-06,
"loss": 0.1719,
"step": 11500
},
{
"epoch": 3.6366005844720006,
"grad_norm": 0.22322690107677232,
"learning_rate": 2.09903486975659e-06,
"loss": 0.1729,
"step": 11510
},
{
"epoch": 3.639759892583524,
"grad_norm": 0.20236434961535074,
"learning_rate": 2.090058781981464e-06,
"loss": 0.1711,
"step": 11520
},
{
"epoch": 3.6429192006950477,
"grad_norm": 0.2123665454455019,
"learning_rate": 2.0810968535076126e-06,
"loss": 0.1701,
"step": 11530
},
{
"epoch": 3.6460785088065713,
"grad_norm": 0.20276458027943825,
"learning_rate": 2.0721491279423246e-06,
"loss": 0.1716,
"step": 11540
},
{
"epoch": 3.649237816918095,
"grad_norm": 0.18698069166162326,
"learning_rate": 2.063215648823781e-06,
"loss": 0.1682,
"step": 11550
},
{
"epoch": 3.6523971250296183,
"grad_norm": 0.19593671814658578,
"learning_rate": 2.0542964596208344e-06,
"loss": 0.1704,
"step": 11560
},
{
"epoch": 3.6555564331411423,
"grad_norm": 0.18813913173493607,
"learning_rate": 2.0453916037328174e-06,
"loss": 0.1727,
"step": 11570
},
{
"epoch": 3.6587157412526654,
"grad_norm": 0.2012970901960924,
"learning_rate": 2.036501124489308e-06,
"loss": 0.1703,
"step": 11580
},
{
"epoch": 3.6618750493641894,
"grad_norm": 0.20717799227869307,
"learning_rate": 2.0276250651499346e-06,
"loss": 0.1706,
"step": 11590
},
{
"epoch": 3.665034357475713,
"grad_norm": 0.197037281134198,
"learning_rate": 2.0187634689041603e-06,
"loss": 0.1715,
"step": 11600
},
{
"epoch": 3.6681936655872365,
"grad_norm": 0.20136557601918076,
"learning_rate": 2.009916378871074e-06,
"loss": 0.1709,
"step": 11610
},
{
"epoch": 3.67135297369876,
"grad_norm": 0.20329881900316946,
"learning_rate": 2.0010838380991776e-06,
"loss": 0.1703,
"step": 11620
},
{
"epoch": 3.6745122818102836,
"grad_norm": 0.20458710965310886,
"learning_rate": 1.9922658895661816e-06,
"loss": 0.1715,
"step": 11630
},
{
"epoch": 3.677671589921807,
"grad_norm": 0.19784202689125777,
"learning_rate": 1.983462576178786e-06,
"loss": 0.1715,
"step": 11640
},
{
"epoch": 3.6808308980333306,
"grad_norm": 0.19664140408499303,
"learning_rate": 1.9746739407724913e-06,
"loss": 0.1707,
"step": 11650
},
{
"epoch": 3.683990206144854,
"grad_norm": 0.20357355393106222,
"learning_rate": 1.965900026111364e-06,
"loss": 0.1682,
"step": 11660
},
{
"epoch": 3.6871495142563777,
"grad_norm": 0.19933592060492888,
"learning_rate": 1.9571408748878495e-06,
"loss": 0.1688,
"step": 11670
},
{
"epoch": 3.6903088223679017,
"grad_norm": 0.19698317753323233,
"learning_rate": 1.9483965297225545e-06,
"loss": 0.1708,
"step": 11680
},
{
"epoch": 3.693468130479425,
"grad_norm": 0.19171609705724965,
"learning_rate": 1.9396670331640427e-06,
"loss": 0.1714,
"step": 11690
},
{
"epoch": 3.6966274385909488,
"grad_norm": 0.20128320688947327,
"learning_rate": 1.930952427688626e-06,
"loss": 0.1699,
"step": 11700
},
{
"epoch": 3.6997867467024723,
"grad_norm": 0.19818571115089004,
"learning_rate": 1.9222527557001587e-06,
"loss": 0.1726,
"step": 11710
},
{
"epoch": 3.702946054813996,
"grad_norm": 0.20461949225316534,
"learning_rate": 1.913568059529832e-06,
"loss": 0.1708,
"step": 11720
},
{
"epoch": 3.7061053629255194,
"grad_norm": 0.1969396016325418,
"learning_rate": 1.9048983814359684e-06,
"loss": 0.1726,
"step": 11730
},
{
"epoch": 3.709264671037043,
"grad_norm": 0.20508478156000626,
"learning_rate": 1.8962437636038095e-06,
"loss": 0.171,
"step": 11740
},
{
"epoch": 3.7124239791485665,
"grad_norm": 0.19143862052589067,
"learning_rate": 1.8876042481453222e-06,
"loss": 0.1703,
"step": 11750
},
{
"epoch": 3.71558328726009,
"grad_norm": 0.19462055997090028,
"learning_rate": 1.8789798770989841e-06,
"loss": 0.1695,
"step": 11760
},
{
"epoch": 3.7187425953716136,
"grad_norm": 0.20047619813511314,
"learning_rate": 1.870370692429585e-06,
"loss": 0.169,
"step": 11770
},
{
"epoch": 3.721901903483137,
"grad_norm": 0.1997610549023301,
"learning_rate": 1.8617767360280182e-06,
"loss": 0.1722,
"step": 11780
},
{
"epoch": 3.7250612115946606,
"grad_norm": 0.2040821932549548,
"learning_rate": 1.8531980497110803e-06,
"loss": 0.1715,
"step": 11790
},
{
"epoch": 3.728220519706184,
"grad_norm": 0.20953839611305236,
"learning_rate": 1.8446346752212662e-06,
"loss": 0.1723,
"step": 11800
},
{
"epoch": 3.731379827817708,
"grad_norm": 0.19797718998398306,
"learning_rate": 1.8360866542265626e-06,
"loss": 0.1683,
"step": 11810
},
{
"epoch": 3.7345391359292313,
"grad_norm": 0.20300080445572846,
"learning_rate": 1.827554028320252e-06,
"loss": 0.1714,
"step": 11820
},
{
"epoch": 3.7376984440407552,
"grad_norm": 0.19643175410416572,
"learning_rate": 1.8190368390207063e-06,
"loss": 0.1733,
"step": 11830
},
{
"epoch": 3.740857752152279,
"grad_norm": 0.1956191661879107,
"learning_rate": 1.8105351277711857e-06,
"loss": 0.1709,
"step": 11840
},
{
"epoch": 3.7440170602638023,
"grad_norm": 0.20191888545926198,
"learning_rate": 1.8020489359396353e-06,
"loss": 0.1726,
"step": 11850
},
{
"epoch": 3.747176368375326,
"grad_norm": 0.20203040508749906,
"learning_rate": 1.7935783048184868e-06,
"loss": 0.1709,
"step": 11860
},
{
"epoch": 3.7503356764868494,
"grad_norm": 0.19868848486501622,
"learning_rate": 1.7851232756244542e-06,
"loss": 0.171,
"step": 11870
},
{
"epoch": 3.753494984598373,
"grad_norm": 0.20747266365018838,
"learning_rate": 1.776683889498339e-06,
"loss": 0.1726,
"step": 11880
},
{
"epoch": 3.7566542927098965,
"grad_norm": 0.1977057128148197,
"learning_rate": 1.768260187504819e-06,
"loss": 0.1712,
"step": 11890
},
{
"epoch": 3.75981360082142,
"grad_norm": 0.20593578039273977,
"learning_rate": 1.7598522106322618e-06,
"loss": 0.1699,
"step": 11900
},
{
"epoch": 3.7629729089329436,
"grad_norm": 0.205692865090555,
"learning_rate": 1.751459999792517e-06,
"loss": 0.1693,
"step": 11910
},
{
"epoch": 3.766132217044467,
"grad_norm": 0.20536611729964754,
"learning_rate": 1.7430835958207188e-06,
"loss": 0.1695,
"step": 11920
},
{
"epoch": 3.7692915251559906,
"grad_norm": 0.20003626633545768,
"learning_rate": 1.734723039475089e-06,
"loss": 0.1707,
"step": 11930
},
{
"epoch": 3.7724508332675146,
"grad_norm": 0.19756511256291745,
"learning_rate": 1.7263783714367388e-06,
"loss": 0.1706,
"step": 11940
},
{
"epoch": 3.7756101413790377,
"grad_norm": 0.19369294547074006,
"learning_rate": 1.7180496323094609e-06,
"loss": 0.1727,
"step": 11950
},
{
"epoch": 3.7787694494905617,
"grad_norm": 0.19163574497757527,
"learning_rate": 1.7097368626195548e-06,
"loss": 0.1716,
"step": 11960
},
{
"epoch": 3.7819287576020852,
"grad_norm": 0.19945071023523384,
"learning_rate": 1.7014401028156003e-06,
"loss": 0.17,
"step": 11970
},
{
"epoch": 3.785088065713609,
"grad_norm": 0.1941150399201019,
"learning_rate": 1.6931593932682893e-06,
"loss": 0.1716,
"step": 11980
},
{
"epoch": 3.7882473738251323,
"grad_norm": 0.19627611530197875,
"learning_rate": 1.6848947742702048e-06,
"loss": 0.17,
"step": 11990
},
{
"epoch": 3.791406681936656,
"grad_norm": 0.1985759488262882,
"learning_rate": 1.6766462860356425e-06,
"loss": 0.1705,
"step": 12000
},
{
"epoch": 3.7945659900481794,
"grad_norm": 0.2084135568916697,
"learning_rate": 1.6684139687004052e-06,
"loss": 0.1703,
"step": 12010
},
{
"epoch": 3.797725298159703,
"grad_norm": 0.20212891125021623,
"learning_rate": 1.6601978623216126e-06,
"loss": 0.1719,
"step": 12020
},
{
"epoch": 3.8008846062712265,
"grad_norm": 0.20148662876122203,
"learning_rate": 1.6519980068775026e-06,
"loss": 0.1718,
"step": 12030
},
{
"epoch": 3.80404391438275,
"grad_norm": 0.19966908684766907,
"learning_rate": 1.643814442267243e-06,
"loss": 0.1703,
"step": 12040
},
{
"epoch": 3.807203222494274,
"grad_norm": 0.19758114437735802,
"learning_rate": 1.6356472083107239e-06,
"loss": 0.1704,
"step": 12050
},
{
"epoch": 3.810362530605797,
"grad_norm": 0.2043201919277624,
"learning_rate": 1.6274963447483855e-06,
"loss": 0.1709,
"step": 12060
},
{
"epoch": 3.813521838717321,
"grad_norm": 0.2065094204780346,
"learning_rate": 1.6193618912410019e-06,
"loss": 0.1719,
"step": 12070
},
{
"epoch": 3.8166811468288446,
"grad_norm": 0.19410241771240708,
"learning_rate": 1.611243887369503e-06,
"loss": 0.1699,
"step": 12080
},
{
"epoch": 3.819840454940368,
"grad_norm": 0.19582642907268447,
"learning_rate": 1.6031423726347778e-06,
"loss": 0.1703,
"step": 12090
},
{
"epoch": 3.8229997630518917,
"grad_norm": 0.1995506401309092,
"learning_rate": 1.5950573864574808e-06,
"loss": 0.1686,
"step": 12100
},
{
"epoch": 3.8261590711634152,
"grad_norm": 0.19809864311658273,
"learning_rate": 1.5869889681778411e-06,
"loss": 0.1705,
"step": 12110
},
{
"epoch": 3.829318379274939,
"grad_norm": 0.20274297131703675,
"learning_rate": 1.5789371570554729e-06,
"loss": 0.1727,
"step": 12120
},
{
"epoch": 3.8324776873864623,
"grad_norm": 0.20601567032087037,
"learning_rate": 1.570901992269177e-06,
"loss": 0.1693,
"step": 12130
},
{
"epoch": 3.835636995497986,
"grad_norm": 0.19456210693609077,
"learning_rate": 1.5628835129167662e-06,
"loss": 0.1701,
"step": 12140
},
{
"epoch": 3.8387963036095094,
"grad_norm": 0.20115987503568447,
"learning_rate": 1.5548817580148517e-06,
"loss": 0.1721,
"step": 12150
},
{
"epoch": 3.841955611721033,
"grad_norm": 0.1958665568478278,
"learning_rate": 1.54689676649868e-06,
"loss": 0.1707,
"step": 12160
},
{
"epoch": 3.8451149198325565,
"grad_norm": 0.20025006804402964,
"learning_rate": 1.5389285772219176e-06,
"loss": 0.1702,
"step": 12170
},
{
"epoch": 3.8482742279440805,
"grad_norm": 0.20824644715314478,
"learning_rate": 1.5309772289564806e-06,
"loss": 0.1713,
"step": 12180
},
{
"epoch": 3.8514335360556036,
"grad_norm": 0.20543389482537658,
"learning_rate": 1.5230427603923386e-06,
"loss": 0.1714,
"step": 12190
},
{
"epoch": 3.8545928441671276,
"grad_norm": 0.201515908971831,
"learning_rate": 1.5151252101373266e-06,
"loss": 0.1729,
"step": 12200
},
{
"epoch": 3.857752152278651,
"grad_norm": 0.2016189641289871,
"learning_rate": 1.5072246167169574e-06,
"loss": 0.1701,
"step": 12210
},
{
"epoch": 3.8609114603901746,
"grad_norm": 0.19876889094407768,
"learning_rate": 1.4993410185742374e-06,
"loss": 0.1689,
"step": 12220
},
{
"epoch": 3.864070768501698,
"grad_norm": 0.20500710077419468,
"learning_rate": 1.4914744540694697e-06,
"loss": 0.1714,
"step": 12230
},
{
"epoch": 3.8672300766132217,
"grad_norm": 0.20129831840042556,
"learning_rate": 1.4836249614800857e-06,
"loss": 0.1706,
"step": 12240
},
{
"epoch": 3.8703893847247453,
"grad_norm": 0.1945834822439153,
"learning_rate": 1.4757925790004362e-06,
"loss": 0.1709,
"step": 12250
},
{
"epoch": 3.873548692836269,
"grad_norm": 0.1987155834635916,
"learning_rate": 1.467977344741624e-06,
"loss": 0.1717,
"step": 12260
},
{
"epoch": 3.8767080009477923,
"grad_norm": 0.21759266570145575,
"learning_rate": 1.4601792967313095e-06,
"loss": 0.1712,
"step": 12270
},
{
"epoch": 3.879867309059316,
"grad_norm": 0.20382637953104524,
"learning_rate": 1.4523984729135272e-06,
"loss": 0.1714,
"step": 12280
},
{
"epoch": 3.88302661717084,
"grad_norm": 0.1916842281105115,
"learning_rate": 1.444634911148502e-06,
"loss": 0.1692,
"step": 12290
},
{
"epoch": 3.886185925282363,
"grad_norm": 0.1946183364574122,
"learning_rate": 1.4368886492124661e-06,
"loss": 0.1699,
"step": 12300
},
{
"epoch": 3.889345233393887,
"grad_norm": 0.19920320984504467,
"learning_rate": 1.429159724797467e-06,
"loss": 0.1697,
"step": 12310
},
{
"epoch": 3.8925045415054105,
"grad_norm": 0.19953879740403313,
"learning_rate": 1.421448175511202e-06,
"loss": 0.1694,
"step": 12320
},
{
"epoch": 3.895663849616934,
"grad_norm": 0.20468537426685904,
"learning_rate": 1.4137540388768107e-06,
"loss": 0.1722,
"step": 12330
},
{
"epoch": 3.8988231577284576,
"grad_norm": 0.20676153968954222,
"learning_rate": 1.4060773523327175e-06,
"loss": 0.173,
"step": 12340
},
{
"epoch": 3.901982465839981,
"grad_norm": 0.21412532800510253,
"learning_rate": 1.3984181532324291e-06,
"loss": 0.17,
"step": 12350
},
{
"epoch": 3.9051417739515046,
"grad_norm": 0.20530217895484593,
"learning_rate": 1.3907764788443651e-06,
"loss": 0.1718,
"step": 12360
},
{
"epoch": 3.908301082063028,
"grad_norm": 0.19397608625457688,
"learning_rate": 1.383152366351671e-06,
"loss": 0.171,
"step": 12370
},
{
"epoch": 3.9114603901745517,
"grad_norm": 0.19792598868494216,
"learning_rate": 1.3755458528520422e-06,
"loss": 0.1691,
"step": 12380
},
{
"epoch": 3.9146196982860753,
"grad_norm": 0.19530536153646347,
"learning_rate": 1.3679569753575321e-06,
"loss": 0.1713,
"step": 12390
},
{
"epoch": 3.917779006397599,
"grad_norm": 0.19937572369371176,
"learning_rate": 1.3603857707943934e-06,
"loss": 0.1718,
"step": 12400
},
{
"epoch": 3.9209383145091223,
"grad_norm": 0.2045657222636314,
"learning_rate": 1.3528322760028706e-06,
"loss": 0.1705,
"step": 12410
},
{
"epoch": 3.9240976226206463,
"grad_norm": 0.19538054890753861,
"learning_rate": 1.345296527737049e-06,
"loss": 0.17,
"step": 12420
},
{
"epoch": 3.9272569307321694,
"grad_norm": 0.19898434578570567,
"learning_rate": 1.3377785626646505e-06,
"loss": 0.1708,
"step": 12430
},
{
"epoch": 3.9304162388436934,
"grad_norm": 0.20793904574642982,
"learning_rate": 1.3302784173668732e-06,
"loss": 0.17,
"step": 12440
},
{
"epoch": 3.933575546955217,
"grad_norm": 0.20762680354960933,
"learning_rate": 1.322796128338207e-06,
"loss": 0.1724,
"step": 12450
},
{
"epoch": 3.9367348550667405,
"grad_norm": 0.2018512947411419,
"learning_rate": 1.315331731986253e-06,
"loss": 0.1695,
"step": 12460
},
{
"epoch": 3.939894163178264,
"grad_norm": 0.19579741223203112,
"learning_rate": 1.3078852646315532e-06,
"loss": 0.1718,
"step": 12470
},
{
"epoch": 3.9430534712897876,
"grad_norm": 0.20067280716130292,
"learning_rate": 1.3004567625074083e-06,
"loss": 0.1701,
"step": 12480
},
{
"epoch": 3.946212779401311,
"grad_norm": 0.19388666686105327,
"learning_rate": 1.2930462617596996e-06,
"loss": 0.1711,
"step": 12490
},
{
"epoch": 3.9493720875128346,
"grad_norm": 0.19875001659515246,
"learning_rate": 1.285653798446725e-06,
"loss": 0.1725,
"step": 12500
},
{
"epoch": 3.952531395624358,
"grad_norm": 0.20497115291022464,
"learning_rate": 1.278279408539006e-06,
"loss": 0.1699,
"step": 12510
},
{
"epoch": 3.9556907037358817,
"grad_norm": 0.19011102711328773,
"learning_rate": 1.270923127919128e-06,
"loss": 0.1697,
"step": 12520
},
{
"epoch": 3.9588500118474053,
"grad_norm": 0.20042416730472096,
"learning_rate": 1.2635849923815562e-06,
"loss": 0.1711,
"step": 12530
},
{
"epoch": 3.962009319958929,
"grad_norm": 0.19397058835275352,
"learning_rate": 1.2562650376324675e-06,
"loss": 0.1715,
"step": 12540
},
{
"epoch": 3.965168628070453,
"grad_norm": 0.20187863747529436,
"learning_rate": 1.2489632992895722e-06,
"loss": 0.173,
"step": 12550
},
{
"epoch": 3.968327936181976,
"grad_norm": 0.19498840804702408,
"learning_rate": 1.2416798128819446e-06,
"loss": 0.1699,
"step": 12560
},
{
"epoch": 3.9714872442935,
"grad_norm": 0.1971626109701413,
"learning_rate": 1.2344146138498414e-06,
"loss": 0.1707,
"step": 12570
},
{
"epoch": 3.9746465524050234,
"grad_norm": 0.19237195141765748,
"learning_rate": 1.2271677375445474e-06,
"loss": 0.1723,
"step": 12580
},
{
"epoch": 3.977805860516547,
"grad_norm": 0.19369835876699062,
"learning_rate": 1.2199392192281805e-06,
"loss": 0.1722,
"step": 12590
},
{
"epoch": 3.9809651686280705,
"grad_norm": 0.20431461072784543,
"learning_rate": 1.2127290940735387e-06,
"loss": 0.1688,
"step": 12600
},
{
"epoch": 3.984124476739594,
"grad_norm": 0.20529642623566896,
"learning_rate": 1.2055373971639195e-06,
"loss": 0.168,
"step": 12610
},
{
"epoch": 3.9872837848511176,
"grad_norm": 0.19084232186590003,
"learning_rate": 1.1983641634929522e-06,
"loss": 0.17,
"step": 12620
},
{
"epoch": 3.990443092962641,
"grad_norm": 0.20780766829176076,
"learning_rate": 1.1912094279644265e-06,
"loss": 0.1679,
"step": 12630
},
{
"epoch": 3.9936024010741646,
"grad_norm": 0.19468886303948824,
"learning_rate": 1.1840732253921227e-06,
"loss": 0.1686,
"step": 12640
},
{
"epoch": 3.996761709185688,
"grad_norm": 0.19648890342838896,
"learning_rate": 1.1769555904996454e-06,
"loss": 0.1704,
"step": 12650
},
{
"epoch": 3.999921017297212,
"grad_norm": 0.19284335982757503,
"learning_rate": 1.1698565579202465e-06,
"loss": 0.1704,
"step": 12660
},
{
"epoch": 4.002843377300371,
"grad_norm": 0.19754930089679965,
"learning_rate": 1.1627761621966671e-06,
"loss": 0.1487,
"step": 12670
},
{
"epoch": 4.006002685411895,
"grad_norm": 0.19520779806174013,
"learning_rate": 1.1557144377809626e-06,
"loss": 0.1588,
"step": 12680
},
{
"epoch": 4.009161993523418,
"grad_norm": 0.19859317904582857,
"learning_rate": 1.1486714190343367e-06,
"loss": 0.1596,
"step": 12690
},
{
"epoch": 4.012321301634942,
"grad_norm": 0.20028049175205914,
"learning_rate": 1.1416471402269747e-06,
"loss": 0.1581,
"step": 12700
},
{
"epoch": 4.015480609746466,
"grad_norm": 0.18619306242942488,
"learning_rate": 1.1346416355378764e-06,
"loss": 0.1598,
"step": 12710
},
{
"epoch": 4.018639917857989,
"grad_norm": 0.19084584057427986,
"learning_rate": 1.1276549390546893e-06,
"loss": 0.1598,
"step": 12720
},
{
"epoch": 4.021799225969513,
"grad_norm": 0.1936540709695895,
"learning_rate": 1.120687084773545e-06,
"loss": 0.159,
"step": 12730
},
{
"epoch": 4.024958534081036,
"grad_norm": 0.1975921539537816,
"learning_rate": 1.1137381065988878e-06,
"loss": 0.1583,
"step": 12740
},
{
"epoch": 4.02811784219256,
"grad_norm": 0.19969147674976237,
"learning_rate": 1.1068080383433188e-06,
"loss": 0.1602,
"step": 12750
},
{
"epoch": 4.031277150304083,
"grad_norm": 0.19352808568437072,
"learning_rate": 1.0998969137274234e-06,
"loss": 0.1597,
"step": 12760
},
{
"epoch": 4.034436458415607,
"grad_norm": 0.19339061581403172,
"learning_rate": 1.0930047663796117e-06,
"loss": 0.1618,
"step": 12770
},
{
"epoch": 4.03759576652713,
"grad_norm": 0.19477299915795446,
"learning_rate": 1.0861316298359537e-06,
"loss": 0.1584,
"step": 12780
},
{
"epoch": 4.040755074638654,
"grad_norm": 0.19747662712691139,
"learning_rate": 1.0792775375400143e-06,
"loss": 0.1598,
"step": 12790
},
{
"epoch": 4.043914382750177,
"grad_norm": 0.19362010628396598,
"learning_rate": 1.0724425228426938e-06,
"loss": 0.1609,
"step": 12800
},
{
"epoch": 4.047073690861701,
"grad_norm": 0.1978158624641635,
"learning_rate": 1.0656266190020648e-06,
"loss": 0.1604,
"step": 12810
},
{
"epoch": 4.050232998973225,
"grad_norm": 0.2017835581795631,
"learning_rate": 1.058829859183204e-06,
"loss": 0.1595,
"step": 12820
},
{
"epoch": 4.053392307084748,
"grad_norm": 0.19505342405207957,
"learning_rate": 1.0520522764580466e-06,
"loss": 0.1601,
"step": 12830
},
{
"epoch": 4.056551615196272,
"grad_norm": 0.19659192583114876,
"learning_rate": 1.0452939038052045e-06,
"loss": 0.1582,
"step": 12840
},
{
"epoch": 4.0597109233077955,
"grad_norm": 0.19062889408725014,
"learning_rate": 1.0385547741098222e-06,
"loss": 0.1594,
"step": 12850
},
{
"epoch": 4.0628702314193195,
"grad_norm": 0.2007575871127392,
"learning_rate": 1.0318349201634116e-06,
"loss": 0.1609,
"step": 12860
},
{
"epoch": 4.066029539530843,
"grad_norm": 0.19407495103479086,
"learning_rate": 1.02513437466369e-06,
"loss": 0.1601,
"step": 12870
},
{
"epoch": 4.0691888476423665,
"grad_norm": 0.1926256014241401,
"learning_rate": 1.01845317021442e-06,
"loss": 0.1597,
"step": 12880
},
{
"epoch": 4.07234815575389,
"grad_norm": 0.19084665134324094,
"learning_rate": 1.0117913393252632e-06,
"loss": 0.1605,
"step": 12890
},
{
"epoch": 4.075507463865414,
"grad_norm": 0.19709574860841494,
"learning_rate": 1.0051489144116e-06,
"loss": 0.1608,
"step": 12900
},
{
"epoch": 4.078666771976937,
"grad_norm": 0.20095251133089448,
"learning_rate": 9.985259277943977e-07,
"loss": 0.1602,
"step": 12910
},
{
"epoch": 4.081826080088461,
"grad_norm": 0.19685311750698353,
"learning_rate": 9.919224117000281e-07,
"loss": 0.1614,
"step": 12920
},
{
"epoch": 4.084985388199984,
"grad_norm": 0.20241005189325584,
"learning_rate": 9.853383982601294e-07,
"loss": 0.1596,
"step": 12930
},
{
"epoch": 4.088144696311508,
"grad_norm": 0.19400175840515052,
"learning_rate": 9.787739195114427e-07,
"loss": 0.1592,
"step": 12940
},
{
"epoch": 4.091304004423032,
"grad_norm": 0.20303783315409038,
"learning_rate": 9.722290073956536e-07,
"loss": 0.1597,
"step": 12950
},
{
"epoch": 4.094463312534555,
"grad_norm": 0.1936600304781095,
"learning_rate": 9.657036937592423e-07,
"loss": 0.1621,
"step": 12960
},
{
"epoch": 4.097622620646079,
"grad_norm": 0.19796367201440496,
"learning_rate": 9.59198010353326e-07,
"loss": 0.1597,
"step": 12970
},
{
"epoch": 4.100781928757602,
"grad_norm": 0.18342546130725573,
"learning_rate": 9.527119888334996e-07,
"loss": 0.1582,
"step": 12980
},
{
"epoch": 4.103941236869126,
"grad_norm": 0.20317670235586452,
"learning_rate": 9.462456607596954e-07,
"loss": 0.1603,
"step": 12990
},
{
"epoch": 4.107100544980649,
"grad_norm": 0.19569726312873856,
"learning_rate": 9.397990575960103e-07,
"loss": 0.1578,
"step": 13000
},
{
"epoch": 4.110259853092173,
"grad_norm": 0.20338486160541247,
"learning_rate": 9.333722107105725e-07,
"loss": 0.1606,
"step": 13010
},
{
"epoch": 4.113419161203696,
"grad_norm": 0.1875071227775237,
"learning_rate": 9.269651513753725e-07,
"loss": 0.1603,
"step": 13020
},
{
"epoch": 4.11657846931522,
"grad_norm": 0.1927619122892459,
"learning_rate": 9.205779107661201e-07,
"loss": 0.1581,
"step": 13030
},
{
"epoch": 4.119737777426743,
"grad_norm": 0.20452574688837152,
"learning_rate": 9.142105199620916e-07,
"loss": 0.159,
"step": 13040
},
{
"epoch": 4.122897085538267,
"grad_norm": 0.2023565306937573,
"learning_rate": 9.078630099459768e-07,
"loss": 0.1604,
"step": 13050
},
{
"epoch": 4.12605639364979,
"grad_norm": 0.19951162187159371,
"learning_rate": 9.015354116037256e-07,
"loss": 0.158,
"step": 13060
},
{
"epoch": 4.129215701761314,
"grad_norm": 0.19668566056760164,
"learning_rate": 8.952277557244077e-07,
"loss": 0.1589,
"step": 13070
},
{
"epoch": 4.132375009872838,
"grad_norm": 0.18983274506050818,
"learning_rate": 8.889400730000475e-07,
"loss": 0.1599,
"step": 13080
},
{
"epoch": 4.135534317984361,
"grad_norm": 0.19379806465727592,
"learning_rate": 8.826723940254923e-07,
"loss": 0.1614,
"step": 13090
},
{
"epoch": 4.138693626095885,
"grad_norm": 0.2015655061435355,
"learning_rate": 8.76424749298247e-07,
"loss": 0.1596,
"step": 13100
},
{
"epoch": 4.141852934207408,
"grad_norm": 0.20253336827566704,
"learning_rate": 8.701971692183365e-07,
"loss": 0.1605,
"step": 13110
},
{
"epoch": 4.145012242318932,
"grad_norm": 0.19563497160996118,
"learning_rate": 8.639896840881534e-07,
"loss": 0.1607,
"step": 13120
},
{
"epoch": 4.1481715504304555,
"grad_norm": 0.20060195439697434,
"learning_rate": 8.578023241123134e-07,
"loss": 0.16,
"step": 13130
},
{
"epoch": 4.1513308585419795,
"grad_norm": 0.19835862074182956,
"learning_rate": 8.516351193975042e-07,
"loss": 0.1631,
"step": 13140
},
{
"epoch": 4.154490166653503,
"grad_norm": 0.1869510365958818,
"learning_rate": 8.454880999523435e-07,
"loss": 0.1587,
"step": 13150
},
{
"epoch": 4.1576494747650266,
"grad_norm": 0.20004584908939843,
"learning_rate": 8.393612956872254e-07,
"loss": 0.1621,
"step": 13160
},
{
"epoch": 4.16080878287655,
"grad_norm": 0.1856765556024912,
"learning_rate": 8.332547364141891e-07,
"loss": 0.159,
"step": 13170
},
{
"epoch": 4.163968090988074,
"grad_norm": 0.19589805621856057,
"learning_rate": 8.271684518467571e-07,
"loss": 0.1602,
"step": 13180
},
{
"epoch": 4.167127399099598,
"grad_norm": 0.19689293762125867,
"learning_rate": 8.211024715998023e-07,
"loss": 0.1591,
"step": 13190
},
{
"epoch": 4.170286707211121,
"grad_norm": 0.19503964405624044,
"learning_rate": 8.150568251893992e-07,
"loss": 0.1604,
"step": 13200
},
{
"epoch": 4.173446015322645,
"grad_norm": 0.1932945597906981,
"learning_rate": 8.09031542032681e-07,
"loss": 0.1596,
"step": 13210
},
{
"epoch": 4.176605323434168,
"grad_norm": 0.1954697082605763,
"learning_rate": 8.030266514476976e-07,
"loss": 0.1596,
"step": 13220
},
{
"epoch": 4.179764631545692,
"grad_norm": 0.18739853815459867,
"learning_rate": 7.97042182653271e-07,
"loss": 0.1611,
"step": 13230
},
{
"epoch": 4.182923939657215,
"grad_norm": 0.2008506504946661,
"learning_rate": 7.910781647688515e-07,
"loss": 0.1594,
"step": 13240
},
{
"epoch": 4.186083247768739,
"grad_norm": 0.1997520617477428,
"learning_rate": 7.851346268143861e-07,
"loss": 0.1594,
"step": 13250
},
{
"epoch": 4.189242555880262,
"grad_norm": 0.19380452684319097,
"learning_rate": 7.7921159771016e-07,
"loss": 0.1608,
"step": 13260
},
{
"epoch": 4.192401863991786,
"grad_norm": 0.19204023557075717,
"learning_rate": 7.733091062766751e-07,
"loss": 0.1603,
"step": 13270
},
{
"epoch": 4.195561172103309,
"grad_norm": 0.1963780227663788,
"learning_rate": 7.674271812344935e-07,
"loss": 0.1581,
"step": 13280
},
{
"epoch": 4.198720480214833,
"grad_norm": 0.18775094836052963,
"learning_rate": 7.615658512041068e-07,
"loss": 0.1585,
"step": 13290
},
{
"epoch": 4.201879788326356,
"grad_norm": 0.1879252136754587,
"learning_rate": 7.557251447057962e-07,
"loss": 0.16,
"step": 13300
},
{
"epoch": 4.20503909643788,
"grad_norm": 0.19489071033267957,
"learning_rate": 7.499050901594896e-07,
"loss": 0.1587,
"step": 13310
},
{
"epoch": 4.208198404549404,
"grad_norm": 0.20273324039262924,
"learning_rate": 7.441057158846276e-07,
"loss": 0.1591,
"step": 13320
},
{
"epoch": 4.211357712660927,
"grad_norm": 0.1982379447248168,
"learning_rate": 7.383270501000245e-07,
"loss": 0.1599,
"step": 13330
},
{
"epoch": 4.214517020772451,
"grad_norm": 0.1856228450158758,
"learning_rate": 7.325691209237251e-07,
"loss": 0.1581,
"step": 13340
},
{
"epoch": 4.217676328883974,
"grad_norm": 0.19437453651795136,
"learning_rate": 7.268319563728831e-07,
"loss": 0.1586,
"step": 13350
},
{
"epoch": 4.220835636995498,
"grad_norm": 0.18694690942616607,
"learning_rate": 7.211155843636059e-07,
"loss": 0.1603,
"step": 13360
},
{
"epoch": 4.223994945107021,
"grad_norm": 0.19393003030705352,
"learning_rate": 7.154200327108313e-07,
"loss": 0.162,
"step": 13370
},
{
"epoch": 4.227154253218545,
"grad_norm": 0.1927448263517383,
"learning_rate": 7.097453291281887e-07,
"loss": 0.1612,
"step": 13380
},
{
"epoch": 4.230313561330068,
"grad_norm": 0.20215663142647716,
"learning_rate": 7.040915012278648e-07,
"loss": 0.1589,
"step": 13390
},
{
"epoch": 4.233472869441592,
"grad_norm": 0.21545803120847262,
"learning_rate": 6.984585765204665e-07,
"loss": 0.16,
"step": 13400
},
{
"epoch": 4.2366321775531155,
"grad_norm": 0.20075406426026432,
"learning_rate": 6.928465824148923e-07,
"loss": 0.1594,
"step": 13410
},
{
"epoch": 4.2397914856646395,
"grad_norm": 0.2394877373371559,
"learning_rate": 6.872555462181907e-07,
"loss": 0.1592,
"step": 13420
},
{
"epoch": 4.2429507937761635,
"grad_norm": 0.19216510992720218,
"learning_rate": 6.816854951354396e-07,
"loss": 0.1573,
"step": 13430
},
{
"epoch": 4.246110101887687,
"grad_norm": 0.2020202616715345,
"learning_rate": 6.761364562695993e-07,
"loss": 0.161,
"step": 13440
},
{
"epoch": 4.2492694099992105,
"grad_norm": 0.189014469828388,
"learning_rate": 6.706084566213933e-07,
"loss": 0.1589,
"step": 13450
},
{
"epoch": 4.252428718110734,
"grad_norm": 0.19279477497914185,
"learning_rate": 6.651015230891694e-07,
"loss": 0.1608,
"step": 13460
},
{
"epoch": 4.255588026222258,
"grad_norm": 0.20318784264174414,
"learning_rate": 6.596156824687722e-07,
"loss": 0.1596,
"step": 13470
},
{
"epoch": 4.258747334333781,
"grad_norm": 0.19924677539875918,
"learning_rate": 6.541509614534103e-07,
"loss": 0.1593,
"step": 13480
},
{
"epoch": 4.261906642445305,
"grad_norm": 0.20657626622701158,
"learning_rate": 6.487073866335298e-07,
"loss": 0.1598,
"step": 13490
},
{
"epoch": 4.265065950556828,
"grad_norm": 0.18954862382773452,
"learning_rate": 6.432849844966782e-07,
"loss": 0.1607,
"step": 13500
},
{
"epoch": 4.268225258668352,
"grad_norm": 0.20096767940062027,
"learning_rate": 6.378837814273886e-07,
"loss": 0.1602,
"step": 13510
},
{
"epoch": 4.271384566779875,
"grad_norm": 0.19873738285793385,
"learning_rate": 6.325038037070336e-07,
"loss": 0.1602,
"step": 13520
},
{
"epoch": 4.274543874891399,
"grad_norm": 0.2026478071540902,
"learning_rate": 6.271450775137116e-07,
"loss": 0.1579,
"step": 13530
},
{
"epoch": 4.277703183002922,
"grad_norm": 0.19853177565270944,
"learning_rate": 6.218076289221153e-07,
"loss": 0.1598,
"step": 13540
},
{
"epoch": 4.280862491114446,
"grad_norm": 0.20829772712743705,
"learning_rate": 6.164914839034008e-07,
"loss": 0.1587,
"step": 13550
},
{
"epoch": 4.28402179922597,
"grad_norm": 0.196256766990733,
"learning_rate": 6.111966683250681e-07,
"loss": 0.1604,
"step": 13560
},
{
"epoch": 4.287181107337493,
"grad_norm": 0.19618426976460837,
"learning_rate": 6.059232079508276e-07,
"loss": 0.1603,
"step": 13570
},
{
"epoch": 4.290340415449017,
"grad_norm": 0.202438781233527,
"learning_rate": 6.006711284404837e-07,
"loss": 0.1612,
"step": 13580
},
{
"epoch": 4.29349972356054,
"grad_norm": 0.19704572347653887,
"learning_rate": 5.954404553497989e-07,
"loss": 0.1602,
"step": 13590
},
{
"epoch": 4.296659031672064,
"grad_norm": 0.19168113950072857,
"learning_rate": 5.902312141303806e-07,
"loss": 0.1604,
"step": 13600
},
{
"epoch": 4.299818339783587,
"grad_norm": 0.2003879313833993,
"learning_rate": 5.850434301295494e-07,
"loss": 0.1596,
"step": 13610
},
{
"epoch": 4.302977647895111,
"grad_norm": 0.19759760667562565,
"learning_rate": 5.798771285902205e-07,
"loss": 0.1604,
"step": 13620
},
{
"epoch": 4.306136956006634,
"grad_norm": 0.19863833857948981,
"learning_rate": 5.747323346507777e-07,
"loss": 0.1592,
"step": 13630
},
{
"epoch": 4.309296264118158,
"grad_norm": 0.2024378958773524,
"learning_rate": 5.696090733449528e-07,
"loss": 0.1601,
"step": 13640
},
{
"epoch": 4.312455572229681,
"grad_norm": 0.20215707871180005,
"learning_rate": 5.645073696017028e-07,
"loss": 0.1585,
"step": 13650
},
{
"epoch": 4.315614880341205,
"grad_norm": 0.18689645886289935,
"learning_rate": 5.594272482450902e-07,
"loss": 0.1573,
"step": 13660
},
{
"epoch": 4.318774188452728,
"grad_norm": 0.2053319708006727,
"learning_rate": 5.543687339941584e-07,
"loss": 0.1615,
"step": 13670
},
{
"epoch": 4.321933496564252,
"grad_norm": 0.19171061220997784,
"learning_rate": 5.493318514628171e-07,
"loss": 0.1616,
"step": 13680
},
{
"epoch": 4.325092804675776,
"grad_norm": 0.19563207560203782,
"learning_rate": 5.443166251597187e-07,
"loss": 0.16,
"step": 13690
},
{
"epoch": 4.3282521127872995,
"grad_norm": 0.19760551499634138,
"learning_rate": 5.393230794881399e-07,
"loss": 0.1587,
"step": 13700
},
{
"epoch": 4.3314114208988235,
"grad_norm": 0.1919192060783246,
"learning_rate": 5.343512387458621e-07,
"loss": 0.1598,
"step": 13710
},
{
"epoch": 4.334570729010347,
"grad_norm": 0.19916105439716603,
"learning_rate": 5.294011271250549e-07,
"loss": 0.1581,
"step": 13720
},
{
"epoch": 4.3377300371218706,
"grad_norm": 0.1924623858324796,
"learning_rate": 5.244727687121581e-07,
"loss": 0.1585,
"step": 13730
},
{
"epoch": 4.340889345233394,
"grad_norm": 0.19222422456276647,
"learning_rate": 5.195661874877633e-07,
"loss": 0.1585,
"step": 13740
},
{
"epoch": 4.344048653344918,
"grad_norm": 0.1925599596150807,
"learning_rate": 5.14681407326495e-07,
"loss": 0.161,
"step": 13750
},
{
"epoch": 4.347207961456441,
"grad_norm": 0.1950428531940909,
"learning_rate": 5.098184519969041e-07,
"loss": 0.1581,
"step": 13760
},
{
"epoch": 4.350367269567965,
"grad_norm": 0.20354980424119892,
"learning_rate": 5.049773451613382e-07,
"loss": 0.1607,
"step": 13770
},
{
"epoch": 4.353526577679488,
"grad_norm": 0.1868628417947928,
"learning_rate": 5.001581103758374e-07,
"loss": 0.1601,
"step": 13780
},
{
"epoch": 4.356685885791012,
"grad_norm": 0.19984754637012053,
"learning_rate": 4.95360771090016e-07,
"loss": 0.159,
"step": 13790
},
{
"epoch": 4.359845193902535,
"grad_norm": 0.19703967834397978,
"learning_rate": 4.905853506469477e-07,
"loss": 0.159,
"step": 13800
},
{
"epoch": 4.363004502014059,
"grad_norm": 0.18977597668459417,
"learning_rate": 4.858318722830518e-07,
"loss": 0.1583,
"step": 13810
},
{
"epoch": 4.366163810125583,
"grad_norm": 0.19339667722600956,
"learning_rate": 4.811003591279834e-07,
"loss": 0.1585,
"step": 13820
},
{
"epoch": 4.369323118237106,
"grad_norm": 0.1938566607621067,
"learning_rate": 4.7639083420451425e-07,
"loss": 0.1593,
"step": 13830
},
{
"epoch": 4.37248242634863,
"grad_norm": 0.19830744178047272,
"learning_rate": 4.71703320428431e-07,
"loss": 0.1591,
"step": 13840
},
{
"epoch": 4.375641734460153,
"grad_norm": 0.20447285160479836,
"learning_rate": 4.6703784060841194e-07,
"loss": 0.1592,
"step": 13850
},
{
"epoch": 4.378801042571677,
"grad_norm": 0.19999302428118468,
"learning_rate": 4.623944174459238e-07,
"loss": 0.1596,
"step": 13860
},
{
"epoch": 4.3819603506832,
"grad_norm": 0.19431839969423845,
"learning_rate": 4.5777307353511103e-07,
"loss": 0.1587,
"step": 13870
},
{
"epoch": 4.385119658794724,
"grad_norm": 0.19213231385230065,
"learning_rate": 4.53173831362681e-07,
"loss": 0.1598,
"step": 13880
},
{
"epoch": 4.388278966906247,
"grad_norm": 0.19727512610076384,
"learning_rate": 4.485967133078001e-07,
"loss": 0.1595,
"step": 13890
},
{
"epoch": 4.391438275017771,
"grad_norm": 0.20137928321566095,
"learning_rate": 4.440417416419812e-07,
"loss": 0.1608,
"step": 13900
},
{
"epoch": 4.394597583129294,
"grad_norm": 0.19192751875350025,
"learning_rate": 4.395089385289747e-07,
"loss": 0.1582,
"step": 13910
},
{
"epoch": 4.397756891240818,
"grad_norm": 0.19508844609767204,
"learning_rate": 4.3499832602466764e-07,
"loss": 0.1612,
"step": 13920
},
{
"epoch": 4.400916199352342,
"grad_norm": 0.20085050538502322,
"learning_rate": 4.3050992607696354e-07,
"loss": 0.1585,
"step": 13930
},
{
"epoch": 4.404075507463865,
"grad_norm": 0.19345802602283768,
"learning_rate": 4.260437605256912e-07,
"loss": 0.1593,
"step": 13940
},
{
"epoch": 4.407234815575389,
"grad_norm": 0.19853948221248452,
"learning_rate": 4.215998511024844e-07,
"loss": 0.1593,
"step": 13950
},
{
"epoch": 4.410394123686912,
"grad_norm": 0.19645554805603088,
"learning_rate": 4.171782194306856e-07,
"loss": 0.1581,
"step": 13960
},
{
"epoch": 4.413553431798436,
"grad_norm": 0.18983249029369767,
"learning_rate": 4.127788870252358e-07,
"loss": 0.1592,
"step": 13970
},
{
"epoch": 4.4167127399099595,
"grad_norm": 0.19264647219798062,
"learning_rate": 4.084018752925728e-07,
"loss": 0.162,
"step": 13980
},
{
"epoch": 4.4198720480214835,
"grad_norm": 0.19188102638619134,
"learning_rate": 4.0404720553052225e-07,
"loss": 0.1599,
"step": 13990
},
{
"epoch": 4.423031356133007,
"grad_norm": 0.19594341326526055,
"learning_rate": 3.997148989282035e-07,
"loss": 0.1582,
"step": 14000
},
{
"epoch": 4.426190664244531,
"grad_norm": 0.19679190764934815,
"learning_rate": 3.9540497656591235e-07,
"loss": 0.16,
"step": 14010
},
{
"epoch": 4.429349972356054,
"grad_norm": 0.2001818569734459,
"learning_rate": 3.911174594150352e-07,
"loss": 0.161,
"step": 14020
},
{
"epoch": 4.432509280467578,
"grad_norm": 0.19932050541026844,
"learning_rate": 3.868523683379316e-07,
"loss": 0.1609,
"step": 14030
},
{
"epoch": 4.435668588579102,
"grad_norm": 0.19545767797055838,
"learning_rate": 3.8260972408784236e-07,
"loss": 0.1586,
"step": 14040
},
{
"epoch": 4.438827896690625,
"grad_norm": 0.19560441338063028,
"learning_rate": 3.7838954730878505e-07,
"loss": 0.1597,
"step": 14050
},
{
"epoch": 4.441987204802149,
"grad_norm": 0.19520076627630972,
"learning_rate": 3.741918585354548e-07,
"loss": 0.1601,
"step": 14060
},
{
"epoch": 4.445146512913672,
"grad_norm": 0.2021625860562286,
"learning_rate": 3.7001667819312303e-07,
"loss": 0.1589,
"step": 14070
},
{
"epoch": 4.448305821025196,
"grad_norm": 0.19657547564424072,
"learning_rate": 3.6586402659753994e-07,
"loss": 0.1593,
"step": 14080
},
{
"epoch": 4.451465129136719,
"grad_norm": 0.2371545042414208,
"learning_rate": 3.617339239548312e-07,
"loss": 0.1602,
"step": 14090
},
{
"epoch": 4.454624437248243,
"grad_norm": 0.19885312184722165,
"learning_rate": 3.5762639036140856e-07,
"loss": 0.1595,
"step": 14100
},
{
"epoch": 4.457783745359766,
"grad_norm": 0.20782243257336264,
"learning_rate": 3.5354144580385997e-07,
"loss": 0.1602,
"step": 14110
},
{
"epoch": 4.46094305347129,
"grad_norm": 0.18932292467991474,
"learning_rate": 3.494791101588657e-07,
"loss": 0.1616,
"step": 14120
},
{
"epoch": 4.464102361582813,
"grad_norm": 0.1919157424283962,
"learning_rate": 3.454394031930885e-07,
"loss": 0.1593,
"step": 14130
},
{
"epoch": 4.467261669694337,
"grad_norm": 0.2010076659233086,
"learning_rate": 3.414223445630865e-07,
"loss": 0.1599,
"step": 14140
},
{
"epoch": 4.47042097780586,
"grad_norm": 0.20255576550229257,
"learning_rate": 3.3742795381521533e-07,
"loss": 0.1593,
"step": 14150
},
{
"epoch": 4.473580285917384,
"grad_norm": 0.194621411799477,
"learning_rate": 3.334562503855321e-07,
"loss": 0.1597,
"step": 14160
},
{
"epoch": 4.476739594028908,
"grad_norm": 0.18866219829191938,
"learning_rate": 3.295072535996974e-07,
"loss": 0.1581,
"step": 14170
},
{
"epoch": 4.479898902140431,
"grad_norm": 0.19349313221379064,
"learning_rate": 3.255809826728923e-07,
"loss": 0.1601,
"step": 14180
},
{
"epoch": 4.483058210251955,
"grad_norm": 0.20115409541502788,
"learning_rate": 3.2167745670970973e-07,
"loss": 0.1601,
"step": 14190
},
{
"epoch": 4.486217518363478,
"grad_norm": 0.18780373745693515,
"learning_rate": 3.1779669470407615e-07,
"loss": 0.1589,
"step": 14200
},
{
"epoch": 4.489376826475002,
"grad_norm": 0.19486811896601117,
"learning_rate": 3.1393871553914654e-07,
"loss": 0.1587,
"step": 14210
},
{
"epoch": 4.492536134586525,
"grad_norm": 0.2025724767403799,
"learning_rate": 3.101035379872219e-07,
"loss": 0.1595,
"step": 14220
},
{
"epoch": 4.495695442698049,
"grad_norm": 0.2080673328738384,
"learning_rate": 3.06291180709653e-07,
"loss": 0.1593,
"step": 14230
},
{
"epoch": 4.498854750809572,
"grad_norm": 0.19149699902846998,
"learning_rate": 3.0250166225675115e-07,
"loss": 0.1599,
"step": 14240
},
{
"epoch": 4.502014058921096,
"grad_norm": 0.20411585929483625,
"learning_rate": 2.987350010676976e-07,
"loss": 0.1602,
"step": 14250
},
{
"epoch": 4.5051733670326195,
"grad_norm": 0.19343496961687232,
"learning_rate": 2.9499121547045426e-07,
"loss": 0.1599,
"step": 14260
},
{
"epoch": 4.5083326751441435,
"grad_norm": 0.19449547358661812,
"learning_rate": 2.912703236816722e-07,
"loss": 0.1606,
"step": 14270
},
{
"epoch": 4.511491983255667,
"grad_norm": 0.1973387536772748,
"learning_rate": 2.8757234380660857e-07,
"loss": 0.1599,
"step": 14280
},
{
"epoch": 4.514651291367191,
"grad_norm": 0.18818779692322143,
"learning_rate": 2.838972938390311e-07,
"loss": 0.1601,
"step": 14290
},
{
"epoch": 4.5178105994787146,
"grad_norm": 0.19985204703053389,
"learning_rate": 2.802451916611365e-07,
"loss": 0.1583,
"step": 14300
},
{
"epoch": 4.520969907590238,
"grad_norm": 0.19536890400548887,
"learning_rate": 2.7661605504346045e-07,
"loss": 0.1608,
"step": 14310
},
{
"epoch": 4.524129215701762,
"grad_norm": 0.18811223542835254,
"learning_rate": 2.730099016447929e-07,
"loss": 0.1596,
"step": 14320
},
{
"epoch": 4.527288523813285,
"grad_norm": 0.19468411001885044,
"learning_rate": 2.6942674901209e-07,
"loss": 0.16,
"step": 14330
},
{
"epoch": 4.530447831924809,
"grad_norm": 0.20552266635420624,
"learning_rate": 2.658666145803912e-07,
"loss": 0.1615,
"step": 14340
},
{
"epoch": 4.533607140036332,
"grad_norm": 0.1937881234664753,
"learning_rate": 2.623295156727301e-07,
"loss": 0.1578,
"step": 14350
},
{
"epoch": 4.536766448147856,
"grad_norm": 0.20412087334287093,
"learning_rate": 2.588154695000589e-07,
"loss": 0.1612,
"step": 14360
},
{
"epoch": 4.539925756259379,
"grad_norm": 0.19434151561756924,
"learning_rate": 2.55324493161152e-07,
"loss": 0.1584,
"step": 14370
},
{
"epoch": 4.543085064370903,
"grad_norm": 0.1908270600724035,
"learning_rate": 2.5185660364253515e-07,
"loss": 0.1593,
"step": 14380
},
{
"epoch": 4.546244372482426,
"grad_norm": 0.2004261876800901,
"learning_rate": 2.484118178183953e-07,
"loss": 0.1581,
"step": 14390
},
{
"epoch": 4.54940368059395,
"grad_norm": 0.19755205920634125,
"learning_rate": 2.4499015245049997e-07,
"loss": 0.1601,
"step": 14400
},
{
"epoch": 4.552562988705473,
"grad_norm": 0.198156216790078,
"learning_rate": 2.415916241881172e-07,
"loss": 0.1606,
"step": 14410
},
{
"epoch": 4.555722296816997,
"grad_norm": 0.195998760066744,
"learning_rate": 2.382162495679341e-07,
"loss": 0.1601,
"step": 14420
},
{
"epoch": 4.558881604928521,
"grad_norm": 0.18945574448552882,
"learning_rate": 2.3486404501397497e-07,
"loss": 0.158,
"step": 14430
},
{
"epoch": 4.562040913040044,
"grad_norm": 0.19748534603778248,
"learning_rate": 2.315350268375227e-07,
"loss": 0.1574,
"step": 14440
},
{
"epoch": 4.565200221151568,
"grad_norm": 0.20176443804299427,
"learning_rate": 2.2822921123703822e-07,
"loss": 0.1603,
"step": 14450
},
{
"epoch": 4.568359529263091,
"grad_norm": 0.19789674088621473,
"learning_rate": 2.249466142980844e-07,
"loss": 0.1598,
"step": 14460
},
{
"epoch": 4.571518837374615,
"grad_norm": 0.187818302106187,
"learning_rate": 2.2168725199324336e-07,
"loss": 0.159,
"step": 14470
},
{
"epoch": 4.574678145486138,
"grad_norm": 0.20046350486590453,
"learning_rate": 2.1845114018204382e-07,
"loss": 0.16,
"step": 14480
},
{
"epoch": 4.577837453597662,
"grad_norm": 0.1956285243391142,
"learning_rate": 2.1523829461087997e-07,
"loss": 0.1606,
"step": 14490
},
{
"epoch": 4.580996761709185,
"grad_norm": 0.19955861141039574,
"learning_rate": 2.12048730912936e-07,
"loss": 0.1599,
"step": 14500
},
{
"epoch": 4.584156069820709,
"grad_norm": 0.19999137325948985,
"learning_rate": 2.0888246460811168e-07,
"loss": 0.1581,
"step": 14510
},
{
"epoch": 4.587315377932233,
"grad_norm": 0.19609764005409422,
"learning_rate": 2.057395111029431e-07,
"loss": 0.1587,
"step": 14520
},
{
"epoch": 4.590474686043756,
"grad_norm": 0.19243471223634231,
"learning_rate": 2.0261988569053205e-07,
"loss": 0.1585,
"step": 14530
},
{
"epoch": 4.5936339941552795,
"grad_norm": 0.20046273900446646,
"learning_rate": 1.995236035504694e-07,
"loss": 0.1602,
"step": 14540
},
{
"epoch": 4.5967933022668035,
"grad_norm": 0.19670271834661107,
"learning_rate": 1.9645067974876086e-07,
"loss": 0.1593,
"step": 14550
},
{
"epoch": 4.5999526103783275,
"grad_norm": 0.19348803453560268,
"learning_rate": 1.9340112923775467e-07,
"loss": 0.1572,
"step": 14560
},
{
"epoch": 4.603111918489851,
"grad_norm": 0.19604507063856969,
"learning_rate": 1.9037496685606782e-07,
"loss": 0.1615,
"step": 14570
},
{
"epoch": 4.606271226601375,
"grad_norm": 0.19905957494150894,
"learning_rate": 1.873722073285156e-07,
"loss": 0.1599,
"step": 14580
},
{
"epoch": 4.609430534712898,
"grad_norm": 0.1931564026479933,
"learning_rate": 1.8439286526603816e-07,
"loss": 0.1605,
"step": 14590
},
{
"epoch": 4.612589842824422,
"grad_norm": 0.1957472742685542,
"learning_rate": 1.814369551656281e-07,
"loss": 0.1576,
"step": 14600
},
{
"epoch": 4.615749150935945,
"grad_norm": 0.19222186879935804,
"learning_rate": 1.7850449141026626e-07,
"loss": 0.158,
"step": 14610
},
{
"epoch": 4.618908459047469,
"grad_norm": 0.19288882737133697,
"learning_rate": 1.755954882688432e-07,
"loss": 0.1599,
"step": 14620
},
{
"epoch": 4.622067767158992,
"grad_norm": 0.19173903599470532,
"learning_rate": 1.7270995989609685e-07,
"loss": 0.163,
"step": 14630
},
{
"epoch": 4.625227075270516,
"grad_norm": 0.1971599904397844,
"learning_rate": 1.6984792033253873e-07,
"loss": 0.1624,
"step": 14640
},
{
"epoch": 4.62838638338204,
"grad_norm": 0.20237639199908425,
"learning_rate": 1.67009383504389e-07,
"loss": 0.1599,
"step": 14650
},
{
"epoch": 4.631545691493563,
"grad_norm": 0.19640473192083982,
"learning_rate": 1.6419436322350602e-07,
"loss": 0.1582,
"step": 14660
},
{
"epoch": 4.634704999605087,
"grad_norm": 0.19394680624920346,
"learning_rate": 1.6140287318732295e-07,
"loss": 0.1612,
"step": 14670
},
{
"epoch": 4.63786430771661,
"grad_norm": 0.19411893620985973,
"learning_rate": 1.5863492697877403e-07,
"loss": 0.1579,
"step": 14680
},
{
"epoch": 4.641023615828134,
"grad_norm": 0.19802298148744443,
"learning_rate": 1.5589053806623845e-07,
"loss": 0.1599,
"step": 14690
},
{
"epoch": 4.644182923939657,
"grad_norm": 0.19535413598264334,
"learning_rate": 1.5316971980346597e-07,
"loss": 0.1563,
"step": 14700
},
{
"epoch": 4.647342232051181,
"grad_norm": 0.19497113149503717,
"learning_rate": 1.5047248542951586e-07,
"loss": 0.1593,
"step": 14710
},
{
"epoch": 4.650501540162704,
"grad_norm": 0.2020301060995925,
"learning_rate": 1.4779884806869262e-07,
"loss": 0.1579,
"step": 14720
},
{
"epoch": 4.653660848274228,
"grad_norm": 0.19311882101118522,
"learning_rate": 1.4514882073048186e-07,
"loss": 0.1603,
"step": 14730
},
{
"epoch": 4.656820156385751,
"grad_norm": 0.19069864706937145,
"learning_rate": 1.4252241630948515e-07,
"loss": 0.159,
"step": 14740
},
{
"epoch": 4.659979464497275,
"grad_norm": 0.19826154207098273,
"learning_rate": 1.3991964758536148e-07,
"loss": 0.1594,
"step": 14750
},
{
"epoch": 4.663138772608798,
"grad_norm": 0.20463683774068114,
"learning_rate": 1.3734052722275849e-07,
"loss": 0.1607,
"step": 14760
},
{
"epoch": 4.666298080720322,
"grad_norm": 0.19219549706945396,
"learning_rate": 1.3478506777125865e-07,
"loss": 0.1574,
"step": 14770
},
{
"epoch": 4.669457388831846,
"grad_norm": 0.19753785214231598,
"learning_rate": 1.3225328166531158e-07,
"loss": 0.1599,
"step": 14780
},
{
"epoch": 4.672616696943369,
"grad_norm": 0.1970861929609217,
"learning_rate": 1.297451812241779e-07,
"loss": 0.1607,
"step": 14790
},
{
"epoch": 4.675776005054893,
"grad_norm": 0.1966183263052465,
"learning_rate": 1.2726077865186648e-07,
"loss": 0.159,
"step": 14800
},
{
"epoch": 4.678935313166416,
"grad_norm": 0.1943429806373708,
"learning_rate": 1.2480008603707627e-07,
"loss": 0.158,
"step": 14810
},
{
"epoch": 4.68209462127794,
"grad_norm": 0.19589426287899958,
"learning_rate": 1.223631153531385e-07,
"loss": 0.1577,
"step": 14820
},
{
"epoch": 4.6852539293894635,
"grad_norm": 0.19706763059063206,
"learning_rate": 1.1994987845795725e-07,
"loss": 0.1597,
"step": 14830
},
{
"epoch": 4.6884132375009875,
"grad_norm": 0.1975866024609479,
"learning_rate": 1.1756038709394902e-07,
"loss": 0.1593,
"step": 14840
},
{
"epoch": 4.691572545612511,
"grad_norm": 0.19686807427075748,
"learning_rate": 1.1519465288799325e-07,
"loss": 0.1599,
"step": 14850
},
{
"epoch": 4.694731853724035,
"grad_norm": 0.19405867333452428,
"learning_rate": 1.1285268735136634e-07,
"loss": 0.1599,
"step": 14860
},
{
"epoch": 4.697891161835558,
"grad_norm": 0.19332590972909364,
"learning_rate": 1.1053450187969383e-07,
"loss": 0.159,
"step": 14870
},
{
"epoch": 4.701050469947082,
"grad_norm": 0.19949006237363448,
"learning_rate": 1.0824010775288829e-07,
"loss": 0.1593,
"step": 14880
},
{
"epoch": 4.704209778058605,
"grad_norm": 0.19416481710856007,
"learning_rate": 1.0596951613509931e-07,
"loss": 0.1592,
"step": 14890
},
{
"epoch": 4.707369086170129,
"grad_norm": 0.19602339515974787,
"learning_rate": 1.0372273807465638e-07,
"loss": 0.1591,
"step": 14900
},
{
"epoch": 4.710528394281653,
"grad_norm": 0.1960689688312678,
"learning_rate": 1.0149978450401776e-07,
"loss": 0.1603,
"step": 14910
},
{
"epoch": 4.713687702393176,
"grad_norm": 0.2057850892961581,
"learning_rate": 9.930066623971334e-08,
"loss": 0.1591,
"step": 14920
},
{
"epoch": 4.7168470105047,
"grad_norm": 0.19301696301593,
"learning_rate": 9.712539398229637e-08,
"loss": 0.1602,
"step": 14930
},
{
"epoch": 4.720006318616223,
"grad_norm": 0.19265047597399218,
"learning_rate": 9.497397831628673e-08,
"loss": 0.1594,
"step": 14940
},
{
"epoch": 4.723165626727747,
"grad_norm": 0.19291383326551242,
"learning_rate": 9.284642971012559e-08,
"loss": 0.1556,
"step": 14950
},
{
"epoch": 4.72632493483927,
"grad_norm": 0.1985670750054423,
"learning_rate": 9.074275851611691e-08,
"loss": 0.1611,
"step": 14960
},
{
"epoch": 4.729484242950794,
"grad_norm": 0.1956461829616837,
"learning_rate": 8.866297497038435e-08,
"loss": 0.1595,
"step": 14970
},
{
"epoch": 4.732643551062317,
"grad_norm": 0.1913906186621511,
"learning_rate": 8.660708919281613e-08,
"loss": 0.1596,
"step": 14980
},
{
"epoch": 4.735802859173841,
"grad_norm": 0.20269780565703013,
"learning_rate": 8.457511118701911e-08,
"loss": 0.1585,
"step": 14990
},
{
"epoch": 4.738962167285364,
"grad_norm": 0.2006526774187281,
"learning_rate": 8.256705084026761e-08,
"loss": 0.159,
"step": 15000
},
{
"epoch": 4.742121475396888,
"grad_norm": 0.19809011682211267,
"learning_rate": 8.05829179234574e-08,
"loss": 0.1585,
"step": 15010
},
{
"epoch": 4.745280783508411,
"grad_norm": 0.1925300643932596,
"learning_rate": 7.862272209105625e-08,
"loss": 0.1593,
"step": 15020
},
{
"epoch": 4.748440091619935,
"grad_norm": 0.1928029167127188,
"learning_rate": 7.668647288106012e-08,
"loss": 0.1599,
"step": 15030
},
{
"epoch": 4.751599399731459,
"grad_norm": 0.19481339189265057,
"learning_rate": 7.47741797149415e-08,
"loss": 0.1601,
"step": 15040
},
{
"epoch": 4.754758707842982,
"grad_norm": 0.20271075417943374,
"learning_rate": 7.288585189760944e-08,
"loss": 0.1617,
"step": 15050
},
{
"epoch": 4.757918015954506,
"grad_norm": 0.19802943106559054,
"learning_rate": 7.102149861735962e-08,
"loss": 0.1585,
"step": 15060
},
{
"epoch": 4.761077324066029,
"grad_norm": 0.19714245747298037,
"learning_rate": 6.918112894583328e-08,
"loss": 0.1618,
"step": 15070
},
{
"epoch": 4.764236632177553,
"grad_norm": 0.20555489147236597,
"learning_rate": 6.736475183796887e-08,
"loss": 0.1598,
"step": 15080
},
{
"epoch": 4.767395940289076,
"grad_norm": 0.19422658967567297,
"learning_rate": 6.557237613196321e-08,
"loss": 0.1607,
"step": 15090
},
{
"epoch": 4.7705552484006,
"grad_norm": 0.19909191153347178,
"learning_rate": 6.380401054922547e-08,
"loss": 0.1594,
"step": 15100
},
{
"epoch": 4.7737145565121235,
"grad_norm": 0.18853836885626313,
"learning_rate": 6.205966369433547e-08,
"loss": 0.1607,
"step": 15110
},
{
"epoch": 4.7768738646236475,
"grad_norm": 0.2001345804485893,
"learning_rate": 6.033934405500042e-08,
"loss": 0.1618,
"step": 15120
},
{
"epoch": 4.7800331727351715,
"grad_norm": 0.20108034656670212,
"learning_rate": 5.864306000201825e-08,
"loss": 0.1625,
"step": 15130
},
{
"epoch": 4.783192480846695,
"grad_norm": 0.1957272210969326,
"learning_rate": 5.697081978922936e-08,
"loss": 0.16,
"step": 15140
},
{
"epoch": 4.786351788958218,
"grad_norm": 0.20101643906993924,
"learning_rate": 5.5322631553484385e-08,
"loss": 0.1587,
"step": 15150
},
{
"epoch": 4.789511097069742,
"grad_norm": 0.1932795778151164,
"learning_rate": 5.369850331459925e-08,
"loss": 0.1609,
"step": 15160
},
{
"epoch": 4.792670405181266,
"grad_norm": 0.2009306860005601,
"learning_rate": 5.209844297531796e-08,
"loss": 0.159,
"step": 15170
},
{
"epoch": 4.795829713292789,
"grad_norm": 0.18836162035628687,
"learning_rate": 5.052245832127434e-08,
"loss": 0.1596,
"step": 15180
},
{
"epoch": 4.798989021404313,
"grad_norm": 0.19233309651869823,
"learning_rate": 4.8970557020954215e-08,
"loss": 0.1614,
"step": 15190
},
{
"epoch": 4.802148329515836,
"grad_norm": 0.20067963469184333,
"learning_rate": 4.744274662565662e-08,
"loss": 0.16,
"step": 15200
},
{
"epoch": 4.80530763762736,
"grad_norm": 0.1907746195534059,
"learning_rate": 4.5939034569458804e-08,
"loss": 0.1595,
"step": 15210
},
{
"epoch": 4.808466945738883,
"grad_norm": 0.18618207146361934,
"learning_rate": 4.4459428169179583e-08,
"loss": 0.1596,
"step": 15220
},
{
"epoch": 4.811626253850407,
"grad_norm": 0.1924146366492045,
"learning_rate": 4.3003934624342716e-08,
"loss": 0.1581,
"step": 15230
},
{
"epoch": 4.81478556196193,
"grad_norm": 0.19971943762565272,
"learning_rate": 4.157256101714413e-08,
"loss": 0.1577,
"step": 15240
},
{
"epoch": 4.817944870073454,
"grad_norm": 0.19654001189058995,
"learning_rate": 4.016531431241533e-08,
"loss": 0.1588,
"step": 15250
},
{
"epoch": 4.821104178184978,
"grad_norm": 0.20256447137055644,
"learning_rate": 3.8782201357589475e-08,
"loss": 0.1592,
"step": 15260
},
{
"epoch": 4.824263486296501,
"grad_norm": 0.19955523886494098,
"learning_rate": 3.742322888267036e-08,
"loss": 0.159,
"step": 15270
},
{
"epoch": 4.827422794408025,
"grad_norm": 0.20426304048843397,
"learning_rate": 3.6088403500196267e-08,
"loss": 0.1585,
"step": 15280
},
{
"epoch": 4.830582102519548,
"grad_norm": 0.19123805822649273,
"learning_rate": 3.4777731705211705e-08,
"loss": 0.1599,
"step": 15290
},
{
"epoch": 4.833741410631072,
"grad_norm": 0.1961561945761796,
"learning_rate": 3.349121987523241e-08,
"loss": 0.1603,
"step": 15300
},
{
"epoch": 4.836900718742595,
"grad_norm": 0.19703665699422515,
"learning_rate": 3.222887427021537e-08,
"loss": 0.1584,
"step": 15310
},
{
"epoch": 4.840060026854119,
"grad_norm": 0.205577530927033,
"learning_rate": 3.099070103253055e-08,
"loss": 0.1599,
"step": 15320
},
{
"epoch": 4.843219334965642,
"grad_norm": 0.19037949799154982,
"learning_rate": 2.977670618692641e-08,
"loss": 0.1588,
"step": 15330
},
{
"epoch": 4.846378643077166,
"grad_norm": 0.18634230770244323,
"learning_rate": 2.8586895640504986e-08,
"loss": 0.1589,
"step": 15340
},
{
"epoch": 4.849537951188689,
"grad_norm": 0.19361637810104884,
"learning_rate": 2.7421275182691887e-08,
"loss": 0.1576,
"step": 15350
},
{
"epoch": 4.852697259300213,
"grad_norm": 0.1957214714937506,
"learning_rate": 2.6279850485206316e-08,
"loss": 0.162,
"step": 15360
},
{
"epoch": 4.855856567411736,
"grad_norm": 0.19434731826900606,
"learning_rate": 2.5162627102035543e-08,
"loss": 0.1607,
"step": 15370
},
{
"epoch": 4.85901587552326,
"grad_norm": 0.19311901028457665,
"learning_rate": 2.406961046940659e-08,
"loss": 0.1597,
"step": 15380
},
{
"epoch": 4.862175183634784,
"grad_norm": 0.19029235111786966,
"learning_rate": 2.3000805905761814e-08,
"loss": 0.1571,
"step": 15390
},
{
"epoch": 4.8653344917463075,
"grad_norm": 0.18986353610183304,
"learning_rate": 2.1956218611730028e-08,
"loss": 0.1599,
"step": 15400
},
{
"epoch": 4.8684937998578315,
"grad_norm": 0.19230056674947954,
"learning_rate": 2.0935853670103202e-08,
"loss": 0.1587,
"step": 15410
},
{
"epoch": 4.871653107969355,
"grad_norm": 0.18970480828208378,
"learning_rate": 1.9939716045811463e-08,
"loss": 0.1601,
"step": 15420
},
{
"epoch": 4.874812416080879,
"grad_norm": 0.19866752371194085,
"learning_rate": 1.8967810585898695e-08,
"loss": 0.162,
"step": 15430
},
{
"epoch": 4.877971724192402,
"grad_norm": 0.19367079176529667,
"learning_rate": 1.8020142019499755e-08,
"loss": 0.159,
"step": 15440
},
{
"epoch": 4.881131032303926,
"grad_norm": 0.19230026377170165,
"learning_rate": 1.7096714957814953e-08,
"loss": 0.1581,
"step": 15450
},
{
"epoch": 4.884290340415449,
"grad_norm": 0.18516877508195054,
"learning_rate": 1.619753389409062e-08,
"loss": 0.1577,
"step": 15460
},
{
"epoch": 4.887449648526973,
"grad_norm": 0.2030225188585794,
"learning_rate": 1.5322603203595797e-08,
"loss": 0.1584,
"step": 15470
},
{
"epoch": 4.890608956638496,
"grad_norm": 0.19372717095423658,
"learning_rate": 1.4471927143601127e-08,
"loss": 0.1597,
"step": 15480
},
{
"epoch": 4.89376826475002,
"grad_norm": 0.19197742339555968,
"learning_rate": 1.3645509853357775e-08,
"loss": 0.1568,
"step": 15490
},
{
"epoch": 4.896927572861543,
"grad_norm": 0.2003946373624122,
"learning_rate": 1.2843355354079102e-08,
"loss": 0.1588,
"step": 15500
},
{
"epoch": 4.900086880973067,
"grad_norm": 0.19449497450048703,
"learning_rate": 1.2065467548917353e-08,
"loss": 0.1563,
"step": 15510
},
{
"epoch": 4.903246189084591,
"grad_norm": 0.19730672007764802,
"learning_rate": 1.1311850222949227e-08,
"loss": 0.1608,
"step": 15520
},
{
"epoch": 4.906405497196114,
"grad_norm": 0.19426023326462366,
"learning_rate": 1.0582507043153112e-08,
"loss": 0.1587,
"step": 15530
},
{
"epoch": 4.909564805307638,
"grad_norm": 0.18665531141294303,
"learning_rate": 9.877441558395761e-09,
"loss": 0.1588,
"step": 15540
},
{
"epoch": 4.912724113419161,
"grad_norm": 0.19544733053145186,
"learning_rate": 9.196657199410097e-09,
"loss": 0.1585,
"step": 15550
},
{
"epoch": 4.915883421530685,
"grad_norm": 0.1931838965682529,
"learning_rate": 8.54015727878299e-09,
"loss": 0.16,
"step": 15560
},
{
"epoch": 4.919042729642208,
"grad_norm": 0.18609748141590166,
"learning_rate": 7.90794499093639e-09,
"loss": 0.1597,
"step": 15570
},
{
"epoch": 4.922202037753732,
"grad_norm": 0.20127723441153517,
"learning_rate": 7.300023412111779e-09,
"loss": 0.1601,
"step": 15580
},
{
"epoch": 4.925361345865255,
"grad_norm": 0.19920490400128663,
"learning_rate": 6.716395500357964e-09,
"loss": 0.1577,
"step": 15590
},
{
"epoch": 4.928520653976779,
"grad_norm": 0.19375948427344167,
"learning_rate": 6.157064095512754e-09,
"loss": 0.1599,
"step": 15600
},
{
"epoch": 4.931679962088302,
"grad_norm": 0.19750901689149744,
"learning_rate": 5.622031919191862e-09,
"loss": 0.1587,
"step": 15610
},
{
"epoch": 4.934839270199826,
"grad_norm": 0.18708793754549835,
"learning_rate": 5.1113015747755735e-09,
"loss": 0.1601,
"step": 15620
},
{
"epoch": 4.937998578311349,
"grad_norm": 0.1928749265530207,
"learning_rate": 4.624875547394325e-09,
"loss": 0.1589,
"step": 15630
},
{
"epoch": 4.941157886422873,
"grad_norm": 0.20136315326321008,
"learning_rate": 4.16275620391815e-09,
"loss": 0.1597,
"step": 15640
},
{
"epoch": 4.944317194534397,
"grad_norm": 0.19196622559471357,
"learning_rate": 3.724945792945023e-09,
"loss": 0.1594,
"step": 15650
},
{
"epoch": 4.94747650264592,
"grad_norm": 0.19581601175358024,
"learning_rate": 3.3114464447892013e-09,
"loss": 0.1596,
"step": 15660
},
{
"epoch": 4.950635810757444,
"grad_norm": 0.19173585748074187,
"learning_rate": 2.922260171470681e-09,
"loss": 0.159,
"step": 15670
},
{
"epoch": 4.9537951188689675,
"grad_norm": 0.19350532117294958,
"learning_rate": 2.5573888667079772e-09,
"loss": 0.1616,
"step": 15680
},
{
"epoch": 4.9569544269804915,
"grad_norm": 0.19845655941270632,
"learning_rate": 2.2168343059042475e-09,
"loss": 0.1588,
"step": 15690
},
{
"epoch": 4.960113735092015,
"grad_norm": 0.19860632734242958,
"learning_rate": 1.9005981461434065e-09,
"loss": 0.1609,
"step": 15700
},
{
"epoch": 4.963273043203539,
"grad_norm": 0.2001952094467118,
"learning_rate": 1.6086819261790232e-09,
"loss": 0.1585,
"step": 15710
},
{
"epoch": 4.966432351315062,
"grad_norm": 0.1971085780740881,
"learning_rate": 1.3410870664276598e-09,
"loss": 0.1583,
"step": 15720
},
{
"epoch": 4.969591659426586,
"grad_norm": 0.19391119108700927,
"learning_rate": 1.0978148689633205e-09,
"loss": 0.1594,
"step": 15730
},
{
"epoch": 4.97275096753811,
"grad_norm": 0.19649511560351368,
"learning_rate": 8.788665175085697e-10,
"loss": 0.1603,
"step": 15740
},
{
"epoch": 4.975910275649633,
"grad_norm": 0.19371704255850056,
"learning_rate": 6.842430774300913e-10,
"loss": 0.1588,
"step": 15750
},
{
"epoch": 4.979069583761156,
"grad_norm": 0.20144520587590042,
"learning_rate": 5.139454957342471e-10,
"loss": 0.1586,
"step": 15760
},
{
"epoch": 4.98222889187268,
"grad_norm": 0.18820551886405948,
"learning_rate": 3.6797460106152707e-10,
"loss": 0.1585,
"step": 15770
},
{
"epoch": 4.985388199984204,
"grad_norm": 0.1989233181042242,
"learning_rate": 2.463311036826621e-10,
"loss": 0.1583,
"step": 15780
},
{
"epoch": 4.988547508095727,
"grad_norm": 0.1988452001930714,
"learning_rate": 1.490155954947392e-10,
"loss": 0.159,
"step": 15790
},
{
"epoch": 4.991706816207251,
"grad_norm": 0.19747851657350868,
"learning_rate": 7.602855001953569e-11,
"loss": 0.1593,
"step": 15800
},
{
"epoch": 4.994866124318774,
"grad_norm": 0.19954559773754327,
"learning_rate": 2.7370322400188665e-11,
"loss": 0.1579,
"step": 15810
},
{
"epoch": 4.998025432430298,
"grad_norm": 0.1940730914471369,
"learning_rate": 3.041149399529708e-12,
"loss": 0.1603,
"step": 15820
}
],
"logging_steps": 10,
"max_steps": 15825,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.002555364979507e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}